Esempio n. 1
0
 def test_check_series(self):
     data = pd.DataFrame(
         data=dict(a=[1.2, 2, 3], b=[2, 3, 4], c=[1, 2, 3], d=[2, 2, 2]))
     norm = MinMaxScaler() << "a"
     normalized1 = norm.fit_transform(data)
     normalized2 = norm.fit_transform(data['a'])
     assert_array_equal(normalized1['a'].values, normalized2['a'].values)
Esempio n. 2
0
    def test_minmaxscaler_float_order_noint(self):
        in_df = pandas.DataFrame(data=OrderedDict(xpetal=[-1.1, -2.2, -3.3],
                                                  ipetal=[1.0, 2.0, 3.0]))

        normed = MinMaxScaler() << ['xpetal', 'ipetal']
        pipeline = Pipeline([normed])
        out_df = pipeline.fit_transform(in_df, verbose=0)
        assert_equal(out_df.shape, (3, 2))
        assert_equal(list(out_df.columns), list(in_df.columns))
Esempio n. 3
0
    def test_minmaxscaler_int(self):
        in_df = pandas.DataFrame(
            data=dict(xpetal=[-1, -2, -3], ipetal=[1, 2, 3]))

        normed = MinMaxScaler() << ['xpetal', 'ipetal']
        pipeline = Pipeline([normed])
        out_df = pipeline.fit_transform(in_df, verbose=0)
        assert_equal(out_df.shape, (3, 2))
        if out_df.loc[2, 'xpetal'] != -1:
            raise Exception("Unexpected:\n" + str(out_df))
        assert_equal(out_df.loc[2, 'ipetal'], 1)
Esempio n. 4
0
    def test_fit_transform(self):
        import azureml.dataprep as dprep

        path = get_dataset('infert').as_filepath()
        dflow = dprep.auto_read_file(path=path)
        dprep_data = DprepDataStream(dflow)
        file_data = FileDataStream.read_csv(path)

        xf = MinMaxScaler(columns={'in': 'induced', 'sp': 'spontaneous'})
        pipe = Pipeline([xf])
        transformed_data = pipe.fit_transform(file_data)
        transformed_data1 = pipe.fit_transform(dprep_data)

        assert_array_equal(transformed_data.columns, transformed_data1.columns)
        assert_2d_array_equal(transformed_data.values,
                              transformed_data1.values)
Esempio n. 5
0
def transform_data():
    xf = MinMaxScaler(columns={'in': 'induced', 'sp': 'spontaneous'})
    pipe = Pipeline([xf])
    transformed_data = pipe.fit_transform(data, as_binary_data_stream=True)
    transformed_data_df = pipe.fit_transform(data)
    return transformed_data, transformed_data_df
Esempio n. 6
0
# Generate the train and test data
np.random.seed(0)
x = np.arange(100, step=0.1)
y = x * 10 + (np.random.standard_normal(len(x)) * 10)
train_data = {'c1': x, 'c2': y}
train_df = pd.DataFrame(train_data).astype({
    'c1': np.float32,
    'c2': np.float32
})

test_data = {'c1': [2.5, 30.5], 'c2': [1, 1]}
test_df = pd.DataFrame(test_data).astype({'c1': np.float32, 'c2': np.float32})

# Fit a MinMaxScaler Pipeline
r1 = Pipeline([MinMaxScaler()])
r1.fit(train_df)

# Export the pipeline to ONNX
onnx_path = get_tmp_file('.onnx')
r1.export_to_onnx(onnx_path, 'com.microsoft.ml', onnx_version='Stable')

# Perform the transform using the standard ML.Net backend
result_standard = r1.transform(test_df)
print(result_standard)
#          c1        c2
# 0  0.025025  0.000998
# 1  0.305305  0.000998

# Perform the transform using the ONNX backend.
# Note, the extra columns and column name differences
Esempio n. 7
0
from nimbusml import FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.preprocessing.normalization import MinMaxScaler

# data input (as a FileDataStream)
path = get_dataset('infert').as_filepath()
data = FileDataStream.read_csv(
    path, sep=',', numeric_dtype=numpy.float32)  # Error with integer input
print(data.head())
#    age  case education  induced  parity  pooled.stratum  row_num  ...
# 0  26.0   1.0    0-5yrs      1.0     6.0             3.0      1.0  ...
# 1  42.0   1.0    0-5yrs      1.0     1.0             1.0      2.0  ...
# 2  39.0   1.0    0-5yrs      2.0     6.0             4.0      3.0  ...
# 3  34.0   1.0    0-5yrs      2.0     4.0             2.0      4.0  ...
# 4  35.0   1.0   6-11yrs      1.0     3.0            32.0      5.0  ...

# transform usage
xf = MinMaxScaler(columns={'in': 'induced', 'sp': 'spontaneous'})

# fit and transform
features = xf.fit_transform(data)

# print features
print(features.head())
#    age  case education   in  ... pooled.stratum  row_num   sp  ...
# 0  26.0   1.0    0-5yrs  0.5 ...            3.0      1.0  1.0  ...
# 1  42.0   1.0    0-5yrs  0.5 ...            1.0      2.0  0.0  ...
# 2  39.0   1.0    0-5yrs  1.0 ...            4.0      3.0  0.0  ...
# 3  34.0   1.0    0-5yrs  1.0 ...            2.0      4.0  0.0  ...
# 4  35.0   1.0   6-11yrs  0.5 ...           32.0      5.0  0.5  ...
Esempio n. 8
0
###############################################################################
# MinMaxScaler
import pandas as pd
from nimbusml.preprocessing.normalization import MinMaxScaler

in_df = pd.DataFrame(
    data=dict(Sepal_Length=[2.5, 1, 2.1, 1.0],
              Sepal_Width=[.75, .9, .8, .76],
              Petal_Length=[0, 2.5, 2.6, 2.4],
              Species=["setosa", "viginica", "setosa", 'versicolor']))

# generate two new Columns - Petal_Normed and Sepal_Normed
normed = MinMaxScaler() << {
    'Petal_Normed': 'Petal_Length',
    'Sepal_Normed': 'Sepal_Width'
}
out_df = normed.fit_transform(in_df)

print('MinMaxScaler\n', (out_df))