Esempio n. 1
0
 def test_SkipFilter(self):
     df = pandas.DataFrame(data=dict(
         review=['row' + str(i) for i in range(10)]))
     y = SkipFilter(count=2).fit_transform(df)
     print(len(y))
     assert_equal(len(y), 8)
Esempio n. 2
0
 'LightGbmBinaryClassifier':
 LightGbmBinaryClassifier(minimum_example_count_per_group=1,
                          minimum_example_count_per_leaf=1),
 'LightGbmClassifier':
 LightGbmClassifier(minimum_example_count_per_group=1,
                    minimum_example_count_per_leaf=1),
 'LightGbmRegressor':
 LightGbmRegressor(minimum_example_count_per_group=1,
                   minimum_example_count_per_leaf=1),
 'LightGbmRanker':
 LightGbmRanker(minimum_example_count_per_group=1,
                minimum_example_count_per_leaf=1),
 'NGramFeaturizer':
 NGramFeaturizer(word_feature_extractor=n_gram()),
 'SkipFilter':
 SkipFilter(count=5),
 'TakeFilter':
 TakeFilter(count=100000),
 'IidSpikeDetector':
 IidSpikeDetector(columns=['F0']),
 'IidChangePointDetector':
 IidChangePointDetector(columns=['F0']),
 'SsaSpikeDetector':
 SsaSpikeDetector(columns=['F0'], seasonal_window_size=2),
 'SsaChangePointDetector':
 SsaChangePointDetector(columns=['F0'], seasonal_window_size=2),
 'SsaForecaster':
 SsaForecaster(columns=['F0'],
               window_size=2,
               series_length=5,
               train_size=5,
Esempio n. 3
0
import numpy as np
from nimbusml import FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.preprocessing.filter import SkipFilter, TakeFilter

# data input (as a FileDataStream)
path = get_dataset('infert').as_filepath()
data = FileDataStream.read_csv(
    path, sep=',', names={
        0: 'id'}, dtype={
        'id': str, 'age': np.float32})
print(data.head())
#    age  case education id  induced  parity  pooled.stratum  spontaneous  ...
# 0  26.0     1    0-5yrs  1        1       6               3            2  ...
# 1  42.0     1    0-5yrs  2        1       1               1            0  ...
# 2  39.0     1    0-5yrs  3        2       6               4            0  ...
# 3  34.0     1    0-5yrs  4        2       4               2            0  ...
# 4  35.0     1   6-11yrs  5        1       3              32            1  ...

# fit and transform
print(TakeFilter(count=100).fit_transform(data).shape)
# (100, 9), first 100 rows are preserved

print(SkipFilter(count=100).fit_transform(data).shape)
# (148, 9), first 100 rows are deleted
Esempio n. 4
0
                                  'education_str.6-11yrs',
                                  'education_str.12+ yrs'],
                         label='induced'),
 'PcaAnomalyDetector': PcaAnomalyDetector(rank=3),
 'PcaTransformer':  PcaTransformer(rank=2),
 'PixelExtractor': Pipeline([
     Loader(columns={'ImgPath': 'Path'}),
     PixelExtractor(columns={'ImgPixels': 'ImgPath'}),
 ]),
 'PrefixColumnConcatenator': PrefixColumnConcatenator(columns={'Features': 'Sepal_'}),
 'Resizer': Pipeline([
     Loader(columns={'ImgPath': 'Path'}),
     Resizer(image_width=227, image_height=227,
             columns={'ImgResize': 'ImgPath'})
 ]),
 'SkipFilter': SkipFilter(count=5),
 'SsaSpikeDetector': SsaSpikeDetector(columns=['Sepal_Length'],
                                      seasonal_window_size=2),
 'SsaChangePointDetector': SsaChangePointDetector(columns=['Sepal_Length'],
                                                 seasonal_window_size=2),
 'SsaForecaster': SsaForecaster(columns=['Sepal_Length'],
                                window_size=2,
                                series_length=5,
                                train_size=5,
                                horizon=1),
 'RangeFilter': RangeFilter(min=5.0, max=5.1, columns=['Sepal_Length']),
 'TakeFilter': TakeFilter(count=100),
 'TensorFlowScorer': TensorFlowScorer(
     model_location=os.path.join(
         script_dir,
         '..',
Esempio n. 5
0
###############################################################################
# SkipFilter: skip first N rows in a dataset
# TakeFilter: take first N rows in a dataset
import pandas
from nimbusml import Pipeline
from nimbusml.preprocessing.filter import SkipFilter, TakeFilter

df = pandas.DataFrame(data=dict(review=['row' + str(i) for i in range(10)]))

# skip the first 5 rows
print(SkipFilter(count=5).fit_transform(df))

# take the first 5 rows
print(TakeFilter(count=5).fit_transform(df))

# skip 3 then take 5 rows
pipe = Pipeline([SkipFilter(count=3), TakeFilter(count=5)])
print(pipe.fit_transform(df))