def test_SkipFilter(self): df = pandas.DataFrame(data=dict( review=['row' + str(i) for i in range(10)])) y = SkipFilter(count=2).fit_transform(df) print(len(y)) assert_equal(len(y), 8)
'LightGbmBinaryClassifier': LightGbmBinaryClassifier(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'LightGbmClassifier': LightGbmClassifier(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'LightGbmRegressor': LightGbmRegressor(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'LightGbmRanker': LightGbmRanker(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=n_gram()), 'SkipFilter': SkipFilter(count=5), 'TakeFilter': TakeFilter(count=100000), 'IidSpikeDetector': IidSpikeDetector(columns=['F0']), 'IidChangePointDetector': IidChangePointDetector(columns=['F0']), 'SsaSpikeDetector': SsaSpikeDetector(columns=['F0'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['F0'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['F0'], window_size=2, series_length=5, train_size=5,
import numpy as np from nimbusml import FileDataStream from nimbusml.datasets import get_dataset from nimbusml.preprocessing.filter import SkipFilter, TakeFilter # data input (as a FileDataStream) path = get_dataset('infert').as_filepath() data = FileDataStream.read_csv( path, sep=',', names={ 0: 'id'}, dtype={ 'id': str, 'age': np.float32}) print(data.head()) # age case education id induced parity pooled.stratum spontaneous ... # 0 26.0 1 0-5yrs 1 1 6 3 2 ... # 1 42.0 1 0-5yrs 2 1 1 1 0 ... # 2 39.0 1 0-5yrs 3 2 6 4 0 ... # 3 34.0 1 0-5yrs 4 2 4 2 0 ... # 4 35.0 1 6-11yrs 5 1 3 32 1 ... # fit and transform print(TakeFilter(count=100).fit_transform(data).shape) # (100, 9), first 100 rows are preserved print(SkipFilter(count=100).fit_transform(data).shape) # (148, 9), first 100 rows are deleted
'education_str.6-11yrs', 'education_str.12+ yrs'], label='induced'), 'PcaAnomalyDetector': PcaAnomalyDetector(rank=3), 'PcaTransformer': PcaTransformer(rank=2), 'PixelExtractor': Pipeline([ Loader(columns={'ImgPath': 'Path'}), PixelExtractor(columns={'ImgPixels': 'ImgPath'}), ]), 'PrefixColumnConcatenator': PrefixColumnConcatenator(columns={'Features': 'Sepal_'}), 'Resizer': Pipeline([ Loader(columns={'ImgPath': 'Path'}), Resizer(image_width=227, image_height=227, columns={'ImgResize': 'ImgPath'}) ]), 'SkipFilter': SkipFilter(count=5), 'SsaSpikeDetector': SsaSpikeDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['Sepal_Length'], window_size=2, series_length=5, train_size=5, horizon=1), 'RangeFilter': RangeFilter(min=5.0, max=5.1, columns=['Sepal_Length']), 'TakeFilter': TakeFilter(count=100), 'TensorFlowScorer': TensorFlowScorer( model_location=os.path.join( script_dir, '..',
############################################################################### # SkipFilter: skip first N rows in a dataset # TakeFilter: take first N rows in a dataset import pandas from nimbusml import Pipeline from nimbusml.preprocessing.filter import SkipFilter, TakeFilter df = pandas.DataFrame(data=dict(review=['row' + str(i) for i in range(10)])) # skip the first 5 rows print(SkipFilter(count=5).fit_transform(df)) # take the first 5 rows print(TakeFilter(count=5).fit_transform(df)) # skip 3 then take 5 rows pipe = Pipeline([SkipFilter(count=3), TakeFilter(count=5)]) print(pipe.fit_transform(df))