def test_get_schema_returns_correct_value_for_vector_valued_columns(self): pipeline = Pipeline([OneHotVectorizer() << 'c0']) pipeline.fit(train_df) schema = pipeline.get_output_columns() self.assertTrue('c0.a' in schema) self.assertTrue('c0.b' in schema) self.assertTrue('c1' in schema) self.assertTrue('c2' in schema) self.assertEqual(len(schema), 4)
def test_get_schema_does_not_work_when_predictor_is_part_of_model(self): df = train_df.drop(['c0'], axis=1) pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')]) pipeline.fit(df) try: schema = pipeline.get_output_columns() except Exception as e: pass else: self.fail()
def test_get_schema_returns_correct_value_for_single_valued_columns(self): df = train_df.drop(['c0'], axis=1) pipeline = Pipeline([RangeFilter(min=0.0, max=4.5) << 'c2']) pipeline.fit(df) df = pipeline.transform(df) schema = pipeline.get_output_columns() self.assertTrue('c1' in schema) self.assertTrue('c2' in schema) self.assertEqual(len(schema), 2)
# Get schema from a fitted pipeline example. import numpy as np import pandas as pd from nimbusml import Pipeline, FileDataStream from nimbusml.datasets import get_dataset from nimbusml.feature_extraction.text import NGramFeaturizer from nimbusml.feature_extraction.text.extractor import Ngram # data input (as a FileDataStream) path = get_dataset("wiki_detox_train").as_filepath() data = FileDataStream.read_csv(path, sep='\t') print(data.head()) # Sentiment SentimentText # 0 1 ==RUDE== Dude, you are rude upload that carl p... # 1 1 == OK! == IM GOING TO VANDALIZE WILD ONES WIK... # 2 1 Stop trolling, zapatancas, calling me a liar m... # 3 1 ==You're cool== You seem like a really cool g... # 4 1 ::::: Why are you threatening me? I'm not bein... pipe = Pipeline([ NGramFeaturizer(word_feature_extractor=Ngram(), columns={'features': ['SentimentText']}) ]) pipe.fit(data) schema = pipe.get_output_columns() print(schema[0:5]) # ['Sentiment', 'SentimentText', 'features.Char.<␂>|=|=', 'features.Char.=|=|r', 'features.Char.=|r|u']