def setUpClass(cls): # Create a temporary directory to store the trained model cls.model_dir = tempfile.TemporaryDirectory() # Create an instance of the model cls.model = XGBRegressorModel( XGBRegressorModelConfig( features=Features(Feature("Feature1", float, 1), Feature("Feature2")), predict=Feature("Target", float, 1), directory=cls.model_dir.name, )) # Generating data f(x1,x2) = 2*x1 + 3*x2 _n_data = 2000 _temp_data = np.random.rand(2, _n_data) cls.records = [ Record( "x" + str(random.random()), data={ "features": { "Feature1": float(_temp_data[0][i]), "Feature2": float(_temp_data[1][i]), "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i], } }, ) for i in range(0, _n_data) ] cls.trainingsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[:1800]))) cls.testsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[1800:])))
def setUpClass(cls): # Create a temporary directory to store the trained model cls.model_dir = tempfile.TemporaryDirectory() # Create an instance of the model cls.model = AnomalyModel( features=Features( Feature("A", int, 1), Feature("B", int, 2), ), predict=Feature("Y", int, 1), directory=cls.model_dir.name, ) # Generating data _n_data = 1800 _temp_data = np.random.normal(2, 1, size=(2, _n_data)) cls.records = [ Record( "x" + str(random.random()), data={ "features": { "A": float(_temp_data[0][i]), "B": float(_temp_data[1][i]), "Y": (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int), } }, ) for i in range(0, _n_data) ] cls.trainingsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[:1400]))) cls.testsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[1400:])))
def setUpClass(cls): ( A_train, B_train, C_train, X_train, D_train, E_train, ) = list(zip(*TRAIN_DATA)) A_test, B_test, C_test, X_test, D_test, E_test = list(zip(*TEST_DATA)) cls.train_records = [ Record( str(i), data={ "features": { "title": A_train[i], "context": B_train[i], "question": C_train[i], "answer_text": X_train[i], "start_pos_char": D_train[i], "is_impossible": E_train[i], "answers": [], } }, ) for i in range(len(X_train)) ] cls.test_records = [ Record( str(i), data={ "features": { "title": A_test[i], "context": B_test[i], "question": C_test[i], "answer_text": X_test[i], "start_pos_char": D_test[i], "is_impossible": E_test[i], "answers": [], } }, ) for i in range(len(X_test)) ] cls.train_sources = Sources( MemorySource(MemorySourceConfig(records=cls.train_records))) cls.test_sources = Sources( MemorySource(MemorySourceConfig(records=cls.test_records))) cls.model_dir = tempfile.TemporaryDirectory() cls.model = QAModel( QAModelConfig( model_name_or_path="bert-base-cased", cache_dir=CACHE_DIR, directory=cls.model_dir.name, log_dir=cls.model_dir.name, model_type="bert", no_cuda=True, ))
def setUpClass(cls): A_train, B_train, X = list(zip(*TRAIN_DATA)) A_predict, B_predict = list(zip(*PREDICT_DATA)) cls.train_records = [ Record( str(i), data={ "features": { "sentence_id": A_train[i], "words": B_train[i], "ner_tag": X[i], } }, ) for i in range(0, len(X)) ] cls.train_sources = Sources( MemorySource(MemorySourceConfig(records=cls.train_records)) ) cls.predict_records = [ Record( str(i), data={ "features": { "sentence_id": A_predict[i], "words": B_predict[i], } }, ) for i in range(0, len(A_predict)) ] cls.predict_sources = Sources( MemorySource(MemorySourceConfig(records=cls.predict_records)) ) cls.model_dir = tempfile.TemporaryDirectory() cls.model = NERModel( NERModelConfig( sid=Feature("sentence_id", int, 1), words=Feature("words", str, 1), predict=Feature("ner_tag", str, 1), output_dir=cls.model_dir.name, model_architecture_type="bert", model_name_or_path="bert-base-cased", no_cuda=True, ) )
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.feature = Feature("starts_with_a", int, 1) cls.features = Features(cls.feature) cls.records = [ Record( "a" + str(random.random()), data={"features": { cls.feature.name: 1, "string": "a" }}, ) for _ in range(0, 1000) ] cls.records += [ Record( "b" + str(random.random()), data={"features": { cls.feature.name: 0, "string": "not a" }}, ) for _ in range(0, 1000) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records))) cls.model = DNNClassifierModel( DNNClassifierModelConfig( directory=cls.model_dir.name, steps=1000, epochs=40, hidden=[50, 20, 10], predict=Feature("string", str, 1), classifications=["a", "not a"], clstype=str, features=cls.features, ))
async def test_02_predict(self): test_feature_val = [ 0, 1.5, 2, ] # inserting zero so that its 1-indexable test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2] # should be same function used in TestDNN.setupclass a = Repo( "a", data={ "features": { self.feature1.NAME: test_feature_val[1], self.feature2.NAME: test_feature_val[2], } }, ) async with Sources(MemorySource(MemorySourceConfig( repos=[a]))) as sources, self.model as model: target_name = model.config.predict.NAME async with sources() as sctx, model() as mctx: res = [repo async for repo in mctx.predict(sctx.repos())] self.assertEqual(len(res), 1) self.assertEqual(res[0].key, a.key) test_error_norm = abs( (test_target - res[0].prediction(target_name).value) / test_target + 1e-6) error_threshold = 0.3 self.assertLess(test_error_norm, error_threshold)
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() if cls.MODEL_TYPE is "CLASSIFICATION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) cls.features.append(DefFeature("D", float, 1)) cls.features.append(DefFeature("E", float, 1)) cls.features.append(DefFeature("F", float, 1)) cls.features.append(DefFeature("G", float, 1)) cls.features.append(DefFeature("H", float, 1)) cls.features.append(DefFeature("I", float, 1)) A, B, C, D, E, F, G, H, I, X = list( zip(*FEATURE_DATA_CLASSIFICATION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "I": I[i], "X": X[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE is "REGRESSION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos))) cls.model = cls.MODEL( cls.MODEL_CONFIG( directory=cls.model_dir.name, predict="X", features=cls.features, ))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.feature1 = Feature_1() cls.feature2 = Feature_2() cls.features = Features(cls.feature1, cls.feature2) cls.model = DNNRegressionModel( DNNRegressionModelConfig( directory=cls.model_dir.name, steps=1000, epochs=40, hidden=[50, 20, 10], predict=DefFeature("TARGET", float, 1), features=cls.features, )) # Generating data f(x1,x2) = 2*x1 + 3*x2 _n_data = 2000 _temp_data = np.random.rand(2, _n_data) cls.repos = [ Repo( "x" + str(random.random()), data={ "features": { cls.feature1.NAME: float(_temp_data[0][i]), cls.feature2.NAME: float(_temp_data[1][i]), "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i], } }, ) for i in range(0, _n_data) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.feature = StartsWithA() cls.features = Features(cls.feature) cls.model_dir = tempfile.TemporaryDirectory() cls.model = MiscModel( MiscModelConfig( directory=cls.model_dir.name, classifications=["not a", "a"], features=cls.features, )) cls.repos = [ Repo( "a" + str(random.random()), data={"features": { cls.feature.NAME: 1, "string": "a" }}, ) for _ in range(0, 1000) ] cls.repos += [ Repo( "b" + str(random.random()), data={"features": { cls.feature.NAME: 0, "string": "not a" }}, ) for _ in range(0, 1000) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.model = DNNClassifierModel( DNNClassifierModelConfig(directory=cls.model_dir.name, steps=1000, epochs=30, hidden=[10, 20, 10], classification="string", classifications=["a", "not a"], clstype=str)) cls.feature = StartsWithA() cls.features = Features(cls.feature) cls.repos = [ Repo( "a" + str(random.random()), data={"features": { cls.feature.NAME: 1, "string": "a" }}, ) for _ in range(0, 1000) ] cls.repos += [ Repo( "b" + str(random.random()), data={"features": { cls.feature.NAME: 0, "string": "not a" }}, ) for _ in range(0, 1000) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.features = Features() cls.features.append(Feature("A", str, 1)) A, X = list(zip(*DATA)) cls.records = [ Record(str(i), data={"features": { "A": A[i], "X": X[i] }}) for i in range(0, len(X)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records))) cls.model_dir = tempfile.TemporaryDirectory() cls.model = TextClassificationModel( TextClassifierConfig( directory=cls.model_dir.name, classifications=[0, 1], features=cls.features, predict=Feature("X", int, 1), add_layers=True, layers=[ "Dense(units = 120, activation='relu')", "Dense(units = 64, activation=relu)", "Dense(units = 2, activation='softmax')", ], model_path= "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1", epochs=30, ))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.model = Misc(ModelConfig(directory=cls.model_dir.name)) cls.feature = StartsWithA() cls.features = Features(cls.feature) cls.classifications = ['a', 'not a'] cls.repos = [ Repo('a' + str(random.random()), data={ 'features': { cls.feature.NAME: 1 }, 'classification': 'a' }) for _ in range(0, 1000) ] cls.repos += [ Repo('b' + str(random.random()), data={ 'features': { cls.feature.NAME: 0 }, 'classification': 'not a' }) for _ in range(0, 1000) ] cls.sources = \ Sources(MemorySource(MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) cls.features.append(Feature("E", float, 1)) cls.features.append(Feature("F", float, 1)) cls.features.append(Feature("G", int, 1)) cls.features.append(Feature("H", int, 1)) A, B, C, D, E, F, G, H, X = list(zip(*DATA)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records)) ) cls.model = VWModel( VWConfig( location=cls.model_dir.name, features=cls.features, predict=Feature("X", float, 1), # A and B will be namespace n1 # A and C will be in namespace n2 namespace=["n1_A_B", "n2_A_C"], importance=Feature("H", int, 1), tag=Feature("G", int, 1), task="regression", vwcmd=[ "l2", "0.1", "loss_function", "squared", "passes", "10", ], ) ) cls.scorer = MeanSquaredErrorAccuracy()
async def test_02_predict(self): a = Repo("a", data={"features": {self.feature.NAME: 1}}) async with Sources(MemorySource(MemorySourceConfig( repos=[a]))) as sources, self.model as model: async with sources() as sctx, model() as mctx: res = [repo async for repo in mctx.predict(sctx.repos())] self.assertEqual(len(res), 1) self.assertEqual(res[0].src_url, a.src_url) self.assertTrue(res[0].prediction().value)
async def _add_memory_source(self): async with MemorySource(records=[ Record(str(i), data={"features": { "by_ten": i * 10 }}) for i in range(0, self.num_records) ]) as source: self.source = self.cli.app["sources"][self.slabel] = source async with source() as sctx: self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx yield
async def test_02_predict(self): a = Record("a", data={"features": {self.feature.NAME: 1}}) async with Sources(MemorySource(MemorySourceConfig( records=[a]))) as sources, self.model as model: target_name = model.config.predict.NAME async with sources() as sctx, model() as mctx: res = [record async for record in mctx.predict(sctx.records())] self.assertEqual(len(res), 1) self.assertEqual(res[0].key, a.key) self.assertTrue(res[0].prediction(target_name).value)
def setUpClass(cls): A_train, X_train = list(zip(*TRAIN_DATA)) A_test, X_test = list(zip(*TEST_DATA)) cls.train_records = [ Record( str(i), data={ "features": { "sentence": A_train[i], "entities": X_train[i], } }, ) for i in range(len(X_train)) ] cls.test_records = [ Record( str(i), data={ "features": {"sentence": A_test[i], "entities": X_test[i],} }, ) for i in range(len(X_test)) ] cls.train_sources = Sources( MemorySource(MemorySourceConfig(records=cls.train_records)) ) cls.test_sources = Sources( MemorySource(MemorySourceConfig(records=cls.test_records)) ) cls.model_dir = tempfile.TemporaryDirectory() cls.model = SpacyNERModel( SpacyNERModelConfig( model_name="en_core_web_sm", location=cls.model_dir.name, n_iter=10, dropout=0.4, ) ) cls.scorer = SpacyNerAccuracy()
async def test_02_predict(self): a = Repo('a', data={'features': {self.feature.NAME: 1}}) async with Sources(MemorySource(MemorySourceConfig(repos=[a]))) \ as sources, self.features as features, self.model as model: async with sources() as sctx, model() as mctx: res = [ repo async for repo in mctx.predict( sctx.repos(), features, self.classifications) ] self.assertEqual(len(res), 1) self.assertEqual(res[0][0].src_url, a.src_url) self.assertTrue(res[0][1])
async def _add_memory_source(self): self.features = Features(DefFeature("by_ten", int, 1)) async with MemorySource( MemorySourceConfig(repos=[ Repo(str(i), data={"features": { "by_ten": i * 10 }}) for i in range(0, self.num_repos) ])) as source: self.source = self.cli.app["sources"][self.slabel] = source async with source() as sctx: self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx yield
async def test_02_predict(self): a = Repo("a", data={"features": {self.feature.NAME: 1}}) b = Repo("not a", data={"features": {self.feature.NAME: 0}}) async with Sources(MemorySource(MemorySourceConfig( repos=[a, b]))) as sources, self.model as model: async with sources() as sctx, model() as mctx: num = 0 async for repo, prediction, confidence in mctx.predict( sctx.repos()): with self.subTest(repo=repo): self.assertEqual(prediction, repo.key) num += 1 self.assertEqual(num, 2)
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.model = SLR(SLRConfig(directory=cls.model_dir.name, predict="Y")) cls.feature = DefFeature("X", float, 1) cls.features = Features(cls.feature) X, Y = list(zip(*FEATURE_DATA)) cls.repos = [ Repo(str(i), data={"features": {"X": X[i], "Y": Y[i]}}) for i in range(0, len(Y)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(repos=cls.repos)) )
def setUpClass(self): self.records = [ Record( str(i), data={ "features": { "Years": A[i], "Expertise": B[i], "Trust": C[i], "Salary": D[i], } }, ) for i in range(4) ] self.source = Sources( MemorySource(MemorySourceConfig(records=self.records)))
def setUpClass(cls): cls.features = Features() cls.features.append(Feature("A", str, 1)) A, X = list(zip(*DATA)) cls.records = [ Record(str(i), data={"features": { "A": A[i], "X": X[i] }}) for i in range(len(X)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records))) cls.model_dir = tempfile.TemporaryDirectory() cls.model = HFClassificationModel( HFClassificationModelConfig( model_name_or_path="bert-base-cased", cache_dir=cls.model_dir.name, logging_dir=cls.model_dir.name, output_dir=cls.model_dir.name, features=cls.features, predict=Feature("X", int, 1), label_list=["0", "1"], ))
def setUpClass(cls): cls.is_multi = "MULTI_" in cls.MODEL_TYPE cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() if cls.MODEL_TYPE in classifier_types: A, B, C, D, E, F, G, H, X, Y = list( zip(*FEATURE_DATA_CLASSIFICATION) ) cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) cls.features.append(Feature("E", float, 1)) cls.features.append(Feature("F", float, 1)) cls.features.append(Feature("G", float, 1)) cls.features.append(Feature("H", float, 1)) if cls.MODEL_TYPE == "CLASSIFICATION": cls.features.append(Feature("X", float, 1)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "X": X[i], "Y": Y[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE in regressor_types: cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) if cls.MODEL_TYPE == "REGRESSION": cls.features.append(Feature("X", float, 1)) A, B, C, D, X, Y = list(zip(*FEATURE_DATA_REGRESSION)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "X": X[i], "Y": Y[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE == "CLUSTERING": cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records)) ) properties = { "location": cls.model_dir.name, "features": cls.features, } config_fields = dict() estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type if estimator_type in supervised_estimators: if cls.is_multi: config_fields["predict"] = Features( Feature("X", float, 1), Feature("Y", float, 1) ) else: config_fields["predict"] = Feature("X", float, 1) elif estimator_type in unsupervised_estimators: # TODO If cls.TRUE_CLSTR_PRESENT then we want to use the # mutual_info_score scikit accuracy scorer. In this case we might # want to change tcluster to a boolean config property. # For more info see commit e4f523976bf37d3457cda140ceab7899420ae2c7 config_fields["predict"] = Feature("X", float, 1) cls.model = cls.MODEL( cls.MODEL_CONFIG(**{**properties, **config_fields}) ) cls.scorer = cls.SCORER()
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() if cls.MODEL_TYPE is "CLASSIFICATION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) cls.features.append(DefFeature("D", float, 1)) cls.features.append(DefFeature("E", float, 1)) cls.features.append(DefFeature("F", float, 1)) cls.features.append(DefFeature("G", float, 1)) cls.features.append(DefFeature("H", float, 1)) cls.features.append(DefFeature("I", float, 1)) A, B, C, D, E, F, G, H, I, X = list( zip(*FEATURE_DATA_CLASSIFICATION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "I": I[i], "X": X[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE is "REGRESSION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "X": X[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE is "CLUSTERING": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) cls.features.append(DefFeature("D", float, 1)) A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos))) properties = { "directory": cls.model_dir.name, "features": cls.features, } config_fields = dict() estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type if estimator_type in supervised_estimators: config_fields["predict"] = DefFeature("X", float, 1) elif estimator_type in unsupervised_estimators: if cls.TRUE_CLSTR_PRESENT: config_fields["tcluster"] = DefFeature("X", float, 1) cls.model = cls.MODEL( cls.MODEL_CONFIG(**{ **properties, **config_fields }))