def test_it_is_convertable_to_a_dict_as_a_shallow_copy(self): Manager = PropertiesManager() self.assertFalse(isinstance(Manager, dict)) MD = Manager.toDict() self.assertTrue(isinstance(MD, dict)) for Key in MD.keys(): self.assertEqual(Manager[Key], MD[Key])
def test_it_reassings_properties(self, Services: MagicMock): PM = PropertiesManager() PM.classifier = 'is_cancer' Services.getService.side_effect = lambda Key, __: PM if Key == 'properties' else MagicMock( ) Pipe = Pipeline.Factory.getInstance() Pipe.pipe(MagicMock(), MagicMock(), MagicMock(), MagicMock(), {'classifier': 'doid'}) self.assertEqual('doid', PM.classifier)
def test_it_initlializes_early_stopping_callback( self ): PM = PropertiesManager() PM.training[ 'patience' ] = 50 FFN = ModelBaseSpec.StubbedFFN( PM, MagicMock( spec = Sequential ) ) FFN.train( MagicMock(), MagicMock() ) self.__Stopper.assert_called_once_with( monitor = 'val_loss', mode = 'min', verbose = 1, patience = PM.training[ 'patience' ] )
def setUp(self): self.__DP = patch( 'biomed.vectorizer.selector.selector_manager.DependencySelector', spec=Selector) self.__FP = patch( 'biomed.vectorizer.selector.selector_manager.FactorSelector', spec=Selector) self.__LVP = patch( 'biomed.vectorizer.selector.selector_manager.LinearVectorSelector', spec=Selector) self.__LLVP = patch( 'biomed.vectorizer.selector.selector_manager.LogisticRegressionSelector', spec=Selector) self.__D = self.__DP.start() self.__F = self.__FP.start() self.__LV = self.__LVP.start() self.__LR = self.__LLVP.start() self.__ReferenceSelector = MagicMock(spec=Selector) self.__D.return_value = self.__ReferenceSelector self.__PM = PropertiesManager()
def test_it_gets_the_training_evaluation_for_multi_processing( self ): Model = MagicMock( spec = Sequential ) X = InputData( MagicMock(), MagicMock(), MagicMock() ) Y = InputData( NP.zeros( ( 2, 3 ) ), MagicMock(), MagicMock() ) PM = PropertiesManager() PM[ "training" ][ "epochs" ] = 1 PM[ "training" ][ "batch_size" ] = 2 PM[ "training" ][ "workers" ] = 3 self.__Loader.return_value = Model FFN = ModelBaseSpec.StubbedFFN( PM, Model ) FFN.train( X, Y ) FFN.getTrainingScore( X, Y ) Model.evaluate.assert_called_once_with( X.Test, Y.Test, batch_size = PM[ "training" ][ "batch_size" ], workers = PM[ "training" ][ "workers" ], use_multiprocessing = True, return_dict = True, verbose = 0 )
def test_it_fails_if_the_model_was_not_trained_while_predicting( self ): Model = MagicMock( spec = Sequential ) ToPredict = MagicMock() FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), Model ) with self.assertRaises( RuntimeError, msg = "The model has not be trained" ): FFN.predict( ToPredict )
def printResults(Predictions): def outputResults(prediction: list): output_predictions = f"pmid,{ pm.classifier }\n" found_targets = list() found_pmids = list() for index in range(0, len(prediction[0])): output_predictions += f"{prediction[ 2 ][ 'pmid' ].iloc[ index ]},{prediction[ 1 ][ index ]}\n" if prediction[0][index] != 0: found_targets.append(prediction[1][index]) found_pmids.append(prediction[2]['pmid'].iloc[index]) print('number of cancer predictions found_targets:', len(found_targets)) counter = collections.Counter(found_targets) print('(doid, count):', counter.most_common()) print('cancer found in articles with PMID:', found_pmids) return output_predictions pm = PropertiesManager() for key in Predictions: print("Configuration ID ", key) output_predictions = outputResults(Predictions[key]) path = OS.path.abspath( OS.path.join( OS.path.dirname(__file__), "..", "results", f"{'blind_' if pm.is_blind else ''}{'binary' if pm.classifier == 'is_cancer' else 'multi'}_{pm.model}_{pm.preprocessing['variant']}_{ datetime.now().strftime('%Y-%m-%d_%H-%M-%S') }_{ key }.csv" )) with open(path, "w") as file: file.write(output_predictions)
def test_it_predicts_with_mulitprocessing( self ): Model = MagicMock( spec = Sequential ) ToPredict = MagicMock() Model.predict.return_value = NP.array( [ [ 0., 0. ] ] ) PM = PropertiesManager() PM[ "training" ][ "epochs" ] = 1 PM[ "training" ][ "batch_size" ] = 2 PM[ "training" ][ "workers" ] = 2 X = InputData( MagicMock(), MagicMock(), MagicMock() ) Y = InputData( NP.zeros( ( 2, 3 ) ), MagicMock(), MagicMock() ) self.__Loader.return_value = Model FFN = ModelBaseSpec.StubbedFFN( PM, Model ) FFN.train( X, Y ) FFN.predict( ToPredict ) Model.predict.assert_called_once_with( ToPredict, batch_size = PM.training['batch_size'], workers = PM.training[ "workers" ], use_multiprocessing = True )
def test_it_returns_normalized_binary_classified_data( self ): Model = MagicMock( spec = Sequential ) ToPredict = MagicMock() PM = PropertiesManager() PM[ "training" ][ "epochs" ] = 1 PM[ "training" ][ "batch_size" ] = 2 PM[ "training" ][ "workers" ] = 1 X = InputData( MagicMock(), MagicMock(), MagicMock() ) Y = InputData( NP.zeros( ( 2, 2 ) ), MagicMock(), MagicMock() ) Model.predict.return_value = NP.array( [ [ 0.0, 0.00 ], [ 0.0, 0.98867947 ], [ 0.0, 0.00 ] ] ) self.__Loader.return_value = Model FFN = ModelBaseSpec.StubbedFFN( PM, Model ) FFN.train( X, Y ) arrayEqual( FFN.predict( ToPredict ), NP.array( [ 0, 1, 0 ] ) )
def test_it_does_nothing_the_saved_model_if_it_is_not_there( self ): self.__Path.exists.return_value = False FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), MagicMock( spec = Sequential ) ) FFN.train( MagicMock(), MagicMock() ) self.__Remover.assert_not_called()
def test_it_trains_a_model_with_class_weights_if_weights_are_given( self ): Model = MagicMock( spec = Sequential ) X = InputData( MagicMock(), MagicMock(), MagicMock() ) Y = InputData( MagicMock(), MagicMock(), MagicMock() ) Weights = MagicMock() PM = PropertiesManager() PM[ "training" ][ "epochs" ] = 1 PM[ "training" ][ "batch_size" ] = 2 PM[ "training" ][ "workers" ] = 2 FFN = ModelBaseSpec.StubbedFFN( PM, Model, Weights ) FFN.train( X, Y ) Model.fit.assert_called_once_with( x = X.Training, y = Y.Training, class_weight = Weights, shuffle = True, epochs = PM[ "training" ][ "epochs" ], batch_size = PM[ "training" ][ "batch_size" ], validation_data = ( X.Validation, Y.Validation ), workers = PM[ "training" ][ "workers" ], use_multiprocessing = True, callbacks = [ self.__Stopper, self.__Checkpoint ] )
def fakeLocator(ServiceKey, Type): if ServiceKey != "properties": raise RuntimeError("Unexpected ServiceKey") if Type != PropertiesManager: raise RuntimeError("Unexpected Type") return PropertiesManager()
def setUp(self): self.__PM = PropertiesManager() self.__Simple = MagicMock(spec=FileWriter) self.__JSON = MagicMock(spec=FileWriter) self.__CSV = MagicMock(spec=FileWriter) self.__mkdirM = patch('biomed.evaluator.std_evaluator.mkdir') self.__mkdir = self.__mkdirM.start() self.__checkDirM = patch('biomed.evaluator.std_evaluator.checkDir') self.__checkDir = self.__checkDirM.start() self.__checkDir.return_value = True self.__TimeM = patch('biomed.evaluator.std_evaluator.Time') self.__Time = self.__TimeM.start() self.__TimeObj = MagicMock(spec=datetime) self.__Time.now.return_value = self.__TimeObj self.__TimeValue = '2020-07-25_14-53-36' self.__TimeObj.strftime.return_value = self.__TimeValue
def fakeLocator(ServiceKey, Type): if ServiceKey not in Dependencies: raise RuntimeError("Unexpected ServiceKey") if Type != Dependencies[ServiceKey]: raise RuntimeError("Unexpected Type") return PropertiesManager()
def test_it_removes_the_saved_model_if_it_is_there( self ): FileName = 'op' self.__Path.exists.return_value = True self.__Path.join.return_value = FileName FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), MagicMock( spec = Sequential ) ) FFN.train( MagicMock(), MagicMock() ) self.__Remover.assert_called_once_with( FileName )
def test_it_initilizes_the_properties_mananger(self, Locator: MagicMock, M: MagicMock): self.__fullfillDepenendcies(Locator) PM = PropertiesManager() M.return_value = PM Services.startServices() M.assert_called_once() Locator.set.assert_any_call("properties", PM)
def __initPreprocessorDependencies(self): self.__PM = PropertiesManager() self.__FakeCache = {} self.__FakeCache2 = {} self.__Complex = StubbedNormalizerFactory(["n", "v", "a"]) self.__Simple = StubbedNormalizerFactory(["s", "l", "w"]) self.__Shared = StubbedCache(self.__FakeCache) self.__FileCache = StubbedCache(self.__FakeCache2) self.__PM.preprocessing["workers"] = 1
def test_it_loads_the_best_model( self ): PM = PropertiesManager() PM.training[ 'patience' ] = 50 FileName = 'kjf' self.__Path.join.return_value = FileName Best = MagicMock() self.__Loader.return_value = Best FFN = ModelBaseSpec.StubbedFFN( PM, MagicMock( spec = Sequential ) ) FFN.train( MagicMock(), MagicMock() ) self.__Loader.assert_called_once_with( FileName, custom_objects = None ) self.assertEqual( Best, FFN._Model )
def test_it_uses_the_given_selectors(self): Selectors = { "dependency": self.__D, "factor": self.__F, "linearVector": self.__LV, "logisticRegression": self.__LR, } for SelectorKey in Selectors: PM = PropertiesManager() PM.selection['type'] = SelectorKey def fakeLocator(_, __): return PM ServiceGetter = MagicMock() ServiceGetter.side_effect = fakeLocator MyManager = SelectorManager.Factory.getInstance(ServiceGetter) MyManager.build(MagicMock(), MagicMock(), MagicMock()) Selectors[SelectorKey].assert_called_once_with(PM)
def test_it_returns_normalized_multi_classified_data( self ): Model = MagicMock( spec = Sequential ) ToPredict = MagicMock() PM = PropertiesManager() PM[ "training" ][ "epochs" ] = 1 PM[ "training" ][ "batch_size" ] = 2 PM[ "training" ][ "workers" ] = 1 PM.classifier = 'doid' X = InputData( MagicMock(), MagicMock(), MagicMock() ) Y = InputData( NP.zeros( ( 4, 4 ) ), MagicMock(), MagicMock() ) Model.predict.return_value = NP.array( [ [ -0.00716622, 23 ], [ -23, -98.98867947 ], [ -42, 12 ] ] ) self.__Loader.return_value = Model FFN = ModelBaseSpec.StubbedFFN( PM, Model ) FFN.train( X, Y ) arrayEqual( FFN.predict( ToPredict ), NP.array( [ 1, 0, 1 ] ) )
def test_it_initlializes_the_checkpoint_callback( self ): FileName = 'mbc' self.__Path.join.return_value = FileName FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), MagicMock( spec = Sequential ) ) FFN.train( MagicMock(), MagicMock() ) self.__Checkpoint.assert_called_once_with( FileName, monitor = 'val_accuracy', mode = 'max', verbose = 1, save_best_only = True )
def setUp(self): self.__Data = DataFrame( { 'pmid': ['1a', '2a', '3a', '4a'], 'text': [ "My little cute Poney is a Poney", "My little farm is cute.", "My little programm is a application and runs and runs and runs.", "My little keyboard is to small" ], 'is_cancer': [0, 1, 1, 0], 'doid': [-1, 1, 2, -1], 'cancer_type': ['no cancer', 'cancer', 'cancer', 'no cancer'], }, columns=['pmid', 'cancer_type', 'doid', 'is_cancer', 'text']) self.__INDF = patch( 'biomed.text_mining.text_mining_controller.InputData') self.__PM = PropertiesManager() self.__Encoder = MagicMock(spec=CategoriesEncoder) self.__FacilityManager = MagicMock(spec=FacilityManager) self.__Splitter = MagicMock(spec=Splitter) self.__Preprocessor = MagicMock(spec=Preprocessor) self.__Vectorizer = MagicMock(spec=Vectorizer) self.__Measurer = MagicMock(spec=Measurer) self.__MLP = MagicMock(spec=MLP) self.__Evaluator = MagicMock(spec=Evaluator) self.__IND = self.__INDF.start() self.__IND.Training = MagicMock() self.__IND.Training.shape = (MagicMock(), MagicMock()) self.__IND.Validation = MagicMock() self.__IND.Test = MagicMock() self.__FacilityManager.clean.return_value = self.__Data self.__Splitter.trainingSplit.return_value = [(MagicMock(), MagicMock())] self.__Splitter.validationSplit.return_value = (MagicMock(), MagicMock()) self.__Preprocessor.preprocessCorpus.return_value = MagicMock() TrainFeatures = MagicMock() TrainFeatures.tolist.return_value = [] self.__Vectorizer.featureizeTrain.return_value = TrainFeatures self.__Vectorizer.featureizeTest.return_value = MagicMock() self.__Vectorizer.getSupportedFeatures.return_value = MagicMock() self.__Measurer.measureClassWeights.return_value = MagicMock()
def test_it_retruns_the_evaluation_score( self ): Eval = MagicMock() Model = MagicMock( spec = Sequential ) Model.evaluate.return_value = Eval self.__Loader.return_value = Model X = InputData( MagicMock(), MagicMock(), MagicMock() ) Y = InputData( NP.zeros( ( 2, 3 ) ), MagicMock(), MagicMock() ) FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), Model ) FFN.train( X, Y ) self.assertEqual( FFN.getTrainingScore( X, Y ), Eval )
def test_it_returns_the_training_history( self ): Hist = MagicMock() Hist.history = Hist Model = MagicMock( spec = Sequential ) Model.fit.return_value = Hist PM = PropertiesManager() PM[ "training" ][ "epochs" ] = 1 PM[ "training" ][ "batch_size" ] = 2 PM[ "training" ][ "workers" ] = 2 FFN = ModelBaseSpec.StubbedFFN( PM, Model ) self.assertEqual( FFN.train( MagicMock(), MagicMock() ), Hist )
def fullfill(ServiceKey: str, _): Pair = { "properties": PropertiesManager(), "preprocessor": MagicMock(spec=Preprocessor), "preprocessor.cache.persistent": MagicMock(spec=Cache), "preprocessor.cache.shared": MagicMock(spec=Cache), "preprocessor.normalizer.simple": MagicMock(spec=NormalizerFactory), "preprocessor.normalizer.complex": MagicMock(spec=NormalizerFactory), "vectorizer": MagicMock(spec=Vectorizer), "vectorizer.selector": MagicMock(spec=Selector), "evaluator": MagicMock(spec=Evaluator), "evaluator.simple": MagicMock(spec=FileWriter), "evaluator.json": MagicMock(spec=FileWriter), "evaluator.csv": MagicMock(spec=FileWriter), "facilitymanager": MagicMock(spec=FacilityManager), "splitter": MagicMock(spec=Splitter), "measurer": MagicMock(spec=Measurer), "categories": MagicMock(spec=CategoriesEncoder), "mlp": MagicMock(spec=MLP) } return Pair[ServiceKey]
def generate_stats_and_write_to_file(self, preds): pm = PropertiesManager() preds_target = self.Y_test_75_multi if pm.classifier == "doid" else self.Y_test_75_binary preds = preds if pm.classifier == 'doid' else self.convert_buggy_doid_to_real_binary(preds) if len(preds) != len(preds_target): print('Prediction lengths don\'t match') return tp = 0 for i in range(len(preds)): if preds[i] == preds_target[i]: if preds[i] not in ('0', '-1'): tp += 1 score = classification_report(preds_target, preds) f1_score_macro_accurate = f1_score(preds_target, preds, average='macro') print('classification report:\n', score, '\nmore accurate macro f1 score', f1_score_macro_accurate, '\n correctly predicted:', tp) with open('../results/all_results.txt', 'a') as file: file.write(f"classifier={pm.classifier}, model={pm.model}, preprocessing={pm.preprocessing['variant']}, " f"ngrams={pm.tfidf_transformation_properties['ngram_range'][1]}\n" f"{score}\nf1_score_macro_accurate={f1_score_macro_accurate}\ntrue positives={tp}\n\n")
def setUp(self): self.__PM = PropertiesManager()
def test_it_fails_if_the_model_was_not_trained_while_evaluating( self ): Model = MagicMock( spec = Sequential ) FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), Model ) with self.assertRaises( RuntimeError, msg = "The model has not be trained" ): FFN.getTrainingScore( MagicMock(), MagicMock() )
def __fakeLocator( self, _, __ ): PM = PropertiesManager() PM.cache_dir = NumpyArrayFileCacheSpec.__Path return PM
def setUp( self ): self.__PM = PropertiesManager() self.__PM.splitting[ 'folds' ] = 1