def test_it_brings_the_features_into_model_input_format( self, DataBinding: MagicMock): TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3, 4.2]]) TestFeatures = Array([[0.1, 2.2], [0.13, 3.3], [1.25, 4.23], [2.1, 4.3]]) ValidationIds = Series(['4a']) TrainingIds = Series(['1a', '2a', '3a']) self.__Splitter.validationSplit.return_value = (TrainingIds, ValidationIds) self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures self.__Vectorizer.featureizeTest.return_value = TestFeatures self.__PM.classifier = 'is_cancer' MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=self.__TestData, ShortName=MagicMock(), Description=MagicMock()) ArgumentsFeatures, _ = DataBinding.call_args_list[0] self.assertEqual(TrainingFeatures.tolist()[0:3], ArgumentsFeatures[0].tolist()) self.assertEqual([TrainingFeatures.tolist()[3]], ArgumentsFeatures[1].tolist()) self.assertEqual(TestFeatures.tolist(), ArgumentsFeatures[2].tolist())
def test_it_hot_encodes_the_labels_for_multiclass(self): TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3, 4.2]]) TestFeatures = Array([[0.1, 2.2], [0.13, 3.3], [1.25, 4.23], [2.1, 4.3]]) ValidationIds = Series(['4a']) TrainingIds = Series(['1a', '2a', '3a']) self.__Splitter.validationSplit.return_value = (TrainingIds, ValidationIds) self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures self.__Vectorizer.featureizeTest.return_value = TestFeatures self.__PM.classifier = 'doid' MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=self.__TestData, ShortName=MagicMock(), Description=MagicMock()) ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[0] self.assertEqual(list(self.__Data['doid'].filter(list(TrainingIds))), ArgumentsLabels[0].tolist()) ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[1] self.assertEqual(list(self.__Data['doid'].filter(list(ValidationIds))), ArgumentsLabels[0].tolist()) self.assertEqual(2, len(self.__Encoder.hotEncode.call_args_list))
def test_it_collects_the_input_data_for_labels( self, DataBinding: MagicMock, ): TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3, 4.2]]) TestFeatures = Array([[0.1, 2.2], [0.13, 3.3], [1.25, 4.23], [2.1, 4.3]]) EncodedTrainingLabels = MagicMock() EncodedValidationLabels = MagicMock() Encoded = [EncodedTrainingLabels, EncodedValidationLabels] self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures self.__Vectorizer.featureizeTest.return_value = TestFeatures self.__Encoder.hotEncode.side_effect = lambda _: Encoded.pop(0) MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=self.__TestData, ShortName=MagicMock(), Description=MagicMock()) ArgumentsLabels, _ = DataBinding.call_args_list[1] self.assertEqual(EncodedTrainingLabels, ArgumentsLabels[0]) self.assertEqual(EncodedValidationLabels, ArgumentsLabels[1]) self.assertEqual(None, ArgumentsLabels[2])
def test_it_maps_the_weights_to_the_given_classes( self, weightFunc: MagicMock ): self.__PM.weights[ 'use_class_weights' ] = True Classes = Array( [ 0, 1 ] ) Weights = Array( [ 0.23, 0.42 ] ) weightFunc.return_value = Weights MyMeasurer = StdMeasurer.Factory.getInstance( self.__fakeLocator ) self.assertDictEqual( { 0: 0.23, 1: 0.42 }, MyMeasurer.measureClassWeights( Classes, MagicMock() ) )
def test_it_hotencodes_given_labels(self, HotEncoder: MagicMock): Labels = Array([1, 1, 2]) Encoded = MagicMock() ExpectedReturn = MagicMock() HotEncoder.return_value = ExpectedReturn self.__Encoder.transform.return_value = Encoded self.__Encoder.classes_ = Array([1, 2]) MyEncoder = StdCategoriesEncoder.Factory.getInstance() MyEncoder.setCategories(Series([1, 2, 3])) self.assertEqual(ExpectedReturn, MyEncoder.hotEncode(Labels)) HotEncoder.assert_called_once_with(Encoded, 2)
def test_it_returns_the_amount_of_categories(self): self.__Encoder.classes_ = Array([1, 2, 3, 4]) MyEncoder = StdCategoriesEncoder.Factory.getInstance() MyEncoder.setCategories(Series([4, 1, 1, 2, 3])) self.assertEqual(4, MyEncoder.amountOfCategories())
def test_it_splits_the_validation_data_of_the_trainings_data_for_multiclass( self): TrainingIds = Series(['1a', '3a'], index=[0, 1]) TestIds = MagicMock() TrainingFeatures = Array([[0., 2.], [0.1, 0.3]]) Expected = self.__Data['doid'] Expected.index = list(self.__Data['pmid']) Expected = Expected.filter(list(TrainingIds)) self.__PM.classifier = 'doid' self.__Splitter.trainingSplit.return_value = [(TrainingIds, TestIds)] self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=None, ShortName=MagicMock(), Description=MagicMock()) Arguments, _ = self.__Splitter.validationSplit.call_args_list[0] self.assertListEqual(list(TrainingIds), list(Arguments[0])) self.assertListEqual(list(Expected), list(Arguments[1])) self.__Splitter.validationSplit.assert_called_once()
def test_it_fails_to_return_hotencode_labels_if_no_categories_had_been_set( self): MyEncoder = StdCategoriesEncoder.Factory.getInstance() with self.assertRaises(RuntimeError, msg="No Categories had been fit in so far"): MyEncoder.hotEncode(Array([1, 2, 3]))
def test_it_saves_the_predictions_and_eventually_their_corresponding_labels( self, DF: DataFrame): ShortName = "Test" Frame = MagicMock(spec=DataFrame) Ids = [1, 2, 3, 4] Predicted = Array([1, 0, 1, 0]) Actual = [1, 1, 1, 0] Path = OS.path.join(self.__PM.result_dir, '{}-{}'.format(ShortName, self.__TimeValue)) DF.return_value = Frame self.__PM.classifier = 'is_cancer' MyEval = StdEvaluator.Factory.getInstance(self.__fakeLocator) MyEval.start(ShortName, "test run") MyEval.capturePredictions(Predicted, Ids) MyEval.finalize() DF.assert_any_call( { 'pmid': Ids, self.__PM.classifier: Predicted.tolist() }, columns=['pmid', self.__PM.classifier], ) Frame.to_csv.assert_any_call(OS.path.join(Path, 'predictions.csv')) Frame.reset_mock() self.__PM.classifier = 'doid' MyEval.capturePredictions(Predicted, Ids) MyEval.finalize() DF.assert_any_call( { 'pmid': Ids, self.__PM.classifier: Predicted.tolist() }, columns=['pmid', self.__PM.classifier], ) Frame.to_csv.assert_any_call(OS.path.join(Path, 'predictions.csv')) Frame.reset_mock() MyEval.capturePredictions(Predicted, Ids, Actual) MyEval.finalize() DF.assert_any_call({ 'predicted': Predicted.tolist(), 'actual': Actual }, columns=['predicted', 'actual'], index=Ids) Frame.to_csv.assert_any_call(OS.path.join(Path, 'predictions.csv'))
def test_it_makes_a_kfold_split_on_test_and_trainings_data( self, Splitter: MagicMock ): Expected =[ ( Series( [ 'a', 'c' ] ), Series( [ 'b', 'd' ] ) ), ( Series( [ 'b', 'd' ] ), Series( [ 'a', 'c' ] ) ) ] X = Series( [ 'a', 'b', 'c', 'd' ] ) Y = Series( [ 1, 2, 1, 2 ] ) Splitter.return_value = Splitter Splitter.split.return_value = [ ( Array( [ 0, 2 ] ), Array( [ 1, 3 ] ) ), ( Array( [ 1, 3 ] ), Array( [ 0, 2 ] ) ) ] self.__PM.splitting[ 'folds' ] = 2 MySplitter = StdSplitter.Factory.getInstance( self.__fakeLocator ) Splitted = MySplitter.trainingSplit( X, Y ) for Index in range( 0, len( Expected ) ): self.assertEqual( list( Expected[ Index ][ 0 ] ), list( Splitted[ Index ][ 0 ] ) ) self.assertEqual( list( Expected[ Index ][ 1 ] ), list( Splitted[ Index ][ 1 ] ) ) Splitter.assert_called_once_with( n_splits = self.__PM.splitting[ 'folds' ], test_size = self.__PM.splitting[ 'test' ], random_state = self.__PM.splitting[ 'seed' ] ) Splitter.split.assert_called_once_with( X, Y )
def setUp( self ): self.__AdapterP = patch( 'biomed.mlp.util.weighted_crossentropy.KerasAdapter' ) self.__Adapter = self.__AdapterP.start() self.__MultiplicationP = patch( 'biomed.mlp.util.weighted_crossentropy.multiply' ) self.__Multiplication = self.__MultiplicationP.start() self.__NPP = patch( 'biomed.mlp.util.weighted_crossentropy.Numpy' ) self.__NP = self.__NPP.start() self.__NP.ones.return_value = Array( [ [0,0],[0,0] ] ) self.__BinP = patch( 'biomed.mlp.util.weighted_crossentropy.Binary' ) self.__Bin = self.__BinP.start() self.__CatP = patch( 'biomed.mlp.util.weighted_crossentropy.Categorical' ) self.__Cat = self.__CatP.start() self.__SparseP = patch( 'biomed.mlp.util.weighted_crossentropy.Sparse' ) self.__Spare = self.__SparseP.start()
def test_it_normalizes_the_axis_of_the_cost_matrix_on_set( self ): MyEntropy = WeightedCrossentropy( 'testentropy', MagicMock(), "bin" ) New = Array( [ 0 ] ) MyEntropy.setCostMatrix( New ) self.__Adapter.expand_dims.assert_called_with( New, 0 )
def test_it_collects_the_input_data_for_labels( self, DataBinding: MagicMock, ): TrainingFeatures = Array([[0., 2.], [0.1, 0.3]]) TrainingIds = Series(['1a']) ValidationIds = Series(['2a']) TestFeatures = Array([[0.1, 0.], [0.15, 0.5]]) TestIds = Series(['3a', '4a']) EncodedTrainingLabels = MagicMock() EncodedValidationLabels = MagicMock() EncodedTestLabels = MagicMock() Encoded = [ EncodedTrainingLabels, EncodedValidationLabels, EncodedTestLabels ] self.__Splitter.trainingSplit.return_value = [(Series(['1a', '2a']), TestIds)] self.__Splitter.validationSplit.return_value = (TrainingIds, ValidationIds) self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures self.__Vectorizer.featureizeTest.return_value = TestFeatures self.__Encoder.hotEncode.side_effect = lambda _: Encoded.pop(0) MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=None, ShortName=MagicMock(), Description=MagicMock()) ArgumentsLabels, _ = DataBinding.call_args_list[1] self.assertEqual(EncodedTrainingLabels, ArgumentsLabels[0]) self.assertEqual(EncodedValidationLabels, ArgumentsLabels[1]) self.assertEqual(EncodedTestLabels, ArgumentsLabels[2])
def test_it_accetps_lists_as_weights( self ): Classes = [ [1, 2, 3], [2, 0, 0], [3, 0, 0]] self.__NP.array = MagicMock( return_value = Array( Classes ) ) MyEntropy = WeightedCrossentropy( 'testentropy', Classes, "bin" ) self.assertListEqual( Classes, MyEntropy.get_config()[ 'ClassWeights' ] )
def test_it_maps_given_class_weights( self ): Classes = { 0: 1, 1: 2, 2: 3 } Remaped = Array( [ [ 0, 0, 0 ], [ 0, 0, 0 ], [ 0, 0, 0 ] ] ) self.__NP.ones.return_value = Remaped WeightedCrossentropy( 'testentropy', Classes, "bin" ) self.assertEqual( [[1, 2, 3], [2, 0, 0], [3, 0, 0]], Remaped.tolist() )
def test_it_hot_encodes_the_labels_for_binary(self): TrainingFeatures = Array([[0., 2.], [0.1, 0.3]]) TrainingIds = Series(['1a']) ValidationIds = Series(['2a']) TestFeatures = Array([[0.1, 0.], [0.15, 0.5]]) TestIds = Series(['3a', '4a']) self.__Splitter.trainingSplit.return_value = [(Series(['1a', '2a']), TestIds)] self.__Splitter.validationSplit.return_value = (TrainingIds, ValidationIds) self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures self.__Vectorizer.featureizeTest.return_value = TestFeatures self.__PM.classifier = 'is_cancer' MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=None, ShortName=MagicMock(), Description=MagicMock()) ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[0] self.assertEqual( list(self.__Data['is_cancer'].filter(list(TrainingIds))), ArgumentsLabels[0].tolist()) ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[1] self.assertEqual( list(self.__Data['is_cancer'].filter(list(ValidationIds))), ArgumentsLabels[0].tolist()) ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[2] self.assertEqual(list(self.__Data['is_cancer'].filter(list(TestIds))), ArgumentsLabels[0].tolist())
def read_image(filepath: Path, logger: Logger = None) -> Array: """Reads in an image as an array and logs read to console. Args: filepath (Path): Filepath to image. logger (Logger): Logging object for writing output to console. (default: None) Returns: np.array: Target image. """ pil_pointer = pil_image.open(filepath).convert("RGB") image = Array(pil_pointer) pil_pointer.close() if logger is not None: logger.info(f"Read: {filepath}") return image
def test_it_encodes_given_labels(self): Labels = Array([1, 1, 2]) ExpectedReturn = MagicMock() self.__Encoder.transform.return_value = ExpectedReturn MyEncoder = StdCategoriesEncoder.Factory.getInstance() MyEncoder.setCategories(Series([1, 2, 3])) self.assertEqual(ExpectedReturn, MyEncoder.encode(Labels)) Arguments, _ = self.__Encoder.transform.call_args_list[0] self.assertListEqual(Labels.tolist(), Arguments[0].tolist()) self.__Encoder.transform.assert_called_once()
def test_it_raises_an_error_if_the_given_weights_are_incompatible( self ): Labels = TF.constant( [ [1, 1 ], [3, 3 ], ] ) Predictions = TF.constant( [ [1, 1 ], [4, 4 ], ] ) self.__NP.ones.return_value = Array( [ [0,0] ] ) MyEntropy = WeightedCrossentropy( 'testentropy', dict(), "bin" ) with self.assertRaises( TypeError ): MyEntropy( Labels, Predictions )
def test_it_casts_the_normalized_Classes_on_set( self ): Classes = { 0: 1, 1: 2 } NormClasses = MagicMock() CastBase = MagicMock() self.__Adapter.expand_dims.return_value = NormClasses self.__Adapter.floatx.return_value = CastBase New = Array( [ 0 ] ) MyEntropy = WeightedCrossentropy( 'testentropy', Classes, "bin" ) MyEntropy.setCostMatrix( New ) self.__Adapter.cast.assert_called_with( NormClasses, CastBase )
def test_it_splits_the_validation_data_of_the_trainings_data_for_binary( self): TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3, 4.2]]) Expected = self.__Data['is_cancer'] Expected.index = list(self.__Data['pmid']) self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures MyController = TextminingController.Factory.getInstance( self.__fakeLocator) MyController.process(Data=self.__Data, TestData=self.__TestData, ShortName=MagicMock(), Description=MagicMock()) Arguments, _ = self.__Splitter.validationSplit.call_args_list[0] self.assertListEqual(list(self.__Data['pmid']), list(Arguments[0])) self.assertListEqual(list(Expected), list(Arguments[1])) self.__Splitter.validationSplit.assert_called_once()
def test_it_returns_the_current_configuration( self ): Name = 'testentropy' EntropyKey = "bin" Classes = { 0: 1, 1: 2, 2: 3 } name = "something" self.__NP.ones.return_value = Array( [ [0,0,0],[0,0,0], [0,0,0 ] ] ) MyEntropy = WeightedCrossentropy( 'testentropy', Classes, "bin", { 'name': name } ) self.assertDictEqual( { 'Name': Name, 'ClassWeights': [[1, 2, 3], [2, 0, 0], [3, 0, 0]], 'EntropyKey': EntropyKey, 'Reduction': None, 'KeywordedEntropyArgs': { 'name': name }, }, MyEntropy.get_config() )
def __convertToArray( self, Value: Series ): return Array( list( Value ) )
def main(data): data = Array(data) return sum(data[(data < 30) | (data > 70)])
def principal_eigenvalue(c: np.Array) -> float: pe = principal_eigenvector(c) nz_index = pe.nonzero()[0][0] return c.dot(pe)[nz_index] / pe[nz_index]