Example #1
0
    def test_it_brings_the_features_into_model_input_format(
            self, DataBinding: MagicMock):
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3,
                                                                     4.2]])
        TestFeatures = Array([[0.1, 2.2], [0.13, 3.3], [1.25, 4.23],
                              [2.1, 4.3]])
        ValidationIds = Series(['4a'])
        TrainingIds = Series(['1a', '2a', '3a'])

        self.__Splitter.validationSplit.return_value = (TrainingIds,
                                                        ValidationIds)
        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures
        self.__Vectorizer.featureizeTest.return_value = TestFeatures
        self.__PM.classifier = 'is_cancer'

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=self.__TestData,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        ArgumentsFeatures, _ = DataBinding.call_args_list[0]
        self.assertEqual(TrainingFeatures.tolist()[0:3],
                         ArgumentsFeatures[0].tolist())
        self.assertEqual([TrainingFeatures.tolist()[3]],
                         ArgumentsFeatures[1].tolist())
        self.assertEqual(TestFeatures.tolist(), ArgumentsFeatures[2].tolist())
Example #2
0
    def test_it_hot_encodes_the_labels_for_multiclass(self):
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3,
                                                                     4.2]])
        TestFeatures = Array([[0.1, 2.2], [0.13, 3.3], [1.25, 4.23],
                              [2.1, 4.3]])
        ValidationIds = Series(['4a'])
        TrainingIds = Series(['1a', '2a', '3a'])

        self.__Splitter.validationSplit.return_value = (TrainingIds,
                                                        ValidationIds)
        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures
        self.__Vectorizer.featureizeTest.return_value = TestFeatures
        self.__PM.classifier = 'doid'

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=self.__TestData,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[0]

        self.assertEqual(list(self.__Data['doid'].filter(list(TrainingIds))),
                         ArgumentsLabels[0].tolist())

        ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[1]
        self.assertEqual(list(self.__Data['doid'].filter(list(ValidationIds))),
                         ArgumentsLabels[0].tolist())

        self.assertEqual(2, len(self.__Encoder.hotEncode.call_args_list))
Example #3
0
    def test_it_collects_the_input_data_for_labels(
        self,
        DataBinding: MagicMock,
    ):
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3,
                                                                     4.2]])
        TestFeatures = Array([[0.1, 2.2], [0.13, 3.3], [1.25, 4.23],
                              [2.1, 4.3]])

        EncodedTrainingLabels = MagicMock()
        EncodedValidationLabels = MagicMock()
        Encoded = [EncodedTrainingLabels, EncodedValidationLabels]

        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures
        self.__Vectorizer.featureizeTest.return_value = TestFeatures
        self.__Encoder.hotEncode.side_effect = lambda _: Encoded.pop(0)

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=self.__TestData,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        ArgumentsLabels, _ = DataBinding.call_args_list[1]

        self.assertEqual(EncodedTrainingLabels, ArgumentsLabels[0])
        self.assertEqual(EncodedValidationLabels, ArgumentsLabels[1])
        self.assertEqual(None, ArgumentsLabels[2])
    def test_it_maps_the_weights_to_the_given_classes( self, weightFunc: MagicMock ):
        self.__PM.weights[ 'use_class_weights' ] = True

        Classes = Array( [ 0, 1 ] )
        Weights = Array( [ 0.23, 0.42 ] )

        weightFunc.return_value = Weights
        MyMeasurer = StdMeasurer.Factory.getInstance( self.__fakeLocator )
        self.assertDictEqual(
            { 0: 0.23, 1: 0.42 },
            MyMeasurer.measureClassWeights( Classes, MagicMock() )
        )
    def test_it_hotencodes_given_labels(self, HotEncoder: MagicMock):
        Labels = Array([1, 1, 2])
        Encoded = MagicMock()
        ExpectedReturn = MagicMock()

        HotEncoder.return_value = ExpectedReturn
        self.__Encoder.transform.return_value = Encoded
        self.__Encoder.classes_ = Array([1, 2])

        MyEncoder = StdCategoriesEncoder.Factory.getInstance()
        MyEncoder.setCategories(Series([1, 2, 3]))

        self.assertEqual(ExpectedReturn, MyEncoder.hotEncode(Labels))

        HotEncoder.assert_called_once_with(Encoded, 2)
    def test_it_returns_the_amount_of_categories(self):
        self.__Encoder.classes_ = Array([1, 2, 3, 4])

        MyEncoder = StdCategoriesEncoder.Factory.getInstance()
        MyEncoder.setCategories(Series([4, 1, 1, 2, 3]))

        self.assertEqual(4, MyEncoder.amountOfCategories())
    def test_it_splits_the_validation_data_of_the_trainings_data_for_multiclass(
            self):
        TrainingIds = Series(['1a', '3a'], index=[0, 1])
        TestIds = MagicMock()
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3]])
        Expected = self.__Data['doid']
        Expected.index = list(self.__Data['pmid'])
        Expected = Expected.filter(list(TrainingIds))

        self.__PM.classifier = 'doid'
        self.__Splitter.trainingSplit.return_value = [(TrainingIds, TestIds)]
        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=None,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        Arguments, _ = self.__Splitter.validationSplit.call_args_list[0]

        self.assertListEqual(list(TrainingIds), list(Arguments[0]))

        self.assertListEqual(list(Expected), list(Arguments[1]))

        self.__Splitter.validationSplit.assert_called_once()
    def test_it_fails_to_return_hotencode_labels_if_no_categories_had_been_set(
            self):
        MyEncoder = StdCategoriesEncoder.Factory.getInstance()

        with self.assertRaises(RuntimeError,
                               msg="No Categories had been fit in so far"):
            MyEncoder.hotEncode(Array([1, 2, 3]))
    def test_it_saves_the_predictions_and_eventually_their_corresponding_labels(
            self, DF: DataFrame):
        ShortName = "Test"

        Frame = MagicMock(spec=DataFrame)
        Ids = [1, 2, 3, 4]
        Predicted = Array([1, 0, 1, 0])
        Actual = [1, 1, 1, 0]

        Path = OS.path.join(self.__PM.result_dir,
                            '{}-{}'.format(ShortName, self.__TimeValue))

        DF.return_value = Frame

        self.__PM.classifier = 'is_cancer'

        MyEval = StdEvaluator.Factory.getInstance(self.__fakeLocator)
        MyEval.start(ShortName, "test run")
        MyEval.capturePredictions(Predicted, Ids)
        MyEval.finalize()

        DF.assert_any_call(
            {
                'pmid': Ids,
                self.__PM.classifier: Predicted.tolist()
            },
            columns=['pmid', self.__PM.classifier],
        )

        Frame.to_csv.assert_any_call(OS.path.join(Path, 'predictions.csv'))
        Frame.reset_mock()

        self.__PM.classifier = 'doid'
        MyEval.capturePredictions(Predicted, Ids)
        MyEval.finalize()

        DF.assert_any_call(
            {
                'pmid': Ids,
                self.__PM.classifier: Predicted.tolist()
            },
            columns=['pmid', self.__PM.classifier],
        )

        Frame.to_csv.assert_any_call(OS.path.join(Path, 'predictions.csv'))
        Frame.reset_mock()

        MyEval.capturePredictions(Predicted, Ids, Actual)
        MyEval.finalize()

        DF.assert_any_call({
            'predicted': Predicted.tolist(),
            'actual': Actual
        },
                           columns=['predicted', 'actual'],
                           index=Ids)

        Frame.to_csv.assert_any_call(OS.path.join(Path, 'predictions.csv'))
    def test_it_makes_a_kfold_split_on_test_and_trainings_data(
        self,
        Splitter: MagicMock
    ):
        Expected =[
            ( Series( [ 'a', 'c' ] ), Series( [ 'b', 'd' ] ) ),
            ( Series( [ 'b', 'd' ] ), Series( [ 'a', 'c' ] ) )
        ]

        X = Series( [ 'a', 'b', 'c', 'd' ] )
        Y = Series( [ 1, 2, 1, 2 ] )

        Splitter.return_value = Splitter
        Splitter.split.return_value = [
            ( Array( [ 0, 2 ] ), Array( [ 1, 3 ] ) ),
            ( Array( [ 1, 3 ] ), Array( [ 0, 2 ] ) )
        ]

        self.__PM.splitting[ 'folds' ] = 2

        MySplitter = StdSplitter.Factory.getInstance( self.__fakeLocator )
        Splitted = MySplitter.trainingSplit( X, Y )
        for Index in range( 0, len( Expected ) ):
            self.assertEqual(
                list( Expected[ Index ][ 0 ] ),
                list( Splitted[ Index ][ 0 ] )
            )

            self.assertEqual(
                list( Expected[ Index ][ 1 ] ),
                list( Splitted[ Index ][ 1 ] )
            )

        Splitter.assert_called_once_with(
            n_splits = self.__PM.splitting[ 'folds' ],
            test_size = self.__PM.splitting[ 'test' ],
            random_state = self.__PM.splitting[ 'seed' ]
        )

        Splitter.split.assert_called_once_with(
            X,
            Y
        )
 def setUp( self ):
     self.__AdapterP = patch( 'biomed.mlp.util.weighted_crossentropy.KerasAdapter' )
     self.__Adapter = self.__AdapterP.start()
     self.__MultiplicationP = patch( 'biomed.mlp.util.weighted_crossentropy.multiply' )
     self.__Multiplication = self.__MultiplicationP.start()
     self.__NPP = patch( 'biomed.mlp.util.weighted_crossentropy.Numpy' )
     self.__NP = self.__NPP.start()
     self.__NP.ones.return_value = Array( [ [0,0],[0,0] ] )
     self.__BinP = patch( 'biomed.mlp.util.weighted_crossentropy.Binary' )
     self.__Bin = self.__BinP.start()
     self.__CatP = patch( 'biomed.mlp.util.weighted_crossentropy.Categorical' )
     self.__Cat = self.__CatP.start()
     self.__SparseP = patch( 'biomed.mlp.util.weighted_crossentropy.Sparse' )
     self.__Spare = self.__SparseP.start()
    def test_it_normalizes_the_axis_of_the_cost_matrix_on_set( self ):
        MyEntropy = WeightedCrossentropy(
            'testentropy',
            MagicMock(),
            "bin"
        )

        New = Array( [ 0 ] )
        MyEntropy.setCostMatrix( New )

        self.__Adapter.expand_dims.assert_called_with(
            New,
            0
        )
    def test_it_collects_the_input_data_for_labels(
        self,
        DataBinding: MagicMock,
    ):
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3]])
        TrainingIds = Series(['1a'])
        ValidationIds = Series(['2a'])
        TestFeatures = Array([[0.1, 0.], [0.15, 0.5]])
        TestIds = Series(['3a', '4a'])

        EncodedTrainingLabels = MagicMock()
        EncodedValidationLabels = MagicMock()
        EncodedTestLabels = MagicMock()
        Encoded = [
            EncodedTrainingLabels, EncodedValidationLabels, EncodedTestLabels
        ]

        self.__Splitter.trainingSplit.return_value = [(Series(['1a', '2a']),
                                                       TestIds)]
        self.__Splitter.validationSplit.return_value = (TrainingIds,
                                                        ValidationIds)
        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures
        self.__Vectorizer.featureizeTest.return_value = TestFeatures
        self.__Encoder.hotEncode.side_effect = lambda _: Encoded.pop(0)

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=None,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        ArgumentsLabels, _ = DataBinding.call_args_list[1]

        self.assertEqual(EncodedTrainingLabels, ArgumentsLabels[0])
        self.assertEqual(EncodedValidationLabels, ArgumentsLabels[1])
        self.assertEqual(EncodedTestLabels, ArgumentsLabels[2])
    def test_it_accetps_lists_as_weights( self ):
        Classes = [ [1, 2, 3], [2, 0, 0], [3, 0, 0]]

        self.__NP.array = MagicMock( return_value = Array( Classes ) )

        MyEntropy = WeightedCrossentropy(
            'testentropy',
            Classes,
            "bin"
        )

        self.assertListEqual(
            Classes,
            MyEntropy.get_config()[ 'ClassWeights' ]
        )
    def test_it_maps_given_class_weights( self ):
        Classes = { 0: 1, 1: 2, 2: 3 }
        Remaped = Array( [ [ 0, 0, 0 ], [ 0, 0, 0 ], [ 0, 0, 0 ] ] )

        self.__NP.ones.return_value = Remaped
        WeightedCrossentropy(
            'testentropy',
            Classes,
            "bin"
        )

        self.assertEqual(
            [[1, 2, 3], [2, 0, 0], [3, 0, 0]],
            Remaped.tolist()
        )
    def test_it_hot_encodes_the_labels_for_binary(self):
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3]])
        TrainingIds = Series(['1a'])
        ValidationIds = Series(['2a'])
        TestFeatures = Array([[0.1, 0.], [0.15, 0.5]])
        TestIds = Series(['3a', '4a'])

        self.__Splitter.trainingSplit.return_value = [(Series(['1a', '2a']),
                                                       TestIds)]
        self.__Splitter.validationSplit.return_value = (TrainingIds,
                                                        ValidationIds)
        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures
        self.__Vectorizer.featureizeTest.return_value = TestFeatures
        self.__PM.classifier = 'is_cancer'

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=None,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[0]

        self.assertEqual(
            list(self.__Data['is_cancer'].filter(list(TrainingIds))),
            ArgumentsLabels[0].tolist())

        ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[1]
        self.assertEqual(
            list(self.__Data['is_cancer'].filter(list(ValidationIds))),
            ArgumentsLabels[0].tolist())

        ArgumentsLabels, _ = self.__Encoder.hotEncode.call_args_list[2]
        self.assertEqual(list(self.__Data['is_cancer'].filter(list(TestIds))),
                         ArgumentsLabels[0].tolist())
Example #17
0
def read_image(filepath: Path, logger: Logger = None) -> Array:
    """Reads in an image as an array and logs read to console.

    Args:
        filepath (Path): Filepath to image.
        logger (Logger): Logging object for writing output to console. (default: None)

    Returns:
        np.array: Target image.
    """
    pil_pointer = pil_image.open(filepath).convert("RGB")
    image = Array(pil_pointer)
    pil_pointer.close()
    if logger is not None:
        logger.info(f"Read: {filepath}")
    return image
    def test_it_encodes_given_labels(self):
        Labels = Array([1, 1, 2])
        ExpectedReturn = MagicMock()

        self.__Encoder.transform.return_value = ExpectedReturn

        MyEncoder = StdCategoriesEncoder.Factory.getInstance()
        MyEncoder.setCategories(Series([1, 2, 3]))

        self.assertEqual(ExpectedReturn, MyEncoder.encode(Labels))

        Arguments, _ = self.__Encoder.transform.call_args_list[0]

        self.assertListEqual(Labels.tolist(), Arguments[0].tolist())

        self.__Encoder.transform.assert_called_once()
    def test_it_raises_an_error_if_the_given_weights_are_incompatible( self ):
        Labels = TF.constant( [
                [1, 1 ],
                [3, 3 ],
        ] )
        Predictions = TF.constant( [
                [1, 1 ],
                [4, 4 ],
        ] )

        self.__NP.ones.return_value = Array( [ [0,0] ] )

        MyEntropy = WeightedCrossentropy(
            'testentropy',
            dict(),
            "bin"
        )

        with self.assertRaises( TypeError ):
            MyEntropy( Labels, Predictions )
    def test_it_casts_the_normalized_Classes_on_set( self ):
        Classes = { 0: 1, 1: 2 }

        NormClasses = MagicMock()
        CastBase = MagicMock()

        self.__Adapter.expand_dims.return_value = NormClasses
        self.__Adapter.floatx.return_value = CastBase

        New = Array( [ 0 ] )
        MyEntropy = WeightedCrossentropy(
            'testentropy',
            Classes,
            "bin"
        )

        MyEntropy.setCostMatrix( New )

        self.__Adapter.cast.assert_called_with(
            NormClasses,
            CastBase
        )
Example #21
0
    def test_it_splits_the_validation_data_of_the_trainings_data_for_binary(
            self):
        TrainingFeatures = Array([[0., 2.], [0.1, 0.3], [1.2, 4.3], [2.3,
                                                                     4.2]])
        Expected = self.__Data['is_cancer']
        Expected.index = list(self.__Data['pmid'])

        self.__Vectorizer.featureizeTrain.return_value = TrainingFeatures

        MyController = TextminingController.Factory.getInstance(
            self.__fakeLocator)
        MyController.process(Data=self.__Data,
                             TestData=self.__TestData,
                             ShortName=MagicMock(),
                             Description=MagicMock())

        Arguments, _ = self.__Splitter.validationSplit.call_args_list[0]

        self.assertListEqual(list(self.__Data['pmid']), list(Arguments[0]))

        self.assertListEqual(list(Expected), list(Arguments[1]))

        self.__Splitter.validationSplit.assert_called_once()
    def test_it_returns_the_current_configuration( self ):
        Name = 'testentropy'
        EntropyKey = "bin"
        Classes = { 0: 1, 1: 2, 2: 3 }
        name = "something"

        self.__NP.ones.return_value = Array( [ [0,0,0],[0,0,0], [0,0,0 ] ] )
        MyEntropy = WeightedCrossentropy(
            'testentropy',
            Classes,
            "bin",
            { 'name': name }
        )

        self.assertDictEqual(
            {
                'Name': Name,
                'ClassWeights': [[1, 2, 3], [2, 0, 0], [3, 0, 0]],
                'EntropyKey': EntropyKey,
                'Reduction': None,
                'KeywordedEntropyArgs': { 'name': name },
            },
            MyEntropy.get_config()
        )
Example #23
0
 def __convertToArray( self, Value: Series ):
     return Array( list( Value ) )
Example #24
0
def main(data):
    data = Array(data)
    return sum(data[(data < 30) | (data > 70)])
Example #25
0
def principal_eigenvalue(c: np.Array) -> float:
    pe = principal_eigenvector(c)
    nz_index = pe.nonzero()[0][0]
    return c.dot(pe)[nz_index] / pe[nz_index]