Example #1
0
    def test_not_found(self, datafiles):
        path = os.path.join(str(datafiles), 'valid')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        data_frame = profiles.data_frame

        groups = {
          'A': [1],
          'B': [2],
          'C': [3],
          'A-B': [1, 2],
          'A-B-C': [1, 2, 3],
          'X': [],
        }
        with pytest.raises(KeyError) as error:
            utils.get_multilabel(data_frame, 'profile', groups)

        # W not found
        assert 'W' in str(error.value)
Example #2
0
    def test_pipe_spread_out_column(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('spread', SpreadOutMatrixTransformer(columns=['DTI_FA',
                                                           'DTI_L1'])),
            ('selecting', FeatureMatrixTransformer(
                matrix_columns=['DTI_FA'], columns=None)),
        ])
        output = pipe.fit_transform(profiles.data_frame)

        assert sorted(list(output.columns)) == [
            'DTI_FA_1_0', 'DTI_FA_2_0', 'DTI_FA_2_1']

        assert output.DTI_FA_1_0.FIS_007 == 4
        assert output.DTI_FA_2_0.FIS_007 == 7
        assert output.DTI_FA_2_1.FIS_007 == 8
Example #3
0
    def test_pipe_params(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        params = {
            'combining__columns': ['DTI_FA', 'DTI_L1'],
            'combining__column_name': 'combined',
        }

        pipe = Pipeline([
            ('combining', CombineMatrixTransformer()),
        ])
        pipe.set_params(**params)

        output = pipe.fit_transform(profiles.data_frame)

        assert sorted(list(output.columns)) == ['DTI_FA', 'DTI_L1', 'combined']

        assert (output.combined.FIS_007 == [[
            6,
            7,
            8,
        ], [9, 10, 11], [12, 13, 14]]).all()
    def test_spread_out_simple(self, datafiles):
        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)
        profiles.load()

        new_dataframe = profiles.spread_out_matrix(['DTI_L1'],
                                                   keep_matrix=False,
                                                   symmetric=False)

        assert [
            'DTI_FA',
            'DTI_L1_0_0',
            'DTI_L1_0_1',
            'DTI_L1_0_2',
            'DTI_L1_1_0',
            'DTI_L1_1_1',
            'DTI_L1_1_2',
            'DTI_L1_2_0',
            'DTI_L1_2_1',
            'DTI_L1_2_2',
        ] == sorted(list(new_dataframe.columns))

        assert new_dataframe.DTI_L1_0_0[0] == 11
        assert new_dataframe.DTI_L1_0_1[0] == 12
        assert new_dataframe.DTI_L1_0_2[0] == 13

        assert new_dataframe.DTI_L1_1_0[0] == 14
        assert new_dataframe.DTI_L1_1_1[0] == 15
        assert new_dataframe.DTI_L1_1_2[0] == 16

        assert new_dataframe.DTI_L1_2_0[0] == 17
        assert new_dataframe.DTI_L1_2_1[0] == 18
        assert new_dataframe.DTI_L1_2_2[0] == 19
Example #5
0
    def test_no_df_simple(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('combining',
             CombineMatrixTransformer(columns=['0', '1'],
                                      column_name='combined')),
        ])

        output = pipe.fit_transform(profiles.data_frame.values)

        assert (output[0][2] == np.asarray([[
            6,
            7,
            8,
        ], [9, 10, 11], [12, 13, 14]])).all()
Example #6
0
    def test_fail_duplicates_data(self, datafiles):
        path = os.path.join(str(datafiles), 'duplicated_data')
        filename = 'profiles.xlsx'
        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        with pytest.raises(ValueError) as error:
            profiles.load()

        assert str(error.value) == "Already existing value at [FIS_007, DTI_FA]: " \
                                   "Value from FIS_007_FA_factor.csv cannot be loaded."
Example #7
0
    def test_warning(self, datafiles, caplog):
        caplog.set_level(logging.INFO)
        path = os.path.join(str(datafiles), 'index_not_found')
        filename = 'profiles.xlsx'
        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)
        profiles.load()
        assert len(caplog.records) == 4

        assert caplog.records[3].levelname == 'WARNING'
        assert caplog.records[3].message == \
            "Index FIS_007 couldn't be found in main file (total 1 index(es))."
Example #8
0
    def test_fail_format(self, datafiles):
        path = os.path.join(str(datafiles), 'duplicated_data')
        filename = 'profiles.xlsx'
        profiles = NeuProfiles(profiles_path=path,
                               profiles_filename=filename,
                               format_file='mat')

        with pytest.raises(NotImplementedError) as error:
            profiles.load()

        assert str(error.value) == 'Format \'mat\' not supported'
Example #9
0
    def test_debug(self, datafiles, caplog):
        caplog.set_level(logging.DEBUG)
        path = os.path.join(str(datafiles), 'index_not_found')
        filename = 'profiles.xlsx'
        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)
        profiles.load()
        assert len(caplog.records) == 5

        assert caplog.records[2].levelname == 'DEBUG'
        assert caplog.records[2].message == \
            "Index (id) FIS_007 couldn't be found, skipped " \
            "(file FIS_007_FA_factor.csv)"
Example #10
0
    def test_fail_duplicates_id(self, datafiles):
        path = os.path.join(str(datafiles), 'duplicated_id')
        filename = 'profiles.xlsx'
        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        with pytest.raises(ValueError) as error:
            profiles.load()

        if six.PY2:
            assert str(error.value) == "Index has duplicate keys: " \
                                       "Index([u'FIS_007', u'TTO_06'], dtype='object', name=u'ID')"
        else:
            assert str(error.value) == "Index has duplicate keys: " \
                                       "Index(['FIS_007', 'TTO_06'], dtype='object', name='ID')"
Example #11
0
    def test_pipe_symmetric(self, datafiles):
        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('spread',
             SpreadOutMatrixTransformer(columns=profiles.data_frame.columns,
                                        symmetric=False)),
        ])
        output = pipe.fit_transform(profiles.data_frame)
        assert sorted(list(output.columns)) == [
            'DTI_FA_0_0', 'DTI_FA_0_1', 'DTI_FA_0_2', 'DTI_FA_1_0',
            'DTI_FA_1_1', 'DTI_FA_1_2', 'DTI_FA_2_0', 'DTI_FA_2_1',
            'DTI_FA_2_2', 'DTI_L1_0_0', 'DTI_L1_0_1', 'DTI_L1_0_2',
            'DTI_L1_1_0', 'DTI_L1_1_1', 'DTI_L1_1_2', 'DTI_L1_2_0',
            'DTI_L1_2_1', 'DTI_L1_2_2'
        ]

        assert output.DTI_FA_0_0.FIS_007 == 1
        assert output.DTI_FA_0_1.FIS_007 == 2
        assert output.DTI_FA_0_2.FIS_007 == 3
        assert output.DTI_FA_1_0.FIS_007 == 4
        assert output.DTI_FA_1_1.FIS_007 == 5
        assert output.DTI_FA_1_2.FIS_007 == 6
        assert output.DTI_FA_2_0.FIS_007 == 7
        assert output.DTI_FA_2_1.FIS_007 == 8
        assert output.DTI_FA_2_2.FIS_007 == 9

        assert output.DTI_L1_0_0.FIS_007 == 11
        assert output.DTI_L1_0_1.FIS_007 == 12
        assert output.DTI_L1_0_2.FIS_007 == 13
        assert output.DTI_L1_1_0.FIS_007 == 14
        assert output.DTI_L1_1_1.FIS_007 == 15
        assert output.DTI_L1_1_2.FIS_007 == 16
        assert output.DTI_L1_2_0.FIS_007 == 17
        assert output.DTI_L1_2_1.FIS_007 == 18
        assert output.DTI_L1_2_2.FIS_007 == 19
Example #12
0
    def test_without_columns(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('combining', CombineMatrixTransformer(column_name='combined')),
        ])
        output = pipe.fit_transform(profiles.data_frame)

        assert sorted(list(output.columns)) == ['DTI_FA', 'DTI_L1']
Example #13
0
    def test_format_info(self, datafiles, caplog):
        caplog.set_level(logging.INFO)

        path = os.path.join(str(datafiles), 'valid')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert len(caplog.records) == 25

        for record in caplog.records:
            assert record.levelname == 'INFO'
            assert 'Reading content in ' in record.message
            assert path in record.message
            assert 'directory with ' in record.message
Example #14
0
    def test_threshold(self, datafiles):
        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None
        assert isinstance(profiles.data_frame, pandas.DataFrame)
        assert profiles.data_frame.shape == (1, 2)
        assert profiles.data_frame.index.name == 'ID'
        assert sorted(
            list(profiles.data_frame.columns)) == ['DTI_FA', 'DTI_L1']

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19

        profiles.binarize_matrix(['DTI_FA', 'DTI_L1'], threshold=1)

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 0
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 1

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 1

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 0
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 1

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 1
Example #15
0
    def test_no_df_simple(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('spread', SpreadOutMatrixTransformer(columns=['0', '1'])),
        ])
        output = pipe.fit_transform(profiles.data_frame.values)

        assert output.tolist() == [[14., 17., 18.,  4.,  7.,  8.]] or \
            output.tolist() == [[4., 7., 8.,  14.,  17.,  18.]]
    def test_inplace_false(self, datafiles):
        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)
        profiles.load()

        old_dataframe = profiles.data_frame

        new_dataframe = profiles.spread_out_matrix(['DTI_L1'],
                                                   keep_matrix=False,
                                                   inplace=False,
                                                   symmetric=False)

        assert [
            'DTI_FA',
            'DTI_L1_0_0',
            'DTI_L1_0_1',
            'DTI_L1_0_2',
            'DTI_L1_1_0',
            'DTI_L1_1_1',
            'DTI_L1_1_2',
            'DTI_L1_2_0',
            'DTI_L1_2_1',
            'DTI_L1_2_2',
        ] == sorted(list(new_dataframe.columns))

        for new_column in [
                'DTI_L1_0_0',
                'DTI_L1_0_1',
                'DTI_L1_0_2',
                'DTI_L1_1_0',
                'DTI_L1_1_1',
                'DTI_L1_1_2',
                'DTI_L1_2_0',
                'DTI_L1_2_1',
                'DTI_L1_2_2',
        ]:
            assert new_column not in profiles.data_frame.columns

        assert (old_dataframe == profiles.data_frame).all().all()
Example #17
0
    def test_no_df_simple(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('spread', SpreadOutMatrixTransformer(columns=['0', '1'])),
            ('selecting', FeatureMatrixTransformer(
                matrix_columns=['0'], columns=None)),
        ])
        output = pipe.fit_transform(profiles.data_frame.values)

        assert int(output[0]) in [4, 14]
Example #18
0
    def test_pipe_column(self, datafiles):

        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None

        pipe = Pipeline([
            ('selecting', FeatureMatrixTransformer(
                columns=['DTI_FA'], matrix_columns=None)
             ),
        ])
        output = pipe.fit_transform(profiles.data_frame)

        assert sorted(list(output.columns)) == ['DTI_FA', ]
        assert (output.DTI_FA == profiles.data_frame.DTI_FA).all()
Example #19
0
    def test_simple(self, datafiles):
        path = os.path.join(str(datafiles), 'valid')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        groups = {
          'A': [1],
          'B': [2],
          'C': [3],
          'A-B': [1, 2],
          'A-B-C': [1, 2, 3],
          'X': [],
          'W': []
        }

        data_frame = profiles.data_frame
        df = utils.get_multilabel(data_frame, 'profile', groups)
        values = df.as_matrix()

        # 3 columns, 63 items
        assert values.shape == (63, 3)

        assert (values[profiles.data_frame.profile == 'A'] == [1, 0, 0]).all()
        assert (values[profiles.data_frame.profile == 'B'] == [0, 1, 0]).all()
        assert (values[profiles.data_frame.profile == 'C'] == [0, 0, 1]).all()
        assert (values[profiles.data_frame.profile == 'C'] == [0, 0, 1]).all()
        assert (values[profiles.data_frame.profile == 'A-B'] == [1, 1, 0]).all()

        assert (values[profiles.data_frame.profile == 'A-B-C'] == [1, 1, 1]).all()

        assert (values[profiles.data_frame.profile == 'X'] == [0, 0, 0]).all()

        assert (df.columns == [1, 2, 3]).all()
        assert (df.index == profiles.data_frame.index).all()
Example #20
0
    def test_default(self, datafiles):
        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None
        assert isinstance(profiles.data_frame, pandas.DataFrame)
        assert profiles.data_frame.shape == (1, 2)
        assert profiles.data_frame.index.name == 'ID'
        assert sorted(list(
            profiles.data_frame.columns)) == ['DTI_FA', 'DTI_L1']

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19

        profiles.combine_matrix(['DTI_FA', 'DTI_L1'], 'combined')

        assert sorted(list(
            profiles.data_frame.columns)) == ['DTI_FA', 'DTI_L1', 'combined']

        assert (profiles.data_frame.combined.FIS_007 == [[
            6,
            7,
            8,
        ], [9, 10, 11], [12, 13, 14]]).all()
Example #21
0
    def test_valid(self, datafiles):
        path = os.path.join(str(datafiles), 'valid')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)

        assert profiles.data_frame is None

        profiles.load()

        assert profiles.data_frame is not None
        assert isinstance(profiles.data_frame, pandas.DataFrame)
        assert profiles.data_frame.shape == (63, 8)
        assert profiles.data_frame.index.name == 'ID'

        assert 'profile' == profiles.data_frame.columns[0]

        for column_name in [
                'RAW', 'LS', 'DTI_L1', 'DTI_MD', 'DTI_RX', 'DTI_FA', 'FUNC'
        ]:
            assert column_name in profiles.data_frame.columns
            first_i = profiles.data_frame[column_name].first_valid_index()
            assert profiles.data_frame[column_name][first_i].shape == (76, 76)
    def test_extract_multiple_keeping(self, datafiles):
        path = os.path.join(str(datafiles), 'small_data')
        filename = 'profiles.xlsx'

        profiles = NeuProfiles(profiles_path=path, profiles_filename=filename)
        profiles.load()

        new_dataframe = profiles.spread_out_matrix([
            'DTI_L1',
            'DTI_FA',
        ],
                                                   keep_matrix=True,
                                                   symmetric=False)

        assert [
            'DTI_FA',
            'DTI_FA_0_0',
            'DTI_FA_0_1',
            'DTI_FA_0_2',
            'DTI_FA_1_0',
            'DTI_FA_1_1',
            'DTI_FA_1_2',
            'DTI_FA_2_0',
            'DTI_FA_2_1',
            'DTI_FA_2_2',
            'DTI_L1',
            'DTI_L1_0_0',
            'DTI_L1_0_1',
            'DTI_L1_0_2',
            'DTI_L1_1_0',
            'DTI_L1_1_1',
            'DTI_L1_1_2',
            'DTI_L1_2_0',
            'DTI_L1_2_1',
            'DTI_L1_2_2',
        ] == sorted(list(new_dataframe.columns))

        assert new_dataframe.DTI_L1_0_0[0] == 11
        assert new_dataframe.DTI_L1_0_1[0] == 12
        assert new_dataframe.DTI_L1_0_2[0] == 13

        assert new_dataframe.DTI_L1_1_0[0] == 14
        assert new_dataframe.DTI_L1_1_1[0] == 15
        assert new_dataframe.DTI_L1_1_2[0] == 16

        assert new_dataframe.DTI_L1_2_0[0] == 17
        assert new_dataframe.DTI_L1_2_1[0] == 18
        assert new_dataframe.DTI_L1_2_2[0] == 19

        assert new_dataframe.DTI_FA_0_0[0] == 1
        assert new_dataframe.DTI_FA_0_1[0] == 2
        assert new_dataframe.DTI_FA_0_2[0] == 3

        assert new_dataframe.DTI_FA_1_0[0] == 4
        assert new_dataframe.DTI_FA_1_1[0] == 5
        assert new_dataframe.DTI_FA_1_2[0] == 6

        assert new_dataframe.DTI_FA_2_0[0] == 7
        assert new_dataframe.DTI_FA_2_1[0] == 8
        assert new_dataframe.DTI_FA_2_2[0] == 9

        assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1
        assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9

        assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11
        assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19