Esempio n. 1
0
    def test__subset_and_standardize(self, expression_data_no_na, standardize,
                                     feature_ids, sample_ids):
        from flotilla.data_model.base import BaseData

        base_data = BaseData(expression_data_no_na)
        base_data.subset, base_data.means = \
            base_data._subset_and_standardize(base_data.data,
                                              sample_ids=sample_ids,
                                              feature_ids=feature_ids,
                                              return_means=True,
                                              standardize=standardize)

        subset = base_data._subset(base_data.data,
                                   sample_ids=sample_ids,
                                   feature_ids=feature_ids)
        means = subset.mean().rename_axis(base_data.feature_renamer)
        subset = subset.fillna(means).fillna(0)
        subset = subset.rename_axis(base_data.feature_renamer, 1)

        if standardize:
            data = StandardScaler().fit_transform(subset)
        else:
            data = subset

        subset_standardized = pd.DataFrame(data,
                                           index=subset.index,
                                           columns=subset.columns)

        pdt.assert_frame_equal(subset_standardized, base_data.subset)
        pdt.assert_series_equal(means, base_data.means)
Esempio n. 2
0
    def test__subset_and_standardize(self, expression_data_no_na,
                                     standardize, feature_ids,
                                     sample_ids):
        from flotilla.data_model.base import BaseData

        base_data = BaseData(expression_data_no_na)
        base_data.subset, base_data.means = \
            base_data._subset_and_standardize(base_data.data,
                                              sample_ids=sample_ids,
                                              feature_ids=feature_ids,
                                              return_means=True,
                                              standardize=standardize)

        subset = base_data._subset(base_data.data, sample_ids=sample_ids,
                                   feature_ids=feature_ids)
        means = subset.mean().rename_axis(base_data.feature_renamer)
        subset = subset.fillna(means).fillna(0)
        subset = subset.rename_axis(base_data.feature_renamer, 1)

        if standardize:
            data = StandardScaler().fit_transform(subset)
        else:
            data = subset

        subset_standardized = pd.DataFrame(data, index=subset.index,
                                           columns=subset.columns)

        pdt.assert_frame_equal(subset_standardized, base_data.subset)
        pdt.assert_series_equal(means, base_data.means)
Esempio n. 3
0
    def test__subset(self, expression_data_no_na, sample_ids, feature_ids):
        from flotilla.data_model.base import BaseData

        base_data = BaseData(expression_data_no_na)
        subset = base_data._subset(base_data.data, sample_ids=sample_ids,
                                   feature_ids=feature_ids)
        data = base_data.data
        if feature_ids is None:
            feature_ids = data.columns
        else:
            feature_ids = pd.Index(set(feature_ids).intersection(data.columns))
        if sample_ids is None:
            sample_ids = data.index
        else:
            sample_ids = pd.Index(set(sample_ids).intersection(data.index))

        true_subset = data.ix[sample_ids, feature_ids]

        pdt.assert_frame_equal(subset, true_subset)
Esempio n. 4
0
    def test__subset(self, expression_data_no_na, sample_ids, feature_ids):
        from flotilla.data_model.base import BaseData

        base_data = BaseData(expression_data_no_na)
        subset = base_data._subset(base_data.data,
                                   sample_ids=sample_ids,
                                   feature_ids=feature_ids)
        data = base_data.data
        if feature_ids is None:
            feature_ids = data.columns
        else:
            feature_ids = pd.Index(set(feature_ids).intersection(data.columns))
        if sample_ids is None:
            sample_ids = data.index
        else:
            sample_ids = pd.Index(set(sample_ids).intersection(data.index))

        true_subset = data.ix[sample_ids, feature_ids]

        pdt.assert_frame_equal(subset, true_subset)