Example #1
0
    def test_setitem(self):
        # not sure what else to do here
        series = self.frame['A'][::2]
        self.frame['col5'] = series
        self.assert_('col5' in self.frame)
        common.assert_dict_equal(series,
                                 self.frame['col5'],
                                 compare_keys=False)

        series = self.frame['A']
        self.frame['col6'] = series
        common.assert_dict_equal(series,
                                 self.frame['col6'],
                                 compare_keys=False)

        self.assertRaises(Exception, self.frame.__setitem__,
                          randn(len(self.frame) + 1))

        # set ndarray
        arr = randn(len(self.frame))
        self.frame['col9'] = arr
        self.assert_((self.frame['col9'] == arr).all())

        # set value, do out of order for DataMatrix
        self.frame['col7'] = 5
        assert ((self.frame['col7'] == 5).all())

        self.frame['col8'] = 'foo'
        assert ((self.frame['col8'] == 'foo').all())

        smaller = self.frame[:2]
        smaller['col10'] = ['1', '2']
        self.assertEqual(smaller['col10'].dtype, np.object_)
        self.assert_((smaller['col10'] == ['1', '2']).all())
Example #2
0
    def test__init__featuredata(self,
                                expression_data_no_na,
                                expression_feature_data,
                                expression_feature_rename_col):
        from flotilla.data_model.base import BaseData, \
            subsets_from_metadata, MINIMUM_FEATURE_SUBSET

        base_data = BaseData(expression_data_no_na,
                             feature_data=expression_feature_data,
                             feature_rename_col=expression_feature_rename_col)

        if expression_feature_rename_col is not None:
            feature_renamer_series = expression_feature_data[
                expression_feature_rename_col]
        else:
            feature_renamer_series = pd.Series(
                expression_feature_data.index,
                index=expression_feature_data.index)
        feature_subsets = subsets_from_metadata(expression_feature_data,
                                                MINIMUM_FEATURE_SUBSET,
                                                'features')
        feature_subsets['variant'] = base_data.variant

        pdt.assert_frame_equal(base_data.data_original, expression_data_no_na)
        pdt.assert_frame_equal(base_data.feature_data, expression_feature_data)
        pdt.assert_frame_equal(base_data.data, expression_data_no_na)
        pdt.assert_series_equal(base_data.feature_renamer_series,
                                feature_renamer_series)
        pdt.assert_dict_equal(base_data.feature_subsets, feature_subsets)
Example #3
0
    def test_groupby_groups_datetimeindex(self):
        # GH#1430
        periods = 1000
        ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods)
        df = DataFrame({'high': np.arange(periods),
                        'low': np.arange(periods)}, index=ind)
        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))

        # it works!
        groups = grouped.groups
        assert isinstance(list(groups.keys())[0], datetime)

        # GH#11442
        index = pd.date_range('2015/01/01', periods=5, name='date')
        df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
                           'B': [1, 2, 3, 4, 5]}, index=index)
        result = df.groupby(level='date').groups
        dates = ['2015-01-05', '2015-01-04', '2015-01-03',
                 '2015-01-02', '2015-01-01']
        expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date')
                    for date in dates}
        tm.assert_dict_equal(result, expected)

        grouped = df.groupby(level='date')
        for date in dates:
            result = grouped.get_group(date)
            data = [[df.loc[date, 'A'], df.loc[date, 'B']]]
            expected_index = pd.DatetimeIndex([date], name='date')
            expected = pd.DataFrame(data,
                                    columns=list('AB'),
                                    index=expected_index)
            tm.assert_frame_equal(result, expected)
Example #4
0
    def test_frame_to_dict_tz(self):
        # GH18372 When converting to dict with orient='records' columns of
        # datetime that are tz-aware were not converted to required arrays
        data = [(datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc), ),
                (datetime(
                    2017,
                    11,
                    18,
                    22,
                    6,
                    30,
                    61810,
                    tzinfo=pytz.utc,
                ), )]
        df = DataFrame(list(data), columns=[
            "d",
        ])

        result = df.to_dict(orient='records')
        expected = [
            {
                'd': Timestamp('2017-11-18 21:53:00.219225+0000', tz=pytz.utc)
            },
            {
                'd': Timestamp('2017-11-18 22:06:30.061810+0000', tz=pytz.utc)
            },
        ]
        tm.assert_dict_equal(result[0], expected[0])
        tm.assert_dict_equal(result[1], expected[1])
Example #5
0
    def test_combineFirst(self):
        series = Series(common.makeIntIndex(20).astype(float),
                        index=common.makeIntIndex(20))

        series_copy = series * 2
        series_copy[::2] = np.NaN

        # nothing used from the input
        combined = series.combineFirst(series_copy)

        self.assert_(np.array_equal(combined, series))

        # Holes filled from input
        combined = series_copy.combineFirst(series)
        self.assert_(np.isfinite(combined).all())

        self.assert_(np.array_equal(combined[::2], series[::2]))
        self.assert_(np.array_equal(combined[1::2], series_copy[1::2]))

        # mixed types
        index = common.makeStringIndex(20)
        floats = Series(common.randn(20), index=index)
        strings = Series(common.makeStringIndex(10), index=index[::2])

        combined = strings.combineFirst(floats)

        common.assert_dict_equal(strings, combined, compare_keys=False)
        common.assert_dict_equal(floats[1::2], combined, compare_keys=False)

        # corner case
        s = Series([1., 2, 3], index=[0, 1, 2])
        result = s.combineFirst(Series([], index=[]))
        assert_series_equal(s, result)
Example #6
0
    def test_setitem(self):
        # not sure what else to do here
        series = self.frame['A'][::2]
        self.frame['col5'] = series
        self.assert_('col5' in self.frame)
        common.assert_dict_equal(series, self.frame['col5'],
                                 compare_keys=False)

        series = self.frame['A']
        self.frame['col6'] = series
        common.assert_dict_equal(series, self.frame['col6'],
                                 compare_keys=False)

        self.assertRaises(Exception, self.frame.__setitem__,
                          randn(len(self.frame) + 1))

        # set ndarray
        arr = randn(len(self.frame))
        self.frame['col9'] = arr
        self.assert_((self.frame['col9'] == arr).all())

        # set value, do out of order for DataMatrix
        self.frame['col7'] = 5
        assert((self.frame['col7'] == 5).all())

        self.frame['col8'] = 'foo'
        assert((self.frame['col8'] == 'foo').all())

        smaller = self.frame[:2]
        smaller['col10'] = ['1', '2']
        self.assertEqual(smaller['col10'].dtype, np.object_)
        self.assert_((smaller['col10'] == ['1', '2']).all())
Example #7
0
    def test_to_dict(self):
        test_data = {
            'A': {
                '1': 1,
                '2': 2
            },
            'B': {
                '1': '1',
                '2': '2',
                '3': '3'
            },
        }
        recons_data = DataFrame(test_data).to_dict()

        for k, v in compat.iteritems(test_data):
            for k2, v2 in compat.iteritems(v):
                assert v2 == recons_data[k][k2]

        recons_data = DataFrame(test_data).to_dict("l")

        for k, v in compat.iteritems(test_data):
            for k2, v2 in compat.iteritems(v):
                assert v2 == recons_data[k][int(k2) - 1]

        recons_data = DataFrame(test_data).to_dict("s")

        for k, v in compat.iteritems(test_data):
            for k2, v2 in compat.iteritems(v):
                assert v2 == recons_data[k][k2]

        recons_data = DataFrame(test_data).to_dict("sp")
        expected_split = {
            'columns': ['A', 'B'],
            'index': ['1', '2', '3'],
            'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]
        }
        tm.assert_dict_equal(recons_data, expected_split)

        recons_data = DataFrame(test_data).to_dict("r")
        expected_records = [{
            'A': 1.0,
            'B': '1'
        }, {
            'A': 2.0,
            'B': '2'
        }, {
            'A': np.nan,
            'B': '3'
        }]
        assert isinstance(recons_data, list)
        assert len(recons_data) == 3
        for l, r in zip(recons_data, expected_records):
            tm.assert_dict_equal(l, r)

        # GH10844
        recons_data = DataFrame(test_data).to_dict("i")

        for k, v in compat.iteritems(test_data):
            for k2, v2 in compat.iteritems(v):
                assert v2 == recons_data[k2][k]
Example #8
0
    def test_init(self, step, vmax, logbf_thresh):
        from flotilla.compute.splicing import ModalityEstimator, \
            ModalityModel

        estimator = ModalityEstimator(step, vmax, logbf_thresh)

        true_parameters = np.arange(2, vmax + step, step).astype(float)
        true_exclusion = ModalityModel(1, true_parameters)
        true_inclusion = ModalityModel(true_parameters, 1)
        true_middle = ModalityModel(true_parameters+3, true_parameters+3)
        true_bimodal = ModalityModel(1 / (true_parameters+3),
                                     1 / (true_parameters+3))
        true_one_param_models = {'Psi~1': true_inclusion,
                                 'Psi~0': true_exclusion}
        true_two_param_models = {'bimodal': true_bimodal,
                                 'middle': true_middle}

        npt.assert_equal(estimator.step, step)
        npt.assert_equal(estimator.vmax, vmax)
        npt.assert_equal(estimator.logbf_thresh, logbf_thresh)
        npt.assert_equal(estimator.parameters, true_parameters)
        npt.assert_equal(estimator.exclusion_model, true_exclusion)
        npt.assert_equal(estimator.inclusion_model, true_inclusion)
        npt.assert_equal(estimator.middle_model, true_middle)
        npt.assert_equal(estimator.bimodal_model, true_bimodal)
        pdt.assert_dict_equal(estimator.one_param_models,
                              true_one_param_models)
        pdt.assert_dict_equal(estimator.two_param_models,
                              true_two_param_models)
Example #9
0
    def test_dict_complex(self):
        x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j}
        x_rec = self.encode_decode(x)
        tm.assert_dict_equal(x, x_rec)

        for key in x:
            tm.assert_class_equal(x[key], x_rec[key], obj="complex value")
Example #10
0
def test_convert_list():
    obj = r('list(a=1, b=2, c=3)')

    converted = convert_robj(obj)
    expected = {'a': [1], 'b': [2], 'c': [3]}

    _test.assert_dict_equal(converted, expected)
Example #11
0
    def test_dict_numpy_complex(self):
        x = {"foo": np.complex128(1.0 + 1.0j), "bar": np.complex128(2.0 + 2.0j)}
        x_rec = self.encode_decode(x)
        tm.assert_dict_equal(x, x_rec)

        for key in x:
            tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128")
Example #12
0
def test_convert_nested_list():
    obj = r('list(a=list(foo=1, bar=2))')

    converted = convert_robj(obj)
    expected = {'a': {'foo': [1], 'bar': [2]}}

    _test.assert_dict_equal(converted, expected)
Example #13
0
def test_convert_nested_list():
    obj = r('list(a=list(foo=1, bar=2))')

    converted = convert_robj(obj)
    expected = {'a': {'foo': [1], 'bar': [2]}}

    _test.assert_dict_equal(converted, expected)
Example #14
0
def test_convert_list():
    obj = r('list(a=1, b=2, c=3)')

    converted = convert_robj(obj)
    expected = {'a': [1], 'b': [2], 'c': [3]}

    _test.assert_dict_equal(converted, expected)
Example #15
0
def test_empty_read(_output_dir: str):
    df = MyTfs(plane="X", directory=_output_dir)
    write_tfs(df.get_filename(), df, save_index="NAME")
    df_read = MyTfs(plane="X", directory=_output_dir).read()
    assert_frame_equal(df, df_read,
                       check_exact=False)  # float precision can be an issue
    assert_dict_equal(df.headers, df_read.headers, compare_keys=True)
Example #16
0
def test_filled_write(_output_dir: str, _filled_tfs: MyTfs):
    df = _filled_tfs(plane="X", directory=_output_dir)
    df.write()
    assert pathlib.Path(df.get_filename()).is_file()
    df_read = read_tfs(df.get_filename(), index="NAME")
    assert_frame_equal(df, df_read)
    assert_dict_equal(df.headers, df_read.headers, compare_keys=True)
Example #17
0
    def test_booleanindex(self):
        boolIdx = np.repeat(True, len(self.strIndex)).astype(bool)
        boolIdx[5:30:2] = False

        subIndex = self.strIndex[boolIdx]
        common.assert_dict_equal(tseries.map_indices(subIndex),
                                 subIndex.indexMap)
Example #18
0
    def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1/3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ãœnicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            self.assertEqual(rdr.data_label, u'This is a  Ãœnicode data label')
Example #19
0
 def test_single_exon_alternative_events(self, splice_graph, exon1_i,
                                         exon1_name,
                                         mutually_exclusive_events,
                                         skipped_exon_events):
     test = splice_graph.single_exon_alternative_events(exon1_i, exon1_name)
     true = {'se': skipped_exon_events, 'mxe': mutually_exclusive_events}
     pdt.assert_dict_equal(test, true)
Example #20
0
def test_tfs_read_write_read_pathlib_input(_tfs_file_pathlib: pathlib.Path,
                                           _test_file: str):
    original = read_tfs(_tfs_file_pathlib)
    write_tfs(_test_file, original)
    new = read_tfs(_test_file)
    assert_frame_equal(original, new)
    assert_dict_equal(original.headers, new.headers, compare_keys=True)
def test_large_dataframe(pca_large_dataframe, kwargs):
    from flotilla.visualize.decomposition import DecompositionViz

    dv = DecompositionViz(pca_large_dataframe.reduced_space,
                          pca_large_dataframe.components_,
                          pca_large_dataframe.explained_variance_ratio_,
                          **kwargs)
    x_pc = kwargs['x_pc']
    y_pc = kwargs['y_pc']
    pcs = [x_pc, y_pc]

    true_top_features = set([])
    true_pc_loadings_labels = {}
    true_pc_loadings = {}

    for pc in pcs:
        x = pca_large_dataframe.components_.ix[pc].copy()
        x.sort(ascending=True)
        half_features = int(kwargs['n_top_pc_features'] / 2)
        if len(x) > kwargs['n_top_pc_features']:
            a = x[:half_features]
            b = x[-half_features:]
            labels = np.r_[a.index, b.index]
            true_pc_loadings[pc] = np.r_[a, b]
        else:
            labels = x.index
            true_pc_loadings[pc] = x

        true_pc_loadings_labels[pc] = labels
        true_top_features.update(labels)
    pdt.assert_numpy_array_equal(dv.top_features, true_top_features)
    pdt.assert_dict_equal(dv.pc_loadings_labels, true_pc_loadings_labels)
    pdt.assert_dict_equal(dv.pc_loadings, true_pc_loadings)
Example #22
0
    def test_init(self, logbf_thresh):
        from anchor import BayesianModalities, ModalityModel
        from anchor.bayesian import ONE_PARAMETER_MODELS, \
            TWO_PARAMETER_MODELS

        estimator = BayesianModalities(
            one_parameter_models=ONE_PARAMETER_MODELS,
            two_parameter_models=TWO_PARAMETER_MODELS,
            logbf_thresh=logbf_thresh)

        true_one_param_models = {
            k: ModalityModel(**v)
            for k, v in ONE_PARAMETER_MODELS.items()
        }

        true_two_param_models = {
            k: ModalityModel(**v)
            for k, v in TWO_PARAMETER_MODELS.items()
        }

        npt.assert_equal(estimator.logbf_thresh, logbf_thresh)
        pdt.assert_dict_equal(estimator.one_param_models,
                              true_one_param_models)
        pdt.assert_dict_equal(estimator.two_param_models,
                              true_two_param_models)
Example #23
0
    def test_groupby_groups_datetimeindex(self):
        # GH#1430
        periods = 1000
        ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods)
        df = DataFrame({'high': np.arange(periods),
                        'low': np.arange(periods)}, index=ind)
        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))

        # it works!
        groups = grouped.groups
        assert isinstance(list(groups.keys())[0], datetime)

        # GH#11442
        index = pd.date_range('2015/01/01', periods=5, name='date')
        df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
                           'B': [1, 2, 3, 4, 5]}, index=index)
        result = df.groupby(level='date').groups
        dates = ['2015-01-05', '2015-01-04', '2015-01-03',
                 '2015-01-02', '2015-01-01']
        expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date')
                    for date in dates}
        tm.assert_dict_equal(result, expected)

        grouped = df.groupby(level='date')
        for date in dates:
            result = grouped.get_group(date)
            data = [[df.loc[date, 'A'], df.loc[date, 'B']]]
            expected_index = pd.DatetimeIndex([date], name='date')
            expected = pd.DataFrame(data,
                                    columns=list('AB'),
                                    index=expected_index)
            tm.assert_frame_equal(result, expected)
Example #24
0
    def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]],
            columns=[
                'Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats',
                'Bytes', 'Longs'
            ])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {
                u'Unicode_Cities_Strl':
                u'Here are some strls with Ãœnicode chars',
                u'Longs': u'long data',
                u'Things': u'Here are some things',
                u'Bytes': u'byte data',
                u'Ints': u'int data',
                u'Cities': u'Here are some cities',
                u'Floats': u'float data'
            }
            tm.assert_dict_equal(vl, vl_expected)

            self.assertEqual(rdr.data_label, u'This is a  Ãœnicode data label')
Example #25
0
    def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype("O")
        expected = DataFrame.from_records(
            [
                ["Cat", "Bogota", u"Bogotá", 1, 1.0, u"option b Ünicode", 1.0],
                ["Dog", "Boston", u"Uzunköprü", np.nan, np.nan, np.nan, np.nan],
                ["Plane", "Rome", u"Tromsø", 0, 0.0, "option a", 0.0],
                ["Potato", "Tokyo", u"Elâzığ", -4, 4.0, 4, 4],
                ["", "", "", 0, 0.3332999, "option a", 1 / 3.0],
            ],
            columns=["Things", "Cities", "Unicode_Cities_Strl", "Ints", "Floats", "Bytes", "Longs"],
        )
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {
                u"Unicode_Cities_Strl": u"Here are some strls with Ãœnicode chars",
                u"Longs": u"long data",
                u"Things": u"Here are some things",
                u"Bytes": u"byte data",
                u"Ints": u"int data",
                u"Cities": u"Here are some cities",
                u"Floats": u"float data",
            }
            tm.assert_dict_equal(vl, vl_expected)

            self.assertEqual(rdr.data_label, u"This is a  Ãœnicode data label")
Example #26
0
def test_large_dataframe(pca_large_dataframe, kwargs):
    from flotilla.visualize.decomposition import DecompositionViz

    dv = DecompositionViz(pca_large_dataframe.reduced_space,
                          pca_large_dataframe.components_,
                          pca_large_dataframe.explained_variance_ratio_,
                          **kwargs)
    x_pc = kwargs['x_pc']
    y_pc = kwargs['y_pc']
    pcs = [x_pc, y_pc]

    true_top_features = set([])
    true_pc_loadings_labels = {}
    true_pc_loadings = {}

    for pc in pcs:
        x = pca_large_dataframe.components_.ix[pc].copy()
        x.sort(ascending=True)
        half_features = int(kwargs['n_top_pc_features'] / 2)
        if len(x) > kwargs['n_top_pc_features']:
            a = x[:half_features]
            b = x[-half_features:]
            labels = np.r_[a.index, b.index]
            true_pc_loadings[pc] = np.r_[a, b]
        else:
            labels = x.index
            true_pc_loadings[pc] = x

        true_pc_loadings_labels[pc] = labels
        true_top_features.update(labels)
    pdt.assert_array_equal(dv.top_features, true_top_features)
    pdt.assert_dict_equal(dv.pc_loadings_labels, true_pc_loadings_labels)
    pdt.assert_dict_equal(dv.pc_loadings, true_pc_loadings)
Example #27
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index("A B C D E F".split())
        dt_idx = pd.date_range("2013-01-01", freq="M", periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            tm.assert_dict_equal(
                idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]}
            )

            to_groupby = Index(
                [
                    datetime(2011, 11, 1),
                    datetime(2011, 12, 1),
                    pd.NaT,
                    pd.NaT,
                    datetime(2011, 12, 1),
                    datetime(2011, 11, 1),
                ],
                tz="UTC",
            ).values

            ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")]
            expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]}
            tm.assert_dict_equal(idx.groupby(to_groupby), expected)
Example #28
0
    def test_combine_first(self):
        values = tm.makeIntIndex(20).values.astype(float)
        series = Series(values, index=tm.makeIntIndex(20))

        series_copy = series * 2
        series_copy[::2] = np.NaN

        # nothing used from the input
        combined = series.combine_first(series_copy)

        self.assert_(np.array_equal(combined, series))

        # Holes filled from input
        combined = series_copy.combine_first(series)
        self.assert_(np.isfinite(combined).all())

        self.assert_(np.array_equal(combined[::2], series[::2]))
        self.assert_(np.array_equal(combined[1::2], series_copy[1::2]))

        # mixed types
        index = tm.makeStringIndex(20)
        floats = Series(tm.randn(20), index=index)
        strings = Series(tm.makeStringIndex(10), index=index[::2])

        combined = strings.combine_first(floats)

        tm.assert_dict_equal(strings, combined, compare_keys=False)
        tm.assert_dict_equal(floats[1::2], combined, compare_keys=False)

        # corner case
        s = Series([1., 2, 3], index=[0, 1, 2])
        result = s.combine_first(Series([], index=[]))
        assert_series_equal(s, result)
Example #29
0
    def test_001_process(self):
        """Test ISO2 primary_geo; build a date day, month, year; no primary_date; feature qualifies another feature."""

        # Define mixmasta inputs:
        mp = f"inputs{sep}test1_input.json"
        fp = f"inputs{sep}test1_input.csv"
        geo = 'admin2'
        outf = f"outputs{sep}unittests"

        # Process:
        df, dct = mixmasta.process(fp, mp, geo, outf)

        # Load expected output:
        output_df = pd.read_csv(f'outputs{sep}test1_output.csv', index_col=False)
        output_df = mixmasta.optimize_df_types(output_df)
        with open(f'outputs{sep}test1_dict.json') as f:
            output_dict = json.loads(f.read())

        # Sort both data frames and reindex for comparison,.
        cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value']
        df.sort_values(by=cols, inplace=True)
        output_df.sort_values(by=cols, inplace=True)
        df.reset_index(drop=True, inplace=True)
        output_df.reset_index(drop =True, inplace=True)

        # Assertions
        assert_frame_equal(df, output_df)
        assert_dict_equal(dct, output_dict)
Example #30
0
    def test_convert_list(self):
        obj = r("list(a=1, b=2, c=3)")

        converted = com.convert_robj(obj)
        expected = {"a": [1], "b": [2], "c": [3]}

        tm.assert_dict_equal(converted, expected)
Example #31
0
    def test__init__featuredata(self, expression_data_no_na,
                                expression_feature_data,
                                expression_feature_rename_col):
        from flotilla.data_model.base import BaseData, \
            subsets_from_metadata, MINIMUM_FEATURE_SUBSET

        base_data = BaseData(expression_data_no_na,
                             feature_data=expression_feature_data,
                             feature_rename_col=expression_feature_rename_col)

        if expression_feature_rename_col is not None:
            feature_renamer_series = expression_feature_data[
                expression_feature_rename_col]
        else:
            feature_renamer_series = pd.Series(
                expression_feature_data.index,
                index=expression_feature_data.index)
        feature_subsets = subsets_from_metadata(expression_feature_data,
                                                MINIMUM_FEATURE_SUBSET,
                                                'features')
        feature_subsets['variant'] = base_data.variant

        pdt.assert_frame_equal(base_data.data_original, expression_data_no_na)
        pdt.assert_frame_equal(base_data.feature_data, expression_feature_data)
        pdt.assert_frame_equal(base_data.data, expression_data_no_na)
        pdt.assert_series_equal(base_data.feature_renamer_series,
                                feature_renamer_series)
        pdt.assert_dict_equal(base_data.feature_subsets, feature_subsets)
Example #32
0
    def test_convert_nested_list(self):
        obj = r("list(a=list(foo=1, bar=2))")

        converted = com.convert_robj(obj)
        expected = {"a": {"foo": [1], "bar": [2]}}

        tm.assert_dict_equal(converted, expected)
Example #33
0
    def test_shift(self):
        shifted = self.ts.shift(1)
        unshifted = shifted.shift(-1)

        common.assert_dict_equal(unshifted.valid(),
                                 self.ts,
                                 compare_keys=False)

        offset = datetools.bday
        shifted = self.ts.shift(1, offset=offset)
        unshifted = shifted.shift(-1, offset=offset)

        assert_series_equal(unshifted, self.ts)

        unshifted = self.ts.shift(0, offset=offset)
        assert_series_equal(unshifted, self.ts)

        shifted = self.ts.shift(1, timeRule='WEEKDAY')
        unshifted = shifted.shift(-1, timeRule='WEEKDAY')

        assert_series_equal(unshifted, self.ts)

        # corner case
        unshifted = self.ts.shift(0)
        assert_series_equal(unshifted, self.ts)
Example #34
0
    def test_006_process(self):
        """Test multi primary_geo, resolve_to_gadm"""

        # Define mixmasta inputs:
        mp = f'inputs{sep}test6_hoa_conflict_input.json'
        fp = f'inputs{sep}test6_hoa_conflict_input.csv'
        geo = 'admin2'
        outf = f'outputs{sep}unittests'

        # Process:
        df, dct = mixmasta.process(fp, mp, geo, outf)

        # Load expected output:
        output_df = pd.read_csv(f'outputs{sep}test6_hoa_conflict_output.csv', index_col=False)
        output_df = mixmasta.optimize_df_types(output_df)
        with open(f'outputs{sep}test6_hoa_conflict_dict.json') as f:
            output_dict = json.loads(f.read())

        # Sort both data frames and reindex for comparison,.
        cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value']
        df.sort_values(by=cols, inplace=True)
        output_df.sort_values(by=cols, inplace=True)

        df.reset_index(drop=True, inplace=True)
        output_df.reset_index(drop =True, inplace=True)

        # Make the datatypes the same for value/feature and qualifying columns.
        df['value'] = df['value'].astype('str')
        df['feature'] = df['feature'].astype('str')
        output_df['value'] = output_df['value'].astype('str')
        output_df['feature'] = output_df['feature'].astype('str')

        # Assertions
        assert_frame_equal(df, output_df, check_categorical = False)
        assert_dict_equal(dct, output_dict)
Example #35
0
    def test_save(self, study, tmpdir):
        from flotilla.datapackage import name_to_resource

        study_name = 'test_save'
        study.supplemental.expression_corr = study.expression.data.corr()
        study.save(study_name, flotilla_dir=tmpdir)

        assert len(tmpdir.listdir()) == 1
        save_dir = tmpdir.listdir()[0]

        with open('{}/datapackage.json'.format(save_dir)) as f:
            test_datapackage = json.load(f)

        assert study_name == save_dir.purebasename

        # resource_keys_to_ignore = ('compression', 'format', 'path',
        #                            'url')
        keys_from_study = {
            'splicing': [],
            'expression': ['thresh', 'log_base', 'plus_one'],
            'metadata': [
                'phenotype_order', 'phenotype_to_color', 'phenotype_col',
                'phenotype_to_marker', 'pooled_col', 'minimum_samples'
            ],
            'mapping_stats': ['number_mapped_col', 'min_reads'],
            'expression_feature': ['rename_col', 'ignore_subset_cols'],
            'splicing_feature':
            ['rename_col', 'ignore_subset_cols', 'expression_id_col'],
            'gene_ontology': []
        }
        resource_names = keys_from_study.keys()

        # Add auto-generated attributes into the true datapackage
        for name, keys in keys_from_study.iteritems():
            resource = name_to_resource(test_datapackage, name)
            for key in keys:
                command = self.get_data_eval_command(name, key)
                test_value = resource[key]
                true_value = eval(command)
                if isinstance(test_value, dict):
                    pdt.assert_dict_equal(test_value, true_value)
                elif isinstance(test_value, Iterable):
                    pdt.assert_array_equal(test_value, true_value)

        for name in resource_names:
            resource = name_to_resource(test_datapackage, name)
            path = '{}.csv.gz'.format(name)
            assert resource['path'] == path
            test_df = pd.read_csv('{}/{}/{}'.format(tmpdir, study_name, path),
                                  index_col=0,
                                  compression='gzip')
            command = self.get_data_eval_command(name, 'data_original')
            true_df = eval(command)
            pdt.assert_frame_equal(test_df, true_df)

        version = semantic_version.Version(study.version)
        version.patch += 1
        assert str(version) == test_datapackage['datapackage_version']
        assert study_name == test_datapackage['name']
Example #36
0
 def test_single_exon_alternative_events(self, splice_graph, exon1_i,
                                         exon1_name,
                                         mutually_exclusive_events,
                                         skipped_exon_events):
     test = splice_graph.single_exon_alternative_events(
         exon1_i, exon1_name)
     true = {'se': skipped_exon_events, 'mxe': mutually_exclusive_events}
     pdt.assert_dict_equal(test, true)
Example #37
0
    def test_valid(self):
        ts = self.ts.copy()
        ts[::2] = np.NaN

        result = ts.valid()
        self.assertEqual(len(result), ts.count())

        tm.assert_dict_equal(result, ts, compare_keys=False)
Example #38
0
def test_tfs_write_read(_dataframe: TfsDataFrame, _test_file: str):
    write_tfs(_test_file, _dataframe)
    assert pathlib.Path(_test_file).is_file()

    new = read_tfs(_test_file)
    assert_frame_equal(_dataframe, new,
                       check_exact=False)  # float precision can be an issue
    assert_dict_equal(_dataframe.headers, new.headers, compare_keys=True)
Example #39
0
    def test_dict_numpy_complex(self):
        x = {'foo': np.complex128(1.0 + 1.0j),
             'bar': np.complex128(2.0 + 2.0j)}
        x_rec = self.encode_decode(x)
        tm.assert_dict_equal(x, x_rec)

        for key in x:
            tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128")
Example #40
0
 def test__create_dict(self, all_transcripts_of_exon, strand_true_exc_nmd,
                       nmd_exons, true_dict):
     test = nmd_exons._get_exons_from_transcripts(all_transcripts_of_exon,
                                                  strand_true_exc_nmd)
     test = dict(
         (key, [v.id for v in values]) for key, values in test.items())
     true = true_dict
     pdt.assert_dict_equal(test, true)
Example #41
0
        def testit(index):
            pickled = pickle.dumps(index)
            unpickled = pickle.loads(pickled)

            self.assert_(isinstance(unpickled, Index))
            self.assert_(np.array_equal(unpickled, index))

            tm.assert_dict_equal(unpickled.indexMap, index.indexMap)
Example #42
0
    def test_valid(self):
        ts = self.ts.copy()
        ts[::2] = np.NaN

        result = ts.valid()
        self.assertEqual(len(result), ts.count())

        tm.assert_dict_equal(result, ts, compare_keys=False)
Example #43
0
        def testit(index):
            pickled = pickle.dumps(index)
            unpickled = pickle.loads(pickled)

            self.assert_(isinstance(unpickled, Index))
            self.assert_(np.array_equal(unpickled, index))

            common.assert_dict_equal(unpickled.indexMap, index.indexMap)
Example #44
0
    def test_combineFrame(self):
        frame_copy = self.frame.reindex(self.frame.index[::2])

        del frame_copy['D']
        frame_copy['C'][:5] = nan

        added = self.frame + frame_copy
        tm.assert_dict_equal(added['A'].valid(),
                             self.frame['A'] * 2,
                             compare_keys=False)

        self.assertTrue(
            np.isnan(added['C'].reindex(frame_copy.index)[:5]).all())

        # assert(False)

        self.assertTrue(np.isnan(added['D']).all())

        self_added = self.frame + self.frame
        self.assertTrue(self_added.index.equals(self.frame.index))

        added_rev = frame_copy + self.frame
        self.assertTrue(np.isnan(added['D']).all())
        self.assertTrue(np.isnan(added_rev['D']).all())

        # corner cases

        # empty
        plus_empty = self.frame + self.empty
        self.assertTrue(np.isnan(plus_empty.values).all())

        empty_plus = self.empty + self.frame
        self.assertTrue(np.isnan(empty_plus.values).all())

        empty_empty = self.empty + self.empty
        self.assertTrue(empty_empty.empty)

        # out of order
        reverse = self.frame.reindex(columns=self.frame.columns[::-1])

        assert_frame_equal(reverse + self.frame, self.frame * 2)

        # mix vs float64, upcast
        added = self.frame + self.mixed_float
        _check_mixed_float(added, dtype='float64')
        added = self.mixed_float + self.frame
        _check_mixed_float(added, dtype='float64')

        # mix vs mix
        added = self.mixed_float + self.mixed_float2
        _check_mixed_float(added, dtype=dict(C=None))
        added = self.mixed_float2 + self.mixed_float
        _check_mixed_float(added, dtype=dict(C=None))

        # with int
        added = self.frame + self.mixed_int
        _check_mixed_float(added, dtype='float64')
Example #45
0
 def test_get_level_lengths_un_sorted(self):
     index = pd.MultiIndex.from_arrays([
         [1, 1, 2, 1],
         ['a', 'b', 'b', 'd']
     ])
     expected = {(0, 0): 2, (0, 2): 1, (0, 3): 1,
                 (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1}
     result = _get_level_lengths(index)
     tm.assert_dict_equal(result, expected)
Example #46
0
def test_tfs_write_read_autoindex(_dataframe: TfsDataFrame, _test_file: str):
    df = _dataframe.set_index("a")
    df1 = _dataframe.set_index("a")
    write_tfs(_test_file, df, save_index=True)
    assert_frame_equal(df, df1)

    df_read = read_tfs(_test_file)
    assert_index_equal(df.index, df_read.index, check_exact=False)
    assert_dict_equal(_dataframe.headers, df_read.headers, compare_keys=True)
Example #47
0
 def test_get_level_lengths_un_sorted(self):
     index = pd.MultiIndex.from_arrays([
         [1, 1, 2, 1],
         ['a', 'b', 'b', 'd']
     ])
     expected = {(0, 0): 2, (0, 2): 1, (0, 3): 1,
                 (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1}
     result = _get_level_lengths(index)
     tm.assert_dict_equal(result, expected)
Example #48
0
    def test_combineFrame(self):
        frame_copy = self.frame.reindex(self.frame.index[::2])

        del frame_copy['D']
        frame_copy['C'][:5] = nan

        added = self.frame + frame_copy
        tm.assert_dict_equal(added['A'].valid(),
                             self.frame['A'] * 2,
                             compare_keys=False)

        self.assertTrue(
            np.isnan(added['C'].reindex(frame_copy.index)[:5]).all())

        # assert(False)

        self.assertTrue(np.isnan(added['D']).all())

        self_added = self.frame + self.frame
        self.assertTrue(self_added.index.equals(self.frame.index))

        added_rev = frame_copy + self.frame
        self.assertTrue(np.isnan(added['D']).all())
        self.assertTrue(np.isnan(added_rev['D']).all())

        # corner cases

        # empty
        plus_empty = self.frame + self.empty
        self.assertTrue(np.isnan(plus_empty.values).all())

        empty_plus = self.empty + self.frame
        self.assertTrue(np.isnan(empty_plus.values).all())

        empty_empty = self.empty + self.empty
        self.assertTrue(empty_empty.empty)

        # out of order
        reverse = self.frame.reindex(columns=self.frame.columns[::-1])

        assert_frame_equal(reverse + self.frame, self.frame * 2)

        # mix vs float64, upcast
        added = self.frame + self.mixed_float
        _check_mixed_float(added, dtype='float64')
        added = self.mixed_float + self.frame
        _check_mixed_float(added, dtype='float64')

        # mix vs mix
        added = self.mixed_float + self.mixed_float2
        _check_mixed_float(added, dtype=dict(C=None))
        added = self.mixed_float2 + self.mixed_float
        _check_mixed_float(added, dtype=dict(C=None))

        # with int
        added = self.frame + self.mixed_int
        _check_mixed_float(added, dtype='float64')
Example #49
0
def test_groupby(idx):
    groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
    labels = idx.get_values().tolist()
    exp = {1: labels[:3], 2: labels[3:]}
    tm.assert_dict_equal(groups, exp)

    # GH5620
    groups = idx.groupby(idx)
    exp = {key: [key] for key in idx}
    tm.assert_dict_equal(groups, exp)
Example #50
0
def test__get_junction_reads(bamfile, uniquely, multi):
    from outrigger.io.bam import _get_junction_reads

    test_uniquely, test_multi = _get_junction_reads(bamfile)

    true_uniquely = uniquely
    true_multi = multi

    pdt.assert_dict_equal(test_uniquely, true_uniquely)
    pdt.assert_dict_equal(test_multi, true_multi)
Example #51
0
    def test_to_dict(self, mapping):
        test_data = {
            'A': {'1': 1, '2': 2},
            'B': {'1': '1', '2': '2', '3': '3'},
        }

        # GH16122
        recons_data = DataFrame(test_data).to_dict(into=mapping)

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert (v2 == recons_data[k][k2])

        recons_data = DataFrame(test_data).to_dict("l", mapping)

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert (v2 == recons_data[k][int(k2) - 1])

        recons_data = DataFrame(test_data).to_dict("s", mapping)

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert (v2 == recons_data[k][k2])

        recons_data = DataFrame(test_data).to_dict("sp", mapping)
        expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
                          'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
        tm.assert_dict_equal(recons_data, expected_split)

        recons_data = DataFrame(test_data).to_dict("r", mapping)
        expected_records = [{'A': 1.0, 'B': '1'},
                            {'A': 2.0, 'B': '2'},
                            {'A': np.nan, 'B': '3'}]
        assert isinstance(recons_data, list)
        assert (len(recons_data) == 3)
        for l, r in zip(recons_data, expected_records):
            tm.assert_dict_equal(l, r)

        # GH10844
        recons_data = DataFrame(test_data).to_dict("i")

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert (v2 == recons_data[k2][k])

        df = DataFrame(test_data)
        df['duped'] = df[df.columns[0]]
        recons_data = df.to_dict("i")
        comp_data = test_data.copy()
        comp_data['duped'] = comp_data[df.columns[0]]
        for k, v in comp_data.items():
            for k2, v2 in v.items():
                assert (v2 == recons_data[k2][k])
Example #52
0
    def test_establish_reducer_use_existing(self):
        from cupcake.smush.base import SmushPlotterBase

        pca_kws = {}
        n_components = 2
        reducer = PCA(n_components=n_components, **pca_kws)

        p = SmushPlotterBase()
        p.establish_reducer(reducer)

        assert isinstance(p.reducer, type(reducer))
        pdt.assert_dict_equal(p.reducer.get_params(), reducer.get_params())
Example #53
0
    def test_na_values_dict_aliasing(self):
        na_values = {'a': 2, 'b': 1}
        na_values_copy = na_values.copy()

        names = ['a', 'b']
        data = '1,2\n2,1'

        expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
        out = self.read_csv(StringIO(data), names=names, na_values=na_values)

        tm.assert_frame_equal(out, expected)
        tm.assert_dict_equal(na_values, na_values_copy)
Example #54
0
def test_na_values_dict_aliasing(all_parsers):
    parser = all_parsers
    na_values = {"a": 2, "b": 1}
    na_values_copy = na_values.copy()

    names = ["a", "b"]
    data = "1,2\n2,1"

    expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)

    tm.assert_frame_equal(result, expected)
    tm.assert_dict_equal(na_values, na_values_copy)
Example #55
0
def test_observed_groups_with_nan(observed):
    # GH 24740
    df = pd.DataFrame({'cat': pd.Categorical(['a', np.nan, 'a'],
                       categories=['a', 'b', 'd']),
                       'vals': [1, 2, 3]})
    g = df.groupby('cat', observed=observed)
    result = g.groups
    if observed:
        expected = {'a': Index([0, 2], dtype='int64')}
    else:
        expected = {'a': Index([0, 2], dtype='int64'),
                    'b': Index([], dtype='int64'),
                    'd': Index([], dtype='int64')}
    tm.assert_dict_equal(result, expected)
Example #56
0
    def test_frame_to_dict_tz(self):
        # GH18372 When converting to dict with orient='records' columns of
        # datetime that are tz-aware were not converted to required arrays
        data = [(datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),),
                (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc,),)]
        df = DataFrame(list(data), columns=["d", ])

        result = df.to_dict(orient='records')
        expected = [
            {'d': Timestamp('2017-11-18 21:53:00.219225+0000', tz=pytz.utc)},
            {'d': Timestamp('2017-11-18 22:06:30.061810+0000', tz=pytz.utc)},
        ]
        tm.assert_dict_equal(result[0], expected[0])
        tm.assert_dict_equal(result[1], expected[1])
Example #57
0
def test__report_read_positions(bamfile):
    from outrigger.io.bam import _report_read_positions

    bam = pysam.AlignmentFile(bamfile, 'rb')

    test = collections.Counter()

    for read in bam:
        _report_read_positions(read, test)
        break
    bam.close()

    true = {('chr2', 136713559, 136713559, '+'): 1}
    pdt.assert_dict_equal(test, true)
Example #58
0
    def test_list_grouper_with_nat(self):
        # GH 14715
        df = pd.DataFrame({'date': pd.date_range('1/1/2011',
                                                 periods=365, freq='D')})
        df.iloc[-1] = pd.NaT
        grouper = pd.Grouper(key='date', freq='AS')

        # Grouper in a list grouping
        result = df.groupby([grouper])
        expected = {pd.Timestamp('2011-01-01'): pd.Index(list(range(364)))}
        tm.assert_dict_equal(result.groups, expected)

        # Test case without a list
        result = df.groupby(grouper)
        expected = {pd.Timestamp('2011-01-01'): 365}
        tm.assert_dict_equal(result.groups, expected)