def test_invalid_url(self):
     try:
         with tm.assertRaises(URLError):
             self.read_html('http://www.a23950sdfa908sd.com',
                            match='.*Water.*')
     except ValueError as e:
         tm.assert_equal(str(e), 'No tables found')
Esempio n. 2
0
    def test_downcast_limits(self):
        # Test the limits of each downcast. Bug: #14401.
        # Check to make sure numpy is new enough to run this test.
        if _np_version_under1p9:
            raise nose.SkipTest("Numpy version is under 1.9")

        i = 'integer'
        u = 'unsigned'
        dtype_downcast_min_max = [
            ('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]),
            ('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]),
            ('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]),
            ('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]),
            ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]),
            ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]),
            ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]),
            ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]),
            ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
            ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
            ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
            ('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]),
            ('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]),
            ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
            ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
            ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
            ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1])
        ]

        for dtype, downcast, min_max in dtype_downcast_min_max:
            series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
            tm.assert_equal(series.dtype, dtype)
Esempio n. 3
0
def test_values_consistent(array, expected_type, dtype):
    l_values = pd.Series(array)._values
    r_values = pd.Index(array)._values
    assert type(l_values) is expected_type
    assert type(l_values) is type(r_values)

    tm.assert_equal(l_values, r_values)
Esempio n. 4
0
def test_aggregate_normal(resample_method):
    """Check TimeGrouper's aggregation is identical as normal groupby."""

    if resample_method == 'ohlc':
        pytest.xfail(reason='DataError: No numeric types to aggregate')

    data = np.random.randn(20, 4)
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, 3, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
                    datetime(2013, 1, 3), datetime(2013, 1, 4),
                    datetime(2013, 1, 5)] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

    expected = getattr(normal_grouped, resample_method)()
    dt_result = getattr(dt_grouped, resample_method)()
    expected.index = date_range(start='2013-01-01', freq='D',
                                periods=5, name='key')
    tm.assert_equal(expected, dt_result)

    # if TimeGrouper is used included, 'nth' doesn't work yet

    """
Esempio n. 5
0
    def test_replace_period(self):
        d = {
            'fname': {
                'out_augmented_AUG_2011.json':
                pd.Period(year=2011, month=8, freq='M'),
                'out_augmented_JAN_2011.json':
                pd.Period(year=2011, month=1, freq='M'),
                'out_augmented_MAY_2012.json':
                pd.Period(year=2012, month=5, freq='M'),
                'out_augmented_SUBSIDY_WEEK.json':
                pd.Period(year=2011, month=4, freq='M'),
                'out_augmented_AUG_2012.json':
                pd.Period(year=2012, month=8, freq='M'),
                'out_augmented_MAY_2011.json':
                pd.Period(year=2011, month=5, freq='M'),
                'out_augmented_SEP_2013.json':
                pd.Period(year=2013, month=9, freq='M')}}

        df = pd.DataFrame(['out_augmented_AUG_2012.json',
                           'out_augmented_SEP_2013.json',
                           'out_augmented_SUBSIDY_WEEK.json',
                           'out_augmented_MAY_2012.json',
                           'out_augmented_MAY_2011.json',
                           'out_augmented_AUG_2011.json',
                           'out_augmented_JAN_2011.json'], columns=['fname'])
        tm.assert_equal(set(df.fname.values), set(d['fname'].keys()))
        expected = DataFrame({'fname': [d['fname'][k]
                                        for k in df.fname.values]})
        result = df.replace(d)
        assert_frame_equal(result, expected)
Esempio n. 6
0
    def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category') for col in original], axis=1)
        with tm.ensure_clean() as path:
            tm.assertRaises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category') for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            tm.assert_equal(len(w), 1)  # should get a warning for mixed content
Esempio n. 7
0
def test_shift(fill_value):
    ct = pd.Categorical(['a', 'b', 'c', 'd'],
                        categories=['a', 'b', 'c', 'd'], ordered=False)
    expected = pd.Categorical([None, 'a', 'b', 'c'],
                              categories=['a', 'b', 'c', 'd'], ordered=False)
    res = ct.shift(1, fill_value=fill_value)
    assert_equal(res, expected)
def test_plot_violins(pca, kwargs, df_norm):
    from flotilla.visualize.decomposition import DecompositionViz

    kw = kwargs.copy()
    kw.pop('singles')

    dv = DecompositionViz(pca.reduced_space, pca.components_,
                          pca.explained_variance_ratio_,
                          singles=df_norm, **kw)
    dv.plot(plot_violins=True)

    ncols = 4
    nrows = 1
    top_features = pd.Index(dv.top_features)
    vector_labels = list(set(dv.magnitudes[:dv.n_vectors].index.union(
        top_features)))
    while ncols * nrows < len(vector_labels):
        nrows += 1

    pdt.assert_equal(len(dv.fig_violins.axes), nrows * ncols)

    # for i in np.arange(len(top_features)):
    #     ax = dv.fig_violins.axes[i]
    #     pdt.assert_equal(len(ax.collections), len(dv.grouped.groups))
    plt.close('all')
Esempio n. 9
0
    def test__init_sample_thresholds(self,
                                     expression_data,
                                     expression_thresh,
                                     metadata_minimum_samples,
                                     pooled):
        from flotilla.data_model.base import BaseData

        base_data = BaseData(expression_data,
                             thresh=expression_thresh,
                             minimum_samples=metadata_minimum_samples,
                             pooled=pooled)
        data = expression_data.copy()
        pooled_samples = pooled.copy() if pooled is not None else []
        single_samples = data.index[~data.index.isin(pooled_samples)]
        singles_df = data.ix[single_samples]

        if expression_thresh > -np.inf or metadata_minimum_samples > 0:
            if not singles_df.empty:
                data = base_data._threshold(data, singles_df)
            else:
                data = base_data._threshold(data)

        singles_df = data.ix[single_samples]
        pooled_df = data.ix[pooled_samples]

        pdt.assert_frame_equal(base_data.data_original, expression_data)
        pdt.assert_frame_equal(base_data.data, data)
        pdt.assert_equal(base_data.thresh, expression_thresh)
        pdt.assert_equal(base_data.minimum_samples, metadata_minimum_samples)
        pdt.assert_frame_equal(base_data.pooled, pooled_df)
        pdt.assert_frame_equal(base_data.singles, singles_df)
Esempio n. 10
0
    def test_read_write_dta12(self):
        original = DataFrame(
            [(1, 2, 3, 4, 5, 6)],
            columns=[
                "astringwithmorethan32characters_1",
                "astringwithmorethan32characters_2",
                "+",
                "-",
                "short",
                "delete",
            ],
        )
        formatted = DataFrame(
            [(1, 2, 3, 4, 5, 6)],
            columns=[
                "astringwithmorethan32characters_",
                "_0astringwithmorethan32character",
                "_",
                "_1_",
                "_short",
                "_delete",
            ],
        )
        formatted.index.name = "index"
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                tm.assert_equal(len(w), 1)  # should get a warning for that format.

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted)
Esempio n. 11
0
    def test_ufunc_compat(self, holder):
        box = pd.Series if holder is pd.Series else pd.Index

        idx = holder(np.arange(5, dtype='int64'))
        result = np.sin(idx)
        expected = box(np.sin(np.arange(5, dtype='int64')))
        tm.assert_equal(result, expected)
Esempio n. 12
0
 def test_index_column(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2"
     result_frame = gbq.read_gbq(
         query, project_id=PROJECT_ID, index_col="STRING_1")
     correct_frame = DataFrame(
         {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1")
     tm.assert_equal(result_frame.index.name, correct_frame.index.name)
Esempio n. 13
0
    def test_read_write_dta12(self):
        original = DataFrame([(1, 2, 3, 4, 5, 6)],
                             columns=['astringwithmorethan32characters_1',
                                      'astringwithmorethan32characters_2',
                                      '+',
                                      '-',
                                      'short',
                                      'delete'])
        formatted = DataFrame([(1, 2, 3, 4, 5, 6)],
                              columns=['astringwithmorethan32characters_',
                                       '_0astringwithmorethan32character',
                                       '_',
                                       '_1_',
                                       '_short',
                                       '_delete'])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                tm.assert_equal(len(w), 1)  # should get a warning for that format.

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted)
Esempio n. 14
0
def assert_graph_items_equal(graph1, items1, graph2, items2):
    """Checks all relationships in graph1 exist in graph2, and vice versa"""
    from outrigger.index.adjacencies import DIRECTIONS

    for number1, item1 in enumerate(items1):
        for direction in DIRECTIONS:
            test = [items1[i] for i in
                    graph1.find(getattr(V(number1), direction))]

            number2 = items2.index(item1)
            true = [items2[i] for i in
                    graph2.find(getattr(V(number2), direction))]

            test.sort()
            true.sort()

            pdt.assert_equal(test, true)

    for number2, item2 in enumerate(items2):
        for direction in DIRECTIONS:
            test = [items2[i] for i in
                    graph2.find(getattr(V(number2), direction))]

            number1 = items1.index(item2)
            true = [items1[i] for i in
                    graph1.find(getattr(V(number1), direction))]

            test.sort()
            true.sort()

            pdt.assert_equal(test, true)
Esempio n. 15
0
    def test_read_dta2(self):
        if LooseVersion(sys.version) < '2.7':
            raise nose.SkipTest('datetime interp under 2.6 is faulty')

        expected = DataFrame.from_records(
            [
                (
                    datetime(2006, 11, 19, 23, 13, 20),
                    1479596223000,
                    datetime(2010, 1, 20),
                    datetime(2010, 1, 8),
                    datetime(2010, 1, 1),
                    datetime(1974, 7, 1),
                    datetime(2010, 1, 1),
                    datetime(2010, 1, 1)
                ),
                (
                    datetime(1959, 12, 31, 20, 3, 20),
                    -1479590,
                    datetime(1953, 10, 2),
                    datetime(1948, 6, 10),
                    datetime(1955, 1, 1),
                    datetime(1955, 7, 1),
                    datetime(1955, 1, 1),
                    datetime(2, 1, 1)
                ),
                (
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                )
            ],
            columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
                     'monthly_date', 'quarterly_date', 'half_yearly_date',
                     'yearly_date']
        )
        expected['yearly_date'] = expected['yearly_date'].astype('O')

        with warnings.catch_warnings(record=True) as w:
            parsed_114 = self.read_dta(self.dta2_114)
            parsed_115 = self.read_dta(self.dta2_115)
            parsed_117 = self.read_dta(self.dta2_117)
            # 113 is buggy due ot limits date format support in Stata
            # parsed_113 = self.read_dta(self.dta2_113)

            # should get a warning for that format.
            tm.assert_equal(len(w), 1)

        # buggy test because of the NaT comparison on certain platforms
        # Format 113 test fails since it does not support tc and tC formats
        # tm.assert_frame_equal(parsed_113, expected)
        tm.assert_frame_equal(parsed_114, expected)
        tm.assert_frame_equal(parsed_115, expected)
        tm.assert_frame_equal(parsed_117, expected)
Esempio n. 16
0
def test_numpy_errstate_is_default():
    # The defaults since numpy 1.6.0
    expected = {"over": "warn", "divide": "warn", "invalid": "warn", "under": "ignore"}
    import numpy as np
    from pandas.compat import numpy  # noqa

    # The errstate should be unchanged after that import.
    tm.assert_equal(np.geterr(), expected)
Esempio n. 17
0
def test_array_not_registered(registry_without_decimal):
    # check we aren't on it
    assert registry.find('decimal') is None
    data = [decimal.Decimal('1'), decimal.Decimal('2')]

    result = pd.array(data, dtype=DecimalDtype)
    expected = DecimalArray._from_sequence(data)
    tm.assert_equal(result, expected)
Esempio n. 18
0
    def test_to_csv_with_mix_columns(self):
        # GH11637, incorrect output when a mix of integer and string column
        # names passed as columns parameter in to_csv

        df = DataFrame({0: ['a', 'b', 'c'],
                        1: ['aa', 'bb', 'cc']})
        df['test'] = 'txt'
        assert_equal(df.to_csv(), df.to_csv(columns=[0, 1, 'test']))
Esempio n. 19
0
def test_numpy_errstate_is_default():
    # The defaults since numpy 1.6.0
    expected = {'over': 'warn', 'divide': 'warn', 'invalid': 'warn',
                'under': 'ignore'}
    import numpy as np
    from pandas.compat import numpy  # noqa
    # The errstate should be unchanged after that import.
    tm.assert_equal(np.geterr(), expected)
Esempio n. 20
0
 def test_index_column(self):
     query = "SELECT 'a' as STRING_1, 'b' as STRING_2"
     result_frame = gbq.read_gbq(query, project_id=_get_project_id(),
                                 index_col="STRING_1",
                                 private_key=_get_private_key_path())
     correct_frame = DataFrame(
         {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1")
     tm.assert_equal(result_frame.index.name, correct_frame.index.name)
Esempio n. 21
0
 def test_frame_empty(self):
     df = DataFrame(columns=['jim', 'joe'])
     self.assertFalse(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)
     # GH 7445
     result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
     expected = '{"test":{}}'
     tm.assert_equal(result, expected)
def test_plot_samples(pca, kwargs):
    from flotilla.visualize.decomposition import DecompositionViz

    dv = DecompositionViz(pca.reduced_space, pca.components_,
                          pca.explained_variance_ratio_, **kwargs)
    dv.plot_samples()
    ax = plt.gca()
    pdt.assert_equal(len(ax.lines), kwargs['n_vectors'] + 1)
    plt.close('all')
def test_plot_loadings(pca, kwargs):
    from flotilla.visualize.decomposition import DecompositionViz

    dv = DecompositionViz(pca.reduced_space, pca.components_,
                          pca.explained_variance_ratio_, **kwargs)
    dv.plot_loadings()
    ax = plt.gca()
    pdt.assert_equal(len(ax.collections), 1)
    plt.close('all')
Esempio n. 24
0
    def test_repeat_preserves_tz(self):
        dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
        arr = DatetimeArray(dti)

        repeated = arr.repeat([1, 1])

        # preserves tz and values, but not freq
        expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype)
        tm.assert_equal(repeated, expected)
Esempio n. 25
0
    def test_pow_float(self, op, idx, box):
        # test power calculations both ways, GH#14973
        expected = pd.Float64Index(op(idx.values, 2.0))

        idx = tm.box_expected(idx, box)
        expected = tm.box_expected(expected, box)

        result = op(idx, 2.0)
        tm.assert_equal(result, expected)
Esempio n. 26
0
    def test_nmf_space_ylabel(self, splicing, groupby):
        test_ylabel = splicing._nmf_space_ylabel(groupby)

        if splicing._is_nmf_space_x_axis_excluded(groupby):
            true_ylabel = splicing.included_label
        else:
            true_ylabel = splicing.excluded_label

        pdt.assert_equal(test_ylabel, true_ylabel)
Esempio n. 27
0
    def test_modulo(self, idx, box):
        # GH#9244
        expected = Index(idx.values % 2)

        idx = tm.box_expected(idx, box)
        expected = tm.box_expected(expected, box)

        result = idx % 2
        tm.assert_equal(result, expected)
Esempio n. 28
0
def test_array_unboxes(box):
    data = box([decimal.Decimal('1'), decimal.Decimal('2')])
    # make sure it works
    with pytest.raises(TypeError):
        DecimalArray2._from_sequence(data)

    result = pd.array(data, dtype='decimal2')
    expected = DecimalArray2._from_sequence(data.values)
    tm.assert_equal(result, expected)
Esempio n. 29
0
    def test_objarr_add_str(self, box):
        ser = pd.Series(['x', np.nan, 'x'])
        expected = pd.Series(['xa', np.nan, 'xa'])

        ser = tm.box_expected(ser, box)
        expected = tm.box_expected(expected, box)

        result = ser + 'a'
        tm.assert_equal(result, expected)
Esempio n. 30
0
    def test_guess_modality(self, event, estimator):
        logsumexps = estimator._logsumexp(estimator._loglik(event))

        test_guess_modality = estimator._guess_modality(logsumexps)

        logsumexps['uniform'] = estimator.logbf_thresh
        true_guess_modality = logsumexps.idxmax()

        pdt.assert_equal(test_guess_modality, true_guess_modality)
Esempio n. 31
0
def test_uint_index_does_not_convert_to_float64():
    # https://github.com/pandas-dev/pandas/issues/28279
    # https://github.com/pandas-dev/pandas/issues/28023
    series = pd.Series(
        [0, 1, 2, 3, 4, 5],
        index=[
            7606741985629028552,
            17876870360202815256,
            17876870360202815256,
            13106359306506049338,
            8991270399732411471,
            8991270399732411472,
        ],
    )

    result = series.loc[[7606741985629028552, 17876870360202815256]]

    expected = UInt64Index(
        [7606741985629028552, 17876870360202815256, 17876870360202815256],
        dtype="uint64",
    )
    tm.assert_index_equal(result.index, expected)

    tm.assert_equal(result, series[:3])
Esempio n. 32
0
def assert_invalid_comparison(left, right, box):
    """
    Assert that comparison operations with mismatched types behave correctly.

    Parameters
    ----------
    left : np.ndarray, ExtensionArray, Index, or Series
    right : object
    box : {pd.DataFrame, pd.Series, pd.Index, tm.to_array}
    """
    # Not for tznaive-tzaware comparison

    # Note: not quite the same as how we do this for tm.box_expected
    xbox = box if box is not Index else np.array

    result = left == right
    expected = xbox(np.zeros(result.shape, dtype=np.bool_))

    tm.assert_equal(result, expected)

    result = right == left
    tm.assert_equal(result, expected)

    result = left != right
    tm.assert_equal(result, ~expected)

    result = right != left
    tm.assert_equal(result, ~expected)

    msg = "Invalid comparison between"
    with pytest.raises(TypeError, match=msg):
        left < right
    with pytest.raises(TypeError, match=msg):
        left <= right
    with pytest.raises(TypeError, match=msg):
        left > right
    with pytest.raises(TypeError, match=msg):
        left >= right
    with pytest.raises(TypeError, match=msg):
        right < left
    with pytest.raises(TypeError, match=msg):
        right <= left
    with pytest.raises(TypeError, match=msg):
        right > left
    with pytest.raises(TypeError, match=msg):
        right >= left
    def test__init(self, expression_data_no_na, outliers):
        from flotilla.data_model.base import BaseData
        from flotilla.compute.predict import PredictorConfigManager, \
            PredictorDataSetManager

        base_data = BaseData(expression_data_no_na, outliers=outliers)
        outlier_samples = outliers.copy() if outliers is not None else []
        outliers_df = expression_data_no_na.ix[outlier_samples]

        feature_renamer_series = pd.Series(expression_data_no_na.columns,
                                           index=expression_data_no_na.columns)

        pdt.assert_frame_equal(base_data.data_original, expression_data_no_na)
        pdt.assert_equal(base_data.feature_data, None)
        pdt.assert_frame_equal(base_data.data, expression_data_no_na)
        pdt.assert_series_equal(base_data.feature_renamer_series,
                                feature_renamer_series)
        pdt.assert_frame_equal(base_data.outliers, outliers_df)
        pdt.assert_numpy_array_equal(base_data.outlier_samples,
                                     outlier_samples)
        assert isinstance(base_data.predictor_config_manager,
                          PredictorConfigManager)
        assert isinstance(base_data.predictor_dataset_manager,
                          PredictorDataSetManager)
Esempio n. 34
0
    def test_read_write_dta11(self):
        original = DataFrame([(1, 2, 3, 4)],
                             columns=[
                                 'good',
                                 compat.u('b\u00E4d'), '8number',
                                 'astringwithmorethan32characters______'
                             ])
        formatted = DataFrame([(1, 2, 3, 4)],
                              columns=[
                                  'good', 'b_d', '_8number',
                                  'astringwithmorethan32characters_'
                              ])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                # should get a warning for that format.
            tm.assert_equal(len(w), 1)

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  formatted)
Esempio n. 35
0
def test_compare_nucleotide_seqs(nucleotide_seq1, nucleotide_seq2, ksizes):
    from orpheum.compare_kmer_content import compare_nucleotide_seqs

    id_seq1 = "seq1", nucleotide_seq1
    id_seq2 = "seq2", nucleotide_seq2

    test = compare_nucleotide_seqs(id_seq1, id_seq2, ksizes)

    s = """id1,id2,ksize,jaccard,alphabet
seq1,seq2,2,1.0,purine_pyrimidine
seq1,seq2,3,0.8,purine_pyrimidine
seq1,seq2,4,0.25,purine_pyrimidine
seq1,seq2,2,1.0,nucleotide
seq1,seq2,3,0.8,nucleotide
seq1,seq2,4,0.25,nucleotide
seq1,seq2,2,1.0,weak_strong
seq1,seq2,3,1.0,weak_strong
seq1,seq2,4,1.0,weak_strong
seq1,seq2,2,1.0,amino_keto
seq1,seq2,3,1.0,amino_keto
seq1,seq2,4,0.75,amino_keto
"""
    true = pd.read_csv(StringIO(s))
    pdt.assert_equal(test, true)
Esempio n. 36
0
def test_plot_loadings_scatter(pca, kwargs):
    from flotilla.visualize.decomposition import DecompositionViz

    dv = DecompositionViz(pca.reduced_space, pca.components_,
                          pca.explained_variance_ratio_, **kwargs)
    dv.plot(plot_loadings='scatter')

    pdt.assert_equal(len(dv.fig_reduced.axes), 3)
    pdt.assert_equal(len(dv.ax_loading1.collections), 1)
    pdt.assert_equal(len(dv.ax_loading1.collections), 1)
    plt.close('all')
Esempio n. 37
0
    def test_td64arr_add_sub_tdi(self, box, names):
        # GH#17250 make sure result dtype is correct
        # GH#19043 make sure names are propagated correctly
        tdi = TimedeltaIndex(['0 days', '1 day'], name=names[0])
        ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1])
        expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)],
                          name=names[2])

        ser = tm.box_expected(ser, box)
        expected = tm.box_expected(expected, box)

        result = tdi + ser
        tm.assert_equal(result, expected)
        if box is not pd.DataFrame:
            assert result.dtype == 'timedelta64[ns]'
        else:
            assert result.dtypes[0] == 'timedelta64[ns]'

        result = ser + tdi
        tm.assert_equal(result, expected)
        if box is not pd.DataFrame:
            assert result.dtype == 'timedelta64[ns]'
        else:
            assert result.dtypes[0] == 'timedelta64[ns]'

        expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)],
                          name=names[2])
        expected = tm.box_expected(expected, box)

        result = tdi - ser
        tm.assert_equal(result, expected)
        if box is not pd.DataFrame:
            assert result.dtype == 'timedelta64[ns]'
        else:
            assert result.dtypes[0] == 'timedelta64[ns]'

        result = ser - tdi
        tm.assert_equal(result, -expected)
        if box is not pd.DataFrame:
            assert result.dtype == 'timedelta64[ns]'
        else:
            assert result.dtypes[0] == 'timedelta64[ns]'
Esempio n. 38
0
    def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
        # GH#4521
        # divide/multiply by integers
        expected = Series(['-59 Days', '-59 Days', 'NaT'],
                          dtype='timedelta64[ns]')

        tdser = tm.box_expected(tdser, box)
        expected = tm.box_expected(expected, box)

        result = tdser * (-one)
        tm.assert_equal(result, expected)
        result = (-one) * tdser
        tm.assert_equal(result, expected)

        expected = Series(['118 Days', '118 Days', 'NaT'],
                          dtype='timedelta64[ns]')
        expected = tm.box_expected(expected, box)

        result = tdser * (2 * one)
        tm.assert_equal(result, expected)
        result = (2 * one) * tdser
        tm.assert_equal(result, expected)
Esempio n. 39
0
    def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other):
        pi = pd.period_range("1994-04-01", periods=9, freq="19D")
        expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

        obj = tm.box_expected(pi, box_with_array)
        expected = tm.box_expected(expected, box_with_array)

        result = obj + other
        tm.assert_equal(result, expected)
        result = other + obj
        tm.assert_equal(result, expected)
        result = obj - other
        tm.assert_equal(result, expected)
        with pytest.raises(TypeError):
            other - obj
Esempio n. 40
0
    def test___init__(self, junction_exon_triples, graph_items):
        from outrigger.index.events import SpliceGraph

        test = SpliceGraph(junction_exon_triples)

        graph, items = graph_items

        exons = tuple(junction_exon_triples.exon.unique())
        junctions = tuple(junction_exon_triples.junction.unique())

        pdt.assert_equal(test.exons, exons)
        pdt.assert_equal(test.junctions, junctions)
        pdt.assert_equal(sorted(test.items), sorted(items))

        assert_graph_items_equal(test.graph, test.items, graph, items)
Esempio n. 41
0
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
    # GH 4343
    tm.skip_if_no_package('scipy')

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = pd.SparseDataFrame(spm,
                             index=index,
                             columns=columns,
                             default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok()))

    # Ensure dtype is preserved if possible
    was_upcast = ((fill_value is None or is_float(fill_value))
                  and not is_object_dtype(dtype) and not is_float_dtype(dtype))
    res_dtype = (bool
                 if is_bool_dtype(dtype) else float if was_upcast else dtype)
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    tm.assert_equal(sdf.to_coo().dtype, res_dtype)

    # However, adding a str column results in an upcast to object
    sdf['strings'] = np.arange(len(sdf)).astype(str)
    tm.assert_equal(sdf.to_coo().dtype, np.object_)
Esempio n. 42
0
    def test_td64arr_add_sub_timestamp(self, box):
        # GH#11925
        ts = Timestamp('2012-01-01')
        # TODO: parametrize over types of datetime scalar?

        tdser = Series(pd.timedelta_range('1 day', periods=3))
        expected = Series(pd.date_range('2012-01-02', periods=3))

        tdser = tm.box_expected(tdser, box)
        expected = tm.box_expected(expected, box)

        tm.assert_equal(ts + tdser, expected)
        tm.assert_equal(tdser + ts, expected)

        expected2 = Series(pd.date_range('2011-12-31',
                                         periods=3, freq='-1D'))
        expected2 = tm.box_expected(expected2, box)

        tm.assert_equal(ts - tdser, expected2)
        tm.assert_equal(ts + (-tdser), expected2)

        with pytest.raises(TypeError):
            tdser - ts
Esempio n. 43
0
    def test_parr_add_sub_td64_nat(self, box):
        # GH#23320 special handling for timedelta64("NaT")
        pi = pd.period_range("1994-04-01", periods=9, freq="19D")
        other = np.timedelta64("NaT")
        expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

        obj = tm.box_expected(pi, box)
        expected = tm.box_expected(expected, box)

        result = obj + other
        tm.assert_equal(result, expected)
        result = other + obj
        tm.assert_equal(result, expected)
        result = obj - other
        tm.assert_equal(result, expected)
        with pytest.raises(TypeError):
            other - obj
Esempio n. 44
0
    def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other):
        # FIXME: DataFrame fails because when when operating column-wise
        #  timedelta64 entries become NaT and are treated like datetimes
        box = box_df_fail

        pi = pd.period_range("1994-04-01", periods=9, freq="19D")
        expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

        obj = tm.box_expected(pi, box)
        expected = tm.box_expected(expected, box)

        result = obj + other
        tm.assert_equal(result, expected)
        result = other + obj
        tm.assert_equal(result, expected)
        result = obj - other
        tm.assert_equal(result, expected)
        with pytest.raises(TypeError):
            other - obj
Esempio n. 45
0
    def test_subclass_sparse_slice(self):
        rows = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
        ssdf = tm.SubclassedSparseDataFrame(rows)
        ssdf.testattr = "testattr"

        tm.assert_sp_frame_equal(ssdf.loc[:2],
                                 tm.SubclassedSparseDataFrame(rows[:3]))
        tm.assert_sp_frame_equal(ssdf.iloc[:2],
                                 tm.SubclassedSparseDataFrame(rows[:2]))
        tm.assert_sp_frame_equal(ssdf[:2],
                                 tm.SubclassedSparseDataFrame(rows[:2]))
        tm.assert_equal(ssdf.loc[:2].testattr, "testattr")
        tm.assert_equal(ssdf.iloc[:2].testattr, "testattr")
        tm.assert_equal(ssdf[:2].testattr, "testattr")

        tm.assert_sp_series_equal(ssdf.loc[1],
                                  tm.SubclassedSparseSeries(rows[1]),
                                  check_names=False)
        tm.assert_sp_series_equal(ssdf.iloc[1],
                                  tm.SubclassedSparseSeries(rows[1]),
                                  check_names=False)
Esempio n. 46
0
    def test_init(self, expression_data_no_na, expression_log_base,
                  expression_plus_one, expression_thresh):
        from flotilla.data_model import ExpressionData

        expression = ExpressionData(expression_data_no_na.copy(),
                                    log_base=expression_log_base,
                                    plus_one=expression_plus_one,
                                    thresh=expression_thresh)
        data = expression_data_no_na.copy()
        thresh = float(expression_thresh)

        if expression_plus_one:
            data += 1
            thresh += 1

        if expression_log_base is not None:
            data = np.divide(np.log(data), np.log(expression_log_base))

        pdt.assert_equal(expression.plus_one, expression_plus_one)
        pdt.assert_equal(expression.log_base, expression_log_base)
        pdt.assert_equal(expression.thresh, thresh)
        pdt.assert_frame_equal(expression.data_original, expression_data_no_na)
        pdt.assert_frame_equal(expression.data, data)
Esempio n. 47
0
 def test_arith_zero_dim_ndarray(self, other):
     arr = integer_array([1, None, 2])
     result = arr + np.array(other)
     expected = arr + other
     tm.assert_equal(result, expected)
Esempio n. 48
0
 def test_constructor_unwraps_index(self, indices):
     if isinstance(indices, pd.MultiIndex):
         raise pytest.skip("MultiIndex has no ._data")
     a = indices
     b = type(a)(a)
     tm.assert_equal(a._data, b._data)
Esempio n. 49
0
 def _validate_periodindex(self, pickled, current):
     tm.assert_index_equal(pickled, current)
     tm.assertIsInstance(pickled.freq, MonthEnd)
     tm.assert_equal(pickled.freq, MonthEnd())
     tm.assert_equal(pickled.freqstr, 'M')
     tm.assert_index_equal(pickled.shift(2), current.shift(2))
Esempio n. 50
0
    def test_parr_cmp_period_scalar(self, freq, box_with_array):
        # GH#13200
        xbox = np.ndarray if box_with_array is pd.Index else box_with_array

        base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
                           freq=freq)
        base = tm.box_expected(base, box_with_array)
        per = Period('2011-02', freq=freq)

        exp = np.array([False, True, False, False])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base == per, exp)
        tm.assert_equal(per == base, exp)

        exp = np.array([True, False, True, True])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base != per, exp)
        tm.assert_equal(per != base, exp)

        exp = np.array([False, False, True, True])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base > per, exp)
        tm.assert_equal(per < base, exp)

        exp = np.array([True, False, False, False])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base < per, exp)
        tm.assert_equal(per > base, exp)

        exp = np.array([False, True, True, True])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base >= per, exp)
        tm.assert_equal(per <= base, exp)

        exp = np.array([True, True, False, False])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base <= per, exp)
        tm.assert_equal(per >= base, exp)
Esempio n. 51
0
    def test_parr_cmp_pi(self, freq, box_with_array):
        # GH#13200
        xbox = np.ndarray if box_with_array is pd.Index else box_with_array

        base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
                           freq=freq)
        base = tm.box_expected(base, box_with_array)

        # TODO: could also box idx?
        idx = PeriodIndex(['2011-02', '2011-01', '2011-03', '2011-05'],
                          freq=freq)

        exp = np.array([False, False, True, False])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base == idx, exp)

        exp = np.array([True, True, False, True])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base != idx, exp)

        exp = np.array([False, True, False, False])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base > idx, exp)

        exp = np.array([True, False, False, True])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base < idx, exp)

        exp = np.array([False, True, True, False])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base >= idx, exp)

        exp = np.array([True, False, True, True])
        exp = tm.box_expected(exp, xbox)
        tm.assert_equal(base <= idx, exp)
Esempio n. 52
0
    def test_ufunc_coercions(self, holder):
        idx = holder([1, 2, 3, 4, 5], name="x")
        box = pd.Series if holder is pd.Series else pd.Index

        result = np.sqrt(idx)
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = np.divide(idx, 2.0)
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        # _evaluate_numeric_binop
        result = idx + 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx - 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx * 1.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx / 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)
Esempio n. 53
0
def compare_series_ts(result, expected, typ, version):
    # GH 7748
    tm.assert_series_equal(result, expected)
    tm.assert_equal(result.index.freq, expected.index.freq)
    tm.assert_equal(result.index.freq.normalize, False)
    tm.assert_series_equal(result > 0, expected > 0)

    # GH 9291
    freq = result.index.freq
    tm.assert_equal(freq + Day(1), Day(2))

    res = freq + pandas.Timedelta(hours=1)
    tm.assert_equal(isinstance(res, pandas.Timedelta), True)
    tm.assert_equal(res, pandas.Timedelta(days=1, hours=1))

    res = freq + pandas.Timedelta(nanoseconds=1)
    tm.assert_equal(isinstance(res, pandas.Timedelta), True)
    tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1))
Esempio n. 54
0
    def test_ufunc_coercions(self, holder):
        idx = holder([1, 2, 3, 4, 5], name='x')
        box = pd.Series if holder is pd.Series else pd.Index

        result = np.sqrt(idx)
        assert result.dtype == 'f8' and isinstance(result, box)
        exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x')
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = np.divide(idx, 2.)
        assert result.dtype == 'f8' and isinstance(result, box)
        exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        # _evaluate_numeric_binop
        result = idx + 2.
        assert result.dtype == 'f8' and isinstance(result, box)
        exp = pd.Float64Index([3., 4., 5., 6., 7.], name='x')
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx - 2.
        assert result.dtype == 'f8' and isinstance(result, box)
        exp = pd.Float64Index([-1., 0., 1., 2., 3.], name='x')
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx * 1.
        assert result.dtype == 'f8' and isinstance(result, box)
        exp = pd.Float64Index([1., 2., 3., 4., 5.], name='x')
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx / 2.
        assert result.dtype == 'f8' and isinstance(result, box)
        exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)
Esempio n. 55
0
    def test_mul_int_identity(self, op, numeric_idx, box):
        idx = numeric_idx
        idx = tm.box_expected(idx, box)

        result = op(idx, 1)
        tm.assert_equal(result, idx)
Esempio n. 56
0
 def test_detect_intraday(self, positions, transactions, expected):
     detected = detect_intraday(positions, transactions, threshold=0.25)
     assert_equal(detected, expected)
Esempio n. 57
0
def _assert_same_values_and_dtype(res, exp):
    tm.assert_equal(res.dtype, exp.dtype)
    tm.assert_almost_equal(res, exp)
Esempio n. 58
0
 def test_estimate_intraday(self, returns, positions, transactions,
                            expected):
     intraday_pos = estimate_intraday(returns, positions, transactions)
     assert_equal(intraday_pos.shape, expected)
Esempio n. 59
0
def compare_index_period(result, expected, typ, version):
    tm.assert_index_equal(result, expected)
    tm.assertIsInstance(result.freq, MonthEnd)
    tm.assert_equal(result.freq, MonthEnd())
    tm.assert_equal(result.freqstr, 'M')
    tm.assert_index_equal(result.shift(2), expected.shift(2))
Esempio n. 60
0
def test_null_transformer2(data):
    """Checks impute_algorithm='ts_interpolate'"""
    null_transform = NullTransformer(impute_algorithm="ts_interpolate",
                                     impute_all=False)
    null_transform.fit(data)
    assert null_transform.impute_params == dict(orders=[7, 14, 21],
                                                agg_func=np.mean,
                                                iter_num=5)
    result = null_transform.transform(data)
    # `orders` is too large for this dataset, nothing is imputed
    assert_equal(result, data)

    # two iterations
    null_transform = NullTransformer(impute_algorithm="ts_interpolate",
                                     impute_params=dict(orders=[1],
                                                        agg_func=np.nanmean,
                                                        iter_num=2),
                                     impute_all=False)
    result = null_transform.fit_transform(data)
    expected = pd.DataFrame({
        "a": (0.0, 0.0, -1.0, 1.0),
        "b": (np.nan, 2.0, 2.0, 2.0),
        "c": (2.0, 3.0, 3.0, 9.0),
        "d": (np.nan, 4.0, -4.0, 16.0),
    })
    assert_equal(result, expected)
    assert null_transform.missing_info == {
        "a": {
            "initial_missing_num": 1,
            "final_missing_num": 0
        },
        "b": {
            "initial_missing_num": 3,
            "final_missing_num": 1
        },
        "c": {
            "initial_missing_num": 1,
            "final_missing_num": 0
        },
        "d": {
            "initial_missing_num": 1,
            "final_missing_num": 1
        },
    }

    # impute_all=True
    null_transform = NullTransformer(impute_algorithm="ts_interpolate",
                                     impute_params=dict(orders=[1],
                                                        agg_func=np.nanmean,
                                                        iter_num=2),
                                     impute_all=True)
    result = null_transform.fit_transform(data)
    expected = pd.DataFrame({
        "a": (0.0, 0.0, -1.0, 1.0),
        "b": (2.0, 2.0, 2.0, 2.0),
        "c": (2.0, 3.0, 3.0, 9.0),
        "d": (4.0, 4.0, -4.0, 16.0),
    })
    assert_equal(result, expected)
    # `final_missing_num` are filled in by the second pass.
    # The counts reflect the first pass.
    assert null_transform.missing_info == {
        "a": {
            "initial_missing_num": 1,
            "final_missing_num": 0
        },
        "b": {
            "initial_missing_num": 3,
            "final_missing_num": 1
        },
        "c": {
            "initial_missing_num": 1,
            "final_missing_num": 0
        },
        "d": {
            "initial_missing_num": 1,
            "final_missing_num": 1
        },
    }