Python assert_frame_equal Examples, pandas.testing.assert_frame_equal Python Examples

Example #1

0

Show file

File: test_axisgrid.py Project: mwaskom/seaborn

    def test_ignore_datelike_data(self):

        df = self.df.copy()
        df['date'] = pd.date_range('2010-01-01', periods=len(df), freq='d')
        result = ag.PairGrid(self.df).data
        expected = df.drop('date', axis=1)
        tm.assert_frame_equal(result, expected)

Example #2

0

Show file

File: test_dataframe_functions.py Project: carrielui/tsfresh

    def test_make_forecasting_frame_list(self):
        df, y = dataframe_functions.make_forecasting_frame(x=range(4), kind="test", max_timeshift=1, rolling_direction=1)
        expected_df = pd.DataFrame({"id": [1, 2, 3], "kind": ["test"]*3, "value": [0., 1., 2.], "time": [0., 1., 2.]})

        expected_y = pd.Series(data=[1, 2, 3], index=[1, 2, 3], name="value")
        assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1))
        assert_series_equal(y, expected_y)

Example #3

0

Show file

File: test_regression.py Project: adelq/seaborn

    def test_establish_variables_from_mix(self):

        p = lm._LinearPlotter()
        p.establish_variables(self.df, x="x", y=self.df.y)
        pdt.assert_series_equal(p.x, self.df.x)
        pdt.assert_series_equal(p.y, self.df.y)
        pdt.assert_frame_equal(p.data, self.df)

Example #4

0

Show file

File: test_regression.py Project: adelq/seaborn

    def test_variables_from_mix(self):

        p = lm._RegressionPlotter("x", self.df.y + 1, data=self.df)

        npt.assert_array_equal(p.x, self.df.x)
        npt.assert_array_equal(p.y, self.df.y + 1)
        pdt.assert_frame_equal(p.data, self.df)

Example #5

0

Show file

File: test_marketdata.py Project: JesseLiu0/toracle

    def test_download_insert_hist_data(self):
        async def run(loop, req, broker, insert_limit):
            engine = await aiosa.create_engine(
                user=self.db_info['user'], db=self.db_info['db'],
                host=self.db_info['host'], password=self.db_info['password'],
                loop=loop)
            # Download, Insert and Query
            dl_blk = await download_insert_hist_data(
                req, broker, engine, insert_limit)
            db_blk = await query_hist_data(
                engine, req.SecType, req.Symbol, req.DataType, req.BarSize,
                *insert_limit)
            engine.close()
            await engine.wait_closed()
            return dl_blk, db_blk

        # Execute
        self._clear_db()
        init_db(self.db_info)
        req = testdata_download_insert_hist_data['req']
        broker, login = testdata_download_insert_hist_data['broker']
        insert_limit = testdata_download_insert_hist_data['insert_limit']
        broker.connect(*login)
        loop = asyncio.get_event_loop()
        dl_blk, db_blk = loop.run_until_complete(
            run(loop, req, broker, insert_limit))
        broker_blk = broker.req_hist_data(req)[0]

        # Verify
        lim0 = insert_limit[0]
        lim1 = insert_limit[1]
        assert_frame_equal(dl_blk.df, broker_blk.df)
        assert_frame_equal(db_blk.df,
                           broker_blk.df.loc(axis=0)[:, :, :, lim0:lim1])

Example #6

0

Show file

File: test_marketdata.py Project: JesseLiu0/toracle

    def test_get_hist_data(self):
        async def run(loop, req, blk_db, broker):
            # Populate database
            engine = await aiosa.create_engine(
                user=self.db_info['user'], db=self.db_info['db'],
                host=self.db_info['host'], password=self.db_info['password'],
                loop=loop, echo=False)
            await insert_hist_data(engine, 'Stock', blk_db)
            engine.close()
            await engine.wait_closed()
            # Get hist data
            blk_db = await get_hist_data(
                req, broker, mysql={**self.db_info, 'loop': loop})
            return blk_db

        from time import sleep
        for data in testdata_get_hist_data:
            sleep(1.5)  # Avoid IB pacing violation
            _logger.debug("\n======= get_hist_data_async: %s ======\n",
                          data['testcase'])
            self._clear_db()
            init_db(self.db_info)
            blk_db = MarketDataBlock(data['df_db'])
            broker = data['broker'][0](*data['broker'][1])
            blk_exp = MarketDataBlock(data['blk_exp.df'])
            blk_exp.tz = data['xchg_tz']
            loop = asyncio.get_event_loop()
            blk_ret = loop.run_until_complete(
                run(loop, data['req'], blk_db, broker))
            assert_frame_equal(blk_ret.df, blk_exp.df)

Example #7

0

Show file

File: dataframe.py Project: Geekly/pybcm

def assert_frame_not_equal(df1, df2, **kwargs):
    # assert_frame_equal exists, but we need the ability to assert that frames are not equal
    try:
        assert_frame_equal(df1, df2, **kwargs)
        raise AssertionError('DataFrames are equal.')
    except AssertionError:
        pass

Example #8

0

Show file

File: test_alpha_rarefaction.py Project: jakereps/q2-diversity

    def test_two_iterations_with_metadata_were_values_are_unique(self):
        # This should be identical to test_without_metadata_df_two_iterations,
        # with just the `sample-id` replaced with `pet`.
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
                            columns=columns, index=['russ', 'milo', 'pea'])

        counts = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                              columns=columns, index=['russ', 'milo', 'pea'])

        obs = _compute_summary(data, 'pet', counts=counts)

        d = [
            ['russ', 1,   1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2., 1],
            ['russ', 200, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4., 1],
            ['milo', 1,   1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2., 1],
            ['milo', 200, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4., 1],
            ['pea', 1,    1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2., 1],
            ['pea', 200,  3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4., 1],
        ]
        exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%',
                                            '25%', '50%', '75%', '91%', '98%',
                                            'max', 'count'])
        pdt.assert_frame_equal(exp, obs)

Example #9

0

Show file

File: test_mnl_new.py Project: UDST/choicemodels

def test_mnl_estimation(obs, alts):
    """
    Confirm that estimated params from the new interface match urbansim.urbanchoice.
    Only runs if the urbansim package has been installed.
    
    """
    try:
        from urbansim.urbanchoice.mnl import mnl_estimate
    except:
        print("Comparison of MNL estimation results skipped because urbansim is not installed")
        return

    model_expression = 'obsval + altval - 1'
    mct = MergedChoiceTable(obs, alts, 'choice')
    
    # new interface
    m = MultinomialLogit(mct, model_expression)
    r = m.fit().get_raw_results()
    
    # old interface
    dm = dmatrix(model_expression, mct.to_frame())
    chosen = np.reshape(mct.to_frame()[mct.choice_col].values, (100, 5))
    log_lik, fit = mnl_estimate(np.array(dm), chosen, numalts=5)
    
    for k,v in log_lik.items():
        assert(v == pytest.approx(r['log_likelihood'][k], 0.00001))
    
    assert_frame_equal(fit, r['fit_parameters'][['Coefficient', 'Std. Error', 'T-Score']])

Example #10

0

Show file

File: test_clustering.py Project: dcroote/btreceptor

    def test_create_lineages(self):

        df_with_lins = clustering.df_add_lineages(self.df_mult_groups, 0.85)

        expected = self.df_mult_groups.reset_index(drop=True)
        expected['lineage'] = [0, 0, 1, 2, 3]

        assert_frame_equal(df_with_lins, expected)

Example #11

0

Show file

File: data.py Project: fako/datascope

 def test_init(self):
     sorted_feature_names = ["is_dutch", "is_english", "value_number"]
     self.assertEquals(
         sorted(self.frame.features.keys()),
         sorted_feature_names
     )
     self.assertTrue(callable(self.frame.content))
     assert_frame_equal(self.frame.data, self.test_frame, check_like=True)

Example #12

0

Show file

File: test_regression.py Project: adelq/seaborn

    def test_variables_from_frame(self):

        p = lm._RegressionPlotter("x", "y", data=self.df, units="s")

        pdt.assert_series_equal(p.x, self.df.x)
        pdt.assert_series_equal(p.y, self.df.y)
        pdt.assert_series_equal(p.units, self.df.s)
        pdt.assert_frame_equal(p.data, self.df)

Example #13

0

Show file

File: test_heatmap.py Project: jakereps/q2-feature-table

    def test_munge_metadata_ids_different_order(self):
        md = qiime2.CategoricalMetadataColumn(
            pd.Series(['russ', 'milo', 'russ'], name='pet',
                      index=pd.Index(['S2', 'S1', 'S3'], name='id')))
        obs = _munge_metadata(md, self.table, 'both')

        exp_idx = pd.Index(['milo | S1', 'russ | S2', 'russ | S3'],
                           name='pet | id')
        exp = pd.DataFrame([[0, 10], [10, 12], [10, 11]], columns=['O1', 'O2'],
                           index=exp_idx)
        assert_frame_equal(exp, obs)

Example #14

0

Show file

File: test_heatmap.py Project: jakereps/q2-feature-table

    def test_munge_metadata_empty_values(self):
        md = qiime2.CategoricalMetadataColumn(
            pd.Series([None, 'russ', np.nan], name='pet',
                      index=pd.Index(['S1', 'S2', 'S3'], name='id')))
        obs = _munge_metadata(md, self.table, 'both')

        exp_idx = pd.Index(['[No Value] | S1', 'russ | S2', '[No Value] | S3'],
                           name='pet | id')
        exp = pd.DataFrame([[0, 10], [10, 12], [10, 11]], columns=['O1', 'O2'],
                           index=exp_idx)
        assert_frame_equal(exp, obs)

Example #15

0

Show file

File: test_heatmap.py Project: jakereps/q2-feature-table

    def test_munge_metadata_sort_samples(self):
        md = qiime2.CategoricalMetadataColumn(
            pd.Series(['peanut', 'milo', 'russ'], name='pet',
                      index=pd.Index(['S1', 'S2', 'S3'], name='id')))
        obs = _munge_metadata(md, self.table, 'features')

        exp_idx = pd.Index(['milo | S2', 'peanut | S1', 'russ | S3'],
                           name='pet | id')
        exp = pd.DataFrame([[10, 12], [0, 10], [10, 11]], columns=['O1', 'O2'],
                           index=exp_idx)
        assert_frame_equal(exp, obs)

Example #16

0

Show file

File: pandas_test.py Project: davvid/jsonpickle

 def test_b64(self):
     """Test the binary encoding"""
     if self.should_skip:
         return self.skip('pandas is not importable')
     # array of substantial size is stored as b64
     a = np.random.rand(20, 10)
     index = ['Row' + str(i) for i in range(1, a.shape[0] + 1)]
     columns = ['Col' + str(i) for i in range(1, a.shape[1] + 1)]
     df = pd.DataFrame(a, index=index, columns=columns)
     decoded_df = self.roundtrip(df)
     assert_frame_equal(decoded_df, df)

Example #17

0

Show file

File: test_table_formula.py Project: sdpython/pyensae

    def test_TableFormula_sort(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")

        table = TableFormula()
        table["A"] = [0, 1]
        table.add_column_vector("B", [6, 7])
        table.sort(lambda row: -row["B"])
        exp = pandas.DataFrame(dict(A=[1, 0], B=[7, 6], C=[1, 0]))
        exp = exp.set_index("C")
        exp.index.rename(None, inplace=True)
        assert_frame_equal(table, exp, check_index_type=False)

Example #18

0

Show file

File: test_run_ccc.py Project: jdebacker/B-Tax

def test_run_ccc_example_output(file_name):
    '''
    Tests the script in ../../run_examples/run_ccc_example.py to
    ensure that it produces the expected results that are checked into
    the repo.
    '''
    run_example_path = os.path.join(CUR_PATH, '..', '..', 'run_examples')
    test_path = os.path.join(run_example_path, file_name + '.csv')
    test_df = pd.read_csv(test_path)
    expected_path = os.path.join(run_example_path, file_name + '_expected.csv')
    expected_df = pd.read_csv(expected_path)
    assert_frame_equal(test_df, expected_df)

Example #19

0

Show file

File: test_03_country-subset.py Project: riversdark/repro-science

def test_get_country():
    # call the function
    df = country.get_country(interim_data, 'Chile')

    # load my previous dataset
    base = pd.read_csv(processed_data)

    # check if I am getting a dataframe
    assert isinstance(df, pd.DataFrame)
    assert isinstance(base, pd.DataFrame)

    # check that they are the same dataframes
    pdt.assert_frame_equal(df, base)

Example #20

0

Show file

File: test_table_formula.py Project: sdpython/pyensae

    def test_TableFormula_add(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")

        table = TableFormula()
        table["A"] = [0, 1]
        table.add_column_index([4, 5])
        table.add_column_vector("B", [6, 7])
        table.addc("C", lambda row: row["A"] * row["B"])
        exp = pandas.DataFrame(dict(sum_d_a=[0.84127, 1.47619]))
        exp = pandas.DataFrame(
            dict(A=[0, 1], B=[6, 7], C=[0, 7], __key__=[4, 5]))
        exp.set_index("__key__", inplace=True)
        exp.index.rename(None, inplace=True)
        assert_frame_equal(table, exp)

Example #21

0

Show file

File: test_io.py Project: kristianfoerster/melodist

    def test_json(self):
            ss = self.station.statistics
            ss.calc_temperature_stats()
            ss.calc_precipitation_stats()
            ss.calc_humidity_stats()
            ss.calc_radiation_stats()
            ss.calc_wind_stats()

            with tempfile.NamedTemporaryFile() as tmp:
                ss.to_json(tmp.name)
                tmp.seek(0)
                ss2 = melodist.StationStatistics.from_json(tmp.name)

            assert_series_equal(ss.temp.max_delta, ss2.temp.max_delta)
            assert_frame_equal(ss.temp.mean_course, ss2.temp.mean_course)

            assert_equal(ss.precip.months, ss2.precip.months)
            assert all([cs1 == cs2 for cs1, cs2 in zip(ss.precip.stats, ss2.precip.stats)])

            assert ss.hum.a0 == ss2.hum.a0
            assert ss.hum.a1 == ss2.hum.a1
            assert ss.hum.kr == ss2.hum.kr
            assert_series_equal(ss.hum.month_hour_precip_mean, ss2.hum.month_hour_precip_mean)

            assert_frame_equal(ss.glob.angstroem, ss2.glob.angstroem)
            assert_frame_equal(ss.glob.bristcamp, ss2.glob.bristcamp)
            assert_frame_equal(ss.glob.mean_course, ss2.glob.mean_course)

            assert ss.wind.a == ss2.wind.a
            assert ss.wind.b == ss2.wind.b
            assert ss.wind.t_shift == ss2.wind.t_shift

Example #22

0

Show file

File: data.py Project: fako/datascope

    def test_adding_content_mixed(self):
        self.skipTest("Bug: GH-109")
        old = list(self.get_iterator())[-2:]

        def update(ind):
            ind.properties["value"] = int(ind.properties["value"]) * 5
            return ind

        updated = list(map(update, old))
        self.frame.load_content(
            lambda: iter(list(self.get_extra_iterator()) + updated)
        )
        self.test_frame_extra["value_number"].loc[[7, 8]] *= 5
        assert_frame_equal(self.frame.data, self.test_frame_extra, check_like=True)

Example #23

0

Show file

File: test_settings.py Project: carrielui/tsfresh

    def test_from_columns(self):
        tsn = "TEST_TIME_SERIES"

        fset = ComprehensiveFCParameters()
        self.assertRaises(TypeError, from_columns, 42)
        self.assertRaises(TypeError, from_columns, 42)
        self.assertRaises(ValueError, from_columns, ["This is not a column name"])
        self.assertRaises(ValueError, from_columns, ["This__neither"])
        self.assertRaises(ValueError, from_columns, ["This__also__not"])

        # Aggregate functions
        feature_names = [tsn + '__sum_values', tsn + "__median", tsn + "__length", tsn + "__sample_entropy"]

        # Aggregate functions with params
        feature_names += [tsn + '__quantile__q_10', tsn + '__quantile__q_70', tsn + '__number_peaks__n_30',
                          tsn + '__value_count__value_inf', tsn + '__value_count__value_-inf',
                          tsn + '__value_count__value_nan']

        # Apply functions
        feature_names += [tsn + '__ar_coefficient__k_20__coeff_4', tsn + '__ar_coefficient__coeff_10__k_-1']

        kind_to_fc_parameters = from_columns(feature_names)

        six.assertCountEqual(self, list(kind_to_fc_parameters[tsn].keys()),
                             ["sum_values", "median", "length", "sample_entropy", "quantile", "number_peaks",
                              "ar_coefficient", "value_count"])

        self.assertEqual(kind_to_fc_parameters[tsn]["sum_values"], None)
        self.assertEqual(kind_to_fc_parameters[tsn]["ar_coefficient"],
                         [{"k": 20, "coeff": 4}, {"k": -1, "coeff": 10}])

        self.assertEqual(kind_to_fc_parameters[tsn]["value_count"],
                         [{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}])

        # test that it passes for all functions
        fset = ComprehensiveFCParameters()
        X_org = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}),
                                 default_fc_parameters=fset,
                                 column_id="id", column_value="value",
                                 n_jobs=0)

        inferred_fset = from_columns(X_org)

        X_new = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}),
                                 kind_to_fc_parameters=inferred_fset,
                                 column_id="id", column_value="value",
                                 n_jobs=0)

        assert_frame_equal(X_org.sort_index(), X_new.sort_index())

Example #24

0

Show file

File: test_utils.py Project: asalt/seaborn

    def check_load_cached_dataset(name):
        # Test the cacheing using a temporary file.
        # With Python 3.2+, we could use the tempfile.TemporaryDirectory()
        # context manager instead of this try...finally statement
        tmpdir = tempfile.mkdtemp()
        try:
            # download and cache
            ds = load_dataset(name, cache=True, data_home=tmpdir)

            # use cached version
            ds2 = load_dataset(name, cache=True, data_home=tmpdir)
            pdt.assert_frame_equal(ds, ds2)

        finally:
            shutil.rmtree(tmpdir)

Example #25

0

Show file

File: test_dataframe_functions.py Project: carrielui/tsfresh

    def test_make_forecasting_frame_pdSeries(self):

        t_index = pd.date_range('1/1/2011', periods=4, freq='H')
        df, y = dataframe_functions.make_forecasting_frame(x=pd.Series(data=range(4), index=t_index),
                                                           kind="test", max_timeshift=1, rolling_direction=1)

        expected_y = pd.Series(data=[1, 2, 3], index=pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00",
                                                                       "2011-01-01 03:00:00"]), name="value")
        expected_df = pd.DataFrame({"id": pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00",
                                                            "2011-01-01 03:00:00"]),
                                    "kind": ["test"]*3, "value": [0., 1., 2.],
                                    "time": pd.DatetimeIndex(["2011-01-01 00:00:00", "2011-01-01 01:00:00",
                                                              "2011-01-01 02:00:00"])
                                    })
        assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1))
        assert_series_equal(y, expected_y)

Example #26

0

Show file

File: test_marketdata.py Project: JesseLiu0/toracle

 def test_market_data_block_merge(self):
     testdata = testdata_market_data_block_merge
     blk = MarketDataBlock(pd.DataFrame(testdata[0]),
                           datatype='TRADES', tz='US/Pacific')
     _logger.info('\n\nBlockTests:merge: Starting blk:\n%s', blk.df)
     for data in testdata[1:]:
         blk.update(pd.DataFrame(data[0]),
                    datatype='TRADES', tz='US/Pacific')
         blk_direct = MarketDataBlock(
             pd.DataFrame(data[1]), datatype='TRADES', tz='US/Pacific')
         _logger.debug('\n\nBlockTests:merge: blk.df\n%s', blk.df[:3])
         _logger.debug('\n\nBlockTests:merge: blk_direct.df\n%s',
                       blk_direct.df[:3])
         assert_frame_equal(blk.df, blk_direct.df)
         self.assertEqual(list(blk.df.index.names),
                          blk.__class__.data_index)
         self.assertEqual(list(blk_direct.df.index.names),
                          blk.__class__.data_index)

Example #27

0

Show file

File: pandas_test.py Project: davvid/jsonpickle

 def test_dataframe_roundtrip(self):
     if self.should_skip:
         return self.skip('pandas is not importable')
     df = pd.DataFrame({
         'an_int': np.int_([1, 2, 3]),
         'a_float': np.float_([2.5, 3.5, 4.5]),
         'a_nan': np.array([np.nan] * 3),
         'a_minus_inf': np.array([-np.inf] * 3),
         'an_inf': np.array([np.inf] * 3),
         'a_str': np.str_('foo'),
         'a_unicode': np.unicode_('bar'),
         'date': np.array([np.datetime64('2014-01-01')] * 3),
         'complex': np.complex_([1 - 2j, 2 - 1.2j, 3 - 1.3j]),
         # TODO: the following dtypes are not currently supported.
         # 'object': np.object_([{'a': 'b'}]*3),
     })
     decoded_df = self.roundtrip(df)
     assert_frame_equal(decoded_df, df)

Example #28

0

Show file

File: data.py Project: fako/datascope

 def test_resetting_features_no_content(self):
     features = [
         TestNumericFeaturesFrame.is_dutch
     ]
     frame = NumericFeaturesFrame(
         TestNumericFeaturesFrame.get_identifier,
         features
     )
     frame.reset(features=[
         TestNumericFeaturesFrame.value_number,
         TestNumericFeaturesFrame.is_english
     ])
     self.test_frame = self.test_frame.drop(labels="is_dutch", axis=1)
     assert_frame_equal(frame.data, self.test_frame[0:0], check_like=True)
     sorted_feature_names = ["is_english", "value_number"]
     self.assertEquals(
         sorted(frame.features.keys()),
         sorted_feature_names
     )

Example #29

0

Show file

File: test_alpha_rarefaction.py Project: jakereps/q2-diversity

    def test_two_iterations_with_metadata_were_values_are_identical(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[3, 6, 9, 9]], columns=columns,
                            index=['milo'])

        counts = pd.DataFrame(data=[[3, 3, 3, 3]], columns=columns,
                              index=['milo'])

        obs = _compute_summary(data, 'pet', counts=counts)

        d = [
            ['milo', 1,   3., 3.06, 3.27, 3.75, 4.5,  5.25, 5.73, 5.94, 6., 3],
            ['milo', 200, 9.,   9.,   9.,   9.,  9.,    9.,   9.,   9., 9., 3],
        ]
        exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%',
                                            '25%', '50%', '75%', '91%', '98%',
                                            'max', 'count'])
        pdt.assert_frame_equal(exp, obs)

Example #30

0

Show file

File: data.py Project: fako/datascope

 def test_adding_features(self):
     features = [
         TestNumericFeaturesFrame.is_dutch
     ]
     frame = NumericFeaturesFrame(
         TestNumericFeaturesFrame.get_identifier,
         features,
         self.get_iterator
     )
     frame.load_features([
         TestNumericFeaturesFrame.value_number,
         TestNumericFeaturesFrame.is_english
     ])
     assert_frame_equal(frame.data, self.test_frame, check_like=True)
     sorted_feature_names = ["is_dutch", "is_english", "value_number"]
     self.assertEquals(
         sorted(self.frame.features.keys()),
         sorted_feature_names
     )

Example #31

0

Show file

	def test_test_data(self):
		pd_testing.assert_frame_equal(self.exercises.test_data, self.test_data)

Example #32

0

Show file

File: testing.py Project: tiems90/geopandas

def assert_geodataframe_equal(
    left,
    right,
    check_dtype=True,
    check_index_type="equiv",
    check_column_type="equiv",
    check_frame_type=True,
    check_like=False,
    check_less_precise=False,
    check_geom_type=False,
    check_crs=True,
):
    """
    Check that two GeoDataFrames are equal/

    Parameters
    ----------
    left, right : two GeoDataFrames
    check_dtype : bool, default True
        Whether to check the DataFrame dtype is identical.
    check_index_type, check_column_type : bool, default 'equiv'
        Check that index types are equal.
    check_frame_type : bool, default True
        Check that both are same type (*and* are GeoDataFrames). If False,
        will attempt to convert both into GeoDataFrame.
    check_like : bool, default False
        If true, ignore the order of rows & columns
    check_less_precise : bool, default False
        If True, use geom_almost_equals. if False, use geom_equals.
    check_geom_type : bool, default False
        If True, check that all the geom types are equal.
    check_crs: bool, default True
        If `check_frame_type` is True, then also check that the
        crs matches.
    """
    try:
        # added from pandas 0.20
        from pandas.testing import assert_frame_equal, assert_index_equal
    except ImportError:
        from pandas.util.testing import assert_frame_equal, assert_index_equal

    # instance validation
    if check_frame_type:
        assert isinstance(left, GeoDataFrame)
        assert isinstance(left, type(right))

        if check_crs:
            # no crs can be either None or {}
            if not left.crs and not right.crs:
                pass
            else:
                assert left.crs == right.crs
    else:
        if not isinstance(left, GeoDataFrame):
            left = GeoDataFrame(left)
        if not isinstance(right, GeoDataFrame):
            right = GeoDataFrame(right)

    # shape comparison
    assert left.shape == right.shape, (
        "GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n"
        "Left columns: {lcols!r}, right columns: {rcols!r}".format(
            lshape=left.shape,
            rshape=right.shape,
            lcols=left.columns,
            rcols=right.columns,
        ))

    if check_like:
        left, right = left.reindex_like(right), right

    # column comparison
    assert_index_equal(left.columns,
                       right.columns,
                       exact=check_column_type,
                       obj="GeoDataFrame.columns")

    # geometry comparison
    assert_geoseries_equal(
        left.geometry,
        right.geometry,
        check_dtype=check_dtype,
        check_less_precise=check_less_precise,
        check_geom_type=check_geom_type,
        check_crs=False,
    )

    # drop geometries and check remaining columns
    left2 = left.drop([left._geometry_column_name], axis=1)
    right2 = right.drop([right._geometry_column_name], axis=1)
    assert_frame_equal(
        left2,
        right2,
        check_dtype=check_dtype,
        check_index_type=check_index_type,
        check_column_type=check_column_type,
        obj="GeoDataFrame",
    )

Example #33

0

Show file

File: test_join.py Project: jelitox/ibis

def test_join_project_left_table(how, left, right, df1, df2):
    expr = left.join(right, left.key == right.key, how=how)[left, right.key3]
    result = expr.execute()
    expected = pd.merge(df1, df2, how=how,
                        on='key')[list(left.columns) + ['key3']]
    tm.assert_frame_equal(result[expected.columns], expected)

Example #34

0

Show file

File: test_join.py Project: jelitox/ibis

def test_asof_join(time_left, time_right, time_df1, time_df2):
    expr = time_left.asof_join(time_right, 'time')[time_left,
                                                   time_right.other_value]
    result = expr.execute()
    expected = pd.merge_asof(time_df1, time_df2, on='time')
    tm.assert_frame_equal(result[expected.columns], expected)

Example #35

0

Show file

 def test_df_trans_acc_disp(self):
     pd_testing.assert_frame_equal(self.exercises.df_trans_acc_disp,
                                   self.df_trans_acc_disp)

Example #36

0

Show file

 def test_df_merged(self):
     pd_testing.assert_frame_equal(self.exercises.df_merged, self.df_merged)

Example #37

0

Show file

 def test_df(self):
     pd_testing.assert_frame_equal(self.exercises.bankData,
                                   self.bankData,
                                   check_dtype=False)

Example #38

0

Show file

File: test_geodataframe.py Project: jGaboardi/geopandas

 def test_pickle_method(self):
     filename = os.path.join(self.tempdir, "df.pkl")
     self.df.to_pickle(filename)
     unpickled = pd.read_pickle(filename)
     assert_frame_equal(self.df, unpickled)
     assert self.df.crs == unpickled.crs

Example #39

0

Show file

File: test_results.py Project: tomasbedrich/pallas

 def test_to_df_types(self, column_type, values, series):
     data = [(v, ) for v in values]
     results = QueryResults(["col"], [column_type], data)
     assert_frame_equal(results.to_df(),
                        pd.DataFrame({"col": series}),
                        check_column_type="exact")

Example #40

0

Show file

 def test_save_and_load(self, versioned_csv_data_set, dummy_dataframe):
     """Test that saved and reloaded data matches the original one for
     the versioned data set."""
     versioned_csv_data_set.save(dummy_dataframe)
     reloaded_df = versioned_csv_data_set.load()
     assert_frame_equal(dummy_dataframe, reloaded_df)

Example #41

0

Show file

File: 5_87.py Project: satriang/bigdata

#!/usr/bin/env python
# coding: utf-8

# In[58]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

get_ipython().run_line_magic('matplotlib', 'inline')

# In[123]:

movie = pd.read_csv('data/movie.csv', index_col='movie_title')
c1 = movie['title_year'] >= 2010
c2 = movie['title_year'].isnull()
criteria = c1 | c2
movie_mask = movie.mask(criteria).dropna(how='all')
movie_boolean = movie[movie['title_year'] < 2010]
from pandas.testing import assert_frame_equal
assert_frame_equal(movie_boolean, movie_mask, check_dtype=False)

# In[124]:

get_ipython().run_line_magic('timeit',
                             "movie.mask(criteria).dropna(how='all')")

# In[ ]:

Example #42

0

Show file

def test_gender_job():
    row = dfp.RowTransformer(columns=['Gender', 'Job'],
                             drop_values=['p', 'N/A'])
    out = row.fit_transform(df)
    assert_frame_equal(out, drop_gender_job_df)

Example #43

0

Show file

def test_create_or_update_move_stop_by_dist_time():
    move_df = MoveDataFrame(
        data=list_data,
        latitude=LATITUDE,
        longitude=LONGITUDE,
        datetime=DATETIME,
        traj_id=TRAJ_ID,
    )
    cols = [
        'segment_stop',
        'id',
        'lat',
        'lon',
        'datetime',
        'dist_to_prev',
        'time_to_prev',
        'speed_to_prev',
        'stop',
    ]

    stay_point_detection.create_or_update_move_stop_by_dist_time(
        move_df, dist_radius=3.5, time_radius=0.5, inplace=True)
    expected = DataFrame(
        data=[
            [
                1,
                1,
                39.984094,
                116.319236,
                Timestamp('2008-10-23 05:53:05'),
                nan,
                nan,
                nan,
                False,
            ],
            [
                2,
                1,
                39.984198,
                116.319322,
                Timestamp('2008-10-23 05:53:06'),
                nan,
                nan,
                nan,
                False,
            ],
            [
                3,
                2,
                39.984224,
                116.319402,
                Timestamp('2008-10-23 05:53:11'),
                nan,
                nan,
                nan,
                True,
            ],
            [
                3,
                2,
                39.984224,
                116.319402,
                Timestamp('2008-10-23 05:53:15'),
                0.0,
                4.0,
                0.0,
                True,
            ],
        ],
        columns=cols,
        index=[0, 1, 2, 3],
    )
    print(move_df)
    assert_frame_equal(move_df, expected)

Example #44

0

Show file

	def test_renamedBostonData(self):
		pd_testing.assert_frame_equal(self.exercises.renamedBostonData, self.renamedBostonData)

Example #45

0

Show file

 def test_df(self):
     pd_testing.assert_frame_equal(self.exercises.df, self.df)

Example #46

0

Show file

 def test_ndarray_input(self):
     cg = mat.ClusterGrid(self.x_norm, **self.default_kws)
     pdt.assert_frame_equal(cg.data, pd.DataFrame(self.x_norm))
     assert len(cg.fig.axes) == 4
     assert cg.ax_row_colors is None
     assert cg.ax_col_colors is None

Example #47

0

Show file

File: test_calcfunctions.py Project: hdoupe/Cost-of-Capital-Calculator

def test_update_depr_methods(monkeypatch):
    '''
    Test of calcfunctions.update_depr_methods
    '''
    p = Specification()
    json_str = """
        {"schema": {
            "labels": {
                "asset_name": {"type": "str"},
                "BEA_code": {"type": "str"},
                "minor_asset_group": {"type": "str"},
                "major_asset_group": {"type": "str"},
                "ADS_life": {"type": "float"},
                "GDS_life": {"type": "float"},
                "system": {"type": "str"},
                "year": {
                    "type": "int",
                    "validators": {"range": {"min": 2013, "max": 2030}}
                }
            }
        },
        "asset": {
            "title": "Tax depreciation rules for assets",
            "description": "Tax depreciation rules for assets",
            "type": "depreciation_rules",
            "value": [
                  {
                      "ADS_life": 10.0,
                      "BEA_code": "1",
                      "GDS_life": 10.0,
                      "asset_name": "Steam engines",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 10,
                                              "method": "DB 200%"}
                  },
                  {
                      "ADS_life": 10.0,
                      "BEA_code": "2",
                      "GDS_life": 10.0,
                      "asset_name": "Custom software",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 10,
                                              "method": "DB 150%"}
                  },
                  {
                      "ADS_life": 3.0,
                      "BEA_code": "3",
                      "GDS_life": 3.0,
                      "asset_name": "Other furniture",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 3,
                                              "method": "SL"}
                  },
                  {
                      "ADS_life": 15.0,
                      "BEA_code": "4",
                      "GDS_life": 15.0,
                      "asset_name": "Mining and oilfield machinery",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 15,
                                              "method": "Economic"}
                  },
                  {
                      "ADS_life": 27.5,
                      "BEA_code": "5",
                      "GDS_life": 27.5,
                      "asset_name": "Expensing",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 27.5,
                                              "method": "Expensing"}
                  },
                  {
                      "ADS_life": 27.5,
                      "BEA_code": "6",
                      "GDS_life": 27.5,
                      "asset_name": "PCs",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 27.5,
                                              "method": "DB 200%"}
                  },
                  {
                      "ADS_life": 10.0,
                      "BEA_code": "7",
                      "GDS_life": 10.0,
                      "asset_name": "Terminals",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 10,
                                              "method": "DB 150%"}
                  },
                  {
                      "ADS_life": 3.0,
                      "BEA_code": "8",
                      "GDS_life": 3.0,
                      "asset_name": "Manufacturing",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 3,
                                              "method": "SL"}
                  },
                  {
                      "ADS_life": 15.0,
                      "BEA_code": "9",
                      "GDS_life": 15.0,
                      "asset_name": "Wind and solar",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 15,
                                              "method": "Economic"}
                  },
                  {
                      "ADS_life": 7.0,
                      "BEA_code": "10",
                      "GDS_life": 7.0,
                      "asset_name": "Equipment",
                      "major_asset_group": "Group1",
                      "minor_asset_group": "Group1",
                      "system": "GDS",
                      "year": 2020, "value": {"life": 7,
                                              "method": "Expensing"}
                  }]
            }
        }
        """
    monkeypatch.setattr(DepreciationParams, "defaults", json_str)
    dp = DepreciationParams()
    asset_df = pd.DataFrame.from_dict({
        'bea_asset_code': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
    })
    expected_df = pd.DataFrame(dp.asset)
    expected_df = pd.concat([
        expected_df.drop(['value'], axis=1), expected_df['value'].apply(
            pd.Series)
    ],
                            axis=1)
    expected_df.drop(
        columns=['asset_name', 'minor_asset_group', 'major_asset_group'],
        inplace=True)
    expected_df['bea_asset_code'] = pd.Series(
        ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
        index=expected_df.index)
    expected_df['bonus'] = pd.Series(
        [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
        index=expected_df.index)
    expected_df['b'] = pd.Series([2, 1.5, 1, 1, 1, 2, 1.5, 1, 1, 1],
                                 index=expected_df.index)
    expected_df['Y'] = pd.Series([10, 10, 3, 15, 27.5, 27.5, 10, 3, 15, 7],
                                 index=expected_df.index)
    print('Expected df =', expected_df)
    test_df = cf.update_depr_methods(asset_df, p, dp)

    assert_frame_equal(test_df, expected_df, check_like=True)

Example #48

0

Show file

 def test_df_input(self):
     cg = mat.ClusterGrid(self.df_norm, **self.default_kws)
     pdt.assert_frame_equal(cg.data, self.df_norm)

Example #49

0

Show file

 def test_df_disp_owner(self):
     pd_testing.assert_frame_equal(self.exercises.df_disp_owner,
                                   self.df_disp_owner)

Example #50

0

Show file

File: Test3_01.py Project: workshop-forks/The-Data-Science-Workshop

	def test_df(self):
		pd_testing.assert_frame_equal(self.exercises.bankData, self.bankData)

Example #51

0

Show file

File: pyunit_coxph_concordance_survival.py Project: skannan23/h2o-3

def check_cox(rossi, x, stratify_by, formula):
    if stratify_by:
        cph_py = CoxPHFitter(strata=stratify_by)
    else:
        cph_py = CoxPHFitter()

    for col in stratify_by:
        rossi[col] = rossi[col].astype('category')

    cph_py.fit(rossi, duration_col='week', event_col='arrest')
    cph_py.print_summary()
    rossi_h2o = h2o.H2OFrame(rossi)

    for col in stratify_by:
        rossi_h2o[col] = rossi_h2o[col].asfactor()

    cph_h2o = H2OCoxProportionalHazardsEstimator(stop_column="week",
                                                 stratify_by=stratify_by)
    cph_h2o.train(x=x, y="arrest", training_frame=rossi_h2o)

    assert cph_h2o.model_id != ""
    assert cph_h2o.model_id != ""
    assert cph_h2o.formula(
    ) == formula, "Expected formula to be '" + formula + "' but it was " + cph_h2o.formula(
    )

    predH2O = cph_h2o.predict(test_data=rossi_h2o)
    assert len(predH2O) == len(rossi)
    metrics_h2o = cph_h2o.model_performance(rossi_h2o)
    concordance_py = concordance_for_lifelines(cph_py)
    assert abs(concordance_py - metrics_h2o.concordance()) < 0.001
    hazard_h2o = h2o.get_frame(
        cph_h2o._model_json['output']['baseline_hazard']['name'])
    hazard_h2o_as_pandas = hazard_h2o.as_data_frame(use_pandas=True)

    hazard_py = cph_py.baseline_hazard_

    for col_name in hazard_py.columns:
        hazard_py.rename(columns={col_name: str(col_name)}, inplace=True)

    hazard_py_reordered_columns = hazard_py.reset_index(drop=True).sort_index(
        axis=1)
    hazard_h2o_reordered_columns = hazard_h2o_as_pandas.drop(
        't', axis="columns").reset_index(drop=True).sort_index(axis=1)

    hazard_py_reordered_columns = fix_py_result_for_older_lifelines(
        hazard_py_reordered_columns)

    print("h2o:")
    print(hazard_h2o_as_pandas.reset_index(drop=True))

    print("lifelines:")
    print(hazard_py_reordered_columns.reset_index(drop=True))

    assert_frame_equal(hazard_py_reordered_columns,
                       hazard_h2o_reordered_columns,
                       check_dtype=False,
                       check_index_type=False,
                       check_column_type=False)

    survival_h2o = h2o.get_frame(
        cph_h2o._model_json['output']['baseline_survival']['name'])
    survival_h2o_as_pandas = survival_h2o.as_data_frame(use_pandas=True)

    survival_py = cph_py.baseline_survival_

    for col_name in survival_py.columns:
        survival_py.rename(columns={col_name: str(col_name)}, inplace=True)

    survival_py_reordered_columns = survival_py.reset_index(
        drop=True).sort_index(axis=1)
    survival_h2o_reordered_columns = survival_h2o_as_pandas.drop(
        't', axis="columns").reset_index(drop=True).sort_index(axis=1)

    survival_py_reordered_columns = fix_py_result_for_older_lifelines(
        survival_py_reordered_columns)

    print("h2o:")
    print(survival_h2o_as_pandas.reset_index(drop=True))

    print("lifelines:")
    print(survival_py_reordered_columns.reset_index(drop=True))

    assert_frame_equal(survival_py_reordered_columns,
                       survival_h2o_reordered_columns,
                       check_dtype=False,
                       check_index_type=False,
                       check_column_type=False)

Example #52

0

Show file

File: test_prepare.py Project: docsmooth/arbtt-chart

def test_prepare():
    def prep(csvs, args=[]):
        args = ac.parse_cmdline_args(args)
        return ac.prepare_bartables(ld(csvs), args)

    # one category and totals
    in1 = """
        Tag,Time
        a:x-y,00:01:00
        (unmatched time),00:02:00
        (total time),00:03:00
        """
    out1 = pd.DataFrame(
        {'Time': ['', '', '00:02:00', '00:01:00', '', '00:03:00'],
         'Type': ['text', 'text', 'bar', 'bar', 'text', 'total_bar'],
         'Frac': [None, None, 2/3, 1/3, None, 1],
         'FracAbove': [None, None, 0, 2/3, None, 0],
         'HourFrac': [None, None, 20, 20, None, 20]},
        index=pd.Index(['a', '═', '(unmatched time)', 'x-y', '', '(total time)'], name='Tag'))
    pdt.assert_frame_equal(prep([in1]), out1)

    # same, different totals
    in1_totals = """
        Tag,Time
        a:x-y,00:01:00
        (unmatched),00:02:00
        (screen),00:03:00
        """
    out1_totals = out1.set_index(
        pd.Index(['a', '═', '(unmatched)', 'x-y', '', '(screen)'], name='Tag'))
    pdt.assert_frame_equal(
        prep([in1_totals], args=["--totals-re", "^\\(screen"]),
        out1_totals)

    # same, subtags
    out1_subtags = out1.set_index(
        pd.MultiIndex.from_tuples(
            [('a', ''), ('═', ''), ('(unmatched time)', ''), ('x', 'y'), ('', ''), ('(total time)', '')],
            names=['Tag', 'SubTag']))
    pdt.assert_frame_equal(prep([in1], args=["--subtags"]), out1_subtags)

    # two categories and totals
    in2 = """
        Tag,Time
        b:z,00:01:00
        (unmatched time),00:02:00
        (total time),00:03:00
        """
    blank = pd.DataFrame(
        {'Time': [''], 'Type': ['text'], 'Frac': [None], 'FracAbove': [None], 'HourFrac': [None]},
        index=pd.Index([''], name='Tag'))
    out2 = out1.set_index(
        pd.Index(['b', '═', '(unmatched time)', 'z', '', '(total time)'], name='Tag'))
    pdt.assert_frame_equal(prep([in1, in2]), pd.concat([out1, blank, out2]))

    # three categories, subtags
    in3 = """
        Tag,Time
        c:z,00:01:00
        (unmatched time),00:02:00
        (total time),00:03:00
        """
    out2_subtags = out1.set_index(
        pd.MultiIndex.from_tuples(
            [('b', ''), ('═', ''), ('(unmatched time)', ''), ('z', ''), ('', ''), ('(total time)', '')],
            names=['Tag', 'SubTag']))
    out3_subtags = out1.set_index(
        pd.MultiIndex.from_tuples(
            [('c', ''), ('═', ''), ('(unmatched time)', ''), ('z', ''), ('', ''), ('(total time)', '')],
            names=['Tag', 'SubTag']))
    blank_subtags = blank.set_index(pd.MultiIndex.from_tuples([('', '')], names=['Tag', 'SubTag']))
    pdt.assert_frame_equal(
        prep([in1, in2, in3], args=["--subtags"]),
        pd.concat([out1_subtags, blank_subtags, out2_subtags, blank_subtags, out3_subtags]))

Example #53

0

Show file

File: test_utils.py Project: OFR-IIASA/ixmp

def test_diff_data(test_mp):
    """diff() when Scenarios contain the same items, but different data."""
    scen_a = make_dantzig(test_mp)
    scen_b = make_dantzig(test_mp)

    # Modify `scen_a` and `scen_b`
    scen_a.check_out()
    scen_b.check_out()

    # Remove elements from "b"
    drop_args = dict(labels=["value", "unit"], axis=1)
    scen_a.remove_par("b", scen_a.par("b").iloc[0:1, :].drop(**drop_args))
    scen_b.remove_par("b", scen_b.par("b").iloc[1:2, :].drop(**drop_args))
    # Remove elements from "d"
    scen_a.remove_par(
        "d",
        scen_a.par("d").query("i == 'san-diego'").drop(**drop_args))
    # Modify values in "d"
    scen_b.add_par("d",
                   scen_b.par("d").query("i == 'seattle'").assign(value=123.4))

    # Expected results
    exp_b = pd.DataFrame(
        [
            ["chicago", 300.0, "cases", np.NaN, None, "left_only"],
            ["new-york", np.NaN, None, 325.0, "cases", "right_only"],
            ["topeka", 275.0, "cases", 275.0, "cases", "both"],
        ],
        columns="j value_a unit_a value_b unit_b _merge".split(),
    )
    exp_d = pd.DataFrame(
        [
            ["san-diego", "chicago", np.NaN, None, 1.8, "km", "right_only"],
            ["san-diego", "new-york", np.NaN, None, 2.5, "km", "right_only"],
            ["san-diego", "topeka", np.NaN, None, 1.4, "km", "right_only"],
            ["seattle", "chicago", 1.7, "km", 123.4, "km", "both"],
            ["seattle", "new-york", 2.5, "km", 123.4, "km", "both"],
            ["seattle", "topeka", 1.8, "km", 123.4, "km", "both"],
        ],
        columns="i j value_a unit_a value_b unit_b _merge".split(),
    )

    # Use the specific categorical produced by pd.merge()
    merge_cat = pd.CategoricalDtype(["left_only", "right_only", "both"])
    exp_b = exp_b.astype(dict(_merge=merge_cat))
    exp_d = exp_d.astype(dict(_merge=merge_cat))

    # Compare different scenarios without filters
    for name, df in utils.diff(scen_a, scen_b):
        if name == "b":
            pdt.assert_frame_equal(exp_b, df)
        elif name == "d":
            pdt.assert_frame_equal(exp_d, df)

    # Compare different scenarios with filters
    iterator = utils.diff(scen_a, scen_b, filters=dict(j=["chicago"]))
    for name, df in iterator:
        # Same as above, except only the filtered rows should appear
        if name == "b":
            pdt.assert_frame_equal(exp_b.iloc[0:1, :], df)
        elif name == "d":
            pdt.assert_frame_equal(
                exp_d.iloc[[0, 3], :].reset_index(drop=True), df)

Example #54

0

Show file

def test_stream():
    df = pd.DataFrame(data_stream)

    filename = os.path.join(testdir, "health.pdf")
    tables = camelot.read_pdf(filename, flavor="stream")
    assert_frame_equal(df, tables[0].df)

Example #55

0

Show file

File: test_parse_silva_taxonomy.py Project: mikerobeson/RESCRIPt

 def test_build_base_silva_taxonomy(self):
     input_taxranks = _prep_taxranks(self.taxranks)
     obs_taxonomy = _build_base_silva_taxonomy(self.taxtree,
                                               input_taxranks,
                                               ALLOWED_RANKS,
                                               rank_propagation=True)
     obs_taxonomy.sort_index(inplace=True)
     tid = {'taxid': ['2', '11084', '42913', '42914', '42915',
                      '11089', '24228', '24229', '42916', '42917'],
            'd__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
                    'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'],
            'sk__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
                     'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'],
            'k__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
                    'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'],
            'ks__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
                     'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'],
            'sp__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea',
                     'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'],
            'p__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
                    'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota',
                    'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota',
                    'Altiarchaeota'],
            'ps__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
                     'Aenigmarchaeota', 'Aenigmarchaeota',
                     'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota',
                     'Altiarchaeota', 'Altiarchaeota'],
            'pi__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
                     'Aenigmarchaeota', 'Aenigmarchaeota',
                     'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota',
                     'Altiarchaeota', 'Altiarchaeota'],
            'sc__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota',
                     'Aenigmarchaeota', 'Aenigmarchaeota',
                     'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota',
                     'Altiarchaeota', 'Altiarchaeota'],
            'c__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                    'Aenigmarchaeia', 'Aenigmarchaeia',
                    'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                    'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'],
            'cs__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                     'Aenigmarchaeia', 'Aenigmarchaeia',
                     'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                     'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'],
            'ci__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                     'Aenigmarchaeia', 'Aenigmarchaeia',
                     'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                     'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'],
            'so__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                     'Aenigmarchaeia', 'Aenigmarchaeia',
                     'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                     'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'],
            'o__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                    'Aenigmarchaeales', 'Aenigmarchaeales',
                    'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                    'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'],
            'os__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                     'Aenigmarchaeales', 'Aenigmarchaeales',
                     'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                     'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'],
            'sf__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                     'Aenigmarchaeales', 'Aenigmarchaeales',
                     'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                     'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'],
            'f__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                    'Aenigmarchaeales', 'Aenigmarchaeales',
                    'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                    'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'],
            'fs__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                     'Aenigmarchaeales', 'Aenigmarchaeales',
                     'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                     'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'],
            'g__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia',
                    'Aenigmarchaeales', 'Candidatus_Aenigmarchaeum',
                    'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota',
                    'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae']}
     exp_taxonomy = pd.DataFrame(tid)
     exp_taxonomy.set_index('taxid', inplace=True)
     exp_taxonomy.sort_index(inplace=True)
     assert_frame_equal(obs_taxonomy, exp_taxonomy)

Example #56

0

Show file

File: test_join.py Project: jelitox/ibis

def test_join(how, left, right, df1, df2):
    expr = left.join(right, left.key == right.key,
                     how=how)[left, right.other_value, right.key3]
    result = expr.execute()
    expected = pd.merge(df1, df2, how=how, on='key')
    tm.assert_frame_equal(result[expected.columns], expected)

Example #57

0

Show file

def test_write_csv_from_data_vendor():
    """Tests downloading market data from the data vendor and dumping to CSV. Checks written CSV against what is loaded
    in memory. Also checks data is available in each 'usual' market hour.

    Note, that we use cached data from disk, as we want to download relatively large sections of data, and doing
    this externally can cause the test to run very slowly.
    """

    for data_vendor_name in data_vendor_name_list:

        # database_source = database_source_dict[data_vendor_name]
        database_populator = database_populator_dict[data_vendor_name]
        chunk_int_min = chunk_int_min_dict[data_vendor_name]

        # specifically choose dates which straddle the weekend boundary
        start_date = '27 Apr 2018'
        finish_date = '03 May 2018'
        expected_csv_files = 5  # during British Summer Time in London
        # start_date = '02 Feb 2018'; finish_date = '07 Feb 2018'; expected_csv_files = 4    # during GMT time in London
        split_size = 'daily'
        write_csv = False

        # prepare the CSV folder first
        csv_folder = os.path.join(constants.test_data_harness_folder,
                                  'csv_' + data_vendor_name + '_dump')

        # empty the CSV test harness folder
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        msg, df_dict = database_populator.download_to_csv(
            start_date,
            finish_date, ['EURUSD'],
            chunk_int_min=chunk_int_min,
            split_size=split_size,
            csv_folder=csv_folder,
            return_df=True,
            write_large_csv=write_csv,
            remove_duplicates=False,
            web_proxies=web_proxies)

        df_read_direct_from_data_vendor = df_dict['EURUSD']

        # check it has data for every market hour (eg. ignoring Saturdays)
        assert util_func.check_data_frame_points_in_every_hour(
            df_read_direct_from_data_vendor, start_date, finish_date)

        if write_csv:
            # read back the CSVs dumped on disk in the test harness CSV folder
            csv_file_list = glob.glob(csv_folder + '/EURUSD*.csv')

            assert len(csv_file_list) == expected_csv_files

            df_list = []

            for c in csv_file_list:
                df = pd.read_csv(c, index_col=0)
                df.index = pd.to_datetime(df.index)
                df_list.append(df)

            # now compare the CSVs on disk versus those read directly
            df_read_from_csv = pd.concat(df_list).tz_localize(pytz.utc)

            assert_frame_equal(df_read_from_csv,
                               df_read_direct_from_data_vendor)

Example #58

0

Show file

File: test_geodataframe.py Project: jGaboardi/geopandas

 def test_coord_slice_points(self):
     assert self.df2.cx[-2:-1, -2:-1].empty
     assert_frame_equal(self.df2, self.df2.cx[:, :])
     assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :])
     assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:])
     assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])

Example #59

0

Show file

def test_create_update_move_and_stop_by_radius():
    move_df = MoveDataFrame(
        data=list_data,
        latitude=LATITUDE,
        longitude=LONGITUDE,
        datetime=DATETIME,
        traj_id=TRAJ_ID,
    )
    cols = [
        'id',
        'lat',
        'lon',
        'datetime',
        'dist_to_prev',
        'dist_to_next',
        'dist_prev_to_next',
        'situation',
    ]

    stay_point_detection.create_update_move_and_stop_by_radius(move_df,
                                                               radius=4.0)
    expected = DataFrame(
        data=[
            [
                1,
                39.984094,
                116.319236,
                Timestamp('2008-10-23 05:53:05'),
                nan,
                13.690153134343689,
                nan,
                'nan',
            ],
            [
                1,
                39.984198,
                116.319322,
                Timestamp('2008-10-23 05:53:06'),
                13.690153134343689,
                nan,
                nan,
                'move',
            ],
            [
                2,
                39.984224,
                116.319402,
                Timestamp('2008-10-23 05:53:11'),
                nan,
                0.0,
                nan,
                'nan',
            ],
            [
                2,
                39.984224,
                116.319402,
                Timestamp('2008-10-23 05:53:15'),
                0.0,
                nan,
                nan,
                'stop',
            ],
        ],
        columns=cols,
        index=[0, 1, 2, 3],
    )
    assert_frame_equal(move_df, expected)

Example #60

0

Show file

File: udf_tests.py Project: m1ndmaze/xlwings

 def frame_equal(a, b):
     try:
         assert_frame_equal(a, b)
     except AssertionError:
         return False
     return True