def test_ignore_datelike_data(self): df = self.df.copy() df['date'] = pd.date_range('2010-01-01', periods=len(df), freq='d') result = ag.PairGrid(self.df).data expected = df.drop('date', axis=1) tm.assert_frame_equal(result, expected)
def test_make_forecasting_frame_list(self): df, y = dataframe_functions.make_forecasting_frame(x=range(4), kind="test", max_timeshift=1, rolling_direction=1) expected_df = pd.DataFrame({"id": [1, 2, 3], "kind": ["test"]*3, "value": [0., 1., 2.], "time": [0., 1., 2.]}) expected_y = pd.Series(data=[1, 2, 3], index=[1, 2, 3], name="value") assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def test_establish_variables_from_mix(self): p = lm._LinearPlotter() p.establish_variables(self.df, x="x", y=self.df.y) pdt.assert_series_equal(p.x, self.df.x) pdt.assert_series_equal(p.y, self.df.y) pdt.assert_frame_equal(p.data, self.df)
def test_variables_from_mix(self): p = lm._RegressionPlotter("x", self.df.y + 1, data=self.df) npt.assert_array_equal(p.x, self.df.x) npt.assert_array_equal(p.y, self.df.y + 1) pdt.assert_frame_equal(p.data, self.df)
def test_download_insert_hist_data(self): async def run(loop, req, broker, insert_limit): engine = await aiosa.create_engine( user=self.db_info['user'], db=self.db_info['db'], host=self.db_info['host'], password=self.db_info['password'], loop=loop) # Download, Insert and Query dl_blk = await download_insert_hist_data( req, broker, engine, insert_limit) db_blk = await query_hist_data( engine, req.SecType, req.Symbol, req.DataType, req.BarSize, *insert_limit) engine.close() await engine.wait_closed() return dl_blk, db_blk # Execute self._clear_db() init_db(self.db_info) req = testdata_download_insert_hist_data['req'] broker, login = testdata_download_insert_hist_data['broker'] insert_limit = testdata_download_insert_hist_data['insert_limit'] broker.connect(*login) loop = asyncio.get_event_loop() dl_blk, db_blk = loop.run_until_complete( run(loop, req, broker, insert_limit)) broker_blk = broker.req_hist_data(req)[0] # Verify lim0 = insert_limit[0] lim1 = insert_limit[1] assert_frame_equal(dl_blk.df, broker_blk.df) assert_frame_equal(db_blk.df, broker_blk.df.loc(axis=0)[:, :, :, lim0:lim1])
def test_get_hist_data(self): async def run(loop, req, blk_db, broker): # Populate database engine = await aiosa.create_engine( user=self.db_info['user'], db=self.db_info['db'], host=self.db_info['host'], password=self.db_info['password'], loop=loop, echo=False) await insert_hist_data(engine, 'Stock', blk_db) engine.close() await engine.wait_closed() # Get hist data blk_db = await get_hist_data( req, broker, mysql={**self.db_info, 'loop': loop}) return blk_db from time import sleep for data in testdata_get_hist_data: sleep(1.5) # Avoid IB pacing violation _logger.debug("\n======= get_hist_data_async: %s ======\n", data['testcase']) self._clear_db() init_db(self.db_info) blk_db = MarketDataBlock(data['df_db']) broker = data['broker'][0](*data['broker'][1]) blk_exp = MarketDataBlock(data['blk_exp.df']) blk_exp.tz = data['xchg_tz'] loop = asyncio.get_event_loop() blk_ret = loop.run_until_complete( run(loop, data['req'], blk_db, broker)) assert_frame_equal(blk_ret.df, blk_exp.df)
def assert_frame_not_equal(df1, df2, **kwargs): # assert_frame_equal exists, but we need the ability to assert that frames are not equal try: assert_frame_equal(df1, df2, **kwargs) raise AssertionError('DataFrames are equal.') except AssertionError: pass
def test_two_iterations_with_metadata_were_values_are_unique(self): # This should be identical to test_without_metadata_df_two_iterations, # with just the `sample-id` replaced with `pet`. columns = pd.MultiIndex.from_product([[1, 200], [1, 2]], names=['depth', 'iter']) data = pd.DataFrame(data=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], columns=columns, index=['russ', 'milo', 'pea']) counts = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], columns=columns, index=['russ', 'milo', 'pea']) obs = _compute_summary(data, 'pet', counts=counts) d = [ ['russ', 1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2., 1], ['russ', 200, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4., 1], ['milo', 1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2., 1], ['milo', 200, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4., 1], ['pea', 1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2., 1], ['pea', 200, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4., 1], ] exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%', '25%', '50%', '75%', '91%', '98%', 'max', 'count']) pdt.assert_frame_equal(exp, obs)
def test_mnl_estimation(obs, alts): """ Confirm that estimated params from the new interface match urbansim.urbanchoice. Only runs if the urbansim package has been installed. """ try: from urbansim.urbanchoice.mnl import mnl_estimate except: print("Comparison of MNL estimation results skipped because urbansim is not installed") return model_expression = 'obsval + altval - 1' mct = MergedChoiceTable(obs, alts, 'choice') # new interface m = MultinomialLogit(mct, model_expression) r = m.fit().get_raw_results() # old interface dm = dmatrix(model_expression, mct.to_frame()) chosen = np.reshape(mct.to_frame()[mct.choice_col].values, (100, 5)) log_lik, fit = mnl_estimate(np.array(dm), chosen, numalts=5) for k,v in log_lik.items(): assert(v == pytest.approx(r['log_likelihood'][k], 0.00001)) assert_frame_equal(fit, r['fit_parameters'][['Coefficient', 'Std. Error', 'T-Score']])
def test_create_lineages(self): df_with_lins = clustering.df_add_lineages(self.df_mult_groups, 0.85) expected = self.df_mult_groups.reset_index(drop=True) expected['lineage'] = [0, 0, 1, 2, 3] assert_frame_equal(df_with_lins, expected)
def test_init(self): sorted_feature_names = ["is_dutch", "is_english", "value_number"] self.assertEquals( sorted(self.frame.features.keys()), sorted_feature_names ) self.assertTrue(callable(self.frame.content)) assert_frame_equal(self.frame.data, self.test_frame, check_like=True)
def test_variables_from_frame(self): p = lm._RegressionPlotter("x", "y", data=self.df, units="s") pdt.assert_series_equal(p.x, self.df.x) pdt.assert_series_equal(p.y, self.df.y) pdt.assert_series_equal(p.units, self.df.s) pdt.assert_frame_equal(p.data, self.df)
def test_munge_metadata_ids_different_order(self): md = qiime2.CategoricalMetadataColumn( pd.Series(['russ', 'milo', 'russ'], name='pet', index=pd.Index(['S2', 'S1', 'S3'], name='id'))) obs = _munge_metadata(md, self.table, 'both') exp_idx = pd.Index(['milo | S1', 'russ | S2', 'russ | S3'], name='pet | id') exp = pd.DataFrame([[0, 10], [10, 12], [10, 11]], columns=['O1', 'O2'], index=exp_idx) assert_frame_equal(exp, obs)
def test_munge_metadata_empty_values(self): md = qiime2.CategoricalMetadataColumn( pd.Series([None, 'russ', np.nan], name='pet', index=pd.Index(['S1', 'S2', 'S3'], name='id'))) obs = _munge_metadata(md, self.table, 'both') exp_idx = pd.Index(['[No Value] | S1', 'russ | S2', '[No Value] | S3'], name='pet | id') exp = pd.DataFrame([[0, 10], [10, 12], [10, 11]], columns=['O1', 'O2'], index=exp_idx) assert_frame_equal(exp, obs)
def test_munge_metadata_sort_samples(self): md = qiime2.CategoricalMetadataColumn( pd.Series(['peanut', 'milo', 'russ'], name='pet', index=pd.Index(['S1', 'S2', 'S3'], name='id'))) obs = _munge_metadata(md, self.table, 'features') exp_idx = pd.Index(['milo | S2', 'peanut | S1', 'russ | S3'], name='pet | id') exp = pd.DataFrame([[10, 12], [0, 10], [10, 11]], columns=['O1', 'O2'], index=exp_idx) assert_frame_equal(exp, obs)
def test_b64(self): """Test the binary encoding""" if self.should_skip: return self.skip('pandas is not importable') # array of substantial size is stored as b64 a = np.random.rand(20, 10) index = ['Row' + str(i) for i in range(1, a.shape[0] + 1)] columns = ['Col' + str(i) for i in range(1, a.shape[1] + 1)] df = pd.DataFrame(a, index=index, columns=columns) decoded_df = self.roundtrip(df) assert_frame_equal(decoded_df, df)
def test_TableFormula_sort(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") table = TableFormula() table["A"] = [0, 1] table.add_column_vector("B", [6, 7]) table.sort(lambda row: -row["B"]) exp = pandas.DataFrame(dict(A=[1, 0], B=[7, 6], C=[1, 0])) exp = exp.set_index("C") exp.index.rename(None, inplace=True) assert_frame_equal(table, exp, check_index_type=False)
def test_run_ccc_example_output(file_name): ''' Tests the script in ../../run_examples/run_ccc_example.py to ensure that it produces the expected results that are checked into the repo. ''' run_example_path = os.path.join(CUR_PATH, '..', '..', 'run_examples') test_path = os.path.join(run_example_path, file_name + '.csv') test_df = pd.read_csv(test_path) expected_path = os.path.join(run_example_path, file_name + '_expected.csv') expected_df = pd.read_csv(expected_path) assert_frame_equal(test_df, expected_df)
def test_get_country(): # call the function df = country.get_country(interim_data, 'Chile') # load my previous dataset base = pd.read_csv(processed_data) # check if I am getting a dataframe assert isinstance(df, pd.DataFrame) assert isinstance(base, pd.DataFrame) # check that they are the same dataframes pdt.assert_frame_equal(df, base)
def test_TableFormula_add(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") table = TableFormula() table["A"] = [0, 1] table.add_column_index([4, 5]) table.add_column_vector("B", [6, 7]) table.addc("C", lambda row: row["A"] * row["B"]) exp = pandas.DataFrame(dict(sum_d_a=[0.84127, 1.47619])) exp = pandas.DataFrame( dict(A=[0, 1], B=[6, 7], C=[0, 7], __key__=[4, 5])) exp.set_index("__key__", inplace=True) exp.index.rename(None, inplace=True) assert_frame_equal(table, exp)
def test_json(self): ss = self.station.statistics ss.calc_temperature_stats() ss.calc_precipitation_stats() ss.calc_humidity_stats() ss.calc_radiation_stats() ss.calc_wind_stats() with tempfile.NamedTemporaryFile() as tmp: ss.to_json(tmp.name) tmp.seek(0) ss2 = melodist.StationStatistics.from_json(tmp.name) assert_series_equal(ss.temp.max_delta, ss2.temp.max_delta) assert_frame_equal(ss.temp.mean_course, ss2.temp.mean_course) assert_equal(ss.precip.months, ss2.precip.months) assert all([cs1 == cs2 for cs1, cs2 in zip(ss.precip.stats, ss2.precip.stats)]) assert ss.hum.a0 == ss2.hum.a0 assert ss.hum.a1 == ss2.hum.a1 assert ss.hum.kr == ss2.hum.kr assert_series_equal(ss.hum.month_hour_precip_mean, ss2.hum.month_hour_precip_mean) assert_frame_equal(ss.glob.angstroem, ss2.glob.angstroem) assert_frame_equal(ss.glob.bristcamp, ss2.glob.bristcamp) assert_frame_equal(ss.glob.mean_course, ss2.glob.mean_course) assert ss.wind.a == ss2.wind.a assert ss.wind.b == ss2.wind.b assert ss.wind.t_shift == ss2.wind.t_shift
def test_adding_content_mixed(self): self.skipTest("Bug: GH-109") old = list(self.get_iterator())[-2:] def update(ind): ind.properties["value"] = int(ind.properties["value"]) * 5 return ind updated = list(map(update, old)) self.frame.load_content( lambda: iter(list(self.get_extra_iterator()) + updated) ) self.test_frame_extra["value_number"].loc[[7, 8]] *= 5 assert_frame_equal(self.frame.data, self.test_frame_extra, check_like=True)
def test_from_columns(self): tsn = "TEST_TIME_SERIES" fset = ComprehensiveFCParameters() self.assertRaises(TypeError, from_columns, 42) self.assertRaises(TypeError, from_columns, 42) self.assertRaises(ValueError, from_columns, ["This is not a column name"]) self.assertRaises(ValueError, from_columns, ["This__neither"]) self.assertRaises(ValueError, from_columns, ["This__also__not"]) # Aggregate functions feature_names = [tsn + '__sum_values', tsn + "__median", tsn + "__length", tsn + "__sample_entropy"] # Aggregate functions with params feature_names += [tsn + '__quantile__q_10', tsn + '__quantile__q_70', tsn + '__number_peaks__n_30', tsn + '__value_count__value_inf', tsn + '__value_count__value_-inf', tsn + '__value_count__value_nan'] # Apply functions feature_names += [tsn + '__ar_coefficient__k_20__coeff_4', tsn + '__ar_coefficient__coeff_10__k_-1'] kind_to_fc_parameters = from_columns(feature_names) six.assertCountEqual(self, list(kind_to_fc_parameters[tsn].keys()), ["sum_values", "median", "length", "sample_entropy", "quantile", "number_peaks", "ar_coefficient", "value_count"]) self.assertEqual(kind_to_fc_parameters[tsn]["sum_values"], None) self.assertEqual(kind_to_fc_parameters[tsn]["ar_coefficient"], [{"k": 20, "coeff": 4}, {"k": -1, "coeff": 10}]) self.assertEqual(kind_to_fc_parameters[tsn]["value_count"], [{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}]) # test that it passes for all functions fset = ComprehensiveFCParameters() X_org = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}), default_fc_parameters=fset, column_id="id", column_value="value", n_jobs=0) inferred_fset = from_columns(X_org) X_new = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}), kind_to_fc_parameters=inferred_fset, column_id="id", column_value="value", n_jobs=0) assert_frame_equal(X_org.sort_index(), X_new.sort_index())
def check_load_cached_dataset(name): # Test the cacheing using a temporary file. # With Python 3.2+, we could use the tempfile.TemporaryDirectory() # context manager instead of this try...finally statement tmpdir = tempfile.mkdtemp() try: # download and cache ds = load_dataset(name, cache=True, data_home=tmpdir) # use cached version ds2 = load_dataset(name, cache=True, data_home=tmpdir) pdt.assert_frame_equal(ds, ds2) finally: shutil.rmtree(tmpdir)
def test_make_forecasting_frame_pdSeries(self): t_index = pd.date_range('1/1/2011', periods=4, freq='H') df, y = dataframe_functions.make_forecasting_frame(x=pd.Series(data=range(4), index=t_index), kind="test", max_timeshift=1, rolling_direction=1) expected_y = pd.Series(data=[1, 2, 3], index=pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00"]), name="value") expected_df = pd.DataFrame({"id": pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00"]), "kind": ["test"]*3, "value": [0., 1., 2.], "time": pd.DatetimeIndex(["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"]) }) assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def test_market_data_block_merge(self): testdata = testdata_market_data_block_merge blk = MarketDataBlock(pd.DataFrame(testdata[0]), datatype='TRADES', tz='US/Pacific') _logger.info('\n\nBlockTests:merge: Starting blk:\n%s', blk.df) for data in testdata[1:]: blk.update(pd.DataFrame(data[0]), datatype='TRADES', tz='US/Pacific') blk_direct = MarketDataBlock( pd.DataFrame(data[1]), datatype='TRADES', tz='US/Pacific') _logger.debug('\n\nBlockTests:merge: blk.df\n%s', blk.df[:3]) _logger.debug('\n\nBlockTests:merge: blk_direct.df\n%s', blk_direct.df[:3]) assert_frame_equal(blk.df, blk_direct.df) self.assertEqual(list(blk.df.index.names), blk.__class__.data_index) self.assertEqual(list(blk_direct.df.index.names), blk.__class__.data_index)
def test_dataframe_roundtrip(self): if self.should_skip: return self.skip('pandas is not importable') df = pd.DataFrame({ 'an_int': np.int_([1, 2, 3]), 'a_float': np.float_([2.5, 3.5, 4.5]), 'a_nan': np.array([np.nan] * 3), 'a_minus_inf': np.array([-np.inf] * 3), 'an_inf': np.array([np.inf] * 3), 'a_str': np.str_('foo'), 'a_unicode': np.unicode_('bar'), 'date': np.array([np.datetime64('2014-01-01')] * 3), 'complex': np.complex_([1 - 2j, 2 - 1.2j, 3 - 1.3j]), # TODO: the following dtypes are not currently supported. # 'object': np.object_([{'a': 'b'}]*3), }) decoded_df = self.roundtrip(df) assert_frame_equal(decoded_df, df)
def test_resetting_features_no_content(self): features = [ TestNumericFeaturesFrame.is_dutch ] frame = NumericFeaturesFrame( TestNumericFeaturesFrame.get_identifier, features ) frame.reset(features=[ TestNumericFeaturesFrame.value_number, TestNumericFeaturesFrame.is_english ]) self.test_frame = self.test_frame.drop(labels="is_dutch", axis=1) assert_frame_equal(frame.data, self.test_frame[0:0], check_like=True) sorted_feature_names = ["is_english", "value_number"] self.assertEquals( sorted(frame.features.keys()), sorted_feature_names )
def test_two_iterations_with_metadata_were_values_are_identical(self): columns = pd.MultiIndex.from_product([[1, 200], [1, 2]], names=['depth', 'iter']) data = pd.DataFrame(data=[[3, 6, 9, 9]], columns=columns, index=['milo']) counts = pd.DataFrame(data=[[3, 3, 3, 3]], columns=columns, index=['milo']) obs = _compute_summary(data, 'pet', counts=counts) d = [ ['milo', 1, 3., 3.06, 3.27, 3.75, 4.5, 5.25, 5.73, 5.94, 6., 3], ['milo', 200, 9., 9., 9., 9., 9., 9., 9., 9., 9., 3], ] exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%', '25%', '50%', '75%', '91%', '98%', 'max', 'count']) pdt.assert_frame_equal(exp, obs)
def test_adding_features(self): features = [ TestNumericFeaturesFrame.is_dutch ] frame = NumericFeaturesFrame( TestNumericFeaturesFrame.get_identifier, features, self.get_iterator ) frame.load_features([ TestNumericFeaturesFrame.value_number, TestNumericFeaturesFrame.is_english ]) assert_frame_equal(frame.data, self.test_frame, check_like=True) sorted_feature_names = ["is_dutch", "is_english", "value_number"] self.assertEquals( sorted(self.frame.features.keys()), sorted_feature_names )
def test_test_data(self): pd_testing.assert_frame_equal(self.exercises.test_data, self.test_data)
def assert_geodataframe_equal( left, right, check_dtype=True, check_index_type="equiv", check_column_type="equiv", check_frame_type=True, check_like=False, check_less_precise=False, check_geom_type=False, check_crs=True, ): """ Check that two GeoDataFrames are equal/ Parameters ---------- left, right : two GeoDataFrames check_dtype : bool, default True Whether to check the DataFrame dtype is identical. check_index_type, check_column_type : bool, default 'equiv' Check that index types are equal. check_frame_type : bool, default True Check that both are same type (*and* are GeoDataFrames). If False, will attempt to convert both into GeoDataFrame. check_like : bool, default False If true, ignore the order of rows & columns check_less_precise : bool, default False If True, use geom_almost_equals. if False, use geom_equals. check_geom_type : bool, default False If True, check that all the geom types are equal. check_crs: bool, default True If `check_frame_type` is True, then also check that the crs matches. """ try: # added from pandas 0.20 from pandas.testing import assert_frame_equal, assert_index_equal except ImportError: from pandas.util.testing import assert_frame_equal, assert_index_equal # instance validation if check_frame_type: assert isinstance(left, GeoDataFrame) assert isinstance(left, type(right)) if check_crs: # no crs can be either None or {} if not left.crs and not right.crs: pass else: assert left.crs == right.crs else: if not isinstance(left, GeoDataFrame): left = GeoDataFrame(left) if not isinstance(right, GeoDataFrame): right = GeoDataFrame(right) # shape comparison assert left.shape == right.shape, ( "GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n" "Left columns: {lcols!r}, right columns: {rcols!r}".format( lshape=left.shape, rshape=right.shape, lcols=left.columns, rcols=right.columns, )) if check_like: left, right = left.reindex_like(right), right # column comparison assert_index_equal(left.columns, right.columns, exact=check_column_type, obj="GeoDataFrame.columns") # geometry comparison assert_geoseries_equal( left.geometry, right.geometry, check_dtype=check_dtype, check_less_precise=check_less_precise, check_geom_type=check_geom_type, check_crs=False, ) # drop geometries and check remaining columns left2 = left.drop([left._geometry_column_name], axis=1) right2 = right.drop([right._geometry_column_name], axis=1) assert_frame_equal( left2, right2, check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type, obj="GeoDataFrame", )
def test_join_project_left_table(how, left, right, df1, df2): expr = left.join(right, left.key == right.key, how=how)[left, right.key3] result = expr.execute() expected = pd.merge(df1, df2, how=how, on='key')[list(left.columns) + ['key3']] tm.assert_frame_equal(result[expected.columns], expected)
def test_asof_join(time_left, time_right, time_df1, time_df2): expr = time_left.asof_join(time_right, 'time')[time_left, time_right.other_value] result = expr.execute() expected = pd.merge_asof(time_df1, time_df2, on='time') tm.assert_frame_equal(result[expected.columns], expected)
def test_df_trans_acc_disp(self): pd_testing.assert_frame_equal(self.exercises.df_trans_acc_disp, self.df_trans_acc_disp)
def test_df_merged(self): pd_testing.assert_frame_equal(self.exercises.df_merged, self.df_merged)
def test_df(self): pd_testing.assert_frame_equal(self.exercises.bankData, self.bankData, check_dtype=False)
def test_pickle_method(self): filename = os.path.join(self.tempdir, "df.pkl") self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) assert self.df.crs == unpickled.crs
def test_to_df_types(self, column_type, values, series): data = [(v, ) for v in values] results = QueryResults(["col"], [column_type], data) assert_frame_equal(results.to_df(), pd.DataFrame({"col": series}), check_column_type="exact")
def test_save_and_load(self, versioned_csv_data_set, dummy_dataframe): """Test that saved and reloaded data matches the original one for the versioned data set.""" versioned_csv_data_set.save(dummy_dataframe) reloaded_df = versioned_csv_data_set.load() assert_frame_equal(dummy_dataframe, reloaded_df)
#!/usr/bin/env python # coding: utf-8 # In[58]: import pandas as pd import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[123]: movie = pd.read_csv('data/movie.csv', index_col='movie_title') c1 = movie['title_year'] >= 2010 c2 = movie['title_year'].isnull() criteria = c1 | c2 movie_mask = movie.mask(criteria).dropna(how='all') movie_boolean = movie[movie['title_year'] < 2010] from pandas.testing import assert_frame_equal assert_frame_equal(movie_boolean, movie_mask, check_dtype=False) # In[124]: get_ipython().run_line_magic('timeit', "movie.mask(criteria).dropna(how='all')") # In[ ]:
def test_gender_job(): row = dfp.RowTransformer(columns=['Gender', 'Job'], drop_values=['p', 'N/A']) out = row.fit_transform(df) assert_frame_equal(out, drop_gender_job_df)
def test_create_or_update_move_stop_by_dist_time(): move_df = MoveDataFrame( data=list_data, latitude=LATITUDE, longitude=LONGITUDE, datetime=DATETIME, traj_id=TRAJ_ID, ) cols = [ 'segment_stop', 'id', 'lat', 'lon', 'datetime', 'dist_to_prev', 'time_to_prev', 'speed_to_prev', 'stop', ] stay_point_detection.create_or_update_move_stop_by_dist_time( move_df, dist_radius=3.5, time_radius=0.5, inplace=True) expected = DataFrame( data=[ [ 1, 1, 39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), nan, nan, nan, False, ], [ 2, 1, 39.984198, 116.319322, Timestamp('2008-10-23 05:53:06'), nan, nan, nan, False, ], [ 3, 2, 39.984224, 116.319402, Timestamp('2008-10-23 05:53:11'), nan, nan, nan, True, ], [ 3, 2, 39.984224, 116.319402, Timestamp('2008-10-23 05:53:15'), 0.0, 4.0, 0.0, True, ], ], columns=cols, index=[0, 1, 2, 3], ) print(move_df) assert_frame_equal(move_df, expected)
def test_renamedBostonData(self): pd_testing.assert_frame_equal(self.exercises.renamedBostonData, self.renamedBostonData)
def test_df(self): pd_testing.assert_frame_equal(self.exercises.df, self.df)
def test_ndarray_input(self): cg = mat.ClusterGrid(self.x_norm, **self.default_kws) pdt.assert_frame_equal(cg.data, pd.DataFrame(self.x_norm)) assert len(cg.fig.axes) == 4 assert cg.ax_row_colors is None assert cg.ax_col_colors is None
def test_update_depr_methods(monkeypatch): ''' Test of calcfunctions.update_depr_methods ''' p = Specification() json_str = """ {"schema": { "labels": { "asset_name": {"type": "str"}, "BEA_code": {"type": "str"}, "minor_asset_group": {"type": "str"}, "major_asset_group": {"type": "str"}, "ADS_life": {"type": "float"}, "GDS_life": {"type": "float"}, "system": {"type": "str"}, "year": { "type": "int", "validators": {"range": {"min": 2013, "max": 2030}} } } }, "asset": { "title": "Tax depreciation rules for assets", "description": "Tax depreciation rules for assets", "type": "depreciation_rules", "value": [ { "ADS_life": 10.0, "BEA_code": "1", "GDS_life": 10.0, "asset_name": "Steam engines", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 10, "method": "DB 200%"} }, { "ADS_life": 10.0, "BEA_code": "2", "GDS_life": 10.0, "asset_name": "Custom software", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 10, "method": "DB 150%"} }, { "ADS_life": 3.0, "BEA_code": "3", "GDS_life": 3.0, "asset_name": "Other furniture", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 3, "method": "SL"} }, { "ADS_life": 15.0, "BEA_code": "4", "GDS_life": 15.0, "asset_name": "Mining and oilfield machinery", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 15, "method": "Economic"} }, { "ADS_life": 27.5, "BEA_code": "5", "GDS_life": 27.5, "asset_name": "Expensing", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 27.5, "method": "Expensing"} }, { "ADS_life": 27.5, "BEA_code": "6", "GDS_life": 27.5, "asset_name": "PCs", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 27.5, "method": "DB 200%"} }, { "ADS_life": 10.0, "BEA_code": "7", "GDS_life": 10.0, "asset_name": "Terminals", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 10, "method": "DB 150%"} }, { "ADS_life": 3.0, "BEA_code": "8", "GDS_life": 3.0, "asset_name": "Manufacturing", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 3, "method": "SL"} }, { "ADS_life": 15.0, "BEA_code": "9", "GDS_life": 15.0, "asset_name": "Wind and solar", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 15, "method": "Economic"} }, { "ADS_life": 7.0, "BEA_code": "10", "GDS_life": 7.0, "asset_name": "Equipment", "major_asset_group": "Group1", "minor_asset_group": "Group1", "system": "GDS", "year": 2020, "value": {"life": 7, "method": "Expensing"} }] } } """ monkeypatch.setattr(DepreciationParams, "defaults", json_str) dp = DepreciationParams() asset_df = pd.DataFrame.from_dict({ 'bea_asset_code': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] }) expected_df = pd.DataFrame(dp.asset) expected_df = pd.concat([ expected_df.drop(['value'], axis=1), expected_df['value'].apply( pd.Series) ], axis=1) expected_df.drop( columns=['asset_name', 'minor_asset_group', 'major_asset_group'], inplace=True) expected_df['bea_asset_code'] = pd.Series( ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], index=expected_df.index) expected_df['bonus'] = pd.Series( [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], index=expected_df.index) expected_df['b'] = pd.Series([2, 1.5, 1, 1, 1, 2, 1.5, 1, 1, 1], index=expected_df.index) expected_df['Y'] = pd.Series([10, 10, 3, 15, 27.5, 27.5, 10, 3, 15, 7], index=expected_df.index) print('Expected df =', expected_df) test_df = cf.update_depr_methods(asset_df, p, dp) assert_frame_equal(test_df, expected_df, check_like=True)
def test_df_input(self): cg = mat.ClusterGrid(self.df_norm, **self.default_kws) pdt.assert_frame_equal(cg.data, self.df_norm)
def test_df_disp_owner(self): pd_testing.assert_frame_equal(self.exercises.df_disp_owner, self.df_disp_owner)
def test_df(self): pd_testing.assert_frame_equal(self.exercises.bankData, self.bankData)
def check_cox(rossi, x, stratify_by, formula): if stratify_by: cph_py = CoxPHFitter(strata=stratify_by) else: cph_py = CoxPHFitter() for col in stratify_by: rossi[col] = rossi[col].astype('category') cph_py.fit(rossi, duration_col='week', event_col='arrest') cph_py.print_summary() rossi_h2o = h2o.H2OFrame(rossi) for col in stratify_by: rossi_h2o[col] = rossi_h2o[col].asfactor() cph_h2o = H2OCoxProportionalHazardsEstimator(stop_column="week", stratify_by=stratify_by) cph_h2o.train(x=x, y="arrest", training_frame=rossi_h2o) assert cph_h2o.model_id != "" assert cph_h2o.model_id != "" assert cph_h2o.formula( ) == formula, "Expected formula to be '" + formula + "' but it was " + cph_h2o.formula( ) predH2O = cph_h2o.predict(test_data=rossi_h2o) assert len(predH2O) == len(rossi) metrics_h2o = cph_h2o.model_performance(rossi_h2o) concordance_py = concordance_for_lifelines(cph_py) assert abs(concordance_py - metrics_h2o.concordance()) < 0.001 hazard_h2o = h2o.get_frame( cph_h2o._model_json['output']['baseline_hazard']['name']) hazard_h2o_as_pandas = hazard_h2o.as_data_frame(use_pandas=True) hazard_py = cph_py.baseline_hazard_ for col_name in hazard_py.columns: hazard_py.rename(columns={col_name: str(col_name)}, inplace=True) hazard_py_reordered_columns = hazard_py.reset_index(drop=True).sort_index( axis=1) hazard_h2o_reordered_columns = hazard_h2o_as_pandas.drop( 't', axis="columns").reset_index(drop=True).sort_index(axis=1) hazard_py_reordered_columns = fix_py_result_for_older_lifelines( hazard_py_reordered_columns) print("h2o:") print(hazard_h2o_as_pandas.reset_index(drop=True)) print("lifelines:") print(hazard_py_reordered_columns.reset_index(drop=True)) assert_frame_equal(hazard_py_reordered_columns, hazard_h2o_reordered_columns, check_dtype=False, check_index_type=False, check_column_type=False) survival_h2o = h2o.get_frame( cph_h2o._model_json['output']['baseline_survival']['name']) survival_h2o_as_pandas = survival_h2o.as_data_frame(use_pandas=True) survival_py = cph_py.baseline_survival_ for col_name in survival_py.columns: survival_py.rename(columns={col_name: str(col_name)}, inplace=True) survival_py_reordered_columns = survival_py.reset_index( drop=True).sort_index(axis=1) survival_h2o_reordered_columns = survival_h2o_as_pandas.drop( 't', axis="columns").reset_index(drop=True).sort_index(axis=1) survival_py_reordered_columns = fix_py_result_for_older_lifelines( survival_py_reordered_columns) print("h2o:") print(survival_h2o_as_pandas.reset_index(drop=True)) print("lifelines:") print(survival_py_reordered_columns.reset_index(drop=True)) assert_frame_equal(survival_py_reordered_columns, survival_h2o_reordered_columns, check_dtype=False, check_index_type=False, check_column_type=False)
def test_prepare(): def prep(csvs, args=[]): args = ac.parse_cmdline_args(args) return ac.prepare_bartables(ld(csvs), args) # one category and totals in1 = """ Tag,Time a:x-y,00:01:00 (unmatched time),00:02:00 (total time),00:03:00 """ out1 = pd.DataFrame( {'Time': ['', '', '00:02:00', '00:01:00', '', '00:03:00'], 'Type': ['text', 'text', 'bar', 'bar', 'text', 'total_bar'], 'Frac': [None, None, 2/3, 1/3, None, 1], 'FracAbove': [None, None, 0, 2/3, None, 0], 'HourFrac': [None, None, 20, 20, None, 20]}, index=pd.Index(['a', '═', '(unmatched time)', 'x-y', '', '(total time)'], name='Tag')) pdt.assert_frame_equal(prep([in1]), out1) # same, different totals in1_totals = """ Tag,Time a:x-y,00:01:00 (unmatched),00:02:00 (screen),00:03:00 """ out1_totals = out1.set_index( pd.Index(['a', '═', '(unmatched)', 'x-y', '', '(screen)'], name='Tag')) pdt.assert_frame_equal( prep([in1_totals], args=["--totals-re", "^\\(screen"]), out1_totals) # same, subtags out1_subtags = out1.set_index( pd.MultiIndex.from_tuples( [('a', ''), ('═', ''), ('(unmatched time)', ''), ('x', 'y'), ('', ''), ('(total time)', '')], names=['Tag', 'SubTag'])) pdt.assert_frame_equal(prep([in1], args=["--subtags"]), out1_subtags) # two categories and totals in2 = """ Tag,Time b:z,00:01:00 (unmatched time),00:02:00 (total time),00:03:00 """ blank = pd.DataFrame( {'Time': [''], 'Type': ['text'], 'Frac': [None], 'FracAbove': [None], 'HourFrac': [None]}, index=pd.Index([''], name='Tag')) out2 = out1.set_index( pd.Index(['b', '═', '(unmatched time)', 'z', '', '(total time)'], name='Tag')) pdt.assert_frame_equal(prep([in1, in2]), pd.concat([out1, blank, out2])) # three categories, subtags in3 = """ Tag,Time c:z,00:01:00 (unmatched time),00:02:00 (total time),00:03:00 """ out2_subtags = out1.set_index( pd.MultiIndex.from_tuples( [('b', ''), ('═', ''), ('(unmatched time)', ''), ('z', ''), ('', ''), ('(total time)', '')], names=['Tag', 'SubTag'])) out3_subtags = out1.set_index( pd.MultiIndex.from_tuples( [('c', ''), ('═', ''), ('(unmatched time)', ''), ('z', ''), ('', ''), ('(total time)', '')], names=['Tag', 'SubTag'])) blank_subtags = blank.set_index(pd.MultiIndex.from_tuples([('', '')], names=['Tag', 'SubTag'])) pdt.assert_frame_equal( prep([in1, in2, in3], args=["--subtags"]), pd.concat([out1_subtags, blank_subtags, out2_subtags, blank_subtags, out3_subtags]))
def test_diff_data(test_mp): """diff() when Scenarios contain the same items, but different data.""" scen_a = make_dantzig(test_mp) scen_b = make_dantzig(test_mp) # Modify `scen_a` and `scen_b` scen_a.check_out() scen_b.check_out() # Remove elements from "b" drop_args = dict(labels=["value", "unit"], axis=1) scen_a.remove_par("b", scen_a.par("b").iloc[0:1, :].drop(**drop_args)) scen_b.remove_par("b", scen_b.par("b").iloc[1:2, :].drop(**drop_args)) # Remove elements from "d" scen_a.remove_par( "d", scen_a.par("d").query("i == 'san-diego'").drop(**drop_args)) # Modify values in "d" scen_b.add_par("d", scen_b.par("d").query("i == 'seattle'").assign(value=123.4)) # Expected results exp_b = pd.DataFrame( [ ["chicago", 300.0, "cases", np.NaN, None, "left_only"], ["new-york", np.NaN, None, 325.0, "cases", "right_only"], ["topeka", 275.0, "cases", 275.0, "cases", "both"], ], columns="j value_a unit_a value_b unit_b _merge".split(), ) exp_d = pd.DataFrame( [ ["san-diego", "chicago", np.NaN, None, 1.8, "km", "right_only"], ["san-diego", "new-york", np.NaN, None, 2.5, "km", "right_only"], ["san-diego", "topeka", np.NaN, None, 1.4, "km", "right_only"], ["seattle", "chicago", 1.7, "km", 123.4, "km", "both"], ["seattle", "new-york", 2.5, "km", 123.4, "km", "both"], ["seattle", "topeka", 1.8, "km", 123.4, "km", "both"], ], columns="i j value_a unit_a value_b unit_b _merge".split(), ) # Use the specific categorical produced by pd.merge() merge_cat = pd.CategoricalDtype(["left_only", "right_only", "both"]) exp_b = exp_b.astype(dict(_merge=merge_cat)) exp_d = exp_d.astype(dict(_merge=merge_cat)) # Compare different scenarios without filters for name, df in utils.diff(scen_a, scen_b): if name == "b": pdt.assert_frame_equal(exp_b, df) elif name == "d": pdt.assert_frame_equal(exp_d, df) # Compare different scenarios with filters iterator = utils.diff(scen_a, scen_b, filters=dict(j=["chicago"])) for name, df in iterator: # Same as above, except only the filtered rows should appear if name == "b": pdt.assert_frame_equal(exp_b.iloc[0:1, :], df) elif name == "d": pdt.assert_frame_equal( exp_d.iloc[[0, 3], :].reset_index(drop=True), df)
def test_stream(): df = pd.DataFrame(data_stream) filename = os.path.join(testdir, "health.pdf") tables = camelot.read_pdf(filename, flavor="stream") assert_frame_equal(df, tables[0].df)
def test_build_base_silva_taxonomy(self): input_taxranks = _prep_taxranks(self.taxranks) obs_taxonomy = _build_base_silva_taxonomy(self.taxtree, input_taxranks, ALLOWED_RANKS, rank_propagation=True) obs_taxonomy.sort_index(inplace=True) tid = {'taxid': ['2', '11084', '42913', '42914', '42915', '11089', '24228', '24229', '42916', '42917'], 'd__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'], 'sk__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'], 'k__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'], 'ks__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'], 'sp__': ['Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea'], 'p__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota'], 'ps__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota'], 'pi__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota'], 'sc__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota'], 'c__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'], 'cs__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'], 'ci__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'], 'so__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia'], 'o__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'], 'os__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'], 'sf__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales'], 'f__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'], 'fs__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'], 'g__': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Candidatus_Aenigmarchaeum', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae']} exp_taxonomy = pd.DataFrame(tid) exp_taxonomy.set_index('taxid', inplace=True) exp_taxonomy.sort_index(inplace=True) assert_frame_equal(obs_taxonomy, exp_taxonomy)
def test_join(how, left, right, df1, df2): expr = left.join(right, left.key == right.key, how=how)[left, right.other_value, right.key3] result = expr.execute() expected = pd.merge(df1, df2, how=how, on='key') tm.assert_frame_equal(result[expected.columns], expected)
def test_write_csv_from_data_vendor(): """Tests downloading market data from the data vendor and dumping to CSV. Checks written CSV against what is loaded in memory. Also checks data is available in each 'usual' market hour. Note, that we use cached data from disk, as we want to download relatively large sections of data, and doing this externally can cause the test to run very slowly. """ for data_vendor_name in data_vendor_name_list: # database_source = database_source_dict[data_vendor_name] database_populator = database_populator_dict[data_vendor_name] chunk_int_min = chunk_int_min_dict[data_vendor_name] # specifically choose dates which straddle the weekend boundary start_date = '27 Apr 2018' finish_date = '03 May 2018' expected_csv_files = 5 # during British Summer Time in London # start_date = '02 Feb 2018'; finish_date = '07 Feb 2018'; expected_csv_files = 4 # during GMT time in London split_size = 'daily' write_csv = False # prepare the CSV folder first csv_folder = os.path.join(constants.test_data_harness_folder, 'csv_' + data_vendor_name + '_dump') # empty the CSV test harness folder UtilFunc().forcibly_create_empty_folder(csv_folder) msg, df_dict = database_populator.download_to_csv( start_date, finish_date, ['EURUSD'], chunk_int_min=chunk_int_min, split_size=split_size, csv_folder=csv_folder, return_df=True, write_large_csv=write_csv, remove_duplicates=False, web_proxies=web_proxies) df_read_direct_from_data_vendor = df_dict['EURUSD'] # check it has data for every market hour (eg. ignoring Saturdays) assert util_func.check_data_frame_points_in_every_hour( df_read_direct_from_data_vendor, start_date, finish_date) if write_csv: # read back the CSVs dumped on disk in the test harness CSV folder csv_file_list = glob.glob(csv_folder + '/EURUSD*.csv') assert len(csv_file_list) == expected_csv_files df_list = [] for c in csv_file_list: df = pd.read_csv(c, index_col=0) df.index = pd.to_datetime(df.index) df_list.append(df) # now compare the CSVs on disk versus those read directly df_read_from_csv = pd.concat(df_list).tz_localize(pytz.utc) assert_frame_equal(df_read_from_csv, df_read_direct_from_data_vendor)
def test_coord_slice_points(self): assert self.df2.cx[-2:-1, -2:-1].empty assert_frame_equal(self.df2, self.df2.cx[:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])
def test_create_update_move_and_stop_by_radius(): move_df = MoveDataFrame( data=list_data, latitude=LATITUDE, longitude=LONGITUDE, datetime=DATETIME, traj_id=TRAJ_ID, ) cols = [ 'id', 'lat', 'lon', 'datetime', 'dist_to_prev', 'dist_to_next', 'dist_prev_to_next', 'situation', ] stay_point_detection.create_update_move_and_stop_by_radius(move_df, radius=4.0) expected = DataFrame( data=[ [ 1, 39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), nan, 13.690153134343689, nan, 'nan', ], [ 1, 39.984198, 116.319322, Timestamp('2008-10-23 05:53:06'), 13.690153134343689, nan, nan, 'move', ], [ 2, 39.984224, 116.319402, Timestamp('2008-10-23 05:53:11'), nan, 0.0, nan, 'nan', ], [ 2, 39.984224, 116.319402, Timestamp('2008-10-23 05:53:15'), 0.0, nan, nan, 'stop', ], ], columns=cols, index=[0, 1, 2, 3], ) assert_frame_equal(move_df, expected)
def frame_equal(a, b): try: assert_frame_equal(a, b) except AssertionError: return False return True