def test_esk205(self): self.eskapade_run(resources.tutorial('esk205_concatenate_pandas_dfs.py')) ds = process_manager.service(DataStore) self.assertTrue('outgoing' in ds) self.assertEqual(ds['n_outgoing'], 12)
def test_esk101(self): self.eskapade_run(resources.tutorial('esk101_helloworld.py')) settings = process_manager.service(ConfigObject) self.assertTrue(settings['do_hello']) self.assertEqual(2, settings['n_repeat'])
def test_esk409(self): """Test Esk-409: Unredeemed vouchers.""" # run Eskapade macro = resources.tutorial('esk409_unredeemed_vouchers.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) # check generated data self.assertIn('voucher_redeems', ds) self.assertIn('voucher_ages', ds) self.assertIsInstance(ds['voucher_redeems'], ROOT.RooDataSet) self.assertIsInstance(ds['voucher_ages'], ROOT.RooDataSet) self.assertLess(ds['voucher_redeems'].numEntries(), 6000) self.assertGreater(ds['voucher_redeems'].numEntries(), 0) self.assertEqual(ds['voucher_ages'].numEntries(), 10000) # check fit result fit_link = process_manager.get('Fitting').get('Fit') self.assertEqual(fit_link.fit_result.status(), 0) n_ev_pull = (fit_link.results['n_ev'][0] - 6000.) / fit_link.results['n_ev'][1] self.assertGreater(n_ev_pull, -3.) self.assertLess(n_ev_pull, 3.) # check plot output plot_path = persistence.io_path('results_data', 'voucher_redeem.pdf') self.assertTrue(os.path.exists(plot_path)) statinfo = os.stat(plot_path) self.assertGreater(statinfo.st_size, 0)
def test_esk609(self): """Test Esk-609: Map data-frame groups.""" # run Eskapade self.eskapade_run(resources.tutorial('esk609_map_df_groups.py')) ds = process_manager.service(DataStore) # check input data for key in ('map_rdd', 'flat_map_rdd'): self.assertIn(key, ds, 'no data found with key "{}"'.format(key)) self.assertIsInstance( ds[key], pyspark.RDD, 'object "{0:s}" is not an RDD (type "{1!s}")'.format( key, type(ds[key]))) # sums of "bar" variable bar_sums = [(0, 27.5), (1, 77.5), (2, 127.5), (3, 177.5), (4, 227.5), (5, 277.5), (6, 327.5), (7, 377.5), (8, 427.5), (9, 477.5)] flmap_rows = [(it, 'foo{:d}'.format(it), (it + 1) / 2., bar_sums[it // 10][1]) for it in range(100)] # check mapped data frames self.assertListEqual(sorted(ds['map_rdd'].collect()), bar_sums, 'unexpected values in "map_rdd"') self.assertListEqual(sorted(ds['flat_map_rdd'].collect()), flmap_rows, 'unexpected values in "flat_map_rdd"')
def test_esk607(self): """Test Esk-607: Add column to Spark dataframe.""" # check if running in local mode sc = process_manager.service(SparkManager).get_session().sparkContext self.assertRegex( sc.getConf().get('spark.master', ''), 'local\[[.*]\]', 'Spark not running in local mode, required for testing with local files' ) # run Eskapade self.eskapade_run(resources.tutorial('esk607_spark_with_column.py')) ds = process_manager.service(DataStore) # check data frame self.assertIn('new_spark_df', ds, 'no object with key "new_spark_df" in data store') self.assertIsInstance(ds['new_spark_df'], pyspark.sql.DataFrame, '"new_spark_df" is not a Spark data frame') self.assertEqual(ds['new_spark_df'].count(), 5, 'unexpected number of rows in filtered data frame') self.assertListEqual( ds['new_spark_df'].columns, ['dummy', 'date', 'loc', 'x', 'y', 'pow_xy1', 'pow_xy2'], 'unexpected columns in data frame') self.assertSetEqual( set(tuple(r) for r in ds['new_spark_df'].collect()), set([('bla', 20090103, 'c', 5, 7, 78125.0, 78125.0), ('bal', 20090102, 'b', 3, 8, 6561.0, 6561.0), ('flo', 20090104, 'e', 3, 5, 243.0, 243.0), ('bar', 20090101, 'a', 1, 9, 1.0, 1.0), ('foo', 20090104, 'd', 1, 6, 1.0, 1.0)]), 'unexpected values in columns')
def test_esk605(self): """Test Esk-605: Create Spark data frame.""" # run Eskapade self.eskapade_run(resources.tutorial('esk605_create_spark_df.py')) ds = process_manager.service(DataStore) # check created data frames cols = (StructField('index', LongType()), StructField('foo', StringType()), StructField('bar', DoubleType())) rows = [(it, 'foo{:d}'.format(it), (it + 1) / 2.) for it in range(20, 100)] for key in ('rows_df', 'rdd_df', 'df_df', 'pd_df'): self.assertIn(key, ds, 'no object with key {} in data store'.format(key)) df = ds[key] self.assertIsInstance( df, pyspark.sql.DataFrame, 'object with key {0:s} is not a data frame (type {1!s})'. format(key, type(df))) self.assertTupleEqual( tuple(df.schema), cols, 'unexpected data-frame schema for {}'.format(key)) self.assertListEqual( sorted(tuple(r) for r in df.collect()), rows, 'unexpected data-frame content for {}'.format(key)) self.assertTrue(df.is_cached, 'data frame {} is not cached'.format(key)) self.assertLessEqual( df.rdd.getNumPartitions(), 2, 'unexpected number of data-frame partitions for {}'.format( key))
def test_esk604(self): """Test Esk-604: Execute Spark-SQL query.""" # check if running in local mode sc = process_manager.service(SparkManager).get_session().sparkContext self.assertRegex( sc.getConf().get('spark.master', ''), 'local\[[.*]\]', 'Spark not running in local mode, required for testing with local files' ) # run Eskapade self.eskapade_run(resources.tutorial('esk604_spark_execute_query.py')) ds = process_manager.service(DataStore) # check data frame self.assertIn('spark_df_sql', ds, 'no object with key "spark_df_sql" in data store') self.assertIsInstance(ds['spark_df_sql'], pyspark.sql.DataFrame, '"spark_df_sql" is not a Spark data frame') self.assertEqual(ds['spark_df_sql'].count(), 4, 'unexpected number of rows in filtered data frame') self.assertListEqual(ds['spark_df_sql'].columns, ['loc', 'sumx', 'sumy'], 'unexpected columns in data frame') self.assertEqual( ds['spark_df_sql'].schema, process_manager.get('ApplySQL').get('SparkSQL').schema, 'schema of data frame does not correspond to schema stored in link' ) self.assertSetEqual( set(tuple(r) for r in ds['spark_df_sql'].collect()), set([('e', 10, 15), ('d', 2, 11), ('b', 6, 16), ('a', 2, 18)]), 'unexpected values in loc/sumx/sumy columns')
def test_esk602(self): """Test Esk-602: Read CSV files into a Spark data frame.""" # check if running in local mode sc = process_manager.service(SparkManager).get_session().sparkContext self.assertRegex( sc.getConf().get('spark.master', ''), 'local\[[.*]\]', 'Spark not running in local mode, required for testing with local files' ) # run Eskapade self.eskapade_run(resources.tutorial('esk602_read_csv_to_spark_df.py')) ds = process_manager.service(DataStore) # check data frame self.assertIn('spark_df', ds, 'no object with key "spark_df" in data store') self.assertIsInstance(ds['spark_df'], pyspark.sql.DataFrame, '"spark_df" is not a Spark data frame') self.assertEqual(ds['spark_df'].rdd.getNumPartitions(), 5, 'unexpected number of partitions in data frame') self.assertEqual(ds['spark_df'].count(), 12, 'unexpected number of rows in data frame') self.assertListEqual(ds['spark_df'].columns, ['date', 'loc', 'x', 'y'], 'unexpected columns in data frame') self.assertSetEqual( set((r['date'], r['loc']) for r in ds['spark_df'].collect()), set([(20090101, 'a'), (20090102, 'b'), (20090103, 'c'), (20090104, 'd'), (20090104, 'e'), (20090106, 'a'), (20090107, 'b'), (20090107, 'c'), (20090107, 'd'), (20090108, 'e'), (20090109, 'e'), (20090109, 'f')]), 'unexpected values in date/loc columns')
def test_esk402(self): """Test Esk-402: RooDataHist fill""" # run Eskapade self.eskapade_run(resources.tutorial('esk402_roodatahist_fill.py')) ds = process_manager.service(DataStore) # data-generation checks self.assertIn('n_accounts', ds) self.assertEqual(650, ds['n_accounts']) self.assertIn('n_rdh_accounts', ds) self.assertEqual(650, ds['n_rdh_accounts']) self.assertIn('to_factorized', ds) self.assertIsInstance(ds['to_factorized'], dict) self.assertIn('to_original', ds) self.assertIsInstance(ds['to_original'], dict) self.assertIn('map_rdh_accounts_to_original', ds) self.assertIsInstance(ds['map_rdh_accounts_to_original'], dict) # roofit objects check self.assertIn('accounts_catset', ds) self.assertIsInstance(ds['accounts_catset'], ROOT.RooArgSet) self.assertEqual(2, len(ds['accounts_catset'])) self.assertIn('accounts_varset', ds) self.assertIsInstance(ds['accounts_varset'], ROOT.RooArgSet) self.assertEqual(6, len(ds['accounts_varset'])) self.assertIn('rdh_accounts', ds) self.assertIsInstance(ds['rdh_accounts'], ROOT.RooDataHist)
def test_esk407(self): """Test Esk-407: Classification unbiased fit estimate.""" # run Eskapade macro = resources.tutorial( 'esk407_classification_unbiased_fit_estimate.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # roofit objects check in datastore self.assertIn('fit_result', ds) self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult) # roofit objects check in workspace mdata = ws.data('data') self.assertFalse(not mdata) self.assertEqual(1000, mdata.numEntries()) mpdf = ws.pdf('hist_model') self.assertFalse(not mpdf) # successful fit result fit_result = ds['fit_result'] self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) lo_risk = ws.var('N_low_risk') self.assertFalse(not lo_risk) self.assertTrue(lo_risk.getVal() < 1000) self.assertTrue(lo_risk.getError() > 0) hi_risk = ws.var('N_high_risk') self.assertFalse(not hi_risk) self.assertTrue(hi_risk.getVal() > 0) self.assertTrue(hi_risk.getError() > 0)
def test_esk211(self): self.eskapade_run(resources.tutorial('esk211_fork_read_data_itr.py')) ds = process_manager.service(DataStore) self.assertTrue('reduced_data' in ds) self.assertEqual(24, len(ds['reduced_data'].index))
def test_esk405(self): """Test Esk-405: Simulation based on binned data""" # run Eskapade self.eskapade_run( resources.tutorial('esk405_simulation_based_on_binned_data.py')) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # data-generation checks self.assertIn('n_rdh_accounts', ds) self.assertEqual(650, ds['n_rdh_accounts']) # roofit objects check in workspace self.assertIn('hpdf_Ndim', ws) self.assertIn('rdh_accounts', ws) mcats = ws.set('rdh_cats') self.assertFalse(not mcats) self.assertEqual(1, len(mcats)) mvars = ws.set('rdh_vars') self.assertFalse(not mvars) self.assertEqual(3, len(mvars)) mdata = ws.data('rdh_accounts') self.assertEqual(650, mdata.sumEntries())
def test_esk406(self): """Test Esk-406: Simulation based on unbinned data""" # run Eskapade macro = resources.tutorial( 'esk406_simulation_based_on_unbinned_data.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) # data-generation checks self.assertIn('n_correlated_data', ds) self.assertEqual(500, ds['n_correlated_data']) self.assertIn('n_rds_correlated_data', ds) self.assertEqual(500, ds['n_rds_correlated_data']) self.assertIn('n_df_simdata', ds) self.assertEqual(5000, ds['n_df_simdata']) self.assertIn('df_simdata', ds) self.assertIsInstance(ds['df_simdata'], pd.DataFrame) self.assertIn('hist', ds) self.assertIsInstance(ds['hist'], dict) # roofit objects check self.assertIn('keys_varset', ds) self.assertIsInstance(ds['keys_varset'], ROOT.RooArgSet) self.assertEqual(2, len(ds['keys_varset'])) self.assertIn('rds_correlated_data', ds) self.assertIsInstance(ds['rds_correlated_data'], ROOT.RooDataSet) self.assertIn('simdata', ds) self.assertIsInstance(ds['simdata'], ROOT.RooDataSet)
def test_esk301(self): settings = process_manager.service(ConfigObject) settings['batchMode'] = True self.eskapade_run(resources.tutorial('esk301_dfsummary_plotter.py')) settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) columns = ['var_a', 'var_b', 'var_c'] # data-generation checks self.assertIn('data', ds) self.assertIsInstance(ds['data'], pd.DataFrame) self.assertListEqual(list(ds['data'].columns), columns) self.assertEqual(10000, len(ds['data'])) # data-summary checks file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in columns] for fname in file_names: path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname) self.assertTrue(os.path.exists(path)) statinfo = os.stat(path) self.assertTrue(statinfo.st_size > 0)
def test_esk404(self): """Test Esk-404: Workspace create PDF, simulate, fit, plot""" # run Eskapade self.eskapade_run( resources.tutorial( 'esk404_workspace_createpdf_simulate_fit_plot.py')) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # data-generation checks self.assertIn('n_df_simdata', ds) self.assertEqual(1000, ds['n_df_simdata']) # roofit objects check in datastore self.assertIn('fit_result', ds) self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult) # successful fit result fit_result = ds['fit_result'] self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) self.assertIn('simdata', ds) self.assertIsInstance(ds['simdata'], ROOT.RooDataSet) self.assertIn('simdata_plot', ds) self.assertIsInstance(ds['simdata_plot'], ROOT.RooPlot) # roofit objects check in workspace self.assertIn('model', ws) self.assertIn('bkg', ws) self.assertIn('sig', ws)
def test_esk303(self): settings = process_manager.service(ConfigObject) settings['batchMode'] = True self.eskapade_run(resources.tutorial('esk303_hgr_filler_plotter.py')) settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) # data-generation checks self.assertIn('n_sum_rc', ds) self.assertEqual(650, ds['n_sum_rc']) self.assertIn('hist', ds) self.assertIsInstance(ds['hist'], dict) col_names = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude', 'isActive:age', 'latitude:longitude'] self.assertListEqual(sorted(ds['hist'].keys()), sorted(col_names)) # data-summary checks f_bases = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude', 'latitude_vs_longitude'] file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in f_bases] for fname in file_names: path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname) self.assertTrue(os.path.exists(path)) statinfo = os.stat(path) self.assertTrue(statinfo.st_size > 0)
def test_esk408(self): """Test Esk-408: Classification error propagation after fit.""" # run Eskapade self.eskapade_run( resources.tutorial( 'esk408_classification_error_propagation_after_fit.py')) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # data-generation checks self.assertIn('n_df_pvalues', ds) self.assertEqual(500, ds['n_df_pvalues']) self.assertIn('df_pvalues', ds) self.assertIsInstance(ds['df_pvalues'], pd.DataFrame) df = ds['df_pvalues'] self.assertTrue('high_risk_pvalue' in df.columns) self.assertTrue('high_risk_perror' in df.columns) # roofit objects check in workspace fit_result = ws.obj('fit_result') self.assertFalse(not fit_result) self.assertIsInstance(fit_result, ROOT.RooFitResult) # test for successful fit result self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) frac = ws.var('frac') self.assertFalse(not frac) self.assertTrue(frac.getVal() > 0) self.assertTrue(frac.getError() > 0)
def test_esk105a(self): self.eskapade_run(resources.tutorial('esk105_A_dont_store_results.py')) settings = process_manager.service(ConfigObject) path = settings['resultsDir'] + '/' + settings['analysisName'] self.assertFalse(os.path.exists(path))
def test_esk104(self): self.eskapade_run( resources.tutorial('esk104_basic_datastore_operations.py')) ds = process_manager.service(DataStore) self.assertEqual(1, len(ds)) self.assertEqual(1, ds['a'])
def test_esk108reduce(self): settings = process_manager.service(ConfigObject) settings['TESTING'] = True self.eskapade_run(resources.tutorial('esk108_reduce.py')) ds = process_manager.service(DataStore) self.assertEqual(20, ds['n_products'])
def test_esk107(self): self.eskapade_run(resources.tutorial('esk107_chain_looper.py')) ds = process_manager.service(DataStore) # chain is repeated 10 times, with nothing put in datastore self.assertEqual(0, len(ds)) self.assertEqual(10, list(list(process_manager)[0])[1].maxcount)
def test_esk702_only_ordered(self): # run Eskapade self.eskapade_run( resources.tutorial('esk702_mimic_data_only_unordered.py')) ds = process_manager.service(DataStore) # -- make sure all was saved to the data store self.assertIn('df', ds) self.assertIn('ids', ds) self.assertIn('maps', ds) self.assertIn('new_column_order', ds) self.assertIn('qts', ds) self.assertIn('data', ds) self.assertIn('data_smoothed', ds) self.assertIn('data_no_nans', ds) self.assertIn('data_normalized', ds) self.assertIn('unordered_categorical_i', ds) self.assertIn('ordered_categorical_i', ds) self.assertIn('continuous_i', ds) self.assertIn('bw', ds) self.assertIn('data_resample', ds) self.assertIn('df_resample', ds) self.assertIn('chi2', ds) self.assertIn('p_value', ds) # -- make sure they're of the right type self.assertIsInstance(ds['df'], pd.DataFrame) self.assertIsInstance(ds['ids'], np.ndarray) self.assertIsInstance(ds['maps'], dict) self.assertIsInstance(ds['new_column_order'], list) self.assertIsInstance(ds['qts'], list) self.assertIsInstance(ds['data'], np.ndarray) self.assertIsInstance(ds['data_smoothed'], np.ndarray) self.assertIsInstance(ds['data_no_nans'], np.ndarray) self.assertIsInstance(ds['data_normalized'], np.ndarray) self.assertIsInstance(ds['unordered_categorical_i'], list) self.assertIsInstance(ds['ordered_categorical_i'], list) self.assertIsInstance(ds['continuous_i'], list) self.assertIsInstance(ds['bw'], np.ndarray) self.assertIsInstance(ds['data_resample'], np.ndarray) self.assertIsInstance(ds['df_resample'], pd.DataFrame) self.assertIsInstance(ds['chi2'], np.float64) self.assertIsInstance(ds['p_value'], np.float64) self.assertEqual(ds['df'].shape[1], 2) self.assertEqual(ds['data'].shape[1], 2) self.assertEqual(ds['data_smoothed'].shape[1], 2) self.assertEqual(ds['data_no_nans'].shape[1], 2) self.assertEqual(len(ds['data_normalized']), 0) self.assertEqual(len(ds['unordered_categorical_i']), 2) self.assertEqual(len(ds['ordered_categorical_i']), 0) self.assertEqual(len(ds['continuous_i']), 0) self.assertEqual(ds['bw'].shape[0], 2) self.assertEqual(ds['data_resample'].shape[1], 2) self.assertEqual(ds['df_resample'].shape[1], 3)
def test_esk103(self): self.eskapade_run(resources.tutorial('esk103_printdatastore.py')) ds = process_manager.service(DataStore) self.assertEqual('world', ds['hello']) self.assertEqual(1, ds['d']['a']) self.assertEqual(2, ds['d']['b']) self.assertEqual(3, ds['d']['c'])
def test_esk206(self): self.eskapade_run(resources.tutorial('esk206_merge_pandas_dfs.py')) ds = process_manager.service(DataStore) self.assertTrue('outgoing' in ds) df = ds['outgoing'] self.assertEqual(len(df.index), 4) self.assertEqual(len(df.columns), 5)
def test_esk109(self): settings = process_manager.service(ConfigObject) # this flag turns off ipython embed link settings['TESTING'] = True self.eskapade_run(resources.tutorial('esk109_debugging_tips.py'), StatusCode.Failure) self.assertTrue(isinstance(list(list(process_manager)[0])[2], Break))
def test_esk106_script(self, mock_argv): """Test Eskapade run with esk106 macro from script""" # get file paths settings = process_manager.service(ConfigObject) settings['analysisName'] = 'esk106_cmdline_options' settings_ = settings.copy() macro_path = resources.tutorial('esk106_cmdline_options.py') # mock command-line arguments args = [] mock_argv.__getitem__ = lambda s, k: args.__getitem__(k) # base settings args_ = [macro_path, '-LDEBUG', '--batch-mode'] settings_['macro'] = macro_path settings_['logLevel'] = LogLevel.DEBUG settings_['batchMode'] = True def do_run(name, args, args_, settings_, add_args, add_settings, chains): # set arguments args.clear() args += args_ + add_args settings = settings_.copy() settings.update(add_settings) # run Eskapade process_manager.reset() entry_points.eskapade_run() settings_run = process_manager.service(ConfigObject) # check results self.assertListEqual( [c.name for c in process_manager.chains], chains, 'unexpected chain names in "{}" test'.format(name)) self.assertDictEqual( settings_run, settings, 'unexpected settings in "{}" test'.format(name)) # run both chains do_run( 'both chains', args, args_, settings_, ['--store-all', '-cdo_chain0=True', '-cdo_chain1=True'], dict(storeResultsEachChain=True, do_chain0=True, do_chain1=True), ['Chain0', 'Chain1']) # run only last chain by skipping the first do_run('skip first', args, args_, settings_, ['-bChain1', '-cdo_chain0=True', '-cdo_chain1=True'], dict(beginWithChain='Chain1', do_chain0=True, do_chain1=True), ['Chain0', 'Chain1']) # run only last chain by not defining the first do_run('no first', args, args_, settings_, ['-cdo_chain0=False', '-cdo_chain1=True'], dict(do_chain0=False, do_chain1=True), ['Chain1'])
def test_esk203(self): self.eskapade_run(resources.tutorial('esk203_apply_func_to_pandas_df.py')) ds = process_manager.service(DataStore) self.assertTrue('transformed_data' in ds) df = ds['transformed_data'] self.assertTrue('xx' in df.columns) self.assertTrue('yy' in df.columns)
def test_esk201(self): self.eskapade_run(resources.tutorial('esk201_readdata.py')) ds = process_manager.service(DataStore) self.assertTrue('test1' in ds) self.assertTrue('test2' in ds) self.assertEqual(12, ds['n_test1']) self.assertEqual(36, ds['n_test2'])
def test_esk102(self): self.eskapade_run(resources.tutorial('esk102_multiple_chains.py')) settings = process_manager.service(ConfigObject) self.assertTrue(settings['do_chain0']) self.assertTrue(settings['do_chain1']) self.assertTrue(settings['do_chain2']) self.assertEqual(3, len(process_manager))
def test_esk501(self): """Test Esk-501: fixing pandas dataframe""" # run Eskapade self.eskapade_run(resources.tutorial('esk501_fix_pandas_dataframe.py')) ds = process_manager.service(DataStore) self.assertIn('vrh', ds) self.assertIn('vrh_fix1', ds) self.assertIn('vrh_fix2', ds) self.assertIn('vrh_fix3', ds) self.assertIsInstance(ds['vrh'], pd.DataFrame) self.assertIsInstance(ds['vrh_fix1'], pd.DataFrame) self.assertIsInstance(ds['vrh_fix2'], pd.DataFrame) self.assertIsInstance(ds['vrh_fix3'], pd.DataFrame) self.assertEqual(len(ds['vrh'].index), 5) self.assertEqual(len(ds['vrh_fix1'].index), 5) self.assertEqual(len(ds['vrh_fix2'].index), 5) self.assertEqual(len(ds['vrh_fix3'].index), 5) self.assertIsInstance(ds['vrh']['B'].dtype, np.object) self.assertIsInstance(ds['vrh']['C'].dtype, np.object) self.assertIsInstance(ds['vrh']['D'].dtype.type(), np.float64) self.assertListEqual(ds['vrh']['A'].values.tolist(), [True, False, np.nan, np.nan, np.nan]) self.assertListEqual(ds['vrh']['B'].values.tolist(), ['foo', 'bar', '3', np.nan, np.nan]) self.assertListEqual(ds['vrh']['C'].values.tolist(), ['1.0', '2.0', 'bal', np.nan, np.nan]) self.assertListEqual(ds['vrh']['D'].values.tolist()[:3], [1.0, 2.0, 3.0]) self.assertListEqual(ds['vrh']['E'].values.tolist(), ['1', '2', 'bla', np.nan, np.nan]) self.assertListEqual(ds['vrh']['F'].values.tolist(), ['1', '2.5', 'bar', np.nan, np.nan]) self.assertListEqual(ds['vrh']['G'].values.tolist(), ['a', 'b', 'c', 'd', np.nan]) self.assertListEqual(ds['vrh']['H'].values.tolist(), ['a', 'b', '1', '2', '3']) self.assertListEqual(ds['vrh_fix1']['A'].values.tolist()[:2], [1.0, 0.0]) self.assertListEqual(ds['vrh_fix1']['B'].values.tolist(), ['foo', 'bar', '3', np.nan, np.nan]) self.assertListEqual(ds['vrh_fix1']['C'].values.tolist()[:2], [1.0, 2.0]) self.assertListEqual(ds['vrh_fix1']['D'].values.tolist()[:3], [1.0, 2.0, 3.0]) self.assertListEqual(ds['vrh_fix1']['E'].values.tolist()[:2], [1, 2]) self.assertListEqual(ds['vrh_fix1']['F'].values.tolist()[:3], ['1', '2.5', 'bar']) self.assertListEqual(ds['vrh_fix1']['G'].values.tolist(), ['a', 'b', 'c', 'd', np.nan]) self.assertListEqual(ds['vrh_fix1']['H'].values.tolist()[2:5], [1.0, 2.0, 3.0]) self.assertListEqual(ds['vrh_fix2']['B'].values.tolist()[2:3], [3]) self.assertListEqual(ds['vrh_fix2']['C'].values.tolist(), ['1.0', '2.0', 'bal', np.nan, np.nan]) self.assertListEqual(ds['vrh_fix3']['A'].values.tolist()[:2], [1.0, 0.0]) self.assertListEqual(ds['vrh_fix3']['B'].values.tolist(), ['foo', 'bar', '3', 'not_a_str', 'not_a_str']) self.assertListEqual(ds['vrh_fix3']['C'].values.tolist()[:2], [1.0, 2.0]) self.assertListEqual(ds['vrh_fix3']['D'].values.tolist()[:3], [1.0, 2.0, 3.0]) self.assertListEqual(ds['vrh_fix3']['E'].values.tolist(), [1, 2, -999, -999, -999]) self.assertListEqual(ds['vrh_fix3']['F'].values.tolist(), ['1', '2.5', 'bar', 'not_a_str', 'not_a_str']) self.assertListEqual(ds['vrh_fix3']['G'].values.tolist(), ['a', 'b', 'c', 'd', 'GREPME']) self.assertListEqual(ds['vrh_fix3']['H'].values.tolist(), [-999, -999, 1, 2, 3])