def storeHdf5(data, tag, path): hdf = HDFStore(path,'a') if tag in hdf.keys(): hdf.append(tag,data) else: hdf.put(tag,data) hdf.close()
class HDFStorePanel(BaseIO): goal_time = 0.2 def setup(self): self.fname = '__test__.h5' with warnings.catch_warnings(record=True): self.p = Panel(np.random.randn(20, 1000, 25), items=['Item%03d' % i for i in range(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in range(25)]) self.store = HDFStore(self.fname) self.store.append('p1', self.p) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store_table_panel(self): with warnings.catch_warnings(record=True): self.store.select('p1') def time_write_store_table_panel(self): with warnings.catch_warnings(record=True): self.store.append('p2', self.p)
def storeHdf5(data, tag, path): hdf = HDFStore(path, 'a') if tag in hdf.keys(): hdf.append(tag, data) else: hdf.put(tag, data) hdf.close()
def store_results(self, result, index, columns, hdf5_file): self.df = DataFrame(result, columns=columns) self.df = self.df.set_index(index) self.df.sort_index(inplace=True) # Store the DataFrame as an HDF5 file... hdf = HDFStore(hdf5_file) # Append the dataframe, and ensure addr / host can be 17 chars long hdf.append('df', self.df, data_columns=list(columns), min_itemsize={'addr': 17, 'host': 17}) hdf.close()
def test_complibs_default_settings(setup_path): # GH15943 df = tm.makeDataFrame() # Set complevel and check if complib is automatically set to # default value with ensure_clean_path(setup_path) as tmpfile: df.to_hdf(tmpfile, "df", complevel=9) result = read_hdf(tmpfile, "df") tm.assert_frame_equal(result, df) with tables.open_file(tmpfile, mode="r") as h5file: for node in h5file.walk_nodes(where="/df", classname="Leaf"): assert node.filters.complevel == 9 assert node.filters.complib == "zlib" # Set complib and check to see if compression is disabled with ensure_clean_path(setup_path) as tmpfile: df.to_hdf(tmpfile, "df", complib="zlib") result = read_hdf(tmpfile, "df") tm.assert_frame_equal(result, df) with tables.open_file(tmpfile, mode="r") as h5file: for node in h5file.walk_nodes(where="/df", classname="Leaf"): assert node.filters.complevel == 0 assert node.filters.complib is None # Check if not setting complib or complevel results in no compression with ensure_clean_path(setup_path) as tmpfile: df.to_hdf(tmpfile, "df") result = read_hdf(tmpfile, "df") tm.assert_frame_equal(result, df) with tables.open_file(tmpfile, mode="r") as h5file: for node in h5file.walk_nodes(where="/df", classname="Leaf"): assert node.filters.complevel == 0 assert node.filters.complib is None # Check if file-defaults can be overridden on a per table basis with ensure_clean_path(setup_path) as tmpfile: store = HDFStore(tmpfile) store.append("dfc", df, complevel=9, complib="blosc") store.append("df", df) store.close() with tables.open_file(tmpfile, mode="r") as h5file: for node in h5file.walk_nodes(where="/df", classname="Leaf"): assert node.filters.complevel == 0 assert node.filters.complib is None for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): assert node.filters.complevel == 9 assert node.filters.complib == "blosc"
def store_to_liam(self): ''' Sauvegarde des données au format utilisé ensuite par le modèle Til Séléctionne les variables appelée par Til derrière Appelle des fonctions de Liam2 ''' path = self._output_name() h5file = tables.openFile(path, mode="w") ent_node = h5file.createGroup("/", "entities", "Entities") for ent_name in ['ind', 'foy', 'men', 'futur', 'past']: entity = eval('self.' + ent_name) if entity is not None: entity = entity.fillna(-1) try: ent_table = entity.to_records(index=False) except: pdb.set_trace() dtypes = ent_table.dtype final_name = of_name_to_til[ent_name] try: table = h5file.createTable(ent_node, final_name, dtypes, title="%s table" % final_name) table.append(ent_table) except: pdb.set_trace() table.flush() if ent_name == 'men': entity = entity.loc[entity['id']>-1] ent_table2 = entity[['pond', 'id', 'period']].to_records(index=False) dtypes2 = ent_table2.dtype table = h5file.createTable(ent_node, 'companies', dtypes2, title="'companies table") table.append(ent_table2) table.flush() if ent_name == 'ind': ent_table2 = entity[['agem', 'sexe', 'pere', 'mere', 'id', 'findet', 'period']].to_records( index = False) dtypes2 = ent_table2.dtype table = h5file.createTable(ent_node, 'register', dtypes2, title="register table") table.append(ent_table2) table.flush() h5file.close() # 3 - table longitudinal # Note: on conserve le format pandas ici store = HDFStore(path) for varname, table in self.longitudinal.iteritems(): table['id'] = table.index store.append('longitudinal/' + varname, table) store.close()
def store_results(self, result, index, columns, hdf5_file): self.df = DataFrame(result, columns=columns) self.df = self.df.set_index(index) self.df.sort_index(inplace=True) # Store the DataFrame as an HDF5 file... hdf = HDFStore(hdf5_file) # Append the dataframe, and ensure addr / host can be 17 chars long hdf.append('df', self.df, data_columns=list(columns), min_itemsize={ 'addr': 17, 'host': 17 }) hdf.close()
def pf2pandas(wd, files, vars=None, npwd=None, rmvars=None, \ debug=False): """ Read in GEOS-Chem planeflight output and convert to HDF format - Converts date and time columns to datetime format indexes - the resultant HDF is in 2D list form ( aka further processing required to 3D /2D output ) Note: - This function is limited by the csv read speed. for large csv output expect significant processing times or set to automatically run post run - Original files are not removed, so this function will double space usage for output unless the original fiels are deleted. """ # Ensure working dorectory string has leading foreward slash if wd[-1] != '/': wd += '/' # pfdate =( re.findall('\d+', file ) )[-1] if not isinstance(vars, list ): vars, sites = get_pf_headers( files[0], debug=debug ) if not isinstance(npwd, str ): npwd = get_dir('npwd') hdf =HDFStore( npwd+ 'pf_{}_{}.h5'.format( wd.split('/')[-3], \ wd.split('/')[-2], wd.split('/')[-1] )) if debug: print hdf for file in files: print file#, pfdate # convert planeflight.log to DataFrame df = pf_csv2pandas( file, vars ) if file==files[0]: hdf.put('d1', df, format='table', data_columns=True) else: hdf.append('d1', df, format='table', data_columns=True) if debug: print hdf['d1'].shape, hdf['d1'].index del df hdf.close()
def to_frame_hdf(self, store_path, store_key, df_cb=None, max_msg=None, usecols=None, chunk_cnt=CHUNK_CNT): """Convert to Pandas DataFrame and save to HDF then returns HDFStore.""" store = HDFStore(store_path, 'w') _c = self._to_frame_prop('to_frame_hdf', False) for df in self._to_frame_gen(_c, usecols, chunk_cnt): min_itemsize = {'kind': 20, 'msg': 255} # pytables not support unicode for now df['msg'] = df['msg'].apply(lambda m: m.encode('utf8')) if df_cb is not None: df_cb(df) if max_msg is not None: min_itemsize['msg'] = max_msg store.append(store_key, df, format='table', min_itemsize=min_itemsize) store.flush() store.close() _c.pg.done()
def test_open_args(setup_path): with tm.ensure_clean(setup_path) as path: df = tm.makeDataFrame() # create an in memory store store = HDFStore( path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0 ) store["df"] = df store.append("df2", df) tm.assert_frame_equal(store["df"], df) tm.assert_frame_equal(store["df2"], df) store.close() # the file should not have actually been written assert not os.path.exists(path)
def csv2hdf5(csv_name, h5_name, dfname, option='frame'): """ Convert a csv file to a dataframe in a hdf5 Parameters: csv_name: string csv file name h5_name : string hdf5 file name dfname : string dataframe name option : string, 'frame' or 'table', default to 'frame' stoing type in the pytable """ table = read_csv(csv_name) store = HDFStore(h5_name) if option == 'frame': store.put(dfname, table) elif option == 'table': # for frame_table à la pytables object_cols = table.dtypes[table.dtypes == 'object'] print object_cols.index try: store.append(dfname, table) except: print table.get_dtype_counts() object_cols = table.dtypes[table.dtypes == 'object'] for col in object_cols.index: print 'removing object column :', col del table[col] store.append(dfname, table) print store store.close()
def csv2hdf5(csv_name, h5_name, dfname, option='frame'): """ Convert a csv file to a dataframe in a hdf5 Parameters: csv_name: string csv file name h5_name : string hdf5 file name dfname : string dataframe name option : string, 'frame' or 'table', default to 'frame' stoing type in the pytable """ table = read_csv(csv_name) store = HDFStore(h5_name) if option == 'frame': store.put(dfname, table) elif option == 'table': # for frame_table à la pytables object_cols = table.dtypes[ table.dtypes == 'object'] print object_cols.index try: store.append(dfname,table) except: print table.get_dtype_counts() object_cols = table.dtypes[ table.dtypes == 'object'] for col in object_cols.index: print 'removing object column :', col del table[col] store.append(dfname,table) print store store.close()
def main(period=None): temps = time.clock() input_tab = "C:/openfisca/output/liam/" + "LiamLeg.h5" output_tab = "C:/Myliam2/Model/SimulTest.h5" store = HDFStore(input_tab) goal = HDFStore(output_tab) name_convertion = {"ind": "person", "foy": "declar", "men": "menage", "fam": "menage"} # on travaille d'abord sur l'ensemble des tables puis on selectionne chaque annee # step 1 for ent in ("ind", "men", "foy", "fam"): dest = name_convertion[ent] tab_in = store[ent] tab_out = goal["entities/" + dest] # on jour sur les variable a garder # TODO: remonter au niveau de of_on_liam mais la c'est pratique du fait de # l'autre table ident = "id" + ent if ent == "ind": ident = "noi" # on garde les valeurs de depart to_remove = [x for x in tab_in.columns if x in tab_out.columns] # on retire les identifiant sauf celui qui deviendra id list_id = ["idmen", "idfoy", "idfam", "id", "quifoy", "quifam", "quimen", "noi"] list_id.remove(ident) to_remove = to_remove + [x for x in tab_in.columns if x in list_id] # on n4oublie pas de garder periode to_remove.remove("period") tab_in = tab_in.drop(to_remove, axis=1) tab_in = tab_in.rename(columns={ident: "id"}) tab_out = merge(tab_in, tab_out, how="right", on=["id", "period"], sort=False) goal.remove("entities/" + dest) goal.append("entities/" + dest, tab_out) # new_tab = np.array(tab_out.to_records()) store.close() goal.close()
def read_raw_tecplot_case_and_write_pandas_hdf5( case_folder, root = 0, output_file = 0, serration_angle = 0, angle_correction = 0, height_correction = 0, streamwise_correction = 0, overwrite = False, time_step_limit = 0, airfoil_normal = False, ): from os.path import isfile,join,splitext from os import listdir from progressbar import ProgressBar,Percentage,Bar,ETA,SimpleProgress from pandas import HDFStore # File related things ###################################################### if not output_file: output_file = case_folder+".hdf5" if airfoil_normal: output_file = output_file+"_AirfoilNormal" if not output_file.endswith('.hdf5'): output_file = output_file.replace(".hdf5","")+".hdf5" if isfile(output_file) and not overwrite: print " Exiting; file exists:\n{0}".format(output_file) return 0 else: print " Writing\n{0}".format(output_file) # ########################################################################## hdf = HDFStore(output_file) time_step_files = sorted([f for f in listdir(join(root,case_folder)) \ if splitext(f)[1] == '.dat']) if time_step_limit: time_step_files = time_step_files[:time_step_limit] progress = ProgressBar( widgets=[ Bar(),' ', Percentage(),' ', ETA(), ' (file ', SimpleProgress(),')'], maxval=len(time_step_files) ).start() cnt = 0 for f,t in zip(time_step_files,range(len(time_step_files))): df_t = read_tecplot_file_and_correct_for_location_rotation( tecplot_file = join(root,case_folder,f), serration_angle = serration_angle, angle_correction = angle_correction, height_correction = height_correction, streamwise_correction = streamwise_correction, time_step = t, airfoil_normal = airfoil_normal, ) df_t = get_vorticity(df_t) if cnt == 0: df = df_t.copy() else: df = df.append( df_t, drop_index = True) #df = df.drop_duplicates() try: x_cnt = df.x.value_counts().max() except AttributeError: print df raise if not x_cnt.max() == x_cnt.min(): print " There's something wrong, counted {0} instances of x"\ .format(x_cnt.max()) return 0 if t == 30: hdf.put(case_folder, df.convert_objects(), format='table', data_columns=True ) elif cnt == 30 and not t == cnt: hdf.append(case_folder, df.convert_objects(), format='table', data_columns=True ) cnt = 0 cnt += 1 progress.update(t) progress.finish() hdf.close() return 1
def test_multiple_open_close(setup_path): # gh-4409: open & close multiple times with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") # single store = HDFStore(path) assert "CLOSED" not in store.info() assert store.is_open store.close() assert "CLOSED" in store.info() assert not store.is_open with ensure_clean_path(setup_path) as path: if pytables._table_file_open_policy_is_strict: # multiples store1 = HDFStore(path) msg = ( r"The file [\S]* is already opened\. Please close it before " r"reopening in write mode\." ) with pytest.raises(ValueError, match=msg): HDFStore(path) store1.close() else: # multiples store1 = HDFStore(path) store2 = HDFStore(path) assert "CLOSED" not in store1.info() assert "CLOSED" not in store2.info() assert store1.is_open assert store2.is_open store1.close() assert "CLOSED" in store1.info() assert not store1.is_open assert "CLOSED" not in store2.info() assert store2.is_open store2.close() assert "CLOSED" in store1.info() assert "CLOSED" in store2.info() assert not store1.is_open assert not store2.is_open # nested close store = HDFStore(path, mode="w") store.append("df", df) store2 = HDFStore(path) store2.append("df2", df) store2.close() assert "CLOSED" in store2.info() assert not store2.is_open store.close() assert "CLOSED" in store.info() assert not store.is_open # double closing store = HDFStore(path, mode="w") store.append("df", df) store2 = HDFStore(path) store.close() assert "CLOSED" in store.info() assert not store.is_open store2.close() assert "CLOSED" in store2.info() assert not store2.is_open # ops on a closed store with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") store = HDFStore(path) store.close() msg = r"[\S]* file is not open!" with pytest.raises(ClosedFileError, match=msg): store.keys() with pytest.raises(ClosedFileError, match=msg): "df" in store with pytest.raises(ClosedFileError, match=msg): len(store) with pytest.raises(ClosedFileError, match=msg): store["df"] with pytest.raises(ClosedFileError, match=msg): store.select("df") with pytest.raises(ClosedFileError, match=msg): store.get("df") with pytest.raises(ClosedFileError, match=msg): store.append("df2", df) with pytest.raises(ClosedFileError, match=msg): store.put("df3", df) with pytest.raises(ClosedFileError, match=msg): store.get_storer("df2") with pytest.raises(ClosedFileError, match=msg): store.remove("df2") with pytest.raises(ClosedFileError, match=msg): store.select("df") msg = "'HDFStore' object has no attribute 'df'" with pytest.raises(AttributeError, match=msg): store.df
def load_from_store_or_yahoo(start=None, end=None, symbol=None): append = False hdf = HDFStore(settings.storage_path) today = dt.datetime.today().date() yahoo_symbol = symbol symbol = clean_symbol(symbol) # this case, earlier data than in store is requested. The table needs to be rewritten if symbol in hdf: df = hdf[symbol] start_store = df.index.min() if isinstance(start, str): start = dt.datetime.strptime(start, '%Y-%m-%d') if start_store.date() > start: hdf.remove(symbol) lprint('start date was earlier than the oldest date in the storage. storage needs to be rewritten.') if symbol in hdf: df = hdf[symbol] end_store = df.index.max() # check if today is a weekend day weekday = dt.datetime.today().weekday() last_trading_day = today if weekday in [5, 6]: correction = 1 if weekday == 5 else 2 last_trading_day = today - dt.timedelta(correction) # if the last trading day is the max date in the store than do not reload data if last_trading_day == end_store.date(): lprint('loaded %s data from storage.' % symbol) return df # if the last trading is younger that the last trading day, load the difference end = today + dt.timedelta(1) start = end_store append = True # if no store was found, use the start and end from above df = None count = 0 while df is None and count < 10: try: df = get_yahoo_data(start=start, end=end, symbol=yahoo_symbol) except RemoteDataError: time.sleep(10 + int(np.random.rand() * 10)) count += 1 if df is None: raise Exception('Even after 10 trials data could not be loaded from yahoo') # remove blanks in the header df.columns = [x.replace(' ', '_') for x in df.columns] # store or append to hdf5 storage if symbol in hdf: # drop duplicates exist_df = hdf[symbol] df = df[~df.index.isin(exist_df.index)] if append: hdf.append(symbol, df, format='table', data_columns=True) else: df.drop_duplicates(inplace=True) hdf.put(symbol, df, format='table', data_columns=True) if not df.index.is_unique: lprint('index of %s is not unique' % symbol) return df
class HDFStoreDataFrame(BaseIO): def setup(self): N = 25000 index = tm.makeStringIndex(N) self.df = DataFrame({'float1': np.random.randn(N), 'float2': np.random.randn(N)}, index=index) self.df_mixed = DataFrame({'float1': np.random.randn(N), 'float2': np.random.randn(N), 'string1': ['foo'] * N, 'bool1': [True] * N, 'int1': np.random.randint(0, N, size=N)}, index=index) self.df_wide = DataFrame(np.random.randn(N, 100)) self.start_wide = self.df_wide.index[10000] self.stop_wide = self.df_wide.index[15000] self.df2 = DataFrame({'float1': np.random.randn(N), 'float2': np.random.randn(N)}, index=date_range('1/1/2000', periods=N)) self.start = self.df2.index[10000] self.stop = self.df2.index[15000] self.df_wide2 = DataFrame(np.random.randn(N, 100), index=date_range('1/1/2000', periods=N)) self.df_dc = DataFrame(np.random.randn(N, 10), columns=['C%03d' % i for i in range(10)]) self.fname = '__test__.h5' self.store = HDFStore(self.fname) self.store.put('fixed', self.df) self.store.put('fixed_mixed', self.df_mixed) self.store.append('table', self.df2) self.store.append('table_mixed', self.df_mixed) self.store.append('table_wide', self.df_wide) self.store.append('table_wide2', self.df_wide2) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store(self): self.store.get('fixed') def time_read_store_mixed(self): self.store.get('fixed_mixed') def time_write_store(self): self.store.put('fixed_write', self.df) def time_write_store_mixed(self): self.store.put('fixed_mixed_write', self.df_mixed) def time_read_store_table_mixed(self): self.store.select('table_mixed') def time_write_store_table_mixed(self): self.store.append('table_mixed_write', self.df_mixed) def time_read_store_table(self): self.store.select('table') def time_write_store_table(self): self.store.append('table_write', self.df) def time_read_store_table_wide(self): self.store.select('table_wide') def time_write_store_table_wide(self): self.store.append('table_wide_write', self.df_wide) def time_write_store_table_dc(self): self.store.append('table_dc_write', self.df_dc, data_columns=True) def time_query_store_table_wide(self): self.store.select('table_wide', where="index > self.start_wide and " "index < self.stop_wide") def time_query_store_table(self): self.store.select('table', where="index > self.start and " "index < self.stop") def time_store_repr(self): repr(self.store) def time_store_str(self): str(self.store) def time_store_info(self): self.store.info()
class HDFStoreDataFrame(BaseIO): goal_time = 0.2 def setup(self): N = 25000 index = tm.makeStringIndex(N) self.df = DataFrame( { 'float1': np.random.randn(N), 'float2': np.random.randn(N) }, index=index) self.df_mixed = DataFrame( { 'float1': np.random.randn(N), 'float2': np.random.randn(N), 'string1': ['foo'] * N, 'bool1': [True] * N, 'int1': np.random.randint(0, N, size=N) }, index=index) self.df_wide = DataFrame(np.random.randn(N, 100)) self.start_wide = self.df_wide.index[10000] self.stop_wide = self.df_wide.index[15000] self.df2 = DataFrame( { 'float1': np.random.randn(N), 'float2': np.random.randn(N) }, index=date_range('1/1/2000', periods=N)) self.start = self.df2.index[10000] self.stop = self.df2.index[15000] self.df_wide2 = DataFrame(np.random.randn(N, 100), index=date_range('1/1/2000', periods=N)) self.df_dc = DataFrame(np.random.randn(N, 10), columns=['C%03d' % i for i in range(10)]) self.fname = '__test__.h5' self.store = HDFStore(self.fname) self.store.put('fixed', self.df) self.store.put('fixed_mixed', self.df_mixed) self.store.append('table', self.df2) self.store.append('table_mixed', self.df_mixed) self.store.append('table_wide', self.df_wide) self.store.append('table_wide2', self.df_wide2) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store(self): self.store.get('fixed') def time_read_store_mixed(self): self.store.get('fixed_mixed') def time_write_store(self): self.store.put('fixed_write', self.df) def time_write_store_mixed(self): self.store.put('fixed_mixed_write', self.df_mixed) def time_read_store_table_mixed(self): self.store.select('table_mixed') def time_write_store_table_mixed(self): self.store.append('table_mixed_write', self.df_mixed) def time_read_store_table(self): self.store.select('table') def time_write_store_table(self): self.store.append('table_write', self.df) def time_read_store_table_wide(self): self.store.select('table_wide') def time_write_store_table_wide(self): self.store.append('table_wide_write', self.df_wide) def time_write_store_table_dc(self): self.store.append('table_dc_write', self.df_dc, data_columns=True) def time_query_store_table_wide(self): self.store.select('table_wide', where="index > self.start_wide and " "index < self.stop_wide") def time_query_store_table(self): self.store.select('table', where="index > self.start and " "index < self.stop") def time_store_repr(self): repr(self.store) def time_store_str(self): str(self.store) def time_store_info(self): self.store.info()
class HDFStoreDataFrame(BaseIO): def setup(self): N = 25000 index = tm.makeStringIndex(N) self.df = DataFrame( {"float1": np.random.randn(N), "float2": np.random.randn(N)}, index=index ) self.df_mixed = DataFrame( { "float1": np.random.randn(N), "float2": np.random.randn(N), "string1": ["foo"] * N, "bool1": [True] * N, "int1": np.random.randint(0, N, size=N), }, index=index, ) self.df_wide = DataFrame(np.random.randn(N, 100)) self.start_wide = self.df_wide.index[10000] self.stop_wide = self.df_wide.index[15000] self.df2 = DataFrame( {"float1": np.random.randn(N), "float2": np.random.randn(N)}, index=date_range("1/1/2000", periods=N), ) self.start = self.df2.index[10000] self.stop = self.df2.index[15000] self.df_wide2 = DataFrame( np.random.randn(N, 100), index=date_range("1/1/2000", periods=N) ) self.df_dc = DataFrame( np.random.randn(N, 10), columns=["C%03d" % i for i in range(10)] ) self.fname = "__test__.h5" self.store = HDFStore(self.fname) self.store.put("fixed", self.df) self.store.put("fixed_mixed", self.df_mixed) self.store.append("table", self.df2) self.store.append("table_mixed", self.df_mixed) self.store.append("table_wide", self.df_wide) self.store.append("table_wide2", self.df_wide2) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store(self): self.store.get("fixed") def time_read_store_mixed(self): self.store.get("fixed_mixed") def time_write_store(self): self.store.put("fixed_write", self.df) def time_write_store_mixed(self): self.store.put("fixed_mixed_write", self.df_mixed) def time_read_store_table_mixed(self): self.store.select("table_mixed") def time_write_store_table_mixed(self): self.store.append("table_mixed_write", self.df_mixed) def time_read_store_table(self): self.store.select("table") def time_write_store_table(self): self.store.append("table_write", self.df) def time_read_store_table_wide(self): self.store.select("table_wide") def time_write_store_table_wide(self): self.store.append("table_wide_write", self.df_wide) def time_write_store_table_dc(self): self.store.append("table_dc_write", self.df_dc, data_columns=True) def time_query_store_table_wide(self): self.store.select( "table_wide", where="index > self.start_wide and index < self.stop_wide" ) def time_query_store_table(self): self.store.select("table", where="index > self.start and index < self.stop") def time_store_repr(self): repr(self.store) def time_store_str(self): str(self.store) def time_store_info(self): self.store.info()
#store = {} #dictionary to store objects #hdf = store = {} #dictionary to store objects am2320 = AM2320(1) while (1): hdf = HDFStore(filename) store1 = [] jj = 12 curr_time = datetime.datetime.now() while (jj > 0): jj = jj - 1 (t, h) = am2320.readSensor() #curr_time = datetime.datetime.now() #yr = datetime.datetime.strftime(curr_time, "%Y") #mn = datetime.datetime.strftime(curr_time, "%m") #dy = datetime.datetime.strftime(curr_time, "%d") #tm = datetime.datetime.strftime(curr_time, "%H:%m") #store1.append([yr,mn,dy,tm,t,h]) store1.append([t, h]) sleep(300) #df = DataFrame(store1,columns=('Year', 'Month', 'Day', 'Time', 'Temp', 'Humidity')) df = DataFrame(store1, index=date_range(start=curr_time, periods=12, freq='300S'), columns=('Temp', 'Humidity')) hdf.append('Weather1', df, format='table', data_columns=True) # print hdf['Weather1'].shape
#Loading insee data projection = HDFStore( 'C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5', 'r') projection_dataframe = projection[ '/projpop0760_FECbasESPbasMIGbas'] # <-Do not know the precise meaning of this. For testing only #Combining concatened = concat([population, projection_dataframe], verify_integrity=True) concatened = concatened.reset_index() concatened['year'] = concatened.year.convert_objects(convert_numeric=True) concatened = concatened.set_index(['age', 'sex', 'year']) #Saving as HDF5 file export = HDFStore('neo_population.h5') export.append('pop', concatened, data_columns=concatened.columns) export.close() export = HDFStore('neo_population.h5', 'r') print export #Creating the simulation object net_payments = Simulation() net_payments.set_population(population) France = 'France' net_payments.set_country(France) r = 0.0 g = 0.01 net_payments.set_discount_rate(r) net_payments.set_growth_rate(g) # print net_payments
projection = HDFStore( "C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5", "r" ) projection_dataframe = projection[ "/projpop0760_FECbasESPbasMIGbas" ] # <-Do not know the precise meaning of this. For testing only # Combining concatened = concat([population, projection_dataframe], verify_integrity=True) concatened = concatened.reset_index() concatened["year"] = concatened.year.convert_objects(convert_numeric=True) concatened = concatened.set_index(["age", "sex", "year"]) # Saving as HDF5 file export = HDFStore("neo_population.h5") export.append("pop", concatened, data_columns=concatened.columns) export.close() export = HDFStore("neo_population.h5", "r") print export # Creating the simulation object net_payments = Simulation() net_payments.set_population(population) France = "France" net_payments.set_country(France) r = 0.0 g = 0.01 net_payments.set_discount_rate(r) net_payments.set_growth_rate(g)
def read_raw_tecplot_folder_and_write_pandas_hdf5( case_folder, root = 0, output_file = 0, output_root = 0, overwrite = False, ): from os.path import isfile,join,splitext from os import listdir from progressbar import ProgressBar,Percentage,Bar from progressbar import ETA,SimpleProgress from pandas import DataFrame, HDFStore # File related things ###################################################### if not output_file: output_file = case_folder+"_Aligned.hdf5" if not output_root: output_root = '/media/carlos/6E34D2CD34D29783/' +\ '2015-02_SerrationPIV/TR_Data_Location_Calibrated_Article3' if not output_file.endswith('_Aligned.hdf5'): output_file = output_file.replace("_Aligned.hdf5","")+"_Aligned.hdf5" if 'STE' in case_folder or 'z10' in case_folder: output_file = output_file.replace( '.hdf5', '_AirfoilNormal.hdf5' ) if isfile(join( output_root, output_file )) and not overwrite: print " Exiting; file exists:\n {0}".format(output_file) return 0 else: print " Writing\n {0}".format(output_file) # ########################################################################## time_step_files = sorted( [join(root,case_folder,f) for f in listdir(join( root, case_folder )) \ if splitext(f)[1] == '.dat'] ) progress = ProgressBar( widgets=[ Bar(),' ', Percentage(),' ', ETA(), ' (file ', SimpleProgress(),')'], maxval=len(time_step_files) ).start() cnt = 0 hdf_store = HDFStore( join( output_root, output_file ) ) for f,t in zip(time_step_files,range(len(time_step_files))): df_t = read_tecplot_file( tecplot_folder = join( root, case_folder ), tecplot_time_step_file = f, time_step = t, ) if cnt == 0: df = df_t.copy() else: df = df.append( df_t, ignore_index = True) if cnt == 50: df = correct_df_translation_rotation( df )\ [['x','y','t','u','v','w']] df = df.sort_values( by = ['x','y','t'] ) #df.set_index( ['x','y'], inplace = True) if t == 0: hdf_store.put( 'data', df , data_columns = ['x','y','t'], format = 't') else: hdf_store.append( 'data', df , data_columns = ['x','y','t'], format = 't') cnt = 0 df = DataFrame() cnt += 1 progress.update(t) progress.finish() hdf_store.close() return 1
def store_to_liam(self): ''' Sauvegarde des données au format utilisé ensuite par le modèle Til Séléctionne les variables appelée par Til derrière Appelle des fonctions de Liam2 ''' path_param = os.path.join(path_model, "til_base_model\param", "globals.csv") path = os.path.join(path_model, self._output_name()) h5file = tables.openFile( path, mode="w") # 1 - on met d'abord les global en recopiant le code de liam2 # globals_def = {'periodic': {'path': 'param/globals.csv'}} globals_def = {'periodic': {'path': path_param}} const_node = h5file.createGroup("/", "globals", "Globals") localdir = path_model for global_name, global_def in globals_def.iteritems(): print(" %s" % global_name) req_fields = ([('PERIOD', int)] if global_name == 'periodic' else []) kind, info = imp.load_def(localdir, global_name, global_def, req_fields) # comme dans import # if kind == 'ndarray': # imp.array_to_disk_array(h5file, const_node, global_name, info, # title=global_name, # compression=compression) # else: assert kind == 'table' fields, numlines, datastream, csvfile = info imp.stream_to_table(h5file, const_node, global_name, fields, datastream, numlines, title="%s table" % global_name, buffersize=10 * 2 ** 20, compression=None) # 2 - ensuite on s'occupe des entities ent_node = h5file.createGroup("/", "entities", "Entities") for ent_name in ['ind','foy','men','futur','past']: entity = eval('self.'+ ent_name) if entity is not None: entity = entity.fillna(-1) ent_table = entity.to_records(index=False) dtypes = ent_table.dtype final_name = of_name_to_til[ent_name] table = h5file.createTable(ent_node, final_name, dtypes, title="%s table" % final_name) table.append(ent_table) table.flush() if ent_name == 'men': entity = entity.loc[entity['id']>-1] ent_table2 = entity[['pond','id','period']].to_records(index=False) dtypes2 = ent_table2.dtype table = h5file.createTable(ent_node, 'companies', dtypes2, title="'companies table") table.append(ent_table2) table.flush() if ent_name == 'ind': ent_table2 = entity[['agem','sexe','pere','mere','id','findet','period']].to_records(index=False) dtypes2 = ent_table2.dtype table = h5file.createTable(ent_node, 'register', dtypes2, title="register table") table.append(ent_table2) table.flush() h5file.close() # 3 - table longitudinal # Note: on conserve le format pandas ici store = HDFStore(path) for varname, tab in self.longitudinal.iteritems(): #format to liam table = tab table['id'] = table.index store.append('longitudinal/' + varname, table) store.close()
def rd_hydstra(varto, sites=None, data_source='A', from_date=None, to_date=None, from_mod_date=None, to_mod_date=None, interval='day', qual_codes=[30, 20, 10, 11, 21, 18], concat_data=True, export=None): """ Function to read in data from Hydstra's database using HYDLLP. This function extracts all sites with a specific variable code (varto). Parameters ---------- varto : int or float The hydstra conversion data variable (140.00 is flow). sites: list of str List of sites to be returned. None includes all sites. data_source : str Hydstra datasource code (usually 'A'). from_date: str The starting date for the returned data given other constraints. to_date: str The ending date for the returned data given other constraints. from_mod_date: str The starting date when the data has been modified. to_mod_date: str The ending date when the data has been modified. interval : str The frequency of the output data (year, month, day, hour, minute, second, period). If data_type is 'point', then interval cannot be 'period' (use anything else, it doesn't matter). qual_codes : list of int The quality codes for output. export_path: str Path string where the data should be saved, or None to not save the data. Return ------ DataFrame In long format with site and time as a MultiIndex and data, qual_code, and hydstra_var_code as columns. """ ### Parameters device_data_type = {100: 'mean', 140: 'mean', 143: 'mean', 450: 'mean', 110: 'mean', 130: 'mean', 10: 'tot'} today1 = date.today() dtype_dict = {'Site': 'varchar', 'HydstraCode': 'smallint', 'Time': 'date', 'Value': 'float', 'QualityCode': 'smallint', 'ModDate': 'date'} ### Determine the period lengths for all sites and variables sites_var_period = hydstra_sites_var_periods(varto=varto, sites=sites, data_source=data_source) # sites_list = sites_var_period.site.unique().tolist() varto_list = sites_var_period.varto.unique().astype('int32').tolist() ### Restrict period ranges - optional if isinstance(from_date, str): from_date1 = Timestamp(from_date) from_date_df = sites_var_period.from_date.apply(lambda x: x if x > from_date1 else from_date1) sites_var_period['from_date'] = from_date_df if isinstance(to_date, str): to_date1 = Timestamp(to_date) to_date_df = sites_var_period.to_date.apply(lambda x: x if x > to_date1 else to_date1) sites_var_period['to_date'] = to_date_df ### Only pull out data according to the modifcation date ranges - optional if isinstance(from_mod_date, str): sites_block = sites_var_period[sites_var_period.varfrom == sites_var_period.varto] varto_block = sites_block.varto.unique().astype('int32').tolist() chg1 = hydstra_data_changes(varto_block, sites_block.site.unique(), from_mod_date=from_mod_date, to_mod_date=to_mod_date).drop('to_date', axis=1) if 140 in varto_list: sites_flow = sites_var_period[(sites_var_period.varfrom != sites_var_period.varto) & (sites_var_period.varto == 140)] chg2 = rating_changes(sites_flow.site.unique().tolist(), from_mod_date=from_mod_date, to_mod_date=to_mod_date) chg1 = concat([chg1, chg2]) chg1.rename(columns={'from_date': 'mod_date'}, inplace=True) chg3 = merge(sites_var_period, chg1, on=['site', 'varfrom', 'varto']) chg4 = chg3[chg3.to_date > chg3.mod_date].copy() chg4['from_date'] = chg4['mod_date'] sites_var_period = chg4.drop('mod_date', axis=1).copy() ### Convert datetime to date as str sites_var_period2 = sites_var_period.copy() sites_var_period2['from_date'] = sites_var_period2['from_date'].dt.date.astype(str) sites_var_period2['to_date'] = sites_var_period2['to_date'].dt.date.astype(str) site_str_len = sites_var_period2.site.str.len().max() if isinstance(export, str): if export.endswith('.h5'): store = HDFStore(export, mode='a') data = DataFrame() for tup in sites_var_period2.itertuples(index=False): print('Processing site: ' + str(tup.site)) varto = tup.varto data_type = device_data_type[varto] df = rd_hydstra_db([tup.site], data_type=data_type, start=tup.from_date, end=tup.to_date, varfrom=tup.varfrom, varto=varto, interval=interval, qual_codes=qual_codes) if df.empty: continue df['HydstraCode'] = varto if varto == 143: df.loc[:, 'data'] = df.loc[:, 'data'] * 0.001 df['HydstraCode'] = 140 ### Make sure the data types are correct df.rename(columns={'data': 'Value', 'qual_code': 'QualityCode'}, inplace=True) df.index.rename(['Site', 'Time'], inplace=True) df.loc[:, 'QualityCode'] = df['QualityCode'].astype('int32') df.loc[:, 'HydstraCode'] = df['HydstraCode'].astype('int32') df.loc[:, 'ModDate'] = today1 if isinstance(export, dict): df = df.reset_index() from_date1 = str(df.Time.min().date()) to_date1 = str(df.Time.max().date()) del_rows_dict = {'where_col': {'Site': [str(tup.site)], 'HydstraCode': [str(df['HydstraCode'][0])]}, 'from_date':from_date1, 'to_date': to_date1, 'date_col': 'Time'} write_sql(df, dtype_dict=dtype_dict, del_rows_dict=del_rows_dict, drop_table=False, create_table=False, **export) elif isinstance(export, str): if export.endswith('.h5'): try: store.append(key='var_' + str(varto), value=df, min_itemsize={'site': site_str_len}) except Exception as err: store.close() raise err if concat_data: data = concat([data, df]) if isinstance(export, str): store.close() if concat_data: return data
def extract_relevant_data( case_list = [], exceptions = [], y_delta_locs = [], x_2h_locs = [] , plot = False): """ This will extract the wall normal data at the spanwise location TE at a certain y density """ from os import listdir from os.path import join,split from pandas import DataFrame, HDFStore, read_pickle from boundary_layer_routines import return_bl_parameters from raw_data_processing_routines import decript_case_name from progressbar import ProgressBar,Percentage from progressbar import Bar,ETA,SimpleProgress from numpy import array, round, linspace from data_cleaning_routines import show_surface_from_df x_2h_locs = round( array( x_2h_locs ), 2 ) y_delta_locs = round( array( y_delta_locs ), 2 ) # Get the available HDF5 files ############################################# hdf5_root = '/media/carlos/6E34D2CD34D29783/' +\ '2015-02_SerrationPIV/TR_Data_Location_Calibrated_Article3' if not len(case_list): hdf5_files = [f for f in listdir( hdf5_root ) \ if f.endswith('.hdf5') \ and not f in exceptions ] else: hdf5_files = [f for f in listdir( hdf5_root ) \ if f.endswith('.hdf5') \ and f in case_list ] # ########################################################################## for hf in [join( hdf5_root, f ) for f in hdf5_files]: f = split( hf )[1].replace('_AirfoilNormal','')\ .replace('_Aligned.hdf5','') print " Extracting data from {0}".format(f) print " at the normalized streamwise locations:" print " {0}".format( x_2h_locs ) hdf_t = HDFStore( hf, 'r' ) # Get the available coordinates ######################################## hf_coords = hdf_t.select('data', where = [ 't = 0' ], columns = [ 'x', 'y' ] ) # ###################################################################### # Turn the non-dim requested locations into physical coords ############ requested_locations = [] requested_normalized_locations = [] #for x,x_norm in zip(x_2h_locs * tooth_length, x_2h_locs): # for y_d in y_delta_locs: # bl_params = return_bl_parameters( f , [x] ) # d_99 = bl_params.delta_99.values[0] # #if "STE" in f: # # d_99 = 9.4 # y = y_d * d_99 # requested_locations.append( (x,y) ) # requested_normalized_locations.append( ( x_norm, y_d ) ) # Get the normalization locations depending on the case ################ if 'z00' in f and not 'STE' in f: x_bl_loc = 40 elif 'z05' in f: x_bl_loc = 20 elif 'z10' in f or 'STE' in f: x_bl_loc = 0 bl_params = return_bl_parameters( f , [x_bl_loc] ) d_99 = bl_params.delta_99.values[0] for x,x_norm in zip(x_2h_locs * tooth_length, x_2h_locs): for y_d in y_delta_locs: y = y_d * d_99 requested_locations.append( (x,y) ) requested_normalized_locations.append( ( x_norm, y_d ) ) print " Normalizing to a BL thickness of {0:.2f} mm".\ format(d_99) # ###################################################################### available_xy_locs = hf_coords[ ( hf_coords.x > min( x_2h_locs ) * 40. ) & \ ( hf_coords.x < max( x_2h_locs ) * 40. ) & \ ( hf_coords.y > min( y_delta_locs ) * d_99 ) & \ ( hf_coords.y < max( y_delta_locs ) * d_99 ) ][ ['x','y'] ] available_xy_locs = [tuple(x) for x in available_xy_locs.values] if plot: trailing_edge,phi,alpha,U,z = decript_case_name( f ) if trailing_edge == 'serrated': device = 'Sr20R21' elif trailing_edge == 'straight': device = 'STE' elif trailing_edge == 'slitted': device = 'Slit20R21' case_name = "{0}_phi{1}_alpha{2}_U{3}_loc{4}_tr.dat".format( device, phi, alpha, U, z ) df_av = read_pickle( 'averaged_data/' + case_name + '.p' ) show_surface_from_df( df_av , points = available_xy_locs , plot_name = 'ReservedData/' + f + '.png' ) query = '' cnt_all = 0 cnt = 0 time_series_hdf = HDFStore( 'ReservedData/' + f + '.hdf5' , 'w' ) vertical_split_blocks = 10 progress = ProgressBar( widgets=[ Bar(),' ', Percentage(),' ', ETA(), ' (query bunch ', SimpleProgress(),')'], maxval = vertical_split_blocks ).start() # Don't try to get it all at once; split the vertical in 4 pieces y_ranges = linspace( min( y_delta_locs ), max( y_delta_locs ), vertical_split_blocks ) * d_99 xmin = min(x_2h_locs) * 40. xmax = max(x_2h_locs) * 40. for ymin, ymax in zip( y_ranges[:-1], y_ranges[1:] ): query = " x>={0} & x<{1} & y>={2} & y<{3} ".\ format( xmin, xmax, ymin, ymax ) df_t = hdf_t.select( key = 'data', where = [ query ], ) df_t['near_x_2h'] = round( df_t.x / 40., 4 ) df_t['near_y_delta'] = round( df_t.y / d_99, 4 ) if not cnt: time_series_hdf.put( 'data', df_t , data_columns = [ 'near_x_2h', 'near_y_delta', 't' ], format = 't') else: time_series_hdf.append( 'data', df_t , data_columns = [ 'near_x_2h', 'near_y_delta', 't' ], format = 't') cnt_all += 1 cnt += 1 progress.update(cnt_all) df_t = DataFrame() progress.finish() hdf_t.close() time_series_hdf.close()
def append_hdfs(self, df, path, key): try: store = HDFStore(path) store.append(key, df) except: print("Error for appending data to {0} in {1}".format(key, path))
def junk(): population = read_csv('C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\pop.csv', sep=',') # print population.columns population = population.set_index(['age', 'sex']) population = population.stack() population = population.reset_index() population['level_2'] = population.level_2.convert_objects(convert_numeric=True) population['year'] = population['level_2'] population['pop'] = population[0] del population['level_2'] del population[0] population = population.set_index(['age', 'sex', 'year']) #Remove the years 2007 and beyond to ensure integrity when combined with INSEE data year = list(range(1991, 2007, 1)) filter_year = array([x in year for x in population.index.get_level_values(2)]) population = population.iloc[filter_year, :] #Loading insee data projection = HDFStore('C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5', 'r') projection_dataframe = projection['/projpop0760_FECbasESPbasMIGbas'] # <-Do not know the precise meaning of this. For testing only #Combining concatened = concat([population, projection_dataframe], verify_integrity = True) concatened = concatened.reset_index() concatened['year'] = concatened.year.convert_objects(convert_numeric=True) concatened = concatened.set_index(['age', 'sex', 'year']) #Saving as HDF5 file export = HDFStore('neo_population.h5') export.append('pop', concatened, data_columns = concatened.columns) export.close() export = HDFStore('neo_population.h5', 'r') print export #Creating the simulation object net_payments = Simulation() net_payments.set_population(population) France = 'France' net_payments.set_country(France) r = 0.0 g = 0.01 net_payments.set_discount_rate(r) net_payments.set_growth_rate(g) # print net_payments # print net_payments.growth_rate, net_payments.discount_rate, net_payments.country net_payments.load_population("neo_population.h5", 'pop') net_payments.load_profiles("C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\profiles.h5", "profiles.h5") year_length = 100 net_payments.set_population_projection(year_length = year_length, method = "exp_growth", rate = 0.02) net_payments.set_tax_projection(method = "per_capita", typ = None, rate = g, discount_rate = r) net_payments.create_cohorts() #Creating a column with total taxes paid. for typ in net_payments._types: net_payments['total'] += hstack(net_payments[typ]) print net_payments['total']
def junk(): population = read_csv( 'C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\pop.csv', sep=',') # print population.columns population = population.set_index(['age', 'sex']) population = population.stack() population = population.reset_index() population['level_2'] = population.level_2.convert_objects( convert_numeric=True) population['year'] = population['level_2'] population['pop'] = population[0] del population['level_2'] del population[0] population = population.set_index(['age', 'sex', 'year']) #Remove the years 2007 and beyond to ensure integrity when combined with INSEE data year = list(range(1991, 2007, 1)) filter_year = array( [x in year for x in population.index.get_level_values(2)]) population = population.iloc[filter_year, :] #Loading insee data projection = HDFStore( 'C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5', 'r') projection_dataframe = projection[ '/projpop0760_FECbasESPbasMIGbas'] # <-Do not know the precise meaning of this. For testing only #Combining concatened = concat([population, projection_dataframe], verify_integrity=True) concatened = concatened.reset_index() concatened['year'] = concatened.year.convert_objects(convert_numeric=True) concatened = concatened.set_index(['age', 'sex', 'year']) #Saving as HDF5 file export = HDFStore('neo_population.h5') export.append('pop', concatened, data_columns=concatened.columns) export.close() export = HDFStore('neo_population.h5', 'r') print export #Creating the simulation object net_payments = Simulation() net_payments.set_population(population) France = 'France' net_payments.set_country(France) r = 0.0 g = 0.01 net_payments.set_discount_rate(r) net_payments.set_growth_rate(g) # print net_payments # print net_payments.growth_rate, net_payments.discount_rate, net_payments.country net_payments.load_population("neo_population.h5", 'pop') net_payments.load_profiles( "C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\profiles.h5", "profiles.h5") year_length = 100 net_payments.set_population_projection(year_length=year_length, method="exp_growth", rate=0.02) net_payments.set_tax_projection(method="per_capita", typ=None, rate=g, discount_rate=r) net_payments.create_cohorts() #Creating a column with total taxes paid. for typ in net_payments._types: net_payments['total'] += hstack(net_payments[typ]) print net_payments['total']
import numpy as np from pandas import HDFStore, DataFrame # create (or open) an hdf5 file and opens in append mode hdf = HDFStore('data/hdata.h5') df = DataFrame(np.random.rand(1000, 3), columns=('A', 'B', 'C')) # put the dataset in the storage hdf.put('d1', df, format='table', data_columns=True) print(hdf['d1'].shape) hdf.append('d1', DataFrame(np.random.rand(5, 3), columns=('A', 'B', 'C')), format='table', data_columns=True) df = DataFrame(np.random.rand(1000, 3), columns=('A', 'B', 'C')) # put the dataset in the storage hdf.put('d2', df, format='table', data_columns=True) print(hdf['d2'].shape) hdf.append('d2', DataFrame(np.random.rand(5, 3), columns=('A', 'B', 'C')), format='table', data_columns=True) hdf.close() # closes the file