def test_chunk(): print "debut" writer = None years = range(2011, 2012) filename = destination_dir + 'output3.h5' store = HDFStore(filename) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() import time tps = {} for nb_chunk in range(1, 5): deb_chunk = time.clock() simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk, print_missing=False) simu.compute() tps[nb_chunk] = time.clock() - deb_chunk voir = simu.output_table.table3['foy'] print len(voir) pdb.set_trace() agg3 = Aggregates() agg3.set_simulation(simu) agg3.compute() df1 = agg3.aggr_frame print df1.to_string() print tps store.close()
def append_store_mod( module, path_store, n_days_refresh=None, b_ptrk=False ): """ append all new rows in module.field to store. Resize store as appropriate. """ store = HDFStore( path_store ) for field in module.__dict__.keys(): if ( type( getattr( module, field ) ) is DataFrame or type( getattr( module, field ) ) is Panel ) and "/{}".format( field ) in store.keys(): if "tdate" in field: getattr( module, field ).to_hdf( path_store, field, mode='a', format='fixed' ) else: solbasic.logger.info( "Working on {}...".format( field ) ) df = store[ field ].copy() df_new = getattr( module, field ).copy() if n_days_refresh == None: l_index = sorted( list( set( df_new.index ) - set( df.index ) ) ) else: l_index = sorted( list( df_new.index[ -n_days_refresh: ] ) ) l_columns = sorted( list( set( df_new.columns ) - set( df.columns ) ) ) l_columns_rev = sorted( list( set( df.columns ) - set( df_new.columns ) ) ) if l_columns: solbasic.logger.info( "Adding {} instruments: {}".format( len( l_columns ), l_columns ) ) for col in l_columns: df[ col ] = np.nan if l_columns_rev: for col in l_columns_rev: df_new[ col ] = df[ col ] if l_index: solbasic.logger.info( "Refreshing {} dates: {}".format( len( l_index ), l_index ) ) for ind in l_index: df.ix[ ind ] = df_new.ix[ ind ] df.to_hdf( path_store, field, mode='a', format='fixed' ) store.close() if b_ptrk: ptrk_store( path_store )
def build_actualisation_groups(self, filename = None): ''' Builds actualisation groups ''' if filename is None: data_dir = CONF.get('paths', 'data_dir') fname = "actualisation_groups.h5" filename = os.path.join(data_dir, fname) store = HDFStore(filename) df = store['vars'] coeff_list = sorted(unique(df['coeff'].dropna())) vars = dict() for coeff in coeff_list: vars[coeff] = list(df[ df['coeff']==coeff ]['var']) self.actualisation_vars = vars self.coeffs_df = store['names'] self.coeffs_df['coeff'] = self.coeffs_df['coeff'].str.replace(' ','') # remove spaces yr = 1*self.survey_year self.coeffs_df['value'] = 1 while yr < self.datesim_year: if yr in self.coeffs_df.columns: factor = self.coeffs_df[yr] else: factor = 1 self.coeffs_df['value'] = self.coeffs_df['value']*factor yr += 1 self.coeffs_df.set_index(['coeff'], inplace = True) store.close()
def save_simulation(self, filename, attribute_list = ['cohorts', 'aggregate_pv', 'percapita_pv', 'cohorts_alt', 'aggregate_pv_alt', 'percapita_pv_alt'], has_alt = False): """ Saves the output dataframe under default directory in an HDF store. Warning : will override .h5 file if already existant ! Warning : the data is saved as a dataframe, one has to recreate the Cohort when reading. Parameters ---------- name : the name of the table inside the store filename : the name of the .h5 file where the table is stored. Created if not existant. """ # Creating the filepath : ERF_HDF5_DATA_DIR = os.path.join(SRC_PATH,'countries',self.country,'sources','Output_folder/') store = HDFStore(os.path.join(os.path.dirname(ERF_HDF5_DATA_DIR),filename+'.h5')) #Looping over simulation's attributes, saving only the one who are matching the list # AND aren't empty from pandas import DataFrame for attrib, value in self.__dict__.iteritems(): if attrib in attribute_list and value is not None: #Transforming the data within a cohort in a dataframe so HDFStore can handle it : record = DataFrame(index=value.index) for col in value.columns: record[col] = value[col] print 'saving' store[attrib] = record else: print 'ignored' print store store.close()
def main(): # the loaded data is a DataFrame genedata = load_gene_dataset() # randomly split the dataset to three folds # this code should be improved in the future kfold = 3.0 data_kfold = {} train, fold1 = train_test_split(genedata, test_size=1/kfold) data_kfold['fold1'] = fold1 fold3, fold2 = train_test_split(train, test_size=0.5) data_kfold['fold2'] = fold2 data_kfold['fold3'] = fold3 # now we want to train a network for each fold # store the results in h5 file geneStore = HDFStore('predGeneExp1.h5') for i, key in enumerate(data_kfold): print(key) test_data = data_kfold[key] X_val, y_val = get_input_output(test_data) keys = data_kfold.keys() keys.remove(key) training_data = pd.concat([data_kfold[keys[0]],data_kfold[keys[1]]]) X_train, y_train = get_input_output(training_data) print(keys) # use the these data to train the network main_training(key, X_train, y_train, X_val, y_val, geneStore) # the h5 must be closed after using geneStore.close()
def storeHdf5(data, tag, path): hdf = HDFStore(path, 'a') if tag in hdf.keys(): hdf.append(tag, data) else: hdf.put(tag, data) hdf.close()
def SAVE_ChangeDictOrder(_processedEvents): '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc''' h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/" #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo all_event_names = sorted([name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name]) store = HDFStore(h_path +bef_aft_dict[bef_aft_switch+ '_hdf']) for _data, recording in zip(_processedEvents, all_event_names): print('I') sname = recording.rfind("/") +1 subId = recording[sname:-4].replace("-", "_") store[subId + '/events/attention/correct'] = _data['correct']['attention'].convert_objects() store[subId + '/events/motor/correct'] = _data['correct']['motor'].convert_objects() store[subId + '/events/attention/incorrect'] = _data['incorrect']['attention'].convert_objects() store[subId + '/events/motor/incorrect'] = _data['incorrect']['motor'].convert_objects() #print(_data['incorrect']['motor'].convert_objects()) store.close()
def download(): """ Convenience method that downloads all the weather data required for the machine learning examples. """ reader = GSODDataReader() year_list = range(2001, 2012) austin = reader.collect_data(year_list, exact_station=True, station_name='AUSTIN CAMP MABRY', state='TX', country='US') houston = reader.collect_data(year_list, exact_station=True, station_name='HOUSTON/D.W. HOOKS', state='TX', country='US') new_york = reader.collect_data(year_list, exact_station=True, station_name='NEW YORK/LA GUARDIA', state='NY', country='US') newark = reader.collect_data(year_list, exact_station=True, station_name='NEWARK INTL AIRPORT', state='NJ', country='US') punta_arenas = reader.collect_data(year_list, exact_station=True, station_name='PUNTA ARENAS', country='CH') wellington = reader.collect_data(year_list, exact_station=True, station_name='WELLINGTON AIRPORT', country='NZ') store = HDFStore('weather.h5') store['austin'] = austin store['houston'] = houston store['nyc'] = new_york store['newark'] = newark store['punta_arenas'] = punta_arenas store['wellington'] = wellington store.close()
def convert_fiducial(filename, output_type="csv"): ''' Converts the fiducial comparison HDF5 files into a CSV file. Parameters ---------- filename : str HDF5 file. output_type : str, optional Type of file to output. ''' store = HDFStore(filename) data_columns = dict() for key in store.keys(): data = store[key].sort(axis=1) mean_data = data.mean(axis=1) data_columns[key[1:]] = mean_data store.close() df = DataFrame(data_columns) output_name = "".join(filename.split(".")[:-1]) + "." + output_type df.to_csv(output_name)
class HDFStorePanel(BaseIO): goal_time = 0.2 def setup(self): self.fname = '__test__.h5' with warnings.catch_warnings(record=True): self.p = Panel(np.random.randn(20, 1000, 25), items=['Item%03d' % i for i in range(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in range(25)]) self.store = HDFStore(self.fname) self.store.append('p1', self.p) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store_table_panel(self): with warnings.catch_warnings(record=True): self.store.select('p1') def time_write_store_table_panel(self): with warnings.catch_warnings(record=True): self.store.append('p2', self.p)
def init_h5_database(database_name, meta_data, overwrite=False): """Initialize a h5 file for storing EEMs using a pandas DataFrame containing EEM meta data Args: database_name (str): filename and relative path for h5 database meta_data (pandas DataFrame): DataFrame containing eem meta data from `pyeem.load_eem_meta_data` function or created manually - see pyeem.load_eem_meta_data for required columns. NOTE: do not use spaces or decimals in column names as this causes a warning when saving to H5 file format Returns: no retun - data is saved as h5 and may be loaded using `pyeem.load_eem_data` """ from pandas import HDFStore # check if h5 file exists and overwrite or warn if os.path.isfile(database_name): if overwrite is True: print('overwriting ' + database_name) os.remove(database_name) else: raise ValueError( "h5 file " + database_name + " exists. Choose new database name or set overwrite=True") # create a h5 file to store EEM meta data hdf = HDFStore(database_name) hdf.put('meta', meta_data, format='table', data_columns=True) hdf.close() return
def test_chunk(): print "debut" writer = None years = range(2011,2012) filename = destination_dir+'output3.h5' store = HDFStore(filename) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() import time tps = {} for nb_chunk in range(1,5): deb_chunk = time.clock() simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk , print_missing=False) simu.compute() tps[nb_chunk] = time.clock() - deb_chunk voir = simu.output_table.table3['foy'] print len(voir) pdb.set_trace() agg3 = Aggregates() agg3.set_simulation(simu) agg3.compute() df1 = agg3.aggr_frame print df1.to_string() print tps store.close()
def remove(self, path): s = HDFStore(self.path) if path in s: print("removing %s" % path) s.remove(path) s.flush(fsync=True) s.close()
def get(self, path): s = HDFStore(self.path) d = None if path in s: d = s[path] s.close() return d
class engine(Engine): """Engine instance for writing data to a HDF5 file.""" name = "HDF5" abbreviation = "hdf5" insert_limit = 1000 required_opts = [ ("file", "Enter the filename of your HDF5 file", "hdf5.h5"), ("table_name", "Format of table name", "{db}_{table}"), ("data_dir", "Install directory", DATA_DIR), ] def create_db(self): """Override create_db since an SQLite dataset needs to be created first followed by the creation of an empty HDFStore file. """ file_path = os.path.join(self.opts["data_dir"], self.opts["file"]) self.file = HDFStore(file_path) def create_table(self): """Don't create table for HDF5 HDF5 doesn't create tables. Each database is a file which has been created. This overloads`create_table` to do nothing in this case. """ return None def insert_data_from_file(self, filename): """Fill the table by fetching the dataframe from the SQLite engine and putting it into the HDFStore file. """ table_name = self.table_name() df = self.fetch_table(table_name) self.file.put(table_name, df, data_columns=True) def fetch_table(self, table_name): """Return a table from sqlite dataset as pandas dataframe.""" connection = self.get_sqlite_connection() sql_query = "SELECT * FROM {};".format(table_name) return pd.read_sql_query(sql_query, connection) def get_sqlite_connection(self): # self.get_input() file = self.opts["file"] file = (file.split("."))[0] + ".db" db_file = self.opts["data_dir"] full_path = os.path.join(db_file, file) return dbapi.connect(os.path.normpath(full_path)) def get_connection(self): """Gets the db connection.""" self.get_input() return DummyConnection() def disconnect(self): """Close the file after being written""" self.file.close() file = self.opts["file"] file = (file.split("."))[0] + ".db" os.remove(file)
def func_wrapper(*args, **kwargs): temporary_store = HDFStore(file_path) try: return func(*args, temporary_store=temporary_store, **kwargs) finally: gc.collect() temporary_store.close()
def save_temp(dataframe, name = None, year = None, config_files_directory = default_config_files_directory): """ Save a temporary table Parameters ---------- dataframe : pandas DataFrame the dataframe to save name : string, default None year : integer, default None year of the data """ if year is None: raise Exception("year is needed") if name is None: raise Exception("name is needed") hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store_path = "{}/{}".format(year, name) if store_path in store.keys(): del store["{}/{}".format(year, name)] dataframe.to_hdf(hdf_file_path, store_path) store.close() return True
def in_store(self, path): s = HDFStore(self.path) val = False if path in s: val = True s.close() return val
class PandasHDFHandler(FileHandler): r""" Handler for HDF5 files using Pandas. """ def _open_for_read(self): self.handle = HDFStore(self.fname, mode='r') def _open_for_write(self): self.handle = HDFStore(self.fname) def list_items(self): keys = [key.strip('/') for key in self.handle.keys()] items = [(key, _get_type_from_attrs(self.handle.get_storer(key).attrs)) for key in keys if '/' not in key] # ---- for backward compatibility (LArray < 0.33) ---- # axes items += [(key.split('/')[-1], 'Axis_Backward_Comp') for key in keys if '__axes__' in key] # groups items += [(key.split('/')[-1], 'Group_Backward_Comp') for key in keys if '__groups__' in key] return items def _read_item(self, key, typename, *args, **kwargs): if typename in _supported_typenames: hdf_key = '/' + key # ---- for backward compatibility (LArray < 0.33) ---- elif typename == 'Axis_Backward_Comp': hdf_key = '__axes__/' + key elif typename == 'Group_Backward_Comp': hdf_key = '__groups__/' + key else: raise TypeError() return read_hdf(self.handle, hdf_key, *args, **kwargs) def _dump_item(self, key, value, *args, **kwargs): hdf_key = '/' + key if isinstance(value, (Array, Axis)): value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Group): hdf_axis_key = '/' + value.axis.name value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs) elif isinstance(value, _supported_scalars_types): s = pd.Series(data=value) self.handle.put(hdf_key, s) self.handle.get_storer(hdf_key).attrs.type = type(value).__name__ else: raise TypeError() def _read_metadata(self): metadata = Metadata.from_hdf(self.handle) if metadata is None: metadata = Metadata() return metadata def _dump_metadata(self, metadata): metadata.to_hdf(self.handle) def close(self): self.handle.close()
def func_wrapper(*args, **kwargs): temporary_store = HDFStore(file_path) try: return func(*args, temporary_store = temporary_store, **kwargs) finally: gc.collect() temporary_store.close()
def load_temp(name=None, year=None, variables=None, config_files_directory=default_config_files_directory): """ Load a temporary saved table Parameters ---------- name : string, default None year : integer, default None year of the data """ if year is None: raise Exception("year is needed") if name is None: raise Exception("name is needed") hdf_file_path = get_tmp_file_path( config_files_directory=config_files_directory) print(hdf_file_path) store = HDFStore(hdf_file_path) dataframe = store["{}/{}".format(year, name)] store.close() if variables is None: return dataframe else: return dataframe[variables].copy()
def _get(self, path): s = HDFStore(self.path) d = None if path in s: d = s[path] s.close() return d
def writeHD5(): """Write to local store.h5""" global Data1 store = HDFStore('.\store.h5') store['listCrisis'] = Data1 store.close()
def AddSeqComp(mypath): """ Loads TestLogAll.h5 from the specified path, then calls MeasurementGroupTools.AddSeqComp to recalculate seq components using FFT Input: Directory of the measurment campaign, e.g.: "aLabView2" Output: Results1.h5, Results1.pdf in the data subdirs. """ from pandas import HDFStore, ExcelWriter import MeasurementGroupTools as mgt h5logs = HDFStore(mypath + "\\" + 'TestLogsAll.h5') TestLog = h5logs['TestLogsAll'] dirs = TestLog[u'DirName'].unique() for dname in dirs: mysubdirpath = mypath + "\\" + dname print "Processing: " + dname mgt.AddSeqComp(mysubdirpath, TestLog, dname) h5logs.put('TestLogsAll',TestLog) h5logs.close() writer = ExcelWriter(mypath + "\\" + 'TestLogsAll.xlsx') TestLog.to_excel(writer,'TestLogsAll') # the second argument defines sheet name writer.save() return
def storeHdf5(data, tag, path): hdf = HDFStore(path,'a') if tag in hdf.keys(): hdf.append(tag,data) else: hdf.put(tag,data) hdf.close()
def SAVE_ChangeDictOrder(_processedEvents): '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc''' h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/" #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo all_event_names = sorted([ name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name ]) store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf']) for _data, recording in zip(_processedEvents, all_event_names): print('I') sname = recording.rfind("/") + 1 subId = recording[sname:-4].replace("-", "_") store[subId + '/events/attention/correct'] = _data['correct'][ 'attention'].convert_objects() store[subId + '/events/motor/correct'] = _data['correct'][ 'motor'].convert_objects() store[subId + '/events/attention/incorrect'] = _data['incorrect'][ 'attention'].convert_objects() store[subId + '/events/motor/incorrect'] = _data['incorrect'][ 'motor'].convert_objects() #print(_data['incorrect']['motor'].convert_objects()) store.close()
def put(self, path, obj): s = HDFStore(self.path) if path in s: print "updating %s" % path s.remove(path) s[path] = obj s.close()
def evaluate(model, test_hdf_file, get_batch, loss_function, batch_size, cuda=False): store_test = HDFStore(test_hdf_file) test_loss = 0 accuracy = 0 count = 0 model.eval() if cuda: model = model.cuda() test_gen = get_batch(store_test, batch_size) for x, target, src_padding, target_padding in test_gen: if cuda: x, target = x.cuda(), target.cuda() out = model(x) loss = loss_function(out, target) acc = int( torch.all(out.argmax(dim=-1) == target, dim=-1).to( torch.int).sum()) / out.shape[0] test_loss += loss.item() accuracy += acc count += 1 test_loss /= count accuracy /= count print("Test Loss :", test_loss) print("Test accuracy :", accuracy) store_test.close()
def storeEEGinHDF(): """Load EEG from 64 electrodes x ~30 min at 500 hz (large dataset)""" h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/" all_eeg_names = sorted([ name for name in eeg_names if bef_aft_dict[bef_aft_switch + '_mat'].replace("_EVENTS", "") in name ]) store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf']) #Create a HDF database with a single-precision point (float 32) cnt = 0 for recording in all_eeg_names: cnt = cnt + 1 sname = recording.rfind("/") + 1 subId = recording[sname:-4].replace("-", "_") sig = pd.DataFrame( sio.loadmat(recording, struct_as_record=True)['eegToSave']).transpose() #Modified here to save a filtered version from: store[subId + "/signal/f"] = sig.convert_objects()) store[subId + "/signal/filtered_30/"] = sig.convert_objects().apply( FilterData, axis=0) print(cnt) store.close()
def show_temp(config_files_directory = default_config_files_directory): hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store.close()
def save_temp(dataframe, name=None, year=None, config_files_directory=default_config_files_directory): """ Save a temporary table Parameters ---------- dataframe : pandas DataFrame the dataframe to save name : string, default None year : integer, default None year of the data """ if year is None: raise Exception("year is needed") if name is None: raise Exception("name is needed") hdf_file_path = get_tmp_file_path( config_files_directory=config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store_path = "{}/{}".format(year, name) if store_path in store.keys(): del store["{}/{}".format(year, name)] dataframe.to_hdf(hdf_file_path, store_path) store.close() return True
def build_from_openfisca( directory = None): df_age_final = None for yr in range(2006,2010): simulation = SurveySimulation() simulation.set_config(year = yr) simulation.set_param() simulation.set_survey() df_age = get_age_structure(simulation) df_age[yr] = df_age['wprm'] del df_age['wprm'] if df_age_final is None: df_age_final = df_age else: df_age_final = df_age_final.merge(df_age) if directory is None: directory = os.path.dirname(__file__) fname = os.path.join(directory, H5_FILENAME) store = HDFStore(fname) print df_age_final.dtypes store.put("openfisca", df_age_final) store.close()
def build_actualisation_group_names_h5(): h5_name = "../actualisation_groups.h5" store = HDFStore(h5_name) xls = ExcelFile('actualisation_groups.xls') df = xls.parse('defs', na_values=['NA']) store['names'] = df print df.to_string() store.close()
def get_children_paths(self, node_path): s = HDFStore(self.path) node = s.get_node(node_path) children = [] for child, df in node._v_children.items(): children.append(df._v_pathname) s.close() return children
def show_temp(config_files_directory=default_config_files_directory): hdf_file_path = get_tmp_file_path( config_files_directory=config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store.close()
def save(self, dataFile): """ save data to HDF""" print 'Saving data to', dataFile store = HDFStore(dataFile) for symbol in self.wp.items: store[symbol] = self.wp[symbol] store.close()
def save(self,dataFile): """ save data to HDF""" print(('Saving data to', dataFile)) store = HDFStore(dataFile) for symbol in self.wp.items: store[symbol] = self.wp[symbol] store.close()
def convert_to_3_tables(year=2006, survey_file=None, output_file=None): if survey_file is None: raise Exception( 'You need a .h5 file with the survey to extract the variables from' ) if output_file is None: output_file = survey_file raise Warning( 'the survey file will be used to store the created tables') store = HDFStore(survey_file) output = HDFStore(output_file) print output simulation = SurveySimulation() simulation.set_config(year=year) table1 = store['survey_' + str(year)] for entity in ['ind', 'foy', 'men', 'fam']: key = 'survey_' + str(year) + '/' + str(entity) vars_matching_entity = vars_matching_entity_from_table( table1, simulation, entity) print entity, vars_matching_entity_from_table print 'table1 enum' if entity == 'ind': print 'INDIVIDUALS' print table1['noindiv'] table_entity = table1.loc[:, vars_matching_entity] # we take care have all ident and selecting qui==0 else: # print ' entity :', entity # print table1['noindiv'].head() position = 'qui' + entity # print table1[position] table_entity = table1.ix[table1[position] == 0, [ 'noi', 'idmen', 'idfoy', 'idfam', 'quifoy', 'quimen', 'quifam' ] + vars_matching_entity] # print table_entity.noi.head() table_entity = table_entity.rename_axis(table_entity['id' + entity], axis=1) # print ' APRES' # print table_entity.noi.head() print key output.put(key, table_entity) del table1 import gc gc.collect() store.close() output.close()
class PandasHDFHandler(FileHandler): """ Handler for HDF5 files using Pandas. """ def _open_for_read(self): self.handle = HDFStore(self.fname, mode='r') def _open_for_write(self): self.handle = HDFStore(self.fname) def list_items(self): keys = [key.strip('/') for key in self.handle.keys()] # axes items = [(key.split('/')[-1], 'Axis') for key in keys if '__axes__' in key] # groups items += [(key.split('/')[-1], 'Group') for key in keys if '__groups__' in key] # arrays items += [(key, 'Array') for key in keys if '/' not in key] return items def _read_item(self, key, type, *args, **kwargs): if type == 'Array': hdf_key = '/' + key elif type == 'Axis': hdf_key = '__axes__/' + key kwargs['name'] = key elif type == 'Group': hdf_key = '__groups__/' + key kwargs['name'] = key else: raise TypeError() return key, read_hdf(self.handle, hdf_key, *args, **kwargs) def _dump_item(self, key, value, *args, **kwargs): if isinstance(value, LArray): hdf_key = '/' + key value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Axis): hdf_key = '__axes__/' + key value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Group): hdf_key = '__groups__/' + key hdf_axis_key = '__axes__/' + value.axis.name value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs) else: raise TypeError() def _read_metadata(self): metadata = Metadata.from_hdf(self.handle) if metadata is None: metadata = Metadata() return metadata def _dump_metadata(self, metadata): metadata.to_hdf(self.handle) def close(self): self.handle.close()
def test_read_nokey_empty(setup_path): with ensure_clean_path(setup_path) as path: store = HDFStore(path) store.close() msg = re.escape( "Dataset(s) incompatible with Pandas data types, not table, or no " "datasets found in HDF5 file.") with pytest.raises(ValueError, match=msg): read_hdf(path)
def write_file(format): outfile = '../inst/exampledata/pytables_' + format + '.h5' if os.path.isfile(outfile): os.remove(outfile) hdf = HDFStore(outfile) hdf.put('mydata', df, format=format, data_columns=True, encoding="utf-8") hdf.close()
def run_convert(basedir, beam): print "Converting for:", beam infile = basedir + "/lmon_p"+str(beam)+".root" outfile = basedir + "/HCal_p"+str(beam)+".h5" #lmon input inp = TFile.Open(infile) tree = inp.Get("DetectorTree") #load the tree ucal_edep_EMC = rt.EntryD() ucal_edep_HAC1 = rt.EntryD() ucal_edep_HAC2 = rt.EntryD() ucal_edep_layers = std.vector(float)() tree.SetBranchAddress("ucal_edep_EMC", AddressOf(ucal_edep_EMC, "v")) tree.SetBranchAddress("ucal_edep_HAC1", AddressOf(ucal_edep_HAC1, "v")) tree.SetBranchAddress("ucal_edep_HAC2", AddressOf(ucal_edep_HAC2, "v")) tree.SetBranchAddress("ucal_edep_layers", ucal_edep_layers) tree.GetEntry(0) nlay = ucal_edep_layers.size() #output DataFrame col = ["ucal_edep_EMC", "ucal_edep_HAC1", "ucal_edep_HAC2"] for i in range(nlay): col.append( "ucal_edep_layer"+str(i) ) df_inp = [] #event loop for iev in xrange(tree.GetEntriesFast()): tree.GetEntry(iev) lin = [] lin.append(ucal_edep_EMC.v) lin.append(ucal_edep_HAC1.v) lin.append(ucal_edep_HAC2.v) for i in xrange(nlay): lin.append(ucal_edep_layers.at(i)) df_inp.append(lin) df = DataFrame(df_inp, columns=col) print df out = HDFStore(outfile) out["hcal"] = df out.close() inp.Close()
def load(self,dataFile): """load data from HDF""" if os.path.exists(dataFile): store = HDFStore(dataFile) symbols = [str(s).strip('/') for s in list(store.keys()) ] data = dict(list(zip(symbols,[store[symbol] for symbol in symbols]))) self.wp = Panel(data) store.close() else: raise IOError('Data file does not exist')
def load(self,dataFile): """load data from HDF""" if os.path.exists(dataFile): store = HDFStore(dataFile) symbols = store.keys() data = dict(zip(symbols,[store[symbol] for symbol in symbols])) self.wp = WidePanel(data) store.close() else: raise IOError('Data file does not exist')
def _put(self, path, obj): s = HDFStore(self.path) if path in s: print("updating %s" % path) s.remove(path) s.close() s = HDFStore(self.path) s[path] = obj s.flush(fsync=True) s.close()
def anls(): store = HDFStore('hdf5/divvy.h5') pd = store['divvy'] store.close() df = reduce(lambda x,y: x.append(y),[pd[i] for i in pd.items]) df.index = df.timestamp foo = map(lambda x: x[1],df.groupby('id')) for i in range(len(foo)): foo[i]['diff'] = foo[i].availableBikes.diff() for i in range(len(foo)): foo[i]['diff'].hist(range=[-5,5],bins=20) plt.show()
def load_df(path, default=None): """Load DataFrame for HDF5 store path '\logs' table""" try: store = HDFStore(path) print store.keys() df = store.get('logs') store.close() return df except: return default
def load(self, dataFile): """load data from HDF""" if os.path.exists(dataFile): store = HDFStore(dataFile) symbols = [str(s).strip('/') for s in store.keys()] data = dict(zip(symbols, [store[symbol] for symbol in symbols])) self.wp = WidePanel(data) store.close() else: raise IOError('Data file does not exist')
def test_store(self): final_store = HDFStore(self.store_path) print '----' print final_store.keys() print '-' * 80 logs = final_store['/logs'] print type(logs) print len(logs) print logs.columns final_store.close()
def store_results(self, result, index, columns, hdf5_file): self.df = DataFrame(result, columns=columns) self.df = self.df.set_index(index) self.df.sort_index(inplace=True) # Store the DataFrame as an HDF5 file... hdf = HDFStore(hdf5_file) # Append the dataframe, and ensure addr / host can be 17 chars long hdf.append('df', self.df, data_columns=list(columns), min_itemsize={'addr': 17, 'host': 17}) hdf.close()
def convert_fiducial(filename, output_type="csv", decimal_places=8, append_comp=True, num_fids=5, return_name=True, mode='mean', **kwargs): ''' Converts the fiducial comparison HDF5 files into a CSV file. Parameters ---------- filename : str HDF5 file. output_type : str, optional Type of file to output. decimal_places : int, optional Specify the number of decimal places to keep. append_comp : bool, optional Append on columns with fiducial numbers copy num_fids : int, optional Number of fiducials compared. ''' store = HDFStore(filename) data_columns = dict() for key in store.keys(): data = store[key].sort(axis=1) mean_data = timestep_choose(data, mode=mode, **kwargs) data_columns[key[1:]] = trunc_float(mean_data, decimal_places) comp_fids = store[key].index store.close() df = DataFrame(data_columns) if append_comp: fids = [] for fid, num in zip(np.arange(0, num_fids - 1), np.arange(num_fids - 1, 0, -1)): for _ in range(num): fids.append(fid) df["Fiducial 1"] = Series(np.asarray(fids).T, index=df.index) df["Fiducial 2"] = Series(comp_fids.T, index=df.index) for comp in all_comparisons: if comp in filename: break else: raise StandardError("Could not find a face comparison match for " + filename) output_name = "fiducials" + comp[:-1] + "." + output_type df.to_csv(output_name) if return_name: return output_name