def append_store_mod( module, path_store, n_days_refresh=None, b_ptrk=False ): """ append all new rows in module.field to store. Resize store as appropriate. """ store = HDFStore( path_store ) for field in module.__dict__.keys(): if ( type( getattr( module, field ) ) is DataFrame or type( getattr( module, field ) ) is Panel ) and "/{}".format( field ) in store.keys(): if "tdate" in field: getattr( module, field ).to_hdf( path_store, field, mode='a', format='fixed' ) else: solbasic.logger.info( "Working on {}...".format( field ) ) df = store[ field ].copy() df_new = getattr( module, field ).copy() if n_days_refresh == None: l_index = sorted( list( set( df_new.index ) - set( df.index ) ) ) else: l_index = sorted( list( df_new.index[ -n_days_refresh: ] ) ) l_columns = sorted( list( set( df_new.columns ) - set( df.columns ) ) ) l_columns_rev = sorted( list( set( df.columns ) - set( df_new.columns ) ) ) if l_columns: solbasic.logger.info( "Adding {} instruments: {}".format( len( l_columns ), l_columns ) ) for col in l_columns: df[ col ] = np.nan if l_columns_rev: for col in l_columns_rev: df_new[ col ] = df[ col ] if l_index: solbasic.logger.info( "Refreshing {} dates: {}".format( len( l_index ), l_index ) ) for ind in l_index: df.ix[ ind ] = df_new.ix[ ind ] df.to_hdf( path_store, field, mode='a', format='fixed' ) store.close() if b_ptrk: ptrk_store( path_store )
def build_actualisation_groups(self, filename = None): ''' Builds actualisation groups ''' if filename is None: data_dir = CONF.get('paths', 'data_dir') fname = "actualisation_groups.h5" filename = os.path.join(data_dir, fname) store = HDFStore(filename) df = store['vars'] coeff_list = sorted(unique(df['coeff'].dropna())) vars = dict() for coeff in coeff_list: vars[coeff] = list(df[ df['coeff']==coeff ]['var']) self.actualisation_vars = vars self.coeffs_df = store['names'] self.coeffs_df['coeff'] = self.coeffs_df['coeff'].str.replace(' ','') # remove spaces yr = 1*self.survey_year self.coeffs_df['value'] = 1 while yr < self.datesim_year: if yr in self.coeffs_df.columns: factor = self.coeffs_df[yr] else: factor = 1 self.coeffs_df['value'] = self.coeffs_df['value']*factor yr += 1 self.coeffs_df.set_index(['coeff'], inplace = True) store.close()
def run(self, fname, *args, **kwargs): while True: try: self._flock = os.open(self._lock, os.O_CREAT | os.O_EXCL | os.O_WRONLY) log.info("SafeHDF:%s lock:%s" % (self._lock, self._flock)) break # except FileExistsError: # except FileExistsError as e: # except (IOError, EOFError, Exception) as e: except (IOError, OSError) as e: # time.sleep(probe_interval) log.error("IOError Error:%s" % (e)) if self.countlock <= 10: time.sleep(random.randint(1, 3)) # time.sleep(random.randint(0,5)) self.countlock += 1 else: os.remove(self._lock) # time.sleep(random.randint(15, 30)) log.error("count10 remove lock") # except (Exception) as e: # print ("Exception Error:%s"%(e)) # log.info("safeHDF Except:%s"%(e)) # time.sleep(probe_interval) # return None # HDFStore.__init__(self, fname, *args, **kwargs) HDFStore.__init__(self, fname, *args, **kwargs)
def load_temp(name=None, year=None, variables=None, config_files_directory=default_config_files_directory): """ Load a temporary saved table Parameters ---------- name : string, default None year : integer, default None year of the data """ if year is None: raise Exception("year is needed") if name is None: raise Exception("name is needed") hdf_file_path = get_tmp_file_path( config_files_directory=config_files_directory) print(hdf_file_path) store = HDFStore(hdf_file_path) dataframe = store["{}/{}".format(year, name)] store.close() if variables is None: return dataframe else: return dataframe[variables].copy()
class engine(Engine): """Engine instance for writing data to a HDF5 file.""" name = "HDF5" abbreviation = "hdf5" insert_limit = 1000 required_opts = [ ("file", "Enter the filename of your HDF5 file", "hdf5.h5"), ("table_name", "Format of table name", "{db}_{table}"), ("data_dir", "Install directory", DATA_DIR), ] def create_db(self): """Override create_db since an SQLite dataset needs to be created first followed by the creation of an empty HDFStore file. """ file_path = os.path.join(self.opts["data_dir"], self.opts["file"]) self.file = HDFStore(file_path) def create_table(self): """Don't create table for HDF5 HDF5 doesn't create tables. Each database is a file which has been created. This overloads`create_table` to do nothing in this case. """ return None def insert_data_from_file(self, filename): """Fill the table by fetching the dataframe from the SQLite engine and putting it into the HDFStore file. """ table_name = self.table_name() df = self.fetch_table(table_name) self.file.put(table_name, df, data_columns=True) def fetch_table(self, table_name): """Return a table from sqlite dataset as pandas dataframe.""" connection = self.get_sqlite_connection() sql_query = "SELECT * FROM {};".format(table_name) return pd.read_sql_query(sql_query, connection) def get_sqlite_connection(self): # self.get_input() file = self.opts["file"] file = (file.split("."))[0] + ".db" db_file = self.opts["data_dir"] full_path = os.path.join(db_file, file) return dbapi.connect(os.path.normpath(full_path)) def get_connection(self): """Gets the db connection.""" self.get_input() return DummyConnection() def disconnect(self): """Close the file after being written""" self.file.close() file = self.opts["file"] file = (file.split("."))[0] + ".db" os.remove(file)
def get(self, path): s = HDFStore(self.path) d = None if path in s: d = s[path] s.close() return d
def save_temp(dataframe, name = None, year = None, config_files_directory = default_config_files_directory): """ Save a temporary table Parameters ---------- dataframe : pandas DataFrame the dataframe to save name : string, default None year : integer, default None year of the data """ if year is None: raise Exception("year is needed") if name is None: raise Exception("name is needed") hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store_path = "{}/{}".format(year, name) if store_path in store.keys(): del store["{}/{}".format(year, name)] dataframe.to_hdf(hdf_file_path, store_path) store.close() return True
def func_wrapper(*args, **kwargs): temporary_store = HDFStore(file_path) try: return func(*args, temporary_store=temporary_store, **kwargs) finally: gc.collect() temporary_store.close()
def writeHD5(): """Write to local store.h5""" global Data1 store = HDFStore('.\store.h5') store['listCrisis'] = Data1 store.close()
def show_temp(config_files_directory = default_config_files_directory): hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store.close()
def mix_models(output: pd.HDFStore, result_file): # +- ## glm_preds = output.get('test/glm') xgb_preds = output.get('test/xgb') assert glm_preds.shape[1] == xgb_preds.shape[1] if glm_preds.shape[0] != xgb_preds.shape[0]: logger.warning( 'glm and xgb predictions in {0!r} have different lengths: {1}, {2}' .format(result_file, glm_preds.shape[0], xgb_preds.shape[0])) ## joined = pd.merge(glm_preds, xgb_preds, how='inner', on='Id') joined['Sales'] = 0.985 * (joined['PredictedSales_x'] + joined['PredictedSales_y']) / 2 assert joined.shape[0] == glm_preds.shape[0] joined = joined[['Id', 'Sales']] ## joined.to_csv(result_file, index=False) return joined
def download(): """ Convenience method that downloads all the weather data required for the machine learning examples. """ reader = GSODDataReader() year_list = range(2001, 2012) austin = reader.collect_data(year_list, exact_station=True, station_name='AUSTIN CAMP MABRY', state='TX', country='US') houston = reader.collect_data(year_list, exact_station=True, station_name='HOUSTON/D.W. HOOKS', state='TX', country='US') new_york = reader.collect_data(year_list, exact_station=True, station_name='NEW YORK/LA GUARDIA', state='NY', country='US') newark = reader.collect_data(year_list, exact_station=True, station_name='NEWARK INTL AIRPORT', state='NJ', country='US') punta_arenas = reader.collect_data(year_list, exact_station=True, station_name='PUNTA ARENAS', country='CH') wellington = reader.collect_data(year_list, exact_station=True, station_name='WELLINGTON AIRPORT', country='NZ') store = HDFStore('weather.h5') store['austin'] = austin store['houston'] = houston store['nyc'] = new_york store['newark'] = newark store['punta_arenas'] = punta_arenas store['wellington'] = wellington store.close()
def _get(self, path): s = HDFStore(self.path) d = None if path in s: d = s[path] s.close() return d
def convert_fiducial(filename, output_type="csv"): ''' Converts the fiducial comparison HDF5 files into a CSV file. Parameters ---------- filename : str HDF5 file. output_type : str, optional Type of file to output. ''' store = HDFStore(filename) data_columns = dict() for key in store.keys(): data = store[key].sort(axis=1) mean_data = data.mean(axis=1) data_columns[key[1:]] = mean_data store.close() df = DataFrame(data_columns) output_name = "".join(filename.split(".")[:-1]) + "." + output_type df.to_csv(output_name)
def save_simulation(self, filename, attribute_list = ['cohorts', 'aggregate_pv', 'percapita_pv', 'cohorts_alt', 'aggregate_pv_alt', 'percapita_pv_alt'], has_alt = False): """ Saves the output dataframe under default directory in an HDF store. Warning : will override .h5 file if already existant ! Warning : the data is saved as a dataframe, one has to recreate the Cohort when reading. Parameters ---------- name : the name of the table inside the store filename : the name of the .h5 file where the table is stored. Created if not existant. """ # Creating the filepath : ERF_HDF5_DATA_DIR = os.path.join(SRC_PATH,'countries',self.country,'sources','Output_folder/') store = HDFStore(os.path.join(os.path.dirname(ERF_HDF5_DATA_DIR),filename+'.h5')) #Looping over simulation's attributes, saving only the one who are matching the list # AND aren't empty from pandas import DataFrame for attrib, value in self.__dict__.iteritems(): if attrib in attribute_list and value is not None: #Transforming the data within a cohort in a dataframe so HDFStore can handle it : record = DataFrame(index=value.index) for col in value.columns: record[col] = value[col] print 'saving' store[attrib] = record else: print 'ignored' print store store.close()
def main(): # the loaded data is a DataFrame genedata = load_gene_dataset() # randomly split the dataset to three folds # this code should be improved in the future kfold = 3.0 data_kfold = {} train, fold1 = train_test_split(genedata, test_size=1/kfold) data_kfold['fold1'] = fold1 fold3, fold2 = train_test_split(train, test_size=0.5) data_kfold['fold2'] = fold2 data_kfold['fold3'] = fold3 # now we want to train a network for each fold # store the results in h5 file geneStore = HDFStore('predGeneExp1.h5') for i, key in enumerate(data_kfold): print(key) test_data = data_kfold[key] X_val, y_val = get_input_output(test_data) keys = data_kfold.keys() keys.remove(key) training_data = pd.concat([data_kfold[keys[0]],data_kfold[keys[1]]]) X_train, y_train = get_input_output(training_data) print(keys) # use the these data to train the network main_training(key, X_train, y_train, X_val, y_val, geneStore) # the h5 must be closed after using geneStore.close()
def in_store(self, path): s = HDFStore(self.path) val = False if path in s: val = True s.close() return val
def AddSeqComp(mypath): """ Loads TestLogAll.h5 from the specified path, then calls MeasurementGroupTools.AddSeqComp to recalculate seq components using FFT Input: Directory of the measurment campaign, e.g.: "aLabView2" Output: Results1.h5, Results1.pdf in the data subdirs. """ from pandas import HDFStore, ExcelWriter import MeasurementGroupTools as mgt h5logs = HDFStore(mypath + "\\" + 'TestLogsAll.h5') TestLog = h5logs['TestLogsAll'] dirs = TestLog[u'DirName'].unique() for dname in dirs: mysubdirpath = mypath + "\\" + dname print "Processing: " + dname mgt.AddSeqComp(mysubdirpath, TestLog, dname) h5logs.put('TestLogsAll',TestLog) h5logs.close() writer = ExcelWriter(mypath + "\\" + 'TestLogsAll.xlsx') TestLog.to_excel(writer,'TestLogsAll') # the second argument defines sheet name writer.save() return
def create_store(sub): hdf = HDFStore('all.h5') d = DataFrame(columns=[ 'SUB', 'SEED', 'SEED ROI', 'TARGET ROI', 'HEMISPHERE', 'DISTANCE', 'STRENGTH', 'CAT1', 'CAT2', 'CAT3' ]) for i in range(1, 181): LSfname = '../' + sub + '/out/L' + str( i) + '/matrix_seeds_to_all_targets' LDfname = '../' + sub + '/out/L' + str( i) + '/matrix_seeds_to_all_targets_lengths' RSfname = '../' + sub + '/out/R' + str( i) + '/matrix_seeds_to_all_targets' RDfname = '../' + sub + '/out/R' + str( i) + '/matrix_seeds_to_all_targets_lengths' ls = readS2R(LSfname) rs = readS2R(RSfname) ld = readS2R_L(LDfname) rd = readS2R_L(RDfname) numSeeds, numROIs = ls.shape for j in tqdm(range(numSeeds), total=numSeeds): for q in range(numROIs): tmp = Series([ sub, j + 1, i + 1, q + 1, 'L', ld[j, q], ls[j, q], '', '', '' ]) d = d.append(tmp, ignore_index=True) # numSeeds ,numROIs = rs.shape # for j in range(numSeeds): # for q in range(numROIs): # tmp = Series([sub,j+1,i+1,q+1,'R',rd[j,q],rs[j,q],'','','']) # d = d.append(tmp,ignore_index=True) if i == 1: break hdf.put(sub, d)
def test_chunk(): print "debut" writer = None years = range(2011, 2012) filename = destination_dir + 'output3.h5' store = HDFStore(filename) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() import time tps = {} for nb_chunk in range(1, 5): deb_chunk = time.clock() simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk, print_missing=False) simu.compute() tps[nb_chunk] = time.clock() - deb_chunk voir = simu.output_table.table3['foy'] print len(voir) pdb.set_trace() agg3 = Aggregates() agg3.set_simulation(simu) agg3.compute() df1 = agg3.aggr_frame print df1.to_string() print tps store.close()
def compute_and_save_hist_as_pd(values : np.array , out_file : pd.HDFStore , hist_name : str , n_bins : int , range_hist : Tuple[float, float], norm : bool = False )->None: """ Computes 1d-histogram and saves it in a file. The name of the table inside the file must be provided. Parameters ---------- values : np.array Array with values to be plotted. out_file: pd.HDFStore File where histogram will be saved. hist_name: string Name of the pd.Dataframe to contain the histogram. n_bins: int Number of bins to make the histogram. range_hist: length-2 tuple (optional) Range of the histogram. norm: bool If True, histogram will be normalized. """ n, b = np.histogram(values, bins = n_bins, range = range_hist, density = norm) table = pd.DataFrame({'entries': n, 'magnitude': shift_to_bin_centers(b)}) out_file.put(hist_name, table, format='table', data_columns=True) return
def save_temp(dataframe, name=None, year=None, config_files_directory=default_config_files_directory): """ Save a temporary table Parameters ---------- dataframe : pandas DataFrame the dataframe to save name : string, default None year : integer, default None year of the data """ if year is None: raise Exception("year is needed") if name is None: raise Exception("name is needed") hdf_file_path = get_tmp_file_path( config_files_directory=config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store_path = "{}/{}".format(year, name) if store_path in store.keys(): del store["{}/{}".format(year, name)] dataframe.to_hdf(hdf_file_path, store_path) store.close() return True
def build_from_openfisca( directory = None): df_age_final = None for yr in range(2006,2010): simulation = SurveySimulation() simulation.set_config(year = yr) simulation.set_param() simulation.set_survey() df_age = get_age_structure(simulation) df_age[yr] = df_age['wprm'] del df_age['wprm'] if df_age_final is None: df_age_final = df_age else: df_age_final = df_age_final.merge(df_age) if directory is None: directory = os.path.dirname(__file__) fname = os.path.join(directory, H5_FILENAME) store = HDFStore(fname) print df_age_final.dtypes store.put("openfisca", df_age_final) store.close()
def put(self, path, obj): s = HDFStore(self.path) if path in s: print "updating %s" % path s.remove(path) s[path] = obj s.close()
def test_chunk(): print "debut" writer = None years = range(2011,2012) filename = destination_dir+'output3.h5' store = HDFStore(filename) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() import time tps = {} for nb_chunk in range(1,5): deb_chunk = time.clock() simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk , print_missing=False) simu.compute() tps[nb_chunk] = time.clock() - deb_chunk voir = simu.output_table.table3['foy'] print len(voir) pdb.set_trace() agg3 = Aggregates() agg3.set_simulation(simu) agg3.compute() df1 = agg3.aggr_frame print df1.to_string() print tps store.close()
def test(): directory = os.path.dirname(__file__) fname = os.path.join(directory, H5_FILENAME) store = HDFStore(fname) print store print store.keys()
def SAVE_ChangeDictOrder(_processedEvents): '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc''' h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/" #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo all_event_names = sorted([name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name]) store = HDFStore(h_path +bef_aft_dict[bef_aft_switch+ '_hdf']) for _data, recording in zip(_processedEvents, all_event_names): print('I') sname = recording.rfind("/") +1 subId = recording[sname:-4].replace("-", "_") store[subId + '/events/attention/correct'] = _data['correct']['attention'].convert_objects() store[subId + '/events/motor/correct'] = _data['correct']['motor'].convert_objects() store[subId + '/events/attention/incorrect'] = _data['incorrect']['attention'].convert_objects() store[subId + '/events/motor/incorrect'] = _data['incorrect']['motor'].convert_objects() #print(_data['incorrect']['motor'].convert_objects()) store.close()
def func_wrapper(*args, **kwargs): temporary_store = HDFStore(file_path) try: return func(*args, temporary_store = temporary_store, **kwargs) finally: gc.collect() temporary_store.close()
def storeEEGinHDF(): """Load EEG from 64 electrodes x ~30 min at 500 hz (large dataset)""" h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/" all_eeg_names = sorted([ name for name in eeg_names if bef_aft_dict[bef_aft_switch + '_mat'].replace("_EVENTS", "") in name ]) store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf']) #Create a HDF database with a single-precision point (float 32) cnt = 0 for recording in all_eeg_names: cnt = cnt + 1 sname = recording.rfind("/") + 1 subId = recording[sname:-4].replace("-", "_") sig = pd.DataFrame( sio.loadmat(recording, struct_as_record=True)['eegToSave']).transpose() #Modified here to save a filtered version from: store[subId + "/signal/f"] = sig.convert_objects()) store[subId + "/signal/filtered_30/"] = sig.convert_objects().apply( FilterData, axis=0) print(cnt) store.close()
def SAVE_ChangeDictOrder(_processedEvents): '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc''' h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/" #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo all_event_names = sorted([ name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name ]) store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf']) for _data, recording in zip(_processedEvents, all_event_names): print('I') sname = recording.rfind("/") + 1 subId = recording[sname:-4].replace("-", "_") store[subId + '/events/attention/correct'] = _data['correct'][ 'attention'].convert_objects() store[subId + '/events/motor/correct'] = _data['correct'][ 'motor'].convert_objects() store[subId + '/events/attention/incorrect'] = _data['incorrect'][ 'attention'].convert_objects() store[subId + '/events/motor/incorrect'] = _data['incorrect'][ 'motor'].convert_objects() #print(_data['incorrect']['motor'].convert_objects()) store.close()
def save(self, store: pandas.HDFStore) -> None: """ Save a model to an open HDFStore. Notes: Performs an IO operation. Args: store (pandas.HDFStore) Returns: None """ # save the config as an attribute config = self.get_config() store.put('model', pandas.DataFrame()) store.get_storer('model').attrs.config = config # save the parameters for i in range(self.num_weights): key = os.path.join('weights', 'weights' + str(i)) self.weights[i].save_params(store, key) for i in range(self.num_layers): key = os.path.join('layers', 'layers' + str(i)) self.layers[i].save_params(store, key)
def evaluate(model, test_hdf_file, get_batch, loss_function, batch_size, cuda=False): store_test = HDFStore(test_hdf_file) test_loss = 0 accuracy = 0 count = 0 model.eval() if cuda: model = model.cuda() test_gen = get_batch(store_test, batch_size) for x, target, src_padding, target_padding in test_gen: if cuda: x, target = x.cuda(), target.cuda() out = model(x) loss = loss_function(out, target) acc = int( torch.all(out.argmax(dim=-1) == target, dim=-1).to( torch.int).sum()) / out.shape[0] test_loss += loss.item() accuracy += acc count += 1 test_loss /= count accuracy /= count print("Test Loss :", test_loss) print("Test accuracy :", accuracy) store_test.close()
def __init__(self, path: str, table: str, compute: Optional[Callable] = None) -> None: self.table = table if compute: self.store = PandasHDFStore(path, complevel=self.complevel, complib=self.complib) dataframe = compute() dataframe.sort_values(by="where", axis=0, inplace=True) self._mangle_where(dataframe) self.store.put( self.table, dataframe, append=False, format="table", expectedrows=len(dataframe), data_columns=[ "where_", "where_type", "who", "who_type", "when", "when_type" ], ) else: self.store = PandasHDFStore(path, complevel=self.complevel, complib=self.complib, mode="r")
def build_actualisation_group_names_h5(): h5_name = "../actualisation_groups.h5" store = HDFStore(h5_name) xls = ExcelFile('actualisation_groups.xls') df = xls.parse('defs', na_values=['NA']) store['names'] = df print df.to_string() store.close()
def save(self,dataFile): """ save data to HDF""" print(('Saving data to', dataFile)) store = HDFStore(dataFile) for symbol in self.wp.items: store[symbol] = self.wp[symbol] store.close()
def save(self, dataFile): """ save data to HDF""" print 'Saving data to', dataFile store = HDFStore(dataFile) for symbol in self.wp.items: store[symbol] = self.wp[symbol] store.close()
def __init__(self, delta=1.0, resize=True): self.store = HDFStore('../dataset/labels.h5') self.ava_table = self.store['labels_train'] self.ava_path = "../dataset/AVA/data/" self.ava_data_path = os.path.join(os.getcwd(), self.ava_path) self.h5f = h5py.File( '../dataset/images_299x299_delta_{}.h5'.format(delta), 'w') self.delta = delta
def show_temp(config_files_directory=default_config_files_directory): hdf_file_path = get_tmp_file_path( config_files_directory=config_files_directory) store = HDFStore(hdf_file_path) log.info("{}".format(store)) store.close()
def get_children_paths(self, node_path): s = HDFStore(self.path) node = s.get_node(node_path) children = [] for child, df in node._v_children.items(): children.append(df._v_pathname) s.close() return children
class PandasHDFHandler(FileHandler): """ Handler for HDF5 files using Pandas. """ def _open_for_read(self): self.handle = HDFStore(self.fname, mode='r') def _open_for_write(self): self.handle = HDFStore(self.fname) def list_items(self): keys = [key.strip('/') for key in self.handle.keys()] # axes items = [(key.split('/')[-1], 'Axis') for key in keys if '__axes__' in key] # groups items += [(key.split('/')[-1], 'Group') for key in keys if '__groups__' in key] # arrays items += [(key, 'Array') for key in keys if '/' not in key] return items def _read_item(self, key, type, *args, **kwargs): if type == 'Array': hdf_key = '/' + key elif type == 'Axis': hdf_key = '__axes__/' + key kwargs['name'] = key elif type == 'Group': hdf_key = '__groups__/' + key kwargs['name'] = key else: raise TypeError() return key, read_hdf(self.handle, hdf_key, *args, **kwargs) def _dump_item(self, key, value, *args, **kwargs): if isinstance(value, LArray): hdf_key = '/' + key value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Axis): hdf_key = '__axes__/' + key value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Group): hdf_key = '__groups__/' + key hdf_axis_key = '__axes__/' + value.axis.name value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs) else: raise TypeError() def _read_metadata(self): metadata = Metadata.from_hdf(self.handle) if metadata is None: metadata = Metadata() return metadata def _dump_metadata(self, metadata): metadata.to_hdf(self.handle) def close(self): self.handle.close()
def convert_to_3_tables(year=2006, survey_file=None, output_file=None): if survey_file is None: raise Exception( 'You need a .h5 file with the survey to extract the variables from' ) if output_file is None: output_file = survey_file raise Warning( 'the survey file will be used to store the created tables') store = HDFStore(survey_file) output = HDFStore(output_file) print output simulation = SurveySimulation() simulation.set_config(year=year) table1 = store['survey_' + str(year)] for entity in ['ind', 'foy', 'men', 'fam']: key = 'survey_' + str(year) + '/' + str(entity) vars_matching_entity = vars_matching_entity_from_table( table1, simulation, entity) print entity, vars_matching_entity_from_table print 'table1 enum' if entity == 'ind': print 'INDIVIDUALS' print table1['noindiv'] table_entity = table1.loc[:, vars_matching_entity] # we take care have all ident and selecting qui==0 else: # print ' entity :', entity # print table1['noindiv'].head() position = 'qui' + entity # print table1[position] table_entity = table1.ix[table1[position] == 0, [ 'noi', 'idmen', 'idfoy', 'idfam', 'quifoy', 'quimen', 'quifam' ] + vars_matching_entity] # print table_entity.noi.head() table_entity = table_entity.rename_axis(table_entity['id' + entity], axis=1) # print ' APRES' # print table_entity.noi.head() print key output.put(key, table_entity) del table1 import gc gc.collect() store.close() output.close()
def setup(self): self.fname = '__test__.h5' with warnings.catch_warnings(record=True): self.p = Panel(np.random.randn(20, 1000, 25), items=['Item%03d' % i for i in range(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in range(25)]) self.store = HDFStore(self.fname) self.store.append('p1', self.p)
def __init__(self, filename): """ Parameters ---------- filename : filename pointing to an existing HDFStore with valid data in it. """ self._store = HDFStore(filename)
def test_read_nokey_empty(setup_path): with ensure_clean_path(setup_path) as path: store = HDFStore(path) store.close() msg = re.escape( "Dataset(s) incompatible with Pandas data types, not table, or no " "datasets found in HDF5 file.") with pytest.raises(ValueError, match=msg): read_hdf(path)
def run_convert(basedir, beam): print "Converting for:", beam infile = basedir + "/lmon_p"+str(beam)+".root" outfile = basedir + "/HCal_p"+str(beam)+".h5" #lmon input inp = TFile.Open(infile) tree = inp.Get("DetectorTree") #load the tree ucal_edep_EMC = rt.EntryD() ucal_edep_HAC1 = rt.EntryD() ucal_edep_HAC2 = rt.EntryD() ucal_edep_layers = std.vector(float)() tree.SetBranchAddress("ucal_edep_EMC", AddressOf(ucal_edep_EMC, "v")) tree.SetBranchAddress("ucal_edep_HAC1", AddressOf(ucal_edep_HAC1, "v")) tree.SetBranchAddress("ucal_edep_HAC2", AddressOf(ucal_edep_HAC2, "v")) tree.SetBranchAddress("ucal_edep_layers", ucal_edep_layers) tree.GetEntry(0) nlay = ucal_edep_layers.size() #output DataFrame col = ["ucal_edep_EMC", "ucal_edep_HAC1", "ucal_edep_HAC2"] for i in range(nlay): col.append( "ucal_edep_layer"+str(i) ) df_inp = [] #event loop for iev in xrange(tree.GetEntriesFast()): tree.GetEntry(iev) lin = [] lin.append(ucal_edep_EMC.v) lin.append(ucal_edep_HAC1.v) lin.append(ucal_edep_HAC2.v) for i in xrange(nlay): lin.append(ucal_edep_layers.at(i)) df_inp.append(lin) df = DataFrame(df_inp, columns=col) print df out = HDFStore(outfile) out["hcal"] = df out.close() inp.Close()
def test_store(self): final_store = HDFStore(self.store_path) print '----' print final_store.keys() print '-' * 80 logs = final_store['/logs'] print type(logs) print len(logs) print logs.columns final_store.close()
def _put(self, path, obj): s = HDFStore(self.path) if path in s: print("updating %s" % path) s.remove(path) s.close() s = HDFStore(self.path) s[path] = obj s.flush(fsync=True) s.close()
def load(self,dataFile): """load data from HDF""" if os.path.exists(dataFile): store = HDFStore(dataFile) symbols = store.keys() data = dict(zip(symbols,[store[symbol] for symbol in symbols])) self.wp = WidePanel(data) store.close() else: raise IOError('Data file does not exist')
def load(self,dataFile): """load data from HDF""" if os.path.exists(dataFile): store = HDFStore(dataFile) symbols = [str(s).strip('/') for s in list(store.keys()) ] data = dict(list(zip(symbols,[store[symbol] for symbol in symbols]))) self.wp = Panel(data) store.close() else: raise IOError('Data file does not exist')
def anls(): store = HDFStore('hdf5/divvy.h5') pd = store['divvy'] store.close() df = reduce(lambda x,y: x.append(y),[pd[i] for i in pd.items]) df.index = df.timestamp foo = map(lambda x: x[1],df.groupby('id')) for i in range(len(foo)): foo[i]['diff'] = foo[i].availableBikes.diff() for i in range(len(foo)): foo[i]['diff'].hist(range=[-5,5],bins=20) plt.show()
def final_check(year=2006): test_filename = os.path.join(DATA_SOURCES_DIR, "test.h5") survey_filename = os.path.join(DATA_SOURCES_DIR, "survey.h5") store = HDFStore(test_filename) survey = HDFStore(survey_filename) final2 = store.get('survey_2006') print survey finalT = survey.get('survey_2006') varlist = [ 'adeben', 'adfdap', 'amois', 'ancchom', 'ancentr', 'anciatm', 'ancrech', 'anref', 'contra', 'datant', 'dimtyp', 'ident', 'idfoy' 'noi', 'nondic', 'rabs', 'RABSP', 'RAISTP', 'raistp', 'rdem', 'retrai', 'sitant', 'sp10', 'sp11', 'stc', 'TXTPPB', ] for i in range(0, 10): varname = 'sp0' + str(i) varlist.append(varname) varlist = set(varlist) columns = final2.columns columns = set(columns) print varlist.difference(columns) print final2.loc[ final2.idfoy == 603018901, ['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi'] ].to_string() return
def refresh_population(self): ''' Refresh after population update ''' population_file = CONF.get('paths', 'population_file') store_pop = HDFStore(population_file,'r') self.population = store_pop[self._param_widget.population_name] store_pop.close() population = self.population.reset_index() self._population_widget.set_dataframe(population) self._population_widget.update_view()
def store_results(self, result, index, columns, hdf5_file): self.df = DataFrame(result, columns=columns) self.df = self.df.set_index(index) self.df.sort_index(inplace=True) # Store the DataFrame as an HDF5 file... hdf = HDFStore(hdf5_file) # Append the dataframe, and ensure addr / host can be 17 chars long hdf.append('df', self.df, data_columns=list(columns), min_itemsize={'addr': 17, 'host': 17}) hdf.close()
def convert_fiducial(filename, output_type="csv", decimal_places=8, append_comp=True, num_fids=5, return_name=True, mode='mean', **kwargs): ''' Converts the fiducial comparison HDF5 files into a CSV file. Parameters ---------- filename : str HDF5 file. output_type : str, optional Type of file to output. decimal_places : int, optional Specify the number of decimal places to keep. append_comp : bool, optional Append on columns with fiducial numbers copy num_fids : int, optional Number of fiducials compared. ''' store = HDFStore(filename) data_columns = dict() for key in store.keys(): data = store[key].sort(axis=1) mean_data = timestep_choose(data, mode=mode, **kwargs) data_columns[key[1:]] = trunc_float(mean_data, decimal_places) comp_fids = store[key].index store.close() df = DataFrame(data_columns) if append_comp: fids = [] for fid, num in zip(np.arange(0, num_fids - 1), np.arange(num_fids - 1, 0, -1)): for _ in range(num): fids.append(fid) df["Fiducial 1"] = Series(np.asarray(fids).T, index=df.index) df["Fiducial 2"] = Series(comp_fids.T, index=df.index) for comp in all_comparisons: if comp in filename: break else: raise StandardError("Could not find a face comparison match for " + filename) output_name = "fiducials" + comp[:-1] + "." + output_type df.to_csv(output_name) if return_name: return output_name
def __init__(self, *args, **kwargs): probe_interval = kwargs.pop("probe_interval", 1.0) self._lock = "%s.lock" % args[0] while True: try: self._flock = os.open(self._lock, os.O_CREAT | os.O_EXCL | os.O_WRONLY) break except FileExistsError: time.sleep(probe_interval) HDFStore.__init__(self, *args, **kwargs)