class Data(tables.IsDescription): ''' Description of data table, each row refers to an event/trace ''' receiver_table_n_i = tables.Int32Col() response_table_n_i = tables.Int32Col() time_table_n_i = tables.Int32Col() # #start_time = Time () # Start time of trace class time(tables.IsDescription): ''' Time, either epoch or human readable ''' type_s = tables.StringCol(8) # 'EPOCH', 'ASCII', or 'BOTH' epoch_l = tables.Int64Col() # Seconds since January 1, 1970 ascii_s = tables.StringCol(32) # WWW MMM DD HH:MM:SS YYYY micro_seconds_i = tables.Int32Col() # event_number_i = tables.Int32Col() # Event number channel_number_i = tables.Int8Col() # Channel number sample_rate_i = tables.Int16Col() # Trace sample rate sample_rate_multiplier_i = tables.Int16Col( ) # This will be needed for sample rates < 1 sps sample_count_i = tables.Int32Col() # Version 2007.191a bleeding stream_number_i = tables.Int8Col() # Stream raw_file_name_s = tables.StringCol(32) # Original file name array_name_data_a = tables.StringCol( 16) # Name of array that contains trace array_name_SOH_a = tables.StringCol(16) # The SOH array name array_name_event_a = tables.StringCol(16) # The event table array array_name_log_a = tables.StringCol(16) # The log array
class elevation(tb.IsDescription): orbit = tb.Int32Col(pos=1) utc85 = tb.Float64Col(pos=2) lon = tb.Float64Col(pos=3) lat = tb.Float64Col(pos=4) elev = tb.Float64Col(pos=5) agc = tb.Float64Col(pos=6) fmode = tb.Int8Col(pos=7) fret = tb.Int8Col(pos=8) fprob = tb.Int8Col(pos=9)
class IDR(tb.IsDescription): orbit = tb.Int32Col(pos=0) secs85 = tb.Float64Col(pos=1) lat = tb.Float64Col(pos=2) lon = tb.Float64Col(pos=3) elev = tb.Float64Col(pos=4) agc = tb.Float64Col(pos=5) fmode = tb.Int8Col(pos=6) fret = tb.Int8Col(pos=7) fprob = tb.Int8Col(pos=8)
class Trades(tables.IsDescription): time = tables.Int64Col() trader_id = tables.Int64Col() trade_id = tables.Int64Col() sequence_id = tables.Int64Col() side = tables.Int8Col() price = tables.Float64Col() quantity = tables.Int64Col() origin_id = tables.Int8Col() is_auction = tables.BoolCol() is_aggressor = tables.BoolCol()
class State(tables.IsDescription): episode = tables.Int32Col() board = tables.Int8Col(shape=(22, 10)) policy = tables.Float32Col(shape=(n_actions, )) action = tables.Int8Col() combo = tables.Int32Col() lines = tables.Int32Col() score = tables.Int32Col() child_stats = tables.Float32Col(shape=(6, n_actions)) cycle = tables.Int32Col() value = tables.Float32Col() variance = tables.Float32Col()
class analDataPoint(tables.IsDescription): random_seed = tables.Int8Col() time_delay = tables.Int8Col() min_q = tables.Float64Col((54, nb_neurones)) q_type = tables.Int64Col(54) dimension = tables.Float64Col() eigvals_real = tables.Float64Col(nb_neurones) eigvals_imag = tables.Float64Col(nb_neurones) converges = tables.BoolCol() train_output = tables.Float64Col((31, 500, 3)) max_cov_val = tables.Float64Col((3, nb_neurones)) max_cov_delay = tables.Int8Col((3, nb_neurones))
class GLA(tb.IsDescription): orbit = tb.Int32Col(pos=0) secs00 = tb.Float64Col(pos=1) lat = tb.Float64Col(pos=2) lon = tb.Float64Col(pos=3) elev = tb.Float64Col(pos=4) agc = tb.Float64Col(pos=5) energy = tb.Float64Col(pos=6) txenergy = tb.Float64Col(pos=7) reflect = tb.Float64Col(pos=8) fbuff = tb.Int8Col(pos=9) fmask = tb.Int8Col(pos=10) fbord = tb.Int8Col(pos=11) ftrk = tb.UInt8Col(pos=12)
class Record(tb.IsDescription): var1 = tb.StringCol(itemsize=4, dflt=b"abcd", pos=0) var2 = tb.StringCol(itemsize=1, dflt=b"a", pos=1) var3 = tb.BoolCol(dflt=1) var4 = tb.Int8Col(dflt=1) var5 = tb.UInt8Col(dflt=1) var6 = tb.Int16Col(dflt=1) var7 = tb.UInt16Col(dflt=1) var8 = tb.Int32Col(dflt=1) var9 = tb.UInt32Col(dflt=1) var10 = tb.Int64Col(dflt=1) var11 = tb.Float32Col(dflt=1.0) var12 = tb.Float64Col(dflt=1.0) var13 = tb.ComplexCol(itemsize=8, dflt=(1.+0.j)) var14 = tb.ComplexCol(itemsize=16, dflt=(1.+0.j)) if hasattr(tb, 'Float16Col'): var15 = tb.Float16Col(dflt=1.0) if hasattr(tb, 'Float96Col'): var16 = tb.Float96Col(dflt=1.0) if hasattr(tb, 'Float128Col'): var17 = tb.Float128Col(dflt=1.0) if hasattr(tb, 'Complex196Col'): var18 = tb.ComplexCol(itemsize=24, dflt=(1.+0.j)) if hasattr(tb, 'Complex256Col'): var19 = tb.ComplexCol(itemsize=32, dflt=(1.+0.j))
def merge_hdf5_hase(args): print(args.genotype, type(args.genotype)) filepath_hase = args.genotype + '/genotype/{}_' + args.study_name + '.h5' g = h5py.File(filepath_hase.format(0), 'r')['genotype'] num_pat = g.shape[1] number_of_files = len(glob.glob(args.genotype + "/genotype/*.h5")) print('number of files ', number_of_files) f = tables.open_file(args.outfolder + args.study_name + '_step2_merged_genotype.h5', mode='w') atom = tables.Int8Col() filter_zlib = tables.Filters(complib='zlib', complevel=args.comp_level) f.create_earray(f.root, 'data', atom, (0, num_pat), filters=filter_zlib) f.close() print("\n merge all files...") f = tables.open_file(args.outfolder + args.study_name + '_step2_merged_genotype.h5', mode='a') for i in tqdm.tqdm(range(number_of_files)): gen_tmp = h5py.File(filepath_hase.format(i), 'r')['genotype'] f.root.data.append(np.array(np.round(gen_tmp[:, :]), dtype=np.int)) f.close() args.outfolder = args.genotype
class AMASS_Params_Row(tables.IsDescription): subject = tables.Int16Col(pos=1) gender = tables.Int8Col(pos=2) shape = tables.Float32Col(16, pos=3) pose = tables.Float32Col(52 * 3, pos=4) dmpl = tables.Float32Col(8, pos=5) trans = tables.Float32Col(3, pos=6)
class Tasks(tables.IsDescription): name = tables.StringCol(128, pos=3) day = tables.Time32Col(pos=1) idnumber = tables.Int64Col(pos=0) task_type = tables.EnumCol( ['welcoming', 'balisage', 'logistics', 'security', 'race', 'other'], 'welcoming', 'int32', pos=2) time_start = tables.Time32Col(pos=4) time_end = tables.Time32Col(pos=5) N_needed = tables.Int8Col(pos=6) N_filled = tables.Int8Col(pos=7) remarqs = tables.StringCol(128, pos=8) # stuff_needed = tables.StringCol(128, pos=9) # affected_volunteers = tables.Int64Col(pos=10, shape=50, dflt=-1)
class Trade(tables.IsDescription): time = tables.Int64Col() buy_order_id = tables.Int64Col() sell_order_id = tables.Int64Col() buyer = tables.Int64Col() seller = tables.Int64Col() price = tables.Float64Col() quantity = tables.Int64Col() side = tables.Int8Col()
class Observation(tb.IsDescription): """PyTables table descriptor: observation details""" telescope = tb.StringCol(32, pos=0) # Telescope name (always Parkes for us) receiver = tb.StringCol(32, pos=1) # Receiver name (MULTI for us) date = tb.Time64Col(pos=2) # Date - only 32bits reqd for date but using 64 project_id = tb.StringCol(32, pos=3) # Project ID number, PXXX for Parkes project_name = tb.StringCol(255, pos=4) # Project name observer = tb.StringCol(255, pos=5) # Observer's name num_beams = tb.Int8Col(pos=6) # Number of beams being used ref_beam = tb.Int8Col(pos=7) # Reference beam acc_len = tb.Float32Col(pos=8) # Accumulation length, in seconds bandwidth = tb.Int32Col(pos=9) # Bandwidth (MHz) (-ve means inverted) dwell_time = tb.Float32Col(pos=10) # Dwell time (sec) frequency = tb.Float32Col(pos=11) # Central frequency (MHz) feed_rotation = tb.StringCol(64, pos=12) # Feed rotation (e.g. STEPPED) feed_angle = tb.Float32Col(pos=13) # Feed angle freq_switch = tb.BoolCol(pos=14) # Frequency switching flag obs_mode = tb.StringCol(16, pos=15) # Observation mode (e.g SCAN) scan_rate = tb.Float32Col(pos=16) # Scan rate, (deg/min)
class RegionDescription(t.IsDescription): """ Description of a genomic region for PyTables Table """ ix = t.Int32Col(pos=0) chromosome = t.StringCol(100, pos=1) start = t.Int64Col(pos=2) end = t.Int64Col(pos=3) strand = t.Int8Col(pos=4) _mask_ix = t.Int32Col(pos=5)
class Order(tables.IsDescription): time = tables.Int64Col() end_time = tables.Int64Col() order_id = tables.Int64Col() trader = tables.Int64Col() action = tables.Int8Col() side = tables.Int8Col() crossed = tables.BoolCol() halted = tables.Int8Col() price = tables.Float64Col() quantity = tables.Int64Col() visible = tables.Int64Col() bid0 = tables.Float64Col() bid1 = tables.Float64Col() bid2 = tables.Float64Col() bid3 = tables.Float64Col() bid4 = tables.Float64Col() ask0 = tables.Float64Col() ask1 = tables.Float64Col() ask2 = tables.Float64Col() ask3 = tables.Float64Col() ask4 = tables.Float64Col() bid0_quantity = tables.Int64Col() bid1_quantity = tables.Int64Col() bid2_quantity = tables.Int64Col() bid3_quantity = tables.Int64Col() bid4_quantity = tables.Int64Col() ask0_quantity = tables.Int64Col() ask1_quantity = tables.Int64Col() ask2_quantity = tables.Int64Col() ask3_quantity = tables.Int64Col() ask4_quantity = tables.Int64Col() bid0_visible = tables.Int64Col() bid1_visible = tables.Int64Col() bid2_visible = tables.Int64Col() bid3_visible = tables.Int64Col() bid4_visible = tables.Int64Col() ask0_visible = tables.Int64Col() ask1_visible = tables.Int64Col() ask2_visible = tables.Int64Col() ask3_visible = tables.Int64Col() ask4_visible = tables.Int64Col()
class Big(tb.IsDescription): name = tb.StringCol(itemsize=16) # 16-character String float1 = tb.Float64Col(shape=32, dflt=np.arange(32)) float2 = tb.Float64Col(shape=32, dflt=2.2) TDCcount = tb.Int8Col() # signed short integer #ADCcount = Int32Col() # ADCcount = Int16Col() # signed short integer grid_i = tb.Int32Col() # integer grid_j = tb.Int32Col() # integer pressure = tb.Float32Col() # float (single-precision) energy = tb.Float64Col() # double (double-precision)
class SimulationParticle(tables.IsDescription): """Store information about the particles hitting a detector Simulations which track individual particles write particle information in this table. Position, arrival time and energy, as well as the detector which detected this particle are stored. .. attribute:: id a unique identifier for the simulated event (only unique in this table) .. attribute:: station_id station identifier, such that you can do:: >>> station = cluster.stations[station_id] .. attribute:: detector_id detector identifier, such that you can do:: >>> station = cluster.stations[station_id] >>> detector = station.detectors[detector_id] .. attribute:: pid a particle identifier. Possible values are determined by the simulation package. .. attribute:: r, phi particle position in polar coordinates .. attribute:: time arrival time of the particle [ns] .. attribute:: energy particle energy [GeV] """ id = tables.UInt32Col() station_id = tables.UInt8Col() detector_id = tables.UInt8Col() pid = tables.Int8Col() r = tables.Float32Col() phi = tables.Float32Col() time = tables.Float32Col() energy = tables.Float32Col()
class Event(T.IsDescription): # ascii line for all these data are 64 bytes + 4 for station count + 4 for charge = 72 # 64+32*5+8+8+4*8 = 272 bytes per record time = T.Float64Col() # Seconds elapsed since start of day lat = T.Float32Col() # Decimal latitude lon = T.Float32Col() # Decimal longitude alt = T.Float32Col() # Altitude, km MSL, WGS84 chi2 = T.Float32Col() # Chi-squared solution quality power= T.Float32Col() # Radiated power stations = T.UInt8Col() # Station count charge = T.Int8Col() # Inferred storm charge flash_id = T.Int32Col() # Flash ID mask = T.StringCol(4) # Station mask
def impute_hase_hdf5(args): t = tables.open_file(args.genotype + args.study_name + '_step2_merged_genotype.h5', mode='r') print('merged shape =', t.root.data.shape) num_SNPS = t.root.data.shape[0] num_pat = t.root.data.shape[1] hdf5_name = args.study_name + '_step3_genotype_no_missing.h5' p = pd.read_hdf(args.genotype + '/probes/' + args.study_name + ".h5") print('probe shape =', p.shape) print("\n impute missing...") f = tables.open_file(args.outfolder + args.study_name + '_step3_genotype_no_missing.h5', mode='w') atom = tables.Int8Col() filter_zlib = tables.Filters(complib='zlib', complevel=args.comp_level) f.create_earray(f.root, 'data', atom, (0, num_pat), filters=filter_zlib) f.close() stdSNPs = np.zeros(num_SNPS) f = tables.open_file(args.outfolder + args.study_name + '_step3_genotype_no_missing.h5', mode='a') chunk = args.tcm // num_SNPS chunk = int(np.clip(chunk, 1, num_pat)) print(chunk) for part in tqdm.tqdm(range(int(np.ceil(num_SNPS / chunk) + 1))): begins = part * chunk tills = min(((part + 1) * chunk), num_SNPS) d = t.root.data[begins:tills, :].astype("float32") d[d == 9] = np.nan a = np.where( np.isnan(d), np.ma.array(d, mask=np.isnan(d)).mean(axis=1)[:, np.newaxis], d) stdSNPs[begins:tills] = np.std(a, axis=1) f.root.data.append(np.round(d).astype(np.int8)) f.close() t.close() np.save(args.outfolder + args.study_name + '_std.npy', stdSNPs) args.outfolder = args.genotype return hdf5_name
def get_hdf5_table_description(used_variables, decimal_precision): columns = dict( (var, tb.UInt8Col(pos=idx)) for idx, var in enumerate(used_variables)) data_start_pos = len(used_variables) columns['integral_float64'] = tb.Float64Col(pos=data_start_pos) columns['error_float64'] = tb.Float64Col(pos=data_start_pos + 1) columns['scale_factor'] = tb.Int8Col(pos=data_start_pos + 2) max_len = decimal_precision + 10 # Account for decimal dot and exponent info columns['integral_str'] = tb.StringCol(itemsize=max_len, pos=data_start_pos + 3) columns['error_str'] = tb.StringCol(itemsize=max_len, pos=data_start_pos + 4) return columns
class ShowerParticle(tables.IsDescription): """Store information about shower particles reaching round level This table stores particles from shower simulations. For example, AIRES simulations produce ``grdpcles`` files containing all particles which reached ground level. These files can be read and their contents can be stored in this table. .. attribute:: id a unique identifier for the particle (unique in this table) .. attribute:: pid a particle identifier. Possible values are determined by the simulation package. .. attribute:: core_distance distance from the particle position to the shower core .. attribute:: polar_angle angle of the particle position vector to a reference line .. attribute:: x, y particle position .. attribute:: arrival_time arrival time of the particle [ns] .. attribute:: energy particle energy [GeV] """ id = tables.UInt32Col() pid = tables.Int8Col() core_distance = tables.Float32Col() polar_angle = tables.Float32Col() x = tables.Float32Col() y = tables.Float32Col() arrival_time = tables.Float32Col() energy = tables.Float32Col()
def impute_hase_hdf5_no_chunk(args): t = tables.open_file(args.genotype + args.study_name + '_step2_merged_genotype.h5', mode='r') print('merged shape =', t.root.data.shape) num_SNPS = t.root.data.shape[0] num_pat = t.root.data.shape[1] hdf5_name = args.study_name + '_step3_genotype_no_missing.h5' p = pd.read_hdf(args.genotype + '/probes/' + args.study_name + ".h5") print('probe shape =', p.shape) print("\n impute missing...") f = tables.open_file(args.outfolder + args.study_name + '_step3_genotype_no_missing.h5', mode='w') atom = tables.Int8Col() filter_zlib = tables.Filters(complib='zlib', complevel=args.comp_level) f.create_earray(f.root, 'data', atom, (0, num_pat), filters=filter_zlib) f.close() stdSNPs = np.zeros(num_SNPS) f = tables.open_file(args.outfolder + args.study_name + '_step3_genotype_no_missing.h5', mode='a') for i in tqdm.tqdm(range(t.root.data.shape[0])): d = t.root.data[i, :].astype("float32") m = np.where(d == 9) d[m] = np.nan d[m] = np.nanmean(d) d = d[np.newaxis, :] f.root.data.append(np.round(d).astype(np.int8)) stdSNPs[i] = np.std(d) f.close() t.close() np.save(args.outfolder + args.study_name + '_std.npy', stdSNPs) args.outfolder = args.genotype return hdf5_name
class orientation(tables.IsDescription): ''' Orientation of sensor ''' #dip = Units32 () # Zero is up class dip(tables.IsDescription): ''' 32 bit float with units ''' _v_pos = 2 units_s = tables.StringCol(16) value_f = tables.Float32Col(pos=1) #azimuth = Units32 () # Zero is north class azimuth(tables.IsDescription): ''' 32 bit float with units ''' _v_pos = 1 units_s = tables.StringCol(16) value_f = tables.Float32Col(pos=1) channel_number_i = tables.Int8Col() description_s = tables.StringCol(1024, pos=3) # Any additional comments
class ProteinTable(tables.IsDescription): EntryNr = tables.UInt32Col(pos=1) SeqBufferOffset = tables.UInt64Col(pos=2) SeqBufferLength = tables.UInt32Col(pos=3) OmaGroup = tables.UInt32Col(pos=4, dflt=0) OmaHOG = tables.StringCol(255, pos=5, dflt=b"") Chromosome = tables.StringCol(255, pos=6) LocusStart = tables.UInt32Col(pos=7) LocusEnd = tables.UInt32Col(pos=8) LocusStrand = tables.Int8Col(pos=9, dflt=1) AltSpliceVariant = tables.Int32Col(pos=10, dflt=0) CanonicalId = tables.StringCol(20, pos=11, dflt=b"") CDNABufferOffset = tables.UInt64Col(pos=12) CDNABufferLength = tables.UInt32Col(pos=13) MD5ProteinHash = tables.StringCol(32, pos=14) DescriptionOffset = tables.UInt32Col(pos=15) DescriptionLength = tables.UInt16Col(pos=16) SubGenome = tables.StringCol(1, pos=17, dflt=b"") RootHogUpstream = tables.Int32Col(pos=18, dflt=-1) RootHogDownStream = tables.Int32Col(pos=19, dflt=-1)
class Elevation(tb.IsDescription): time = tb.StringCol(64, pos=1) orbit = tb.Int32Col(pos=2) utc85 = tb.Float64Col(pos=3) lon = tb.Float64Col(pos=4) lat = tb.Float64Col(pos=5) elev = tb.Float64Col(pos=6) agc = tb.Float64Col(pos=7) fmode = tb.Int8Col(pos=8) fret = tb.Int8Col(pos=9) fprob = tb.Int8Col(pos=10) fmask = tb.Int8Col(pos=11) fbord = tb.Int8Col(pos=12) ftrack = tb.Int8Col(pos=13) inc = tb.Float64Col(pos=14)
def populate_h5(input_file, out_file, values_slice): csv_file = open(input_file, "r") reader = csv.reader(csv_file) #getting the headers of, the file, assume they're there #skipping empty entries entries = reader.next() titles = [s for s in entries if s != '' and not s.startswith('#')] print "Titles are: ", titles csv_file.seek(0) def filter(x): try: return int(x) except ValueError: return 0 all_rows = [] for row in itertools.islice(reader, values_slice.start, values_slice.stop, values_slice.step): class_number = row[0] new_row = [class_number] new_row.extend(filter(x) for x in row[1:1 + len(titles)] ) #skip class_number and the last two empty elements all_rows.append(new_row) #now let's go to the h5 h5file = tables.openFile(out_file, mode="w", title="Adjectives") description = dict( zip(titles, (tables.Int8Col(pos=i + 1) for i in xrange(len(titles))))) description["object_id"] = tables.StringCol(8, pos=0) table = h5file.createTable("/", "clases", description) table.append(all_rows) table.flush() h5file.close()
class Weights(tables.IsDescription): value = tables.Float32Col(shape=(batch_size, 512, 14, 14)) # float (single-precision) labels = tables.Int8Col(shape=(batch_size, nb_classes))
class MyTimeRow(tb.IsDescription): i8col = tb.Int8Col(pos=0) t32col = tb.Time32Col(pos=1) t64col = tb.Time64Col(shape=(2, ), pos=2)
class TrialData(tables.IsDescription): trial_num = tables.Int32Col() trigger = tables.StringCol(26) response = tables.StringCol(26) plot_trigger = tables.Int8Col() plot_response = tables.Int8Col()
class _Location(tables.IsDescription): name = tables.StringCol(128) latitude = tables.Float64Col() longitude = tables.Float64Col() height = tables.Float64Col() bortle_class = tables.Int8Col()