class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) # EventNo x = tables.Float16Col(pos=1) # x position y = tables.Float16Col(pos=2) # y position z = tables.Float16Col(pos=3) # z position t0 = tables.Float16Col(pos=4) # time offset E = tables.Float16Col(pos=5) # energy tau_d = tables.Float16Col(pos=6) # decay time constant success = tables.Int64Col(pos=7) # recon failure x_sph = tables.Float16Col(pos=8) # x position y_sph = tables.Float16Col(pos=9) # y position z_sph = tables.Float16Col(pos=10) # z position l0_sph = tables.Float16Col(pos=11) # energy success_sph = tables.Int64Col(pos=12) # recon failure
def get_tables_dtype(col_type, type_len): """ Get the appropriate pytables.tables data type """ if col_type.lower() == 'string': col_class = tables.StringCol(type_len) elif col_type.lower() == 'int': if type_len == 16: col_class = tables.Int16Col() elif type_len == 32: col_class = tables.Int32Col() elif type_len == 64: col_class = tables.Int64Col() else: col_class = tables.Int32Col() elif col_type.lower() == 'float': if type_len == 16: col_class = tables.Float16Col() elif type_len == 32: col_class = tables.Float32Col() elif type_len == 64: col_class = tables.Float64Col() else: col_class = tables.Float32Col() return col_class
class Record(tables.IsDescription): var1 = StringCol(itemsize=4, dflt=b"abcd", pos=0) var2 = StringCol(itemsize=1, dflt=b"a", pos=1) var3 = BoolCol(dflt=1) var4 = Int8Col(dflt=1) var5 = UInt8Col(dflt=1) var6 = Int16Col(dflt=1) var7 = UInt16Col(dflt=1) var8 = Int32Col(dflt=1) var9 = UInt32Col(dflt=1) var10 = Int64Col(dflt=1) var11 = Float32Col(dflt=1.0) var12 = Float64Col(dflt=1.0) var13 = ComplexCol(itemsize=8, dflt=(1. + 0.j)) var14 = ComplexCol(itemsize=16, dflt=(1. + 0.j)) if hasattr(tables, 'Float16Col'): var15 = tables.Float16Col(dflt=1.0) if hasattr(tables, 'Float96Col'): var16 = tables.Float96Col(dflt=1.0) if hasattr(tables, 'Float128Col'): var17 = tables.Float128Col(dflt=1.0) if hasattr(tables, 'Complex196Col'): var18 = tables.ComplexCol(itemsize=24, dflt=(1. + 0.j)) if hasattr(tables, 'Complex256Col'): var19 = tables.ComplexCol(itemsize=32, dflt=(1. + 0.j))
class PairwiseRelationTable(tables.IsDescription): EntryNr1 = tables.UInt32Col(pos=0) EntryNr2 = tables.UInt32Col(pos=1) RelType = tables.EnumCol(tables.Enum({ '1:1': 0, '1:n': 1, 'm:1': 2, 'm:n': 3, 'close paralog': 4, 'homeolog': 5, 'n/a': 6 }), 'n/a', base='uint8', pos=2) Score = tables.Float32Col(pos=3, dflt=-1) Distance = tables.Float32Col(pos=4, dflt=-1) AlignmentOverlap = tables.Float16Col(pos=5, dflt=-1) SyntenyConservationLocal = tables.Float16Col(pos=6, dflt=-1) Confidence = tables.Float16Col(pos=7, dflt=-1)
class Distribution(tb.IsDescription): """ kind of volume distribution Table structure: - date : POSIX 시간(초)을 Integer 형태로 저장 - value : 거래량 / (고가 - 저가) - price : 각 value의 column index """ row = tb.UInt64Col(pos=0) price = tb.Float64Col(pos=1) value = tb.Float16Col(pos=2)
class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) x = tables.Float16Col(pos=1) y = tables.Float16Col(pos=2) z = tables.Float16Col(pos=3) t0 = tables.Float16Col(pos=4) E = tables.Float16Col(pos=5) tau_d = tables.Float16Col(pos=6) success = tables.Int64Col(pos=7)
class SnagData(tables.IsDescription): """ Snag data array definitions real, dimension(maxcy1,mxsnag) :: hard_density, soft_density & , dbh_dead, hard_ht, soft_ht, hard_vol, soft_vol integer, dimension(maxcy1,mxsnag) :: spp, year_dead """ run_id = tables.StringCol(36, pos=0) cycle = tables.UInt16Col(pos=1) year = tables.UInt16Col(pos=2) species = tables.StringCol(2, pos=3) dbh_dead = tables.Float16Col(pos=4) year_dead = tables.UInt16Col(pos=5) hard_density = tables.Float16Col(pos=6) hard_ht = tables.Float16Col(pos=7) hard_vol = tables.Float16Col(pos=8) soft_density = tables.Float16Col(pos=9) soft_ht = tables.Float16Col(pos=10) soft_vol = tables.Float16Col(pos=11)
class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) # EventNo # inner recon E_sph_in = tables.Float16Col(pos=1) # Energy x_sph_in = tables.Float16Col(pos=2) # x position y_sph_in = tables.Float16Col(pos=3) # y position z_sph_in = tables.Float16Col(pos=4) # z position t0_in = tables.Float16Col(pos=5) # time offset success_in = tables.Int64Col(pos=6) # recon failure Likelihood_in = tables.Float16Col(pos=7) # outer recon E_sph_out = tables.Float16Col(pos=8) # Energy x_sph_out = tables.Float16Col(pos=9) # x position y_sph_out = tables.Float16Col(pos=10) # y position z_sph_out = tables.Float16Col(pos=11) # z position t0_out = tables.Float16Col(pos=12) # time offset success_out = tables.Int64Col(pos=13) # recon failure Likelihood_out = tables.Float16Col(pos=14) # truth info x_truth = tables.Float16Col(pos=15) # x position y_truth = tables.Float16Col(pos=16) # y position z_truth = tables.Float16Col(pos=17) # z position E_truth = tables.Float16Col(pos=18) # z position # unfinished tau_d = tables.Float16Col(pos=18) # decay time constant
def preprocess_to_table(data, data_name, sr=16000): filename = data_name # Added example_batch_size = 5000 # get count of examples from text file num_examples = len(data['prompts']) # pad out all these jagged arrays and store them in an npy file texts = [] max_freq_length = audio.maximum_audio_length // (audio.r * audio.hop_length) print("num_examples: %s" % str(num_examples)) print("max_freq_length: %s" % str(max_freq_length)) print("1025*audio.r: %s" % str(1025 * audio.r)) text_lens = np.zeros((example_batch_size), dtype=np.int32) speech_lens = np.zeros((example_batch_size), dtype=np.int32) mels = np.zeros((example_batch_size, max_freq_length, 80 * audio.r), dtype=np.float16) stfts = np.zeros((example_batch_size, max_freq_length, 1025 * audio.r), dtype=np.float16) print("Processing audio...") texts_for_length = list() audio_count = 0 for text, audio_file in zip(data['prompts'], data['audio_files']): mel, stft = audio.process_audio(audio_file, sr=sr) if mel is not None: text = np.array([process_char(c) for c in list(text)]) texts_for_length.append(text) audio_count += 1 if audio_count % 500 == 0: print("Processed %d audio samples..." % audio_count) print("Processed %d audio samples total!" % audio_count) max_text_length = max(r.shape[0] for r in texts_for_length) print("max_text_length: %d" % max_text_length) max_len = max(r.shape[0] for r in texts_for_length) padded_texts_for_length = pad_to_dense(texts_for_length, max_len) print("max_text_length: %s" % str(padded_texts_for_length.shape)) table_description = { INDEX_COL: tables.Int64Col(), MELS_COL: tables.Float16Col(shape=(max_freq_length, 80 * audio.r)), MELS_SHAPE_COL: tables.Int64Col(shape=(2)), STFTS_COL: tables.Float16Col(shape=(max_freq_length, 1025 * audio.r)), STFTS_SHAPE_COL: tables.Int64Col(shape=(2)), TEXTS_COL: tables.Int32Col(shape=(max_text_length)), TEXT_LENS_COL: tables.Int32Col(), SPEECH_LENS_COL: tables.Int32Col() } create_hdf5_table_file(filename, table_description) print("len prompts: %d" % len(data["prompts"])) print("len audio_files: %d" % len(data["audio_files"])) count = 0 for text, audio_file in tqdm(zip(data['prompts'], data['audio_files']), total=num_examples): original_text = text text = [process_char(c) for c in list(text)] mel, stft = audio.process_audio(audio_file, sr=sr) if mel is not None: texts.append(np.array(text)) text_lens[count % example_batch_size] = len(text) speech_lens[count % example_batch_size] = mel.shape[0] mels[count % example_batch_size] = mel stfts[count % example_batch_size] = stft count += 1 if count % example_batch_size == 0: print("Writing data on count: %d/%d" % (count, num_examples)) rows = list() for i in range(example_batch_size): row_dict = dict() row_dict[INDEX_COL] = count - 1 row_dict[MELS_COL] = mels[i] row_dict[MELS_SHAPE_COL] = mels[i].shape row_dict[STFTS_COL] = stfts[i] row_dict[STFTS_SHAPE_COL] = stfts[i].shape row_dict[TEXT_LENS_COL] = text_lens[i] row_dict[SPEECH_LENS_COL] = speech_lens[i] rows.append(row_dict) append_rows_to_hdf5_table(filename, rows) print("Wrote batch sized '%d' to '%s'" % (example_batch_size, filename)) else: print("mel is None for text: %s" % str(original_text)) rows = list() starting_index = count - 1 - (count % example_batch_size) for i in range(count % example_batch_size): row_dict = dict() row_dict[INDEX_COL] = starting_index + i row_dict[MELS_COL] = mels[i] row_dict[MELS_SHAPE_COL] = mels[i].shape row_dict[STFTS_COL] = stfts[i] row_dict[STFTS_SHAPE_COL] = stfts[i].shape row_dict[TEXT_LENS_COL] = text_lens[i] row_dict[SPEECH_LENS_COL] = speech_lens[i] rows.append(row_dict) append_rows_to_hdf5_table(filename, rows) print("Final batch, wrote batch sized '%d' to '%s'" % ((count % example_batch_size), filename)) print("texts len: %d" % len(texts)) max_len = max(r.shape[0] for r in texts) i = 0 while i < len(texts): update_all_rows_in_hdf5_table(filename, TEXTS_COL, pad_to_dense( texts[i:i + example_batch_size], max_len), start_index=i) i += example_batch_size print("Wrote batch sized '%d' to '%s'" % (example_batch_size, filename)) if len(texts) % example_batch_size != 0: prev_i = i - example_batch_size update_all_rows_in_hdf5_table( filename, TEXTS_COL, pad_to_dense(texts[prev_i:prev_i + example_batch_size], max_len), start_index=prev_i) print("Final batch, wrote batch sized '%d' to '%s'" % ((len(texts) % example_batch_size), filename)) if 'speakers' in data: np.save('data/%s/speakers.npy' % data_name, data['speakers'], allow_pickle=False) # save vocabulary save_vocab(data_name)
DATETIME 12 timestamp YEAR 13 number VARCHAR 15 string BIT 16 number NEWDECIMAL 246 number ENUM 247 string TINY_BLOB 249 binary MEDIUM_BLOB 250 binary LONG_BLOB 251 binary BLOB 252 binary VAR_STRING 253 string STRING 254 string ============== ================= ============= """ import tables as tb import numpy as np # see also: # http://mysql-python.sourceforge.net/MySQLdb-1.2.2/public/MySQLdb.constants.FIELD_TYPE-module.html map_numpy = {'VAR_STRING': str, 'LONG': np.int16, 'FLOAT': np.float} # TODO: Figure out variable length string atoms with references in the actual table. Or calibrate string field width (or both). map_pytables = { 'VAR_STRING': tb.StringCol( 32 ), # 32 is relatively arbitrary - pytables doesn't support variable length strings 'LONG': tb.Int16Col(), 'FLOAT': tb.Float16Col() }
class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) # EventNo x1 = tables.Float16Col(pos=1) # x position y1 = tables.Float16Col(pos=2) # y position z1 = tables.Float16Col(pos=3) # z position x2 = tables.Float16Col(pos=4) # x position y2 = tables.Float16Col(pos=5) # y position z2 = tables.Float16Col(pos=6) # z position x3 = tables.Float16Col(pos=7) # x position y3 = tables.Float16Col(pos=8) # y position z3 = tables.Float16Col(pos=9) # z position x4 = tables.Float16Col(pos=10) # x position y4 = tables.Float16Col(pos=11) # y position z4 = tables.Float16Col(pos=12) # z position
class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) x = tables.Float16Col(pos=1) y = tables.Float16Col(pos=2) z = tables.Float16Col(pos=3) E = tables.Float16Col(pos=4)