Example #1
0
 class ReconData(tables.IsDescription):
     EventID = tables.Int64Col(pos=0)    # EventNo
     x = tables.Float16Col(pos=1)        # x position
     y = tables.Float16Col(pos=2)        # y position
     z = tables.Float16Col(pos=3)        # z position
     t0 = tables.Float16Col(pos=4)       # time offset
     E = tables.Float16Col(pos=5)        # energy
     tau_d = tables.Float16Col(pos=6)    # decay time constant
     success = tables.Int64Col(pos=7)    # recon failure
     x_sph = tables.Float16Col(pos=8)        # x position
     y_sph = tables.Float16Col(pos=9)        # y position
     z_sph = tables.Float16Col(pos=10)        # z position
     l0_sph = tables.Float16Col(pos=11)        # energy
     success_sph = tables.Int64Col(pos=12)    # recon failure
Example #2
0
 def get_tables_dtype(col_type, type_len):
     """
     Get the appropriate pytables.tables data type
     """
     if col_type.lower() == 'string':
         col_class = tables.StringCol(type_len)
     elif col_type.lower() == 'int':
         if type_len == 16:
             col_class = tables.Int16Col()
         elif type_len == 32:
             col_class = tables.Int32Col()
         elif type_len == 64:
             col_class = tables.Int64Col()
         else:
             col_class = tables.Int32Col()
     elif col_type.lower() == 'float':
         if type_len == 16:
             col_class = tables.Float16Col()
         elif type_len == 32:
             col_class = tables.Float32Col()
         elif type_len == 64:
             col_class = tables.Float64Col()
         else:
             col_class = tables.Float32Col()
     return col_class
Example #3
0
class Record(tables.IsDescription):
    var1 = StringCol(itemsize=4, dflt=b"abcd", pos=0)
    var2 = StringCol(itemsize=1, dflt=b"a", pos=1)
    var3 = BoolCol(dflt=1)
    var4 = Int8Col(dflt=1)
    var5 = UInt8Col(dflt=1)
    var6 = Int16Col(dflt=1)
    var7 = UInt16Col(dflt=1)
    var8 = Int32Col(dflt=1)
    var9 = UInt32Col(dflt=1)
    var10 = Int64Col(dflt=1)
    var11 = Float32Col(dflt=1.0)
    var12 = Float64Col(dflt=1.0)
    var13 = ComplexCol(itemsize=8, dflt=(1. + 0.j))
    var14 = ComplexCol(itemsize=16, dflt=(1. + 0.j))
    if hasattr(tables, 'Float16Col'):
        var15 = tables.Float16Col(dflt=1.0)
    if hasattr(tables, 'Float96Col'):
        var16 = tables.Float96Col(dflt=1.0)
    if hasattr(tables, 'Float128Col'):
        var17 = tables.Float128Col(dflt=1.0)
    if hasattr(tables, 'Complex196Col'):
        var18 = tables.ComplexCol(itemsize=24, dflt=(1. + 0.j))
    if hasattr(tables, 'Complex256Col'):
        var19 = tables.ComplexCol(itemsize=32, dflt=(1. + 0.j))
Example #4
0
class PairwiseRelationTable(tables.IsDescription):
    EntryNr1 = tables.UInt32Col(pos=0)
    EntryNr2 = tables.UInt32Col(pos=1)
    RelType = tables.EnumCol(tables.Enum({
        '1:1': 0,
        '1:n': 1,
        'm:1': 2,
        'm:n': 3,
        'close paralog': 4,
        'homeolog': 5,
        'n/a': 6
    }),
                             'n/a',
                             base='uint8',
                             pos=2)
    Score = tables.Float32Col(pos=3, dflt=-1)
    Distance = tables.Float32Col(pos=4, dflt=-1)
    AlignmentOverlap = tables.Float16Col(pos=5, dflt=-1)
    SyntenyConservationLocal = tables.Float16Col(pos=6, dflt=-1)
    Confidence = tables.Float16Col(pos=7, dflt=-1)
Example #5
0
class Distribution(tb.IsDescription):
    """
    kind of volume distribution
    Table structure:
        - date : POSIX 시간(초)을 Integer 형태로 저장
        - value : 거래량 / (고가 - 저가)
        - price : 각 value의 column index
    """
    row = tb.UInt64Col(pos=0)
    price = tb.Float64Col(pos=1)
    value = tb.Float16Col(pos=2)
Example #6
0
 class ReconData(tables.IsDescription):
     EventID = tables.Int64Col(pos=0)
     x = tables.Float16Col(pos=1)
     y = tables.Float16Col(pos=2)
     z = tables.Float16Col(pos=3)
     t0 = tables.Float16Col(pos=4)
     E = tables.Float16Col(pos=5)
     tau_d = tables.Float16Col(pos=6)
     success = tables.Int64Col(pos=7)
Example #7
0
class SnagData(tables.IsDescription):
    """
    Snag data array definitions
    real, dimension(maxcy1,mxsnag) :: hard_density, soft_density &
            , dbh_dead, hard_ht, soft_ht, hard_vol, soft_vol
    integer, dimension(maxcy1,mxsnag) :: spp, year_dead
    """
    run_id = tables.StringCol(36, pos=0)
    cycle = tables.UInt16Col(pos=1)
    year = tables.UInt16Col(pos=2)
    species = tables.StringCol(2, pos=3)
    dbh_dead = tables.Float16Col(pos=4)
    year_dead = tables.UInt16Col(pos=5)
    hard_density = tables.Float16Col(pos=6)
    hard_ht = tables.Float16Col(pos=7)
    hard_vol = tables.Float16Col(pos=8)
    soft_density = tables.Float16Col(pos=9)
    soft_ht = tables.Float16Col(pos=10)
    soft_vol = tables.Float16Col(pos=11)
Example #8
0
    class ReconData(tables.IsDescription):
        EventID = tables.Int64Col(pos=0)  # EventNo
        # inner recon
        E_sph_in = tables.Float16Col(pos=1)  # Energy
        x_sph_in = tables.Float16Col(pos=2)  # x position
        y_sph_in = tables.Float16Col(pos=3)  # y position
        z_sph_in = tables.Float16Col(pos=4)  # z position
        t0_in = tables.Float16Col(pos=5)  # time offset
        success_in = tables.Int64Col(pos=6)  # recon failure
        Likelihood_in = tables.Float16Col(pos=7)

        # outer recon
        E_sph_out = tables.Float16Col(pos=8)  # Energy
        x_sph_out = tables.Float16Col(pos=9)  # x position
        y_sph_out = tables.Float16Col(pos=10)  # y position
        z_sph_out = tables.Float16Col(pos=11)  # z position
        t0_out = tables.Float16Col(pos=12)  # time offset
        success_out = tables.Int64Col(pos=13)  # recon failure
        Likelihood_out = tables.Float16Col(pos=14)

        # truth info
        x_truth = tables.Float16Col(pos=15)  # x position
        y_truth = tables.Float16Col(pos=16)  # y position
        z_truth = tables.Float16Col(pos=17)  # z position
        E_truth = tables.Float16Col(pos=18)  # z position

        # unfinished
        tau_d = tables.Float16Col(pos=18)  # decay time constant
Example #9
0
def preprocess_to_table(data, data_name, sr=16000):
    filename = data_name

    # Added
    example_batch_size = 5000

    # get count of examples from text file
    num_examples = len(data['prompts'])

    # pad out all these jagged arrays and store them in an npy file
    texts = []

    max_freq_length = audio.maximum_audio_length // (audio.r *
                                                     audio.hop_length)
    print("num_examples: %s" % str(num_examples))
    print("max_freq_length: %s" % str(max_freq_length))
    print("1025*audio.r: %s" % str(1025 * audio.r))

    text_lens = np.zeros((example_batch_size), dtype=np.int32)
    speech_lens = np.zeros((example_batch_size), dtype=np.int32)
    mels = np.zeros((example_batch_size, max_freq_length, 80 * audio.r),
                    dtype=np.float16)
    stfts = np.zeros((example_batch_size, max_freq_length, 1025 * audio.r),
                     dtype=np.float16)

    print("Processing audio...")
    texts_for_length = list()
    audio_count = 0
    for text, audio_file in zip(data['prompts'], data['audio_files']):
        mel, stft = audio.process_audio(audio_file, sr=sr)
        if mel is not None:
            text = np.array([process_char(c) for c in list(text)])
            texts_for_length.append(text)
        audio_count += 1
        if audio_count % 500 == 0:
            print("Processed %d audio samples..." % audio_count)
    print("Processed %d audio samples total!" % audio_count)
    max_text_length = max(r.shape[0] for r in texts_for_length)
    print("max_text_length: %d" % max_text_length)
    max_len = max(r.shape[0] for r in texts_for_length)
    padded_texts_for_length = pad_to_dense(texts_for_length, max_len)
    print("max_text_length: %s" % str(padded_texts_for_length.shape))

    table_description = {
        INDEX_COL: tables.Int64Col(),
        MELS_COL: tables.Float16Col(shape=(max_freq_length, 80 * audio.r)),
        MELS_SHAPE_COL: tables.Int64Col(shape=(2)),
        STFTS_COL: tables.Float16Col(shape=(max_freq_length, 1025 * audio.r)),
        STFTS_SHAPE_COL: tables.Int64Col(shape=(2)),
        TEXTS_COL: tables.Int32Col(shape=(max_text_length)),
        TEXT_LENS_COL: tables.Int32Col(),
        SPEECH_LENS_COL: tables.Int32Col()
    }
    create_hdf5_table_file(filename, table_description)

    print("len prompts: %d" % len(data["prompts"]))
    print("len audio_files: %d" % len(data["audio_files"]))

    count = 0
    for text, audio_file in tqdm(zip(data['prompts'], data['audio_files']),
                                 total=num_examples):

        original_text = text

        text = [process_char(c) for c in list(text)]
        mel, stft = audio.process_audio(audio_file, sr=sr)

        if mel is not None:
            texts.append(np.array(text))

            text_lens[count % example_batch_size] = len(text)
            speech_lens[count % example_batch_size] = mel.shape[0]

            mels[count % example_batch_size] = mel
            stfts[count % example_batch_size] = stft

            count += 1

            if count % example_batch_size == 0:
                print("Writing data on count: %d/%d" % (count, num_examples))
                rows = list()
                for i in range(example_batch_size):
                    row_dict = dict()
                    row_dict[INDEX_COL] = count - 1
                    row_dict[MELS_COL] = mels[i]
                    row_dict[MELS_SHAPE_COL] = mels[i].shape
                    row_dict[STFTS_COL] = stfts[i]
                    row_dict[STFTS_SHAPE_COL] = stfts[i].shape
                    row_dict[TEXT_LENS_COL] = text_lens[i]
                    row_dict[SPEECH_LENS_COL] = speech_lens[i]
                    rows.append(row_dict)

                append_rows_to_hdf5_table(filename, rows)
                print("Wrote batch sized '%d' to '%s'" %
                      (example_batch_size, filename))
        else:
            print("mel is None for text: %s" % str(original_text))

    rows = list()
    starting_index = count - 1 - (count % example_batch_size)
    for i in range(count % example_batch_size):
        row_dict = dict()
        row_dict[INDEX_COL] = starting_index + i
        row_dict[MELS_COL] = mels[i]
        row_dict[MELS_SHAPE_COL] = mels[i].shape
        row_dict[STFTS_COL] = stfts[i]
        row_dict[STFTS_SHAPE_COL] = stfts[i].shape
        row_dict[TEXT_LENS_COL] = text_lens[i]
        row_dict[SPEECH_LENS_COL] = speech_lens[i]
        rows.append(row_dict)
    append_rows_to_hdf5_table(filename, rows)
    print("Final batch, wrote batch sized '%d' to '%s'" %
          ((count % example_batch_size), filename))

    print("texts len: %d" % len(texts))
    max_len = max(r.shape[0] for r in texts)
    i = 0
    while i < len(texts):
        update_all_rows_in_hdf5_table(filename,
                                      TEXTS_COL,
                                      pad_to_dense(
                                          texts[i:i + example_batch_size],
                                          max_len),
                                      start_index=i)
        i += example_batch_size
        print("Wrote batch sized '%d' to '%s'" %
              (example_batch_size, filename))
    if len(texts) % example_batch_size != 0:
        prev_i = i - example_batch_size
        update_all_rows_in_hdf5_table(
            filename,
            TEXTS_COL,
            pad_to_dense(texts[prev_i:prev_i + example_batch_size], max_len),
            start_index=prev_i)
        print("Final batch, wrote batch sized '%d' to '%s'" %
              ((len(texts) % example_batch_size), filename))

    if 'speakers' in data:
        np.save('data/%s/speakers.npy' % data_name,
                data['speakers'],
                allow_pickle=False)

    # save vocabulary
    save_vocab(data_name)
Example #10
0
    DATETIME       12                timestamp
    YEAR           13                number
    VARCHAR        15                string
    BIT            16                number
    NEWDECIMAL     246               number
    ENUM           247               string
    TINY_BLOB      249               binary
    MEDIUM_BLOB    250               binary
    LONG_BLOB      251               binary
    BLOB           252               binary
    VAR_STRING     253               string
    STRING         254               string
    ============== ================= =============
    
"""

import tables as tb
import numpy as np

# see also:
# http://mysql-python.sourceforge.net/MySQLdb-1.2.2/public/MySQLdb.constants.FIELD_TYPE-module.html
map_numpy = {'VAR_STRING': str, 'LONG': np.int16, 'FLOAT': np.float}

# TODO: Figure out variable length string atoms with references in the actual table. Or calibrate string field width (or both).
map_pytables = {
    'VAR_STRING': tb.StringCol(
        32
    ),  # 32 is relatively arbitrary - pytables doesn't support variable length strings
    'LONG': tb.Int16Col(),
    'FLOAT': tb.Float16Col()
}
Example #11
0
 class ReconData(tables.IsDescription):
     EventID = tables.Int64Col(pos=0)  # EventNo
     x1 = tables.Float16Col(pos=1)  # x position
     y1 = tables.Float16Col(pos=2)  # y position
     z1 = tables.Float16Col(pos=3)  # z position
     x2 = tables.Float16Col(pos=4)  # x position
     y2 = tables.Float16Col(pos=5)  # y position
     z2 = tables.Float16Col(pos=6)  # z position
     x3 = tables.Float16Col(pos=7)  # x position
     y3 = tables.Float16Col(pos=8)  # y position
     z3 = tables.Float16Col(pos=9)  # z position
     x4 = tables.Float16Col(pos=10)  # x position
     y4 = tables.Float16Col(pos=11)  # y position
     z4 = tables.Float16Col(pos=12)  # z position
Example #12
0
 class ReconData(tables.IsDescription):
     EventID = tables.Int64Col(pos=0)
     x = tables.Float16Col(pos=1)
     y = tables.Float16Col(pos=2)
     z = tables.Float16Col(pos=3)
     E = tables.Float16Col(pos=4)