Exemple #1
0
def generate_training_data(pick_list, dataset, database, chunk_size):
    """
    Generate TFrecords from database.

    :param pick_list: List of picks from Pick SQL query.
    :param str dataset: Output directory name.
    :param str database: SQL database.
    :param int chunk_size: Number of data stores in TFRecord.
    """
    config = utils.get_config()
    dataset_dir = os.path.join(config['DATASET_ROOT'], dataset)
    utils.make_dirs(dataset_dir)

    total_batch = int(len(pick_list) / chunk_size)
    batch_picks = utils.batch(pick_list, size=chunk_size)
    for index, picks in enumerate(batch_picks):
        example_list = utils.parallel(picks,
                                      func=get_example_list,
                                      database=database)
        flatten = itertools.chain.from_iterable
        flat_list = list(flatten(flatten(example_list)))

        file_name = f'{index:0>5}.tfrecord'
        save_file = os.path.join(dataset_dir, file_name)
        io.write_tfrecord(flat_list, save_file)
        print(f'output {file_name} / {total_batch}')
Exemple #2
0
def read_event_list(sfile):
    config = get_config()
    sfile_dir = os.path.join(config['CATALOG_ROOT'], sfile)
    sfile_list = get_dir_list(sfile_dir)
    print(f'reading events from {sfile_dir}')
    events = parallel(par=get_event, file_list=sfile_list)
    print(f'read {len(events)} events from {sfile}')
    return events
Exemple #3
0
def read_event_list(sfile_dir):
    """
    Returns event list from sfile directory.

    :param str sfile_dir: Directory contains SEISAN sfile.
    :rtype: list
    :return: list of event.
    """
    config = utils.get_config()
    sfile_dir = os.path.join(config['CATALOG_ROOT'], sfile_dir)

    sfile_list = utils.get_dir_list(sfile_dir)
    print(f'Reading events from {sfile_dir}')

    event_list = utils.parallel(sfile_list, func=get_event)
    flatten = itertools.chain.from_iterable

    events = list(flatten(flatten(event_list)))
    print(f'Read {len(events)} events\n')
    return events
Exemple #4
0
def write_training_dataset(pick_list, geom, dataset, pickset):
    config = get_config()
    dataset_dir = os.path.join(config['DATASET_ROOT'], dataset)
    make_dirs(dataset_dir)

    pick_time_key = []
    for pick in pick_list:
        pick_time_key.append(pick.time)

    par = partial(_write_picked_stream,
                  pick_list=pick_list,
                  pick_time_key=pick_time_key,
                  geom=geom,
                  pickset=pickset)

    example_list = parallel(par, pick_list)

    station = pick_list[0].waveform_id.station_code
    file_name = '{}.tfrecord'.format(station)
    save_file = os.path.join(dataset_dir, file_name)

    write_tfrecord(example_list, save_file)
Exemple #5
0
def parallel_to_tfrecord(batch_list):
    from seisnn.utils import parallel

    example_list = parallel(par=_to_tfrecord, file_list=batch_list)
    return example_list