def generate_training_data(pick_list, dataset, database, chunk_size): """ Generate TFrecords from database. :param pick_list: List of picks from Pick SQL query. :param str dataset: Output directory name. :param str database: SQL database. :param int chunk_size: Number of data stores in TFRecord. """ config = utils.get_config() dataset_dir = os.path.join(config['DATASET_ROOT'], dataset) utils.make_dirs(dataset_dir) total_batch = int(len(pick_list) / chunk_size) batch_picks = utils.batch(pick_list, size=chunk_size) for index, picks in enumerate(batch_picks): example_list = utils.parallel(picks, func=get_example_list, database=database) flatten = itertools.chain.from_iterable flat_list = list(flatten(flatten(example_list))) file_name = f'{index:0>5}.tfrecord' save_file = os.path.join(dataset_dir, file_name) io.write_tfrecord(flat_list, save_file) print(f'output {file_name} / {total_batch}')
def read_event_list(sfile): config = get_config() sfile_dir = os.path.join(config['CATALOG_ROOT'], sfile) sfile_list = get_dir_list(sfile_dir) print(f'reading events from {sfile_dir}') events = parallel(par=get_event, file_list=sfile_list) print(f'read {len(events)} events from {sfile}') return events
def read_event_list(sfile_dir): """ Returns event list from sfile directory. :param str sfile_dir: Directory contains SEISAN sfile. :rtype: list :return: list of event. """ config = utils.get_config() sfile_dir = os.path.join(config['CATALOG_ROOT'], sfile_dir) sfile_list = utils.get_dir_list(sfile_dir) print(f'Reading events from {sfile_dir}') event_list = utils.parallel(sfile_list, func=get_event) flatten = itertools.chain.from_iterable events = list(flatten(flatten(event_list))) print(f'Read {len(events)} events\n') return events
def write_training_dataset(pick_list, geom, dataset, pickset): config = get_config() dataset_dir = os.path.join(config['DATASET_ROOT'], dataset) make_dirs(dataset_dir) pick_time_key = [] for pick in pick_list: pick_time_key.append(pick.time) par = partial(_write_picked_stream, pick_list=pick_list, pick_time_key=pick_time_key, geom=geom, pickset=pickset) example_list = parallel(par, pick_list) station = pick_list[0].waveform_id.station_code file_name = '{}.tfrecord'.format(station) save_file = os.path.join(dataset_dir, file_name) write_tfrecord(example_list, save_file)
def parallel_to_tfrecord(batch_list): from seisnn.utils import parallel example_list = parallel(par=_to_tfrecord, file_list=batch_list) return example_list