Beispiel #1
0
    def __init__(self):
        # Try not to let tests interfere with actual system:
        if os.getenv('CALLED_FROM_PYTEST') is None:
            # Not called from pytest
            local_proj = os.path.join(os.getcwd(), constants.PROJ_CONF_DIR)
            local_proj_conf = os.path.join(local_proj, 'projects.conf')
            if os.path.isdir(local_proj) and os.path.isfile(local_proj_conf):
                PROJECTS_PATH = local_proj
            else:
                PROJECTS_PATH = constants.PROJECTS_PATH
        else:
            PROJECTS_PATH = constants.PROJECTS_PATH_TEST

        self.PROJECTS_PATH = PROJECTS_PATH
        self.PROJECTS_FILE = os.path.join(PROJECTS_PATH, 'projects.conf')

        self._parse_command_line()
        self._load_config()

        setup_logger(self.args)
        logging.info('Logging level includes INFO.')
        logging.debug('Logging level includes DEBUG.')
Beispiel #2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import numpy as np
import json
import pkg_resources

from gmprocess.io.read_directory import directory_to_streams
from gmprocess.utils.logging import setup_logger
from gmprocess.core.streamcollection import StreamCollection

from obspy import UTCDateTime

setup_logger()


def test_StreamCollection():

    # read usc data
    dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(usc_streams) == 7

    usc_sc = StreamCollection(usc_streams)

    # Use print method
    print(usc_sc)
#!/usr/bin/env python
import logging
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from libcomcat.search import search
from gmprocess.utils.logging import setup_logger

setup_logger(args=None, level="info")

DELTA_MAG = 0.5
# Can change if target event only has month, day, and year
DELTA_HOUR = 12
DIST_SCALER = 20
COORD_KM_SCALER = 0.009
ERROR_THRESH = 1.0


def cross_reference_to_usgs_id(project_name, eqids, mags, times, lats, lons):
    """
    Look for potential event matches for events in data and return
    a dictionary mapping the event IDs to the repsective match with the
    lowest error.
    Args:
        project_name (string):
            Project or data source name.
        eqids (np.array):
            Earthquake IDs of target events to be cross-referenced
        mags (np.array):
            Magnitudes of target events to be cross-referenced.
Beispiel #4
0
def process_event(event,
                  outdir,
                  pcommands,
                  config,
                  input_directory,
                  process_tag,
                  files_created,
                  output_format,
                  status,
                  recompute_metrics,
                  export_dir=None):

    # setup logging to write to the input logfile
    argthing = namedtuple('args', ['debug', 'quiet'])
    args = argthing(debug=True, quiet=False)
    setup_logger(args)

    logger = logging.getLogger()
    stream_handler = logger.handlers[0]
    logfile = os.path.join(outdir, '%s.log' % event.id)
    fhandler = logging.FileHandler(logfile)
    logger.removeHandler(stream_handler)
    logger.addHandler(fhandler)

    event_dir = os.path.join(outdir, event.id)
    if not os.path.exists(event_dir):
        os.makedirs(event_dir)

    workname = os.path.join(event_dir, WORKSPACE_NAME)
    workspace_exists = os.path.isfile(workname)
    workspace_has_processed = False
    workspace = None
    processing_done = False

    if workspace_exists:
        workspace = StreamWorkspace.open(workname)
        labels = workspace.getLabels()
        if len(labels):
            labels.remove('unprocessed')
        elif 'assemble' not in pcommands:
            print('No data in workspace. Please run assemble.')
            sys.exit(1)

        if len(labels) == 1:
            process_tag = labels[0]
            workspace_has_processed = True
        else:
            if 'process' not in pcommands:
                fmt = '\nThere are %i sets of processed data in %s.'
                tpl = (len(labels), workname)
                print(fmt % tpl)
                print(('This software can only handle one set of '
                       'processed data. Exiting.\n'))
                sys.exit(1)

    download_done = False

    # Need to initialize rstreams/pstreams
    rstreams = []
    pstreams = []

    rupture_file = None
    if 'assemble' in pcommands:
        logging.info('Downloading/loading raw streams...')
        workspace, workspace_file, rstreams, rupture_file = download(
            event, event_dir, config, input_directory)

        download_done = True
        append_file(files_created, 'Workspace', workname)

    else:
        if not workspace_exists:
            print('\nYou opted not to download or process from input.')
            print('No previous HDF workspace file could be found.')
            print('Try re-running with the assemble command with or ')
            print('without the --directory option.\n')
            sys.exit(1)
        if 'process' in pcommands:
            logging.info('Getting raw streams from workspace...')
            with warnings.catch_warnings():
                warnings.simplefilter("ignore",
                                      category=H5pyDeprecationWarning)
                rstreams = workspace.getStreams(event.id,
                                                labels=['unprocessed'])
            download_done = True
        else:
            need_processed = set(['report', 'shakemap'])
            need_pstreams = len(need_processed.intersection(pcommands))
            if workspace_has_processed:
                if need_pstreams:
                    logging.info('Getting processed streams from workspace...')
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore",
                                              category=H5pyDeprecationWarning)
                        pstreams = workspace.getStreams(event.id,
                                                        labels=[process_tag])
                download_done = True
                processing_done = True

    if ('process' in pcommands and download_done and not processing_done
            and len(rstreams)):
        logging.info('Processing raw streams for event %s...' % event.id)
        pstreams = process_streams(rstreams, event, config=config)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            workspace.addStreams(event, pstreams, label=process_tag)
            workspace.calcMetrics(event.id,
                                  labels=[process_tag],
                                  config=config,
                                  streams=pstreams,
                                  stream_label=process_tag,
                                  rupture_file=rupture_file)
        processing_done = True

    if 'export' in pcommands:
        if export_dir is not None:
            if not os.path.isdir(export_dir):
                os.makedirs(export_dir)
            outdir = export_dir

        labels = workspace.getLabels()
        if 'unprocessed' not in labels:
            fmt = ('Workspace file "%s" appears to have no unprocessed '
                   'data. Skipping.')
            logging.info(fmt % workspace_file)
        else:
            labels.remove('unprocessed')
            if not labels:
                fmt = ('Workspace file "%s" appears to have no processed '
                       'data. Skipping.')
                logging.info(fmt % workspace_file)
            else:
                logging.info('Creating tables for event %s...', event.id)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore",
                                          category=H5pyDeprecationWarning)
                    if recompute_metrics:
                        del workspace.dataset.auxiliary_data.WaveFormMetrics
                        del workspace.dataset.auxiliary_data.StationMetrics
                        workspace.calcMetrics(event.id,
                                              labels=labels,
                                              config=config,
                                              rupture_file=rupture_file)
                    event_table, imc_tables, readmes = workspace.getTables(
                        labels[0], streams=pstreams, stream_label=process_tag)
                    ev_fit_spec, fit_readme = workspace.getFitSpectraTable(
                        event.id, labels[0], pstreams)

                # Set the precisions for the imc tables, event table, and
                # fit_spectra table before writing
                imc_tables_formatted = {}
                for imc, imc_table in imc_tables.items():
                    imc_tables_formatted[imc] = set_precisions(imc_table)
                event_table_formatted = set_precisions(event_table)
                df_fit_spectra_formatted = set_precisions(ev_fit_spec)

                if not os.path.isdir(outdir):
                    os.makedirs(outdir)

                filenames = ['events'] + \
                    [imc.lower() for imc in imc_tables_formatted.keys()] + \
                    [imc.lower() + '_README' for imc in readmes.keys()] + \
                    ['fit_spectra_parameters', 'fit_spectra_parameters_README']

                files = [event_table_formatted] + list(
                    imc_tables_formatted.values()) + list(readmes.values()) + [
                        df_fit_spectra_formatted, fit_readme
                    ]

                if output_format != 'csv':
                    output_format = 'xlsx'

                for filename, df in dict(zip(filenames, files)).items():
                    filepath = os.path.join(outdir,
                                            filename + '.%s' % output_format)
                    if os.path.exists(filepath):
                        if 'README' in filename:
                            continue
                        else:
                            mode = 'a'
                            header = False
                    else:
                        mode = 'w'
                        header = True
                        append_file(files_created, 'Tables', filepath)
                    if output_format == 'csv':
                        df.to_csv(filepath,
                                  index=False,
                                  float_format=DEFAULT_FLOAT_FORMAT,
                                  na_rep=DEFAULT_NA_REP,
                                  mode=mode,
                                  header=header)
                    else:
                        df.to_excel(filepath,
                                    index=False,
                                    float_format=DEFAULT_FLOAT_FORMAT,
                                    na_rep=DEFAULT_NA_REP,
                                    mode=mode,
                                    header=header)

    if ('report' in pcommands and processing_done and len(pstreams)):
        logging.info('Creating diagnostic plots for event %s...' % event.id)
        plot_dir = os.path.join(event_dir, 'plots')
        if not os.path.isdir(plot_dir):
            os.makedirs(plot_dir)
        for stream in pstreams:
            summary_plots(stream, plot_dir, event)

        mapfile = draw_stations_map(pstreams, event, event_dir)
        plot_moveout(pstreams,
                     event.latitude,
                     event.longitude,
                     file=os.path.join(event_dir, 'moveout_plot.png'))

        append_file(files_created, 'Station map', mapfile)
        append_file(files_created, 'Moveout plot', 'moveout_plot.png')

        logging.info('Creating diagnostic report for event %s...' % event.id)
        # Build the summary report?
        build_conf = config['build_report']
        report_format = build_conf['format']
        if report_format == 'latex':
            report_file, success = build_report_latex(pstreams,
                                                      event_dir,
                                                      event,
                                                      config=config)
        else:
            report_file = ''
            success = False
        if os.path.isfile(report_file) and success:
            append_file(files_created, 'Summary report', report_file)

    if 'provenance' in pcommands and processing_done and len(pstreams):
        logging.info('Creating provenance table for event %s...' % event.id)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            provdata = workspace.getProvenance(event.id, labels=[process_tag])
        if output_format == 'csv':
            csvfile = os.path.join(event_dir, 'provenance.csv')
            append_file(files_created, 'Provenance', csvfile)
            provdata.to_csv(csvfile)
        else:
            excelfile = os.path.join(event_dir, 'provenance.xlsx')
            append_file(files_created, 'Provenance', excelfile)
            provdata.to_excel(excelfile, index=False)

    if 'shakemap' in pcommands and processing_done and len(pstreams):
        logging.info('Creating shakemap table for event %s...' % event.id)
        shakemap_file, jsonfile = save_shakemap_amps(pstreams, event,
                                                     event_dir)
        append_file(files_created, 'shakemap', shakemap_file)
        append_file(files_created, 'shakemap', jsonfile)

    if status and processing_done and len(pstreams):
        if status == 'short':
            index = 'Failure reason'
            col = ['Number of records']
        elif status == 'long':
            index = 'Station ID'
            col = ['Failure reason']
        elif status == 'net':
            index = 'Network'
            col = ['Number of passed records', 'Number of failed records']

        status_info = pstreams.get_status(status)
        status_info.to_csv(os.path.join(event_dir, 'status.csv'),
                           header=col,
                           index_label=index)

    # since we don't know how many events users will be processing,
    # let's guard against memory issues by clearing out the big data
    # structures
    workspace.close()

    logging.info('Finishing event %s' % event.id)

    return workname
Beispiel #5
0
def main():
    logging.warning("gmprocess2 (formerly gmprocess) is deprecated "
                    "and will be removed soon.")
    logging.warning("Please use gmrecords instead.")
    description = '''
    Download, process, and extract metrics from raw ground motion data.

This program will allow the user to:
   - download raw data from a number of sources, including:
   - Any FDSN provider which serves waveform data
   - Japan's KNET/KikNet repository (requires login info)
   - ...
'''
    parser = argparse.ArgumentParser(description=description,
                                     formatter_class=MyFormatter)

    # ***** Required arguments
    parser.add_argument('-o',
                        '--output-directory',
                        help='Output directory',
                        metavar="DIRECTORY",
                        action='store',
                        type=str,
                        required=True,
                        dest='outdir')

    # ***** Command arguments
    help_assemble = format_helptext(
        'Download data from all available online sources, or load raw data '
        'from files if --directory is selected.')
    parser.add_argument('--assemble',
                        help=help_assemble,
                        action='store_true',
                        dest='assemble')

    help_process = format_helptext(
        'Process data using steps defined in configuration file.')
    parser.add_argument('--process',
                        help=help_process,
                        action='store_true',
                        dest='process')

    help_report = format_helptext(
        'Create a summary report for each event specified.')
    parser.add_argument('--report',
                        help=help_report,
                        action='store_true',
                        dest='report')

    help_provenance = format_helptext(
        'Generate provenance table in --format format.')
    parser.add_argument('--provenance',
                        help=help_provenance,
                        action='store_true',
                        dest='provenance')

    help_export = format_helptext(
        'Generate metrics tables (NGA-style "flat" files) for all events '
        'and IMCs.')
    parser.add_argument('--export',
                        help=help_export,
                        action='store_true',
                        dest='export')

    help_export_dir = format_helptext('Specify an alternate directory for the '
                                      'export files, which defaults to the '
                                      'directory above event directory.')
    parser.add_argument('--export-dir', help=help_export_dir)

    help_shakemap = format_helptext(
        'Generate ShakeMap-friendly peak ground motions table.')
    parser.add_argument('--shakemap',
                        help=help_shakemap,
                        action='store_true',
                        dest='shakemap')

    # # ***** Optional arguments
    group = parser.add_mutually_exclusive_group(required=False)
    help_eventids = format_helptext('ComCat Event IDs')
    group.add_argument('--eventids', help=help_eventids, nargs='+')

    help_textfile = format_helptext(
        'Text file containing lines of ComCat Event IDs or event '
        'information (ID TIME LAT LON DEPTH MAG)')
    group.add_argument('--textfile',
                       help=help_textfile,
                       action='store',
                       dest='textfile')

    help_event = format_helptext(
        'Single event information as ID TIME(YYYY-MM-DDTHH:MM:SS) '
        'LAT LON DEP MAG')
    group.add_argument('--eventinfo',
                       help=help_event,
                       type=str,
                       nargs=7,
                       metavar=('ID', 'TIME', 'LAT', 'LON', 'DEPTH', 'MAG',
                                'MAG_TYPE'))

    help_dir = format_helptext(
        'Sidestep online data retrieval and get from local directory instead. '
        'This is the path where data already exists. Must organized in a '
        '\'raw\' directory, within directories with names as the event IDs. '
        'For example, if `--directory` is \'proj_dir\' and you have data for '
        'event id \'abc123\' then the raw data to be read in should be '
        'located in `proj_dir/abc123/raw/`.')
    parser.add_argument('--directory',
                        help=help_dir,
                        action='store',
                        dest='directory')

    help_format = format_helptext('Output format for tabular information')
    parser.add_argument('--format',
                        help=help_format,
                        choices=['excel', 'csv'],
                        default='csv',
                        dest='format')

    help_tag = format_helptext(
        'Processing label (single word, no spaces) to attach to processed '
        'files. Defaults to the current time in YYYYMMDDHHMMSS format.')
    parser.add_argument('--process-tag',
                        help=help_tag,
                        action='store',
                        type=str,
                        dest='process_tag')

    help_config = format_helptext('Supply custom configuration file')
    parser.add_argument('--config',
                        help=help_config,
                        action='store',
                        type=str,
                        dest='config')

    help_recompute = format_helptext(
        'Recompute metrics (i.e. from new config)')
    parser.add_argument('--recompute-metrics',
                        help=help_recompute,
                        action='store_true',
                        dest='recompute_metrics')

    help_logfile = format_helptext(
        'Supply file name to store processing log info.')
    parser.add_argument('--log-file',
                        help=help_logfile,
                        action='store',
                        dest='log_file')

    nhelpstr = 'Number of parallel processes to run over events.'
    parser.add_argument('-n',
                        '--num-processes',
                        default=0,
                        type=int,
                        help=nhelpstr)

    help_status = format_helptext(
        'Output failure information, either in short form ("short"), '
        'long form ("long"), or network form ("net"). '
        'short: Two column table, where the columns are "failure reason" and '
        '"number of records". net: Three column table where the columns are '
        '"network", "number passed", and "number failed". long: Two column '
        'table, where columns are "station ID" and "failure reason".')
    parser.add_argument('--status',
                        choices=['short', 'long', 'net'],
                        dest='status',
                        help=help_status)

    # ***** Shared arguments
    parser = add_shared_args(parser)
    args = parser.parse_args()

    tstart = datetime.now()
    # get the process tag from the user or define by current datetime
    process_tag = args.process_tag or datetime.utcnow().strftime(TAG_FMT)

    # config handling
    configfile = args.config
    if configfile is not None:
        config = update_config(configfile)
        if config is None:
            print('\nCustom config file %s is invalid. Exiting.' % configfile)
            sys.exit(1)

    else:
        config = get_config()

    outdir = args.outdir
    eventids = args.eventids
    textfile = args.textfile
    eventinfo = args.eventinfo
    input_directory = args.directory

    # get a list of ScalarEvent objects from one of the inputs
    events = get_events(eventids, textfile, eventinfo, input_directory, outdir)
    if not events:
        print('No event information was found. Exiting.')
        sys.exit(1)

    if not os.path.isdir(outdir):
        os.makedirs(outdir)

    workspace_files = []
    files_created = {}

    logbase = 'gmprocess_batch_log_'
    logfmt = logbase + '%i.txt'

    # compare list of all commands with list of actual commands
    process_commands = set(
        ['assemble', 'process', 'report', 'shakemap', 'provenance', 'export'])
    pcommands = []
    if args.assemble:
        pcommands.append('assemble')
    if args.process:
        pcommands.append('process')
    if args.provenance:
        pcommands.append('provenance')
    if args.report:
        pcommands.append('report')
    if args.shakemap:
        pcommands.append('shakemap')
    if args.export:
        pcommands.append('export')

    if len(process_commands.intersection(set(pcommands))) > 0:
        if args.num_processes:
            # parallelize processing on events using forked processes
            try:
                client = Client(n_workers=args.num_processes)
            except OSError:
                sys.stderr.write("Could not create a dask client.\n")
                sys.exit(1)

            # Need a dict holding all args that do not change across calls
            _argdict_ = {
                'outdir': outdir,
                'pcommands': pcommands,
                'config': config,
                'input_directory': input_directory,
                'process_tag': process_tag,
                'files_created': files_created,
                'output_format': args.format,
                'status': args.status,
                'recompute_metrics': args.recompute_metrics,
                'export_dir': args.export_dir
            }

            def dask_process_event(event):
                """
                Wrapper function for multiprocessing of process_event method.
                """
                workname = process_event(event, **_argdict_)
                return event, workname

            futures = client.map(dask_process_event, events)

            for _, result in as_completed(futures, with_results=True):
                print('Completed event: %s, %s' %
                      (result[0].id, str(result[1])))

        else:
            logfile = os.path.join(outdir, logfmt % os.getpid())
            for event in events:
                workname = process_event(event,
                                         outdir,
                                         pcommands,
                                         config,
                                         input_directory,
                                         process_tag,
                                         files_created,
                                         args.format,
                                         args.status,
                                         args.recompute_metrics,
                                         export_dir=args.export_dir)
                workspace_files.append(workname)
                print('Completed event: %s, %s' % (event.id, str(workname)))

    # logging
    logger = None
    setup_logger(args)
    if args.log_file:
        logger = logging.getLogger()
        stream_handler = logger.handlers[0]
        fhandler = logging.FileHandler(args.log_file)
        logger.removeHandler(stream_handler)
        logger.addHandler(fhandler)

    # transfer the logfile contents into our global logger
    # first get the handler
    if logger is None:
        logger = logging.getLogger()
    handler = logger.handlers[0]
    # then get the current formatter
    old_format = handler.formatter
    handler.setFormatter(logging.Formatter('%(message)s'))
    logfiles = glob.glob(os.path.join(outdir, logbase + '*'))
    for logfile in logfiles:
        with open(logfile, 'rt', encoding='utf-8') as logobj:
            for line in logobj.readlines():
                logging.info(line.strip())
        os.remove(logfile)

    # reset handler back to original formatter
    handler.setFormatter(old_format)

    logging.info('%i workspace files created' % len(workspace_files))

    if 'export' in pcommands:

        imc_table_names = [
            file.replace('_README', '') for file in os.listdir(outdir)
            if 'README' in file
        ]
        imc_tables = {}
        for file in imc_table_names:
            imc_tables[file.replace('.%s' % args.format, '')] = pd.read_csv(
                os.path.join(outdir, file))
            if 'fit_spectra_parameters' in imc_tables:
                del imc_tables['fit_spectra_parameters']

        # TODO - where is this being written? Is it a requirement?
        event_file = os.path.join(outdir, 'events.csv')
        if os.path.isfile(event_file):
            event_table = pd.read_csv(event_file)
        else:
            data = [{'id': event.id, 'magnitude': event.magnitude}]
            event_table = pd.DataFrame(data=data)

        # make a regression plot of the most common imc/imt combination we
        # can find
        if not len(imc_tables):
            msg = '''No IMC tables found. It is likely that no streams
            passed checks. If you created reports for the events you
            have been processing, check those to see if this is the case,
            then adjust your configuration as necessary to process the data.
            '''
            logging.warning(msg)
        else:
            pref_imcs = [
                'rotd50.0',
                'greater_of_two_horizontals',
                'h1',
                'h2',
            ]
            pref_imts = ['PGA', 'PGV', 'SA(1.0)']
            found_imc = None
            found_imt = None
            for imc in pref_imcs:
                if imc in imc_tables:
                    for imt in pref_imts:
                        if imt in imc_tables[imc].columns:
                            found_imt = imt
                            found_imc = imc
                            break
                    if found_imc:
                        break
            # now look for whatever IMC/IMTcombination we can find
            if imc_tables and not found_imc:
                found_imc = list(imc_tables.keys())[0]
                table_cols = set(imc_tables[found_imc].columns)
                imtlist = list(table_cols - NON_IMT_COLS)
                found_imt = imtlist[0]

            if found_imc and found_imt:
                pngfile = '%s_%s.png' % (found_imc, found_imt)
                regression_file = os.path.join(outdir, pngfile)
                plot_regression(event_table,
                                found_imc,
                                imc_tables[found_imc],
                                found_imt,
                                regression_file,
                                distance_metric='EpicentralDistance',
                                colormap='viridis_r')
                append_file(files_created, 'Multi-event regression plot',
                            regression_file)

    if args.status:
        if args.status == 'short':
            index_col = 'Failure reason'
        elif args.status == 'long':
            index_col = 'Station ID'
        elif args.status == 'net':
            index_col = 'Network'
        statuses = []
        for event in events:
            status_path = os.path.join(outdir, event.id, 'status.csv')
            if os.path.exists(status_path):
                status = pd.read_csv(status_path, index_col=index_col)
                if args.status == 'long':
                    status['Event ID'] = event.id
                statuses.append(status)
        if statuses:
            comp_status_path = os.path.join(outdir, 'complete_status.csv')
            if args.status == 'long':
                for idx, status in enumerate(statuses):
                    if idx == 0:
                        status.to_csv(comp_status_path, mode='w')
                    else:
                        status.to_csv(comp_status_path, mode='a', header=False)
            else:
                df_status = pd.concat(statuses)
                df_status = df_status.groupby(df_status.index).sum()
                df_status.to_csv(comp_status_path)
            append_file(files_created, 'Complete status', comp_status_path)

    print('\nThe following files have been created:')
    for file_type, file_list in files_created.items():
        print('File type: %s' % file_type)
        for fname in file_list:
            print('\t%s' % fname)

    tend = datetime.now()
    dt = (tend - tstart).total_seconds()
    minutes = dt // 60
    seconds = dt % 60
    fmt = '\nElapsed processing time: %i minutes, %i seconds.'
    print(fmt % (minutes, seconds))

    print('\nProcessing is complete.\n')
#!/usr/bin/env python
import logging
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from libcomcat.search import search
from gmprocess.utils.logging import setup_logger

setup_logger(args=None, level='info')

DELTA_MAG = 0.5
# Can change if target event only has month, day, and year
DELTA_HOUR = 12
DIST_SCALER = 20
COORD_KM_SCALER = 0.009
ERROR_THRESH = 1.0


def cross_reference_to_usgs_id(project_name, eqids, mags, times, lats, lons):
    """
    Look for potential event matches for events in data and return
    a dictionary mapping the event IDs to the repsective match with the
    lowest error.
    Args:
        project_name (string):
            Project or data source name.
        eqids (np.array):
            Earthquake IDs of target events to be cross-referenced
        mags (np.array):
            Magnitudes of target events to be cross-referenced.
def main():
    desc = '''Convert a directory of strong motion data files into any ObsPy
    supported format.

https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.write.html#supported-formats

    The inventory information will be written as an
    accompanying file in station XML format.

    To convert a single file in the NIED KNET format to MiniSEED:

    gmconvert AOM0011801241951.EW

    The following files will be written to the current directory:
        - BO.AOM001.--.HN2.mseed
        - BO.AOM001.--.HN2.xml

    To convert the three files that make up the BO.AOM001 station data into
    one MiniSEED file:

    gmconvert AOM0011801241951.*

    The following files will be written to the current directory:
        - BO.AOM001.HN.mseed
        - BO.AOM001.HN.xml

    To convert a directory "indatadir" full of files to SAC format, and write
    to a directory called "outdatadir":

    gmconvert -i datadir -o outdatadir -f SAC

    Note: The data files in "indatadir" can be distributed through
    subdirectories and gmconvert will find them.

    '''
    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=CustomFormatter)
    parser.add_argument('files', help='List of files to convert.',
                        nargs='*', default=None)
    parser.add_argument('-i', '--indir',
                        help='Directory containing input files to convert.')
    parser.add_argument('-o', '--outdir',
                        help='Output directory.', default=os.getcwd())
    parser.add_argument('-f', '--format',
                        help='Output strong motion data format.',
                        choices=FORMATS, default='MSEED')

    # Shared arguments
    parser = add_shared_args(parser)

    args = parser.parse_args()

    setup_logger(args)
    logging.info("Running gmconvert.")

    # gather arguments
    indir = args.indir
    outdir = args.outdir
    oformat = args.format

    has_files = args.files is not None and len(args.files)

    if has_files and args.indir is not None:
        print('Specify input files or an input directory, not both.')
        sys.exit(1)

    if args.files is None and args.indir is None:
        print('You must specify input files or an input directory.')
        sys.exit(1)

    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    if args.files:
        # read all the data files, gather up a list of obspy Stream objects
        allstreams = []
        error_dict = {}
        for dfile in args.files:
            logging.info('Parsing %s...' % dfile)
            try:
                streams = read_data(dfile)
            except BaseException as e:
                error_dict[dfile] = str(e)
                continue
            allstreams += streams
    else:
        # grab all the files in the input directory
        allstreams, unprocessed, errors = directory_to_streams(indir)
        error_dict = dict(zip(unprocessed, errors))

    sc = StreamCollection(allstreams)

    for stream in sc:
        streamid = stream.get_id()
        if len(stream) == 1:
            streamid = stream[0].get_id()
        outfile = os.path.join(outdir, '%s.%s' % (streamid, oformat.lower()))
        invfile = os.path.join(outdir, '%s.xml' % (streamid))
        inv_format = 'STATIONXML'
        inv = stream.getInventory()
        logging.info('Writing data file %s...' % outfile)
        stream.write(outfile, format=oformat)
        logging.info('Writing inventory file %s...' % invfile)
        inv.write(invfile, format=inv_format)

    print('Wrote %i streams to %s' % (len(sc), outdir))
    if len(error_dict):
        print('\nThe following files could not be read:')
        for fname, error in error_dict.items():
            print('\t%s - "%s"' % (fname, error))