def main(self, gmrecords):
        """
        Download data and organize it in the project data directory.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)
        self.gmrecords = gmrecords
        self._check_arguments()

        self._get_events()

        logging.info('Number of events to download: %s' % len(self.events))
        for event in self.events:
            logging.info('Starting event: %s' % event.id)
            event_dir = os.path.join(gmrecords.data_path, event.id)
            if not os.path.exists(event_dir):
                os.makedirs(event_dir)

            _ = download(event=event,
                         event_dir=event_dir,
                         config=gmrecords.conf,
                         directory=None,
                         create_workspace=False,
                         stream_collection=False)
    def _assemble_event(self, event):
        logging.info('Starting event: %s' % event.id)
        event_dir = os.path.join(self.gmrecords.data_path, event.id)
        if not os.path.exists(event_dir):
            os.makedirs(event_dir)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        workspace_exists = os.path.isfile(workname)
        if workspace_exists:
            logging.info("ASDF exists: %s" % workname)
            if not self.gmrecords.args.overwrite:
                logging.info("The --overwrite argument not selected.")
                logging.info("No action taken for %s." % event.id)
                return event.id
            else:
                logging.info(
                    "Removing existing ASDF file: %s" % workname
                )
                os.remove(workname)

        # Todo: probably want to break up `download` into finer steps to
        # call here. Also, there are files created besides workspace
        # that are not getting tracked (e.g., raw data plots, event.json)
        workspace, _, _, _ = download(
            event=event,
            event_dir=event_dir,
            config=self.gmrecords.conf,
            directory=self.gmrecords.data_path
        )
        workspace.close()
        self.append_file('Workspace', workname)
        return event.id
Example #3
0
def process_event(event,
                  outdir,
                  pcommands,
                  config,
                  input_directory,
                  process_tag,
                  files_created,
                  output_format,
                  status,
                  recompute_metrics,
                  export_dir=None):

    # setup logging to write to the input logfile
    argthing = namedtuple('args', ['debug', 'quiet'])
    args = argthing(debug=True, quiet=False)
    setup_logger(args)

    logger = logging.getLogger()
    stream_handler = logger.handlers[0]
    logfile = os.path.join(outdir, '%s.log' % event.id)
    fhandler = logging.FileHandler(logfile)
    logger.removeHandler(stream_handler)
    logger.addHandler(fhandler)

    event_dir = os.path.join(outdir, event.id)
    if not os.path.exists(event_dir):
        os.makedirs(event_dir)

    workname = os.path.join(event_dir, WORKSPACE_NAME)
    workspace_exists = os.path.isfile(workname)
    workspace_has_processed = False
    workspace = None
    processing_done = False

    if workspace_exists:
        workspace = StreamWorkspace.open(workname)
        labels = workspace.getLabels()
        if len(labels):
            labels.remove('unprocessed')
        elif 'assemble' not in pcommands:
            print('No data in workspace. Please run assemble.')
            sys.exit(1)

        if len(labels) == 1:
            process_tag = labels[0]
            workspace_has_processed = True
        else:
            if 'process' not in pcommands:
                fmt = '\nThere are %i sets of processed data in %s.'
                tpl = (len(labels), workname)
                print(fmt % tpl)
                print(('This software can only handle one set of '
                       'processed data. Exiting.\n'))
                sys.exit(1)

    download_done = False

    # Need to initialize rstreams/pstreams
    rstreams = []
    pstreams = []

    rupture_file = None
    if 'assemble' in pcommands:
        logging.info('Downloading/loading raw streams...')
        workspace, workspace_file, rstreams, rupture_file = download(
            event, event_dir, config, input_directory)

        download_done = True
        append_file(files_created, 'Workspace', workname)

    else:
        if not workspace_exists:
            print('\nYou opted not to download or process from input.')
            print('No previous HDF workspace file could be found.')
            print('Try re-running with the assemble command with or ')
            print('without the --directory option.\n')
            sys.exit(1)
        if 'process' in pcommands:
            logging.info('Getting raw streams from workspace...')
            with warnings.catch_warnings():
                warnings.simplefilter("ignore",
                                      category=H5pyDeprecationWarning)
                rstreams = workspace.getStreams(event.id,
                                                labels=['unprocessed'])
            download_done = True
        else:
            need_processed = set(['report', 'shakemap'])
            need_pstreams = len(need_processed.intersection(pcommands))
            if workspace_has_processed:
                if need_pstreams:
                    logging.info('Getting processed streams from workspace...')
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore",
                                              category=H5pyDeprecationWarning)
                        pstreams = workspace.getStreams(event.id,
                                                        labels=[process_tag])
                download_done = True
                processing_done = True

    if ('process' in pcommands and download_done and not processing_done
            and len(rstreams)):
        logging.info('Processing raw streams for event %s...' % event.id)
        pstreams = process_streams(rstreams, event, config=config)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            workspace.addStreams(event, pstreams, label=process_tag)
            workspace.calcMetrics(event.id,
                                  labels=[process_tag],
                                  config=config,
                                  streams=pstreams,
                                  stream_label=process_tag,
                                  rupture_file=rupture_file)
        processing_done = True

    if 'export' in pcommands:
        if export_dir is not None:
            if not os.path.isdir(export_dir):
                os.makedirs(export_dir)
            outdir = export_dir

        labels = workspace.getLabels()
        if 'unprocessed' not in labels:
            fmt = ('Workspace file "%s" appears to have no unprocessed '
                   'data. Skipping.')
            logging.info(fmt % workspace_file)
        else:
            labels.remove('unprocessed')
            if not labels:
                fmt = ('Workspace file "%s" appears to have no processed '
                       'data. Skipping.')
                logging.info(fmt % workspace_file)
            else:
                logging.info('Creating tables for event %s...', event.id)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore",
                                          category=H5pyDeprecationWarning)
                    if recompute_metrics:
                        del workspace.dataset.auxiliary_data.WaveFormMetrics
                        del workspace.dataset.auxiliary_data.StationMetrics
                        workspace.calcMetrics(event.id,
                                              labels=labels,
                                              config=config,
                                              rupture_file=rupture_file)
                    event_table, imc_tables, readmes = workspace.getTables(
                        labels[0], streams=pstreams, stream_label=process_tag)
                    ev_fit_spec, fit_readme = workspace.getFitSpectraTable(
                        event.id, labels[0], pstreams)

                # Set the precisions for the imc tables, event table, and
                # fit_spectra table before writing
                imc_tables_formatted = {}
                for imc, imc_table in imc_tables.items():
                    imc_tables_formatted[imc] = set_precisions(imc_table)
                event_table_formatted = set_precisions(event_table)
                df_fit_spectra_formatted = set_precisions(ev_fit_spec)

                if not os.path.isdir(outdir):
                    os.makedirs(outdir)

                filenames = ['events'] + \
                    [imc.lower() for imc in imc_tables_formatted.keys()] + \
                    [imc.lower() + '_README' for imc in readmes.keys()] + \
                    ['fit_spectra_parameters', 'fit_spectra_parameters_README']

                files = [event_table_formatted] + list(
                    imc_tables_formatted.values()) + list(readmes.values()) + [
                        df_fit_spectra_formatted, fit_readme
                    ]

                if output_format != 'csv':
                    output_format = 'xlsx'

                for filename, df in dict(zip(filenames, files)).items():
                    filepath = os.path.join(outdir,
                                            filename + '.%s' % output_format)
                    if os.path.exists(filepath):
                        if 'README' in filename:
                            continue
                        else:
                            mode = 'a'
                            header = False
                    else:
                        mode = 'w'
                        header = True
                        append_file(files_created, 'Tables', filepath)
                    if output_format == 'csv':
                        df.to_csv(filepath,
                                  index=False,
                                  float_format=DEFAULT_FLOAT_FORMAT,
                                  na_rep=DEFAULT_NA_REP,
                                  mode=mode,
                                  header=header)
                    else:
                        df.to_excel(filepath,
                                    index=False,
                                    float_format=DEFAULT_FLOAT_FORMAT,
                                    na_rep=DEFAULT_NA_REP,
                                    mode=mode,
                                    header=header)

    if ('report' in pcommands and processing_done and len(pstreams)):
        logging.info('Creating diagnostic plots for event %s...' % event.id)
        plot_dir = os.path.join(event_dir, 'plots')
        if not os.path.isdir(plot_dir):
            os.makedirs(plot_dir)
        for stream in pstreams:
            summary_plots(stream, plot_dir, event)

        mapfile = draw_stations_map(pstreams, event, event_dir)
        plot_moveout(pstreams,
                     event.latitude,
                     event.longitude,
                     file=os.path.join(event_dir, 'moveout_plot.png'))

        append_file(files_created, 'Station map', mapfile)
        append_file(files_created, 'Moveout plot', 'moveout_plot.png')

        logging.info('Creating diagnostic report for event %s...' % event.id)
        # Build the summary report?
        build_conf = config['build_report']
        report_format = build_conf['format']
        if report_format == 'latex':
            report_file, success = build_report_latex(pstreams,
                                                      event_dir,
                                                      event,
                                                      config=config)
        else:
            report_file = ''
            success = False
        if os.path.isfile(report_file) and success:
            append_file(files_created, 'Summary report', report_file)

    if 'provenance' in pcommands and processing_done and len(pstreams):
        logging.info('Creating provenance table for event %s...' % event.id)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            provdata = workspace.getProvenance(event.id, labels=[process_tag])
        if output_format == 'csv':
            csvfile = os.path.join(event_dir, 'provenance.csv')
            append_file(files_created, 'Provenance', csvfile)
            provdata.to_csv(csvfile)
        else:
            excelfile = os.path.join(event_dir, 'provenance.xlsx')
            append_file(files_created, 'Provenance', excelfile)
            provdata.to_excel(excelfile, index=False)

    if 'shakemap' in pcommands and processing_done and len(pstreams):
        logging.info('Creating shakemap table for event %s...' % event.id)
        shakemap_file, jsonfile = save_shakemap_amps(pstreams, event,
                                                     event_dir)
        append_file(files_created, 'shakemap', shakemap_file)
        append_file(files_created, 'shakemap', jsonfile)

    if status and processing_done and len(pstreams):
        if status == 'short':
            index = 'Failure reason'
            col = ['Number of records']
        elif status == 'long':
            index = 'Station ID'
            col = ['Failure reason']
        elif status == 'net':
            index = 'Network'
            col = ['Number of passed records', 'Number of failed records']

        status_info = pstreams.get_status(status)
        status_info.to_csv(os.path.join(event_dir, 'status.csv'),
                           header=col,
                           index_label=index)

    # since we don't know how many events users will be processing,
    # let's guard against memory issues by clearing out the big data
    # structures
    workspace.close()

    logging.info('Finishing event %s' % event.id)

    return workname