Example #1
0
    def dump_history(self, save_dir=None):
        if save_dir is None:
            save_dir = os.getcwd()

        logger.info('Dumping sampler history to %s' % save_dir)
        dump_objects(os.path.join(save_dir, self.history.filename),
                     self.history)
Example #2
0
def run_mpi_sampler(
        sampler_name, model, sampler_args, keep_tmp, n_jobs,
        loglevel='info', project_dir=''):
    """
    Execute a sampling algorithm that requires the call of mpiexec
    as it uses MPI for parallelization.

    A run directory is created unter '/tmp/' where the sampler
    arguments are pickled and then reloaded by the MPI sampler
    script.

    Parameters
    ----------
    sampler_name : string
        valid names see distributed.samplers for available options
    model : :class:`pymc3.model.Model`
        that holds the forward model graph
    sampler_args : list
        of sampler arguments, order is important
    keep_tmp : boolean
        if true dont remove the run directory after execution
    n_jobs : number of processors to call MPI with
    loglevel : string
        loglevels defined by the logging package
    project_dir : string
        main directory path to the logfile, if left empty- written to tmp
        directory
    """

    from beat.info import project_root
    from beat.utility import dump_objects

    try:
        sampler = samplers[sampler_name]
    except KeyError:
        raise NotImplementedError(
            'Currently only samplers: %s supported!' %
            list2string(list(samplers.keys())))

    runner = MPIRunner(keep_tmp=keep_tmp)
    args_path = pjoin(runner.tempdir, mpiargs_name)
    model_path = pjoin(runner.tempdir, pymc_model_name)

    dump_objects(model_path, model)
    dump_objects(args_path, sampler_args)

    samplerdir = pjoin(project_root, sampler)
    logger.info('sampler directory: %s' % samplerdir)
    runner.run(
        samplerdir, n_jobs=n_jobs, loglevel=loglevel, project_dir=project_dir)
Example #3
0
def main():
    parser = OptionParser(usage=usage, description=description)

    parser.add_option('--force',
                      dest='force',
                      action='store_true',
                      default=False,
                      help='allow recreation of output <directory>')

    parser.add_option('--debug',
                      dest='debug',
                      action='store_true',
                      default=False,
                      help='print debugging information to stderr')

    parser.add_option('--dry-run',
                      dest='dry_run',
                      action='store_true',
                      default=False,
                      help='show available stations/channels and exit '
                      '(do not download waveforms)')

    parser.add_option('--continue',
                      dest='continue_',
                      action='store_true',
                      default=False,
                      help='continue download after a accident')

    parser.add_option('--local-data',
                      dest='local_data',
                      action='append',
                      help='add file/directory with local data')

    parser.add_option('--local-stations',
                      dest='local_stations',
                      action='append',
                      help='add local stations file')

    parser.add_option('--selection',
                      dest='selection_file',
                      action='append',
                      help='add local stations file')

    parser.add_option(
        '--local-responses-resp',
        dest='local_responses_resp',
        action='append',
        help='add file/directory with local responses in RESP format')

    parser.add_option('--local-responses-pz',
                      dest='local_responses_pz',
                      action='append',
                      help='add file/directory with local pole-zero responses')

    parser.add_option(
        '--local-responses-stationxml',
        dest='local_responses_stationxml',
        help='add file with local response information in StationXML format')

    parser.add_option(
        '--window',
        dest='window',
        default='full',
        help='set time window to choose [full, p, "<time-start>,<time-end>"'
        '] (time format is YYYY-MM-DD HH:MM:SS)')

    parser.add_option(
        '--out-components',
        choices=['enu', 'rtu'],
        dest='out_components',
        default='rtu',
        help='set output component orientations to radial-transverse-up [rtu] '
        '(default) or east-north-up [enu]')

    parser.add_option('--out-units',
                      choices=['M', 'M/S', 'M/S**2'],
                      dest='output_units',
                      default='M',
                      help='set output units to displacement "M" (default),'
                      ' velocity "M/S" or acceleration "M/S**2"')

    parser.add_option(
        '--padding-factor',
        type=float,
        default=3.0,
        dest='padding_factor',
        help='extend time window on either side, in multiples of 1/<fmin_hz> '
        '(default: 5)')

    parser.add_option(
        '--zero-padding',
        dest='zero_pad',
        action='store_true',
        default=False,
        help='Extend traces by zero-padding if clean restitution requires'
        'longer windows')

    parser.add_option(
        '--credentials',
        dest='user_credentials',
        action='append',
        default=[],
        metavar='SITE,USER,PASSWD',
        help='user credentials for specific site to access restricted data '
        '(this option can be repeated)')

    parser.add_option(
        '--token',
        dest='auth_tokens',
        metavar='SITE,FILENAME',
        action='append',
        default=[],
        help='user authentication token for specific site to access '
        'restricted data (this option can be repeated)')

    parser.add_option(
        '--sites',
        dest='sites',
        metavar='SITE1,SITE2,...',
        default='geofon,iris,orfeus',
        help='sites to query (available: %s, default: "%%default"' %
        ', '.join(g_sites_available))

    parser.add_option(
        '--band-codes',
        dest='priority_band_code',
        metavar='V,L,M,B,H,S,E,...',
        default='B,H',
        help='select and prioritize band codes (default: %default)')

    parser.add_option(
        '--instrument-codes',
        dest='priority_instrument_code',
        metavar='H,L,G,...',
        default='H,L',
        help='select and prioritize instrument codes (default: %default)')

    parser.add_option('--radius-min',
                      dest='radius_min',
                      metavar='VALUE',
                      default=0.0,
                      type=float,
                      help='minimum radius [km]')

    parser.add_option('--nstations-wanted',
                      dest='nstations_wanted',
                      metavar='N',
                      type=int,
                      help='number of stations to select initially')

    (options, args) = parser.parse_args(sys.argv[1:])

    print('Parsed arguments:', args)
    if len(args) not in (10, 7, 6):
        parser.print_help()
        sys.exit(1)

    if options.debug:
        util.setup_logging(program_name, 'debug')
    else:
        util.setup_logging(program_name, 'info')

    if options.local_responses_pz and options.local_responses_resp:
        logger.critical('cannot use local responses in PZ and RESP '
                        'format at the same time')
        sys.exit(1)

    n_resp_opt = 0
    for resp_opt in (options.local_responses_pz, options.local_responses_resp,
                     options.local_responses_stationxml):

        if resp_opt:
            n_resp_opt += 1

    if n_resp_opt > 1:
        logger.critical('can only handle local responses from either PZ or '
                        'RESP or StationXML. Cannot yet merge different '
                        'response formats.')
        sys.exit(1)

    if options.local_responses_resp and not options.local_stations:
        logger.critical('--local-responses-resp can only be used '
                        'when --stations is also given.')
        sys.exit(1)

    try:
        ename = ''
        magnitude = None
        mt = None
        if len(args) == 10:
            time = util.str_to_time(args[1] + ' ' + args[2])
            lat = float(args[3])
            lon = float(args[4])
            depth = float(args[5]) * km
            iarg = 6

        elif len(args) == 7:
            if args[2].find(':') == -1:
                sname_or_date = None
                lat = float(args[1])
                lon = float(args[2])
                event = None
                time = None
            else:
                sname_or_date = args[1] + ' ' + args[2]

            iarg = 3

        elif len(args) == 6:
            sname_or_date = args[1]
            iarg = 2

        if len(args) in (7, 6) and sname_or_date is not None:
            events = get_events_by_name_or_date([sname_or_date],
                                                catalog=geofon)
            if len(events) == 0:
                logger.critical('no event found')
                sys.exit(1)
            elif len(events) > 1:
                logger.critical('more than one event found')
                sys.exit(1)

            event = events[0]
            time = event.time
            lat = event.lat
            lon = event.lon
            depth = event.depth
            ename = event.name
            magnitude = event.magnitude
            mt = event.moment_tensor

        radius = float(args[iarg]) * km
        fmin = float(args[iarg + 1])
        sample_rate = float(args[iarg + 2])

        eventname = args[iarg + 3]
        cwd = str(sys.argv[1])
        event_dir = op.join(cwd, 'data', 'events', eventname)
        output_dir = op.join(event_dir, 'waveforms')
    except:
        raise
        parser.print_help()
        sys.exit(1)

    if options.force and op.isdir(event_dir):
        if not options.continue_:
            shutil.rmtree(event_dir)

    if op.exists(event_dir) and not options.continue_:
        logger.critical(
            'directory "%s" exists. Delete it first or use the --force option'
            % event_dir)
        sys.exit(1)

    util.ensuredir(output_dir)

    if time is not None:
        event = model.Event(time=time,
                            lat=lat,
                            lon=lon,
                            depth=depth,
                            name=ename,
                            magnitude=magnitude,
                            moment_tensor=mt)

    if options.window == 'full':
        if event is None:
            logger.critical('need event for --window=full')
            sys.exit(1)

        low_velocity = 1500.
        timewindow = VelocityWindow(low_velocity,
                                    tpad=options.padding_factor / fmin)

        tmin, tmax = timewindow(time, radius, depth)

    elif options.window == 'p':
        if event is None:
            logger.critical('need event for --window=p')
            sys.exit(1)

        phases = list(map(cake.PhaseDef, 'P p'.split()))
        emod = cake.load_model()

        tpad = options.padding_factor / fmin
        timewindow = PhaseWindow(emod, phases, -tpad, tpad)

        arrivaltimes = []
        for dist in num.linspace(0, radius, 20):
            try:
                arrivaltimes.extend(timewindow(time, dist, depth))
            except NoArrival:
                pass

        if not arrivaltimes:
            logger.error('required phase arrival not found')
            sys.exit(1)

        tmin = min(arrivaltimes)
        tmax = max(arrivaltimes)

    else:
        try:
            stmin, stmax = options.window.split(',')
            tmin = util.str_to_time(stmin.strip())
            tmax = util.str_to_time(stmax.strip())

            timewindow = FixedWindow(tmin, tmax)

        except ValueError:
            logger.critical('invalid argument to --window: "%s"' %
                            options.window)
            sys.exit(1)

    if event is not None:
        event.name = eventname

    tfade = tfade_factor / fmin

    tpad = tfade

    tmin -= tpad
    tmax += tpad

    tinc = None

    priority_band_code = options.priority_band_code.split(',')
    for s in priority_band_code:
        if len(s) != 1:
            logger.critical('invalid band code: %s' % s)

    priority_instrument_code = options.priority_instrument_code.split(',')
    for s in priority_instrument_code:
        if len(s) != 1:
            logger.critical('invalid instrument code: %s' % s)

    station_query_conf = dict(latitude=lat,
                              longitude=lon,
                              minradius=options.radius_min * km * cake.m2d,
                              maxradius=radius * cake.m2d,
                              channel=','.join('%s??' % s
                                               for s in priority_band_code))

    target_sample_rate = sample_rate

    fmax = target_sample_rate

    # target_sample_rate = None
    # priority_band_code = ['H', 'B', 'M', 'L', 'V', 'E', 'S']

    priority_units = ['M/S', 'M', 'M/S**2']

    # output_units = 'M'

    sites = [x.strip() for x in options.sites.split(',') if x.strip()]

    for site in sites:
        if site not in g_sites_available:
            logger.critical('unknown FDSN site: %s' % site)
            sys.exit(1)

    for s in options.user_credentials:
        try:
            site, user, passwd = s.split(',')
            g_user_credentials[site] = user, passwd
        except ValueError:
            logger.critical('invalid format for user credentials: "%s"' % s)
            sys.exit(1)

    for s in options.auth_tokens:
        try:
            site, token_filename = s.split(',')
            with open(token_filename, 'r') as f:
                g_auth_tokens[site] = f.read()
        except (ValueError, OSError, IOError):
            logger.critical('cannot get token from file: %s' % token_filename)
            sys.exit(1)

    fn_template0 = \
        'data_%(network)s.%(station)s.%(location)s.%(channel)s_%(tmin)s.mseed'

    fn_template_raw = op.join(output_dir, 'raw', fn_template0)
    fn_stations_raw = op.join(output_dir, 'stations.raw.txt')
    fn_template_rest = op.join(output_dir, 'rest', fn_template0)
    fn_commandline = op.join(output_dir, 'beatdown.command')

    ftap = (ffade_factors[0] * fmin, fmin, fmax, ffade_factors[1] * fmax)

    # chapter 1: download

    sxs = []
    for site in sites:
        try:
            extra_args = {
                'iris': dict(matchtimeseries=True),
            }.get(site, {})

            extra_args.update(station_query_conf)

            if site == 'geonet':
                extra_args.update(starttime=tmin, endtime=tmax)
            else:
                extra_args.update(startbefore=tmax,
                                  endafter=tmin,
                                  includerestricted=(site in g_user_credentials
                                                     or site in g_auth_tokens))

            logger.info('downloading channel information (%s)' % site)
            sx = fdsn.station(site=site,
                              format='text',
                              level='channel',
                              **extra_args)

        except fdsn.EmptyResult:
            logger.error('No stations matching given criteria. (%s)' % site)
            sx = None

        if sx is not None:
            sxs.append(sx)

    if all(sx is None for sx in sxs) and not options.local_data:
        sys.exit(1)

    nsl_to_sites = defaultdict(list)
    nsl_to_station = {}

    if options.selection_file:
        logger.info('using stations from stations file!')
        stations = []
        for fn in options.selection_file:
            stations.extend(model.load_stations(fn))

        nsls_selected = set(s.nsl() for s in stations)
    else:
        nsls_selected = None

    for sx, site in zip(sxs, sites):
        site_stations = sx.get_pyrocko_stations()
        for s in site_stations:
            nsl = s.nsl()

            nsl_to_sites[nsl].append(site)
            if nsl not in nsl_to_station:
                if nsls_selected:
                    if nsl in nsls_selected:
                        nsl_to_station[nsl] = s
                else:
                    nsl_to_station[
                        nsl] = s  # using first site with this station

        logger.info('number of stations found: %i' % len(nsl_to_station))

    # station weeding
    if options.nstations_wanted:
        nsls_selected = None
        stations_all = [
            nsl_to_station[nsl_] for nsl_ in sorted(nsl_to_station.keys())
        ]

        for s in stations_all:
            s.set_event_relative_data(event)

        stations_selected = weeding.weed_stations(stations_all,
                                                  options.nstations_wanted)[0]

        nsls_selected = set(s.nsl() for s in stations_selected)
        logger.info('number of stations selected: %i' % len(nsls_selected))

    if tinc is None:
        tinc = 3600.

    have_data = set()

    if options.continue_:
        fns = glob.glob(fn_template_raw % starfill())
        p = pile.make_pile(fns)
    else:
        fns = []

    have_data_site = {}
    could_have_data_site = {}
    for site in sites:
        have_data_site[site] = set()
        could_have_data_site[site] = set()

    available_through = defaultdict(set)
    it = 0
    nt = int(math.ceil((tmax - tmin) / tinc))
    for it in range(nt):
        tmin_win = tmin + it * tinc
        tmax_win = min(tmin + (it + 1) * tinc, tmax)
        logger.info('time window %i/%i (%s - %s)' %
                    (it + 1, nt, util.tts(tmin_win), util.tts(tmax_win)))

        have_data_this_window = set()
        if options.continue_:
            trs_avail = p.all(tmin=tmin_win, tmax=tmax_win, load_data=False)
            for tr in trs_avail:
                have_data_this_window.add(tr.nslc_id)
        for site, sx in zip(sites, sxs):
            if sx is None:
                continue

            selection = []
            channels = sx.choose_channels(
                target_sample_rate=target_sample_rate,
                priority_band_code=priority_band_code,
                priority_units=priority_units,
                priority_instrument_code=priority_instrument_code,
                timespan=(tmin_win, tmax_win))

            for nslc in sorted(channels.keys()):
                if nsls_selected is not None and nslc[:3] not in nsls_selected:
                    continue

                could_have_data_site[site].add(nslc)

                if nslc not in have_data_this_window:
                    channel = channels[nslc]
                    if event:
                        lat_, lon_ = event.lat, event.lon
                    else:
                        lat_, lon_ = lat, lon
                    try:
                        dist = orthodrome.distance_accurate50m_numpy(
                            lat_, lon_, channel.latitude.value,
                            channel.longitude.value)
                    except:
                        dist = orthodrome.distance_accurate50m_numpy(
                            lat_, lon_, channel.latitude, channel.longitude)

                    if event:
                        depth_ = event.depth
                        time_ = event.time
                    else:
                        depth_ = None
                        time_ = None

                    tmin_, tmax_ = timewindow(time_, dist, depth_)

                    tmin_this = tmin_ - tpad
                    tmax_this = float(tmax_ + tpad)

                    tmin_req = max(tmin_win, tmin_this)
                    tmax_req = min(tmax_win, tmax_this)
                    if channel.sample_rate:
                        try:
                            deltat = 1.0 / int(channel.sample_rate.value)
                        except:
                            deltat = 1.0 / int(channel.sample_rate)
                    else:
                        deltat = 1.0

                    if tmin_req < tmax_req:
                        logger.debug('deltat %f' % deltat)
                        # extend time window by some samples because otherwise
                        # sometimes gaps are produced
                        # apparently the WS are only sensitive to full seconds
                        # round to avoid gaps, increase safetiy window
                        selection.append(nslc +
                                         (math.floor(tmin_req - deltat * 20.0),
                                          math.ceil(tmax_req + deltat * 20.0)))
            if options.dry_run:
                for (net, sta, loc, cha, tmin, tmax) in selection:
                    available_through[net, sta, loc, cha].add(site)

            else:
                neach = 100
                i = 0
                nbatches = ((len(selection) - 1) // neach) + 1
                while i < len(selection):
                    selection_now = selection[i:i + neach]
                    f = tempfile.NamedTemporaryFile()
                    try:
                        sbatch = ''
                        if nbatches > 1:
                            sbatch = ' (batch %i/%i)' % (
                                (i // neach) + 1, nbatches)

                        logger.info('downloading data (%s)%s' % (site, sbatch))
                        data = fdsn.dataselect(site=site,
                                               selection=selection_now,
                                               **get_user_credentials(site))

                        while True:
                            buf = data.read(1024)
                            if not buf:
                                break
                            f.write(buf)

                        f.flush()

                        trs = io.load(f.name)
                        for tr in trs:
                            tr.fix_deltat_rounding_errors()
                            logger.debug('cutting window: %f - %f' %
                                         (tmin_win, tmax_win))
                            logger.debug(
                                'available window: %f - %f, nsamples: %g' %
                                (tr.tmin, tr.tmax, tr.ydata.size))
                            try:
                                logger.debug('tmin before snap %f' % tr.tmin)
                                tr.snap(interpolate=True)
                                logger.debug('tmin after snap %f' % tr.tmin)
                                tr.chop(tmin_win,
                                        tmax_win,
                                        snap=(math.floor, math.ceil),
                                        include_last=True)
                                logger.debug(
                                    'cut window: %f - %f, nsamles: %g' %
                                    (tr.tmin, tr.tmax, tr.ydata.size))
                                have_data.add(tr.nslc_id)
                                have_data_site[site].add(tr.nslc_id)
                            except trace.NoData:
                                pass

                        fns2 = io.save(trs, fn_template_raw)
                        for fn in fns2:
                            if fn in fns:
                                logger.warn('overwriting file %s', fn)
                        fns.extend(fns2)

                    except fdsn.EmptyResult:
                        pass

                    except HTTPError:
                        logger.warn('an error occurred while downloading data '
                                    'for channels \n  %s' %
                                    '\n  '.join('.'.join(x[:4])
                                                for x in selection_now))

                    f.close()
                    i += neach

    if options.dry_run:
        nslcs = sorted(available_through.keys())

        all_channels = defaultdict(set)
        all_stations = defaultdict(set)

        def plural_s(x):
            return '' if x == 1 else 's'

        for nslc in nslcs:
            sites = tuple(sorted(available_through[nslc]))
            logger.info('selected: %s.%s.%s.%s from site%s %s' %
                        (nslc + (plural_s(len(sites)), '+'.join(sites))))

            all_channels[sites].add(nslc)
            all_stations[sites].add(nslc[:3])

        nchannels_all = 0
        nstations_all = 0
        for sites in sorted(all_channels.keys(),
                            key=lambda sites: (-len(sites), sites)):

            nchannels = len(all_channels[sites])
            nstations = len(all_stations[sites])
            nchannels_all += nchannels
            nstations_all += nstations
            logger.info('selected (%s): %i channel%s (%i station%s)' %
                        ('+'.join(sites), nchannels, plural_s(nchannels),
                         nstations, plural_s(nstations)))

        logger.info('selected total: %i channel%s (%i station%s)' %
                    (nchannels_all, plural_s(nchannels_all), nstations_all,
                     plural_s(nstations_all)))

        logger.info('dry run done.')
        sys.exit(0)

    for nslc in have_data:
        # if we are in continue mode, we have to guess where the data came from
        if not any(nslc in have_data_site[site] for site in sites):
            for site in sites:
                if nslc in could_have_data_site[site]:
                    have_data_site[site].add(nslc)

    sxs = {}
    for site in sites:
        selection = []
        for nslc in sorted(have_data_site[site]):
            selection.append(nslc + (tmin - tpad, tmax + tpad))

        if selection:
            logger.info('downloading response information (%s)' % site)
            sxs[site] = fdsn.station(site=site,
                                     level='response',
                                     selection=selection)

            sxs[site].dump_xml(filename=op.join(output_dir, 'stations.%s.xml' %
                                                site))

    # chapter 1.5: inject local data

    if options.local_data:
        have_data_site['local'] = set()
        plocal = pile.make_pile(options.local_data, fileformat='detect')
        logger.info(
            'Importing local data from %s between %s (%f) and %s (%f)' %
            (options.local_data, util.time_to_str(tmin), tmin,
             util.time_to_str(tmax), tmax))
        for traces in plocal.chopper_grouped(gather=lambda tr: tr.nslc_id,
                                             tmin=tmin,
                                             tmax=tmax,
                                             tinc=tinc):

            for tr in traces:
                if tr.nslc_id not in have_data:
                    fns.extend(io.save(traces, fn_template_raw))
                    have_data_site['local'].add(tr.nslc_id)
                    have_data.add(tr.nslc_id)

        sites.append('local')

    if options.local_responses_pz:
        sxs['local'] = epz.make_stationxml(
            epz.iload(options.local_responses_pz))

    if options.local_responses_resp:
        local_stations = []
        for fn in options.local_stations:
            local_stations.extend(model.load_stations(fn))

        sxs['local'] = resp.make_stationxml(
            local_stations, resp.iload(options.local_responses_resp))

    if options.local_responses_stationxml:
        sxs['local'] = stationxml.load_xml(
            filename=options.local_responses_stationxml)

    # chapter 1.6: dump raw data stations file

    nsl_to_station = {}
    for site in sites:
        if site in sxs:
            stations = sxs[site].get_pyrocko_stations(timespan=(tmin, tmax))
            for s in stations:
                nsl = s.nsl()
                if nsl not in nsl_to_station:
                    nsl_to_station[nsl] = s

    stations = [nsl_to_station[nsl_] for nsl_ in sorted(nsl_to_station.keys())]

    util.ensuredirs(fn_stations_raw)
    model.dump_stations(stations, fn_stations_raw)

    dump_commandline(sys.argv, fn_commandline)

    # chapter 2: restitution

    if not fns:
        logger.error('no data available')
        sys.exit(1)

    p = pile.make_pile(fns, show_progress=False)
    p.get_deltatmin()
    otinc = None
    if otinc is None:
        otinc = nice_seconds_floor(p.get_deltatmin() * 500000.)
    otinc = 3600.
    otmin = math.floor(p.tmin / otinc) * otinc
    otmax = math.ceil(p.tmax / otinc) * otinc
    otpad = tpad * 2

    fns = []
    rest_traces_b = []
    win_b = None
    for traces_a in p.chopper_grouped(gather=lambda tr: tr.nslc_id,
                                      tmin=otmin,
                                      tmax=otmax,
                                      tinc=otinc,
                                      tpad=otpad):

        rest_traces_a = []
        win_a = None
        for tr in traces_a:
            win_a = tr.wmin, tr.wmax

            if win_b and win_b[0] >= win_a[0]:
                fns.extend(cut_n_dump(rest_traces_b, win_b, fn_template_rest))
                rest_traces_b = []
                win_b = None

            response = None
            failure = []
            for site in sites:
                try:
                    if site not in sxs:
                        continue
                    logger.debug('Getting response for %s' % tr.__str__())
                    response = sxs[site].get_pyrocko_response(
                        tr.nslc_id,
                        timespan=(tr.tmin, tr.tmax),
                        fake_input_units=options.output_units)

                    break

                except stationxml.NoResponseInformation:
                    failure.append('%s: no response information' % site)

                except stationxml.MultipleResponseInformation:
                    failure.append('%s: multiple response information' % site)

            if response is None:
                failure = ', '.join(failure)

            else:
                failure = ''
                try:
                    if tr.tmin > tmin and options.zero_pad:
                        logger.warning(
                            'Trace too short for clean restitution in '
                            'desired frequency band -> zero-padding!')
                        tr.extend(tr.tmin - tfade, tr.tmax + tfade, 'repeat')

                    rest_tr = tr.transfer(tfade, ftap, response, invert=True)
                    rest_traces_a.append(rest_tr)

                except (trace.TraceTooShort, trace.NoData):
                    failure = 'trace too short'

            if failure:
                logger.warn('failed to restitute trace %s.%s.%s.%s (%s)' %
                            (tr.nslc_id + (failure, )))

        if rest_traces_b:
            rest_traces = trace.degapper(rest_traces_b + rest_traces_a,
                                         deoverlap='crossfade_cos')

            fns.extend(cut_n_dump(rest_traces, win_b, fn_template_rest))
            rest_traces_a = []
            if win_a:
                for tr in rest_traces:
                    try:
                        rest_traces_a.append(
                            tr.chop(win_a[0], win_a[1] + otpad, inplace=False))
                    except trace.NoData:
                        pass

        rest_traces_b = rest_traces_a
        win_b = win_a

    fns.extend(cut_n_dump(rest_traces_b, win_b, fn_template_rest))

    # chapter 3: rotated restituted traces for inspection

    if not event:
        sys.exit(0)

    fn_template1 = \
        'DISPL.%(network)s.%(station)s.%(location)s.%(channel)s'

    fn_waveforms = op.join(output_dir, 'prepared', fn_template1)
    fn_stations = op.join(output_dir, 'stations.prepared.txt')
    fn_event = op.join(event_dir, 'event.txt')
    fn_event_yaml = op.join(event_dir, 'event.yaml')

    nsl_to_station = {}
    for site in sites:
        if site in sxs:
            stations = sxs[site].get_pyrocko_stations(timespan=(tmin, tmax))
            for s in stations:
                nsl = s.nsl()
                if nsl not in nsl_to_station:
                    nsl_to_station[nsl] = s

    p = pile.make_pile(fns, show_progress=False)

    deltat = None
    if sample_rate is not None:
        deltat = 1.0 / sample_rate

    traces_beat = []
    used_stations = []
    for nsl, s in nsl_to_station.items():
        s.set_event_relative_data(event)
        traces = p.all(trace_selector=lambda tr: tr.nslc_id[:3] == nsl)

        if options.out_components == 'rtu':
            pios = s.guess_projections_to_rtu(out_channels=('R', 'T', 'Z'))
        elif options.out_components == 'enu':
            pios = s.guess_projections_to_enu(out_channels=('E', 'N', 'Z'))
        else:
            assert False

        for (proj, in_channels, out_channels) in pios:

            proc = trace.project(traces, proj, in_channels, out_channels)
            for tr in proc:
                tr_beat = heart.SeismicDataset.from_pyrocko_trace(tr)
                traces_beat.append(tr_beat)
                for ch in out_channels:
                    if ch.name == tr.channel:
                        s.add_channel(ch)

            if proc:
                io.save(proc, fn_waveforms)
                used_stations.append(s)

    stations = list(used_stations)
    util.ensuredirs(fn_stations)
    model.dump_stations(stations, fn_stations)
    model.dump_events([event], fn_event)

    from pyrocko.guts import dump
    dump([event], filename=fn_event_yaml)

    utility.dump_objects(op.join(cwd, 'seismic_data.pkl'),
                         outlist=[stations, traces_beat])
    logger.info('prepared waveforms from %i stations' % len(stations))
Example #4
0
def Metropolis_sample(n_stages=10, n_steps=10000, trace=None, start=None,
            progressbar=False, stage=None, rm_flag=False,
            step=None, model=None, n_jobs=1, update=None, burn=0.5, thin=2):
    """
    Execute Metropolis algorithm repeatedly depending on the number of stages.
    The start point of each stage set to the end point of the previous stage.
    Update covariances if given.
    """

    model = pm.modelcontext(model)
    step.n_steps = int(n_steps)

    if n_steps < 1:
        raise Exception('Argument `n_steps` should be above 0.', exc_info=1)

    if step is None:
        raise Exception('Argument `step` has to be a TMCMC step object.')

    if trace is None:
        raise Exception('Argument `trace` should be path to result_directory.')

    if n_jobs > 1:
        if not (step.n_chains / float(n_jobs)).is_integer():
            raise Exception('n_chains / n_jobs has to be a whole number!')

    if start is not None:
        if len(start) != step.n_chains:
            raise Exception('Argument `start` should have dicts equal the '
                            'number of chains (step.N-chains)')
        else:
            step.population = start

    if not any(
            step.likelihood_name in var.name for var in model.deterministics):
            raise Exception('Model (deterministic) variables need to contain '
                            'a variable %s '
                            'as defined in `step`.' % step.likelihood_name)

    homepath = trace

    util.ensuredir(homepath)

    chains, step, update = init_stage(
        homepath=homepath,
        step=step,
        stage=stage,
        n_jobs=n_jobs,
        progressbar=progressbar,
        update=update,
        model=model,
        rm_flag=rm_flag)

    # set beta to 1 - standard Metropolis sampling
    step.beta = 1.
    step.n_jobs = n_jobs

    with model:

        for s in range(int(stage), n_stages):

            stage_path = os.path.join(homepath, 'stage_%i' % s)
            logger.info('Sampling stage %s' % stage_path)

            if s == 0:
                draws = 1
            else:
                draws = n_steps

            if not os.path.exists(stage_path):
                chains = None

            step.stage = s

            sample_args = {
                    'draws': draws,
                    'step': step,
                    'stage_path': stage_path,
                    'progressbar': progressbar,
                    'model': model,
                    'n_jobs': n_jobs,
                    'chains': chains}

            _iter_parallel_chains(**sample_args)

            mtrace = backend.load(stage_path, model)

            step.population, step.array_population, step.likelihoods = \
                                    step.select_end_points(mtrace)

            pdict, step.covariance = get_trace_stats(
                mtrace, step, burn, thin)

            if step.proposal_name == 'MultivariateNormal':
                step.proposal_dist = choose_proposal(
                    step.proposal_name, scale=step.covariance)

            if update is not None:
                logger.info('Updating Covariances ...')
                update.update_weights(pdict['dist_mean'], n_jobs=n_jobs)

                mtrace = update_last_samples(
                    homepath, step, progressbar, model, n_jobs, rm_flag)

            elif update is not None and stage == 0:
                update.engine.close_cashed_stores()

            step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace)

            outpath = os.path.join(stage_path, sample_p_outname)
            outparam_list = [step, update]
            utility.dump_objects(outpath, outparam_list)

        get_final_stage(homepath, n_stages, model=model)
        outpath = os.path.join(homepath, 'stage_final', sample_p_outname)
        utility.dump_objects(outpath, outparam_list)
Example #5
0
 def dump_atmip_params(self, stage_number, outlist):
     """
     Save atmip params to file.
     """
     utility.dump_objects(self.atmip_path(stage_number), outlist)
Example #6
0
def ATMIP_sample(n_steps,
                 step=None,
                 start=None,
                 trace=None,
                 chain=0,
                 stage=None,
                 n_jobs=1,
                 tune=None,
                 progressbar=False,
                 model=None,
                 update=None,
                 random_seed=None,
                 rm_flag=False):
    """
    (C)ATMIP sampling algorithm
    (Cascading - (C) not always relevant)

    Samples the solution space with n_chains of Metropolis chains, where each
    chain has n_steps iterations. Once finished, the sampled traces are
    evaluated:

    (1) Based on the likelihoods of the final samples, chains are weighted
    (2) the weighted covariance of the ensemble is calculated and set as new
        proposal distribution
    (3) the variation in the ensemble is calculated and the next tempering
        parameter (beta) calculated
    (4) New n_chains Metropolis chains are seeded on the traces with high
        weight for n_steps iterations
    (5) Repeat until beta > 1.

    Parameters
    ----------
    n_steps : int
        The number of samples to draw for each Markov-chain per stage
    step : :class:`ATMCMC`
        ATMCMC initialisation object
    start : List of dictionaries
        with length of (n_chains)
        Starting points in parameter space (or partial point)
        Defaults to random draws from variables (defaults to empty dict)
    chain : int
        Chain number used to store sample in backend. If `n_jobs` is
        greater than one, chain numbers will start here.
    stage : str
        Stage where to start or continue the calculation. It is possible to
        continue after completed stages (stage should be the number of the
        completed stage + 1). If None the start will be at stage = 0.
    n_jobs : int
        The number of cores to be used in parallel. Be aware that theano has
        internal parallelisation. Sometimes this is more efficient especially
        for simple models.
        step.n_chains / n_jobs has to be an integer number!
    tune : int
        Number of iterations to tune, if applicable (defaults to None)
    trace : string
        Result_folder for storing stages, will be created if not existing.
    progressbar : bool
        Flag for displaying a progress bar
    model : :class:`pymc3.Model`
        (optional if in `with` context) has to contain deterministic
        variable name defined under step.likelihood_name' that contains the
        model likelihood
    update : :py:class:`models.Problem`
        Problem object that contains all the observed data and (if applicable)
        covariances to be updated each transition step.
    rm_flag : bool
        If True existing stage result folders are being deleted prior to
        sampling.

    References
    ----------
    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
        Bayesian inversion for finite fault earthquake source models
        I- Theory and algorithm. Geophysical Journal International, 2013,
        194(3), pp.1701-1726,
        `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__
    """

    model = pm.modelcontext(model)
    step.n_steps = int(n_steps)

    if n_steps < 1:
        raise Exception('Argument `n_steps` should be above 0.', exc_info=1)

    if step is None:
        raise Exception('Argument `step` has to be a TMCMC step object.')

    if trace is None:
        raise Exception('Argument `trace` should be path to result_directory.')

    if n_jobs > 1:
        if not (step.n_chains / float(n_jobs)).is_integer():
            raise Exception('n_chains / n_jobs has to be a whole number!')

    if start is not None:
        if len(start) != step.n_chains:
            raise Exception('Argument `start` should have dicts equal the '
                            'number of chains (step.N-chains)')
        else:
            step.population = start

    if not any(step.likelihood_name in var.name
               for var in model.deterministics):
        raise Exception('Model (deterministic) variables need to contain '
                        'a variable %s '
                        'as defined in `step`.' % step.likelihood_name)

    homepath = trace

    util.ensuredir(homepath)

    if progressbar and n_jobs > 1:
        progressbar = False

    chains, step, update = init_stage(homepath=homepath,
                                      step=step,
                                      stage=stage,
                                      n_jobs=n_jobs,
                                      progressbar=progressbar,
                                      update=update,
                                      model=model)

    with model:
        while step.beta < 1.:
            if step.stage == 0:
                # Initial stage
                logger.info('Sample initial stage: ...')
                draws = 1
            else:
                draws = n_steps

            logger.info('Beta: %f Stage: %i' % (step.beta, step.stage))

            # Metropolis sampling intermediate stages
            stage_path = os.path.join(homepath, 'stage_%i' % step.stage)

            if not os.path.exists(stage_path):
                chains = None

            sample_args = {
                'draws': draws,
                'step': step,
                'stage_path': stage_path,
                'progressbar': progressbar,
                'model': model,
                'n_jobs': n_jobs,
                'chains': chains
            }

            _iter_parallel_chains(**sample_args)

            mtrace = backend.load(stage_path, model)

            step.population, step.array_population, step.likelihoods = \
                                    step.select_end_points(mtrace)
            step.beta, step.old_beta, step.weights = step.calc_beta()

            step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace)

            if update is not None:
                logger.info('Updating Covariances ...')
                mean_pt = step.mean_end_points()
                update.update_weights(mean_pt, n_jobs=n_jobs)

            if step.beta > 1.:
                logger.info('Beta > 1.: %f' % step.beta)
                step.beta = 1.
                outpath = os.path.join(stage_path, sample_p_outname)
                outparam_list = [step, update]
                utility.dump_objects(outpath, outparam_list)
                if stage == 'final':
                    chains = []
                else:
                    chains = None
                break

            step.covariance = step.calc_covariance()
            step.proposal_dist = choose_proposal(step.proposal_name,
                                                 scale=step.covariance)
            step.resampling_indexes = step.resample()

            outpath = os.path.join(stage_path, sample_p_outname)
            outparam_list = [step, update]
            utility.dump_objects(outpath, outparam_list)

            step.stage += 1

            del (mtrace)

        # Metropolis sampling final stage
        logger.info('Sample final stage')
        stage_path = os.path.join(homepath, 'stage_final')
        temp = np.exp((1 - step.old_beta) * \
                           (step.likelihoods - step.likelihoods.max()))
        step.weights = temp / np.sum(temp)
        step.covariance = step.calc_covariance()
        step.proposal_dist = choose_proposal(step.proposal_name,
                                             scale=step.covariance)
        step.resampling_indexes = step.resample()

        sample_args['step'] = step
        sample_args['stage_path'] = stage_path
        sample_args['chains'] = chains
        _iter_parallel_chains(**sample_args)

        outpath = os.path.join(stage_path, sample_p_outname)
        outparam_list = [step, update]
        utility.dump_objects(outpath, outparam_list)