Esempio n. 1
0
class GeodeticSourceComposite(GeodeticComposite):
    """
    Comprises how to solve the non-linear geodetic forward model.

    Parameters
    ----------
    gc : :class:`config.GeodeticConfig`
        configuration object containing seismic setup parameters
    project_dir : str
        directory of the model project, where to find the data
    sources : list
        of :class:`pyrocko.gf.seismosizer.Source`
    event : :class:`pyrocko.model.Event`
        contains information of reference event, coordinates of reference
        point and source time
    hypers : boolean
        if true initialise object for hyper parameter optimization
    """
    def __init__(self, gc, project_dir, sources, event, hypers=False):

        super(GeodeticSourceComposite, self).__init__(gc,
                                                      project_dir,
                                                      event,
                                                      hypers=hypers)

        self.engine = LocalEngine(
            store_superdirs=[gc.gf_config.store_superdir])

        self.sources = sources

    def __getstate__(self):
        self.engine.close_cashed_stores()
        return self.__dict__.copy()

    def point2sources(self, point):
        """
        Updates the composite source(s) (in place) with the point values.
        """
        tpoint = copy.deepcopy(point)
        tpoint = utility.adjust_point_units(tpoint)

        # remove hyperparameters from point
        hps = self.config.get_hypernames()

        for hyper in hps:
            if hyper in tpoint:
                tpoint.pop(hyper)

        source_params = list(self.sources[0].keys())
        for param in list(tpoint.keys()):
            if param not in source_params:
                tpoint.pop(param)

        source_points = utility.split_point(tpoint)
        for i, source in enumerate(self.sources):
            utility.update_source(source, **source_points[i])
            # reset source time may result in store error otherwise
            source.time = 0.

    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):
        """
        Get geodetic likelihood formula for the model built. Has to be called
        within a with model context.
        Part of the pymc3 model.

        Parameters
        ----------
        input_rvs : dict
            of :class:`pymc3.distribution.Distribution`
        fixed_rvs : dict
            of :class:`numpy.array`
        hyperparams : dict
            of :class:`pymc3.distribution.Distribution`
        problem_config : :class:`config.ProblemConfig`

        Returns
        -------
        posterior_llk : :class:`theano.tensor.Tensor`
        """
        hp_specific = self.config.dataset_specific_residual_noise_estimation

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        logger.info('Geodetic optimization on: \n '
                    '%s' % ', '.join(self.input_rvs.keys()))

        self.input_rvs.update(fixed_rvs)

        t0 = time()
        disp = self.get_synths(self.input_rvs)
        t1 = time()
        logger.debug('Geodetic forward model on test model takes: %f' %
                     (t1 - t0))

        los_disp = (disp * self.slos_vectors).sum(axis=1)

        residuals = self.Bij.srmap(
            tt.cast((self.sdata - los_disp) * self.sodws, tconfig.floatX))

        self.init_hierarchicals(problem_config)
        if len(self.hierarchicals) > 0:
            residuals = self.remove_ramps(residuals)

        logpts = multivariate_normal_chol(self.datasets,
                                          self.weights,
                                          hyperparams,
                                          residuals,
                                          hp_specific=hp_specific)

        llk = Deterministic(self._like_name, logpts)
        return llk.sum()
Esempio n. 2
0
class SeismicComposite(Composite):
    """
    Comprises how to solve the non-linear seismic forward model.

    Parameters
    ----------
    sc : :class:`config.SeismicConfig`
        configuration object containing seismic setup parameters
    event: :class:`pyrocko.model.Event`
    project_dir : str
        directory of the model project, where to find the data
    hypers : boolean
        if true initialise object for hyper parameter optimization
    """
    _datasets = None
    _weights = None
    _targets = None

    def __init__(self, sc, event, project_dir, hypers=False):

        super(SeismicComposite, self).__init__()

        logger.debug('Setting up seismic structure ...\n')
        self.name = 'seismic'
        self._like_name = 'seis_like'
        self.correction_name = 'time_shift'

        self.event = event
        self.engine = LocalEngine(
            store_superdirs=[sc.gf_config.store_superdir])

        seismic_data_path = os.path.join(
            project_dir, bconfig.seismic_data_name)

        self.datahandler = heart.init_datahandler(
            seismic_config=sc, seismic_data_path=seismic_data_path)

        self.noise_analyser = cov.SeismicNoiseAnalyser(
            structure=sc.noise_estimator.structure,
            pre_arrival_time=sc.noise_estimator.pre_arrival_time,
            engine=self.engine,
            event=self.event,
            chop_bounds=['b', 'c'])

        self.wavemaps = []
        for i, wc in enumerate(sc.waveforms):
            if wc.include:
                wmap = heart.init_wavemap(
                    waveformfit_config=wc,
                    datahandler=self.datahandler,
                    event=event,
                    mapnumber=i)

                self.wavemaps.append(wmap)
            else:
                logger.info(
                    'The waveform defined in "%s %i" config is not '
                    'included in the optimization!' % (wc.name, i))

        if hypers:
            self._llks = []
            for t in range(self.n_t):
                self._llks.append(
                    shared(
                        num.array([1.]), name='seis_llk_%i' % t, borrow=True))

    def __getstate__(self):
        self.engine.close_cashed_stores()
        return self.__dict__.copy()

    def analyse_noise(self, tpoint=None):
        """
        Analyse seismic noise in datatraces and set
        data-covariance matrixes accordingly.
        """
        if self.config.noise_estimator.structure == 'non-toeplitz':
            results = self.assemble_results(
                tpoint, order='wmap', chop_bounds=['b', 'c'])
        else:
            results = [None] * len(self.wavemaps)

        for wmap, wmap_results in zip(self.wavemaps, results):
            logger.info(
                'Retrieving seismic data-covariances with structure "%s" '
                'for %s ...' % (
                    self.config.noise_estimator.structure, wmap._mapid))

            cov_ds_seismic = self.noise_analyser.get_data_covariances(
                wmap=wmap, results=wmap_results,
                sample_rate=self.config.gf_config.sample_rate)

            for j, trc in enumerate(wmap.datasets):
                if trc.covariance is None:
                    trc.covariance = heart.Covariance(data=cov_ds_seismic[j])
                else:
                    trc.covariance.data = cov_ds_seismic[j]

                if int(trc.covariance.data.sum()) == trc.data_len():
                    logger.warn('Data covariance is identity matrix!'
                                ' Please double check!!!')

    def init_hierarchicals(self, problem_config):
        """
        Initialise random variables for temporal station corrections.
        """
        if not self.config.station_corrections and \
                self.correction_name in problem_config.hierarchicals:
                raise ConfigInconsistentError(
                    'Station corrections disabled, but they are defined'
                    ' in the problem configuration!')

        if self.config.station_corrections and \
                self.correction_name not in problem_config.hierarchicals:
                raise ConfigInconsistentError(
                    'Station corrections enabled, but they are not defined'
                    ' in the problem configuration!')

        if self.correction_name in problem_config.hierarchicals:
            nhierarchs = len(self.get_unique_stations())
            param = problem_config.hierarchicals[self.correction_name]
            logger.info(
                'Estimating time shift for each station...')
            kwargs = dict(
                name=self.correction_name,
                shape=nhierarchs,
                lower=num.repeat(param.lower, nhierarchs),
                upper=num.repeat(param.upper, nhierarchs),
                testval=num.repeat(param.testvalue, nhierarchs),
                transform=None,
                dtype=tconfig.floatX)

            try:
                station_corrs_rv = Uniform(**kwargs)

            except TypeError:
                kwargs.pop('name')
                station_corrs_rv = Uniform.dist(**kwargs)

            self.hierarchicals[self.correction_name] = station_corrs_rv
        else:
            nhierarchs = 0

    def init_weights(self):
        """
        Initialise shared weights in wavemaps.
        """
        for wmap in self.wavemaps:
            weights = []
            for j, trc in enumerate(wmap.datasets):
                icov = trc.covariance.chol_inverse
                weights.append(
                    shared(
                        icov,
                        name='seis_%s_weight_%i' % (wmap._mapid, j),
                        borrow=True))

            wmap.add_weights(weights)

    def get_unique_stations(self):
        us = []
        for wmap in self.wavemaps:
            us.extend(wmap.get_station_names())
        return utility.unique_list(us)

    @property
    def n_t(self):
        return sum(wmap.n_t for wmap in self.wavemaps)

    @property
    def datasets(self):
        if self._datasets is None:
            ds = []
            for wmap in self.wavemaps:
                ds.extend(wmap.datasets)

            self._datasets = ds
        return self._datasets

    @property
    def weights(self):
        if self._weights is None:
            ws = []
            for wmap in self.wavemaps:
                ws.extend(wmap.weights)

            self._weights = ws
        return self._weights

    @property
    def targets(self):
        if self._targets is None:
            ts = []
            for wmap in self.wavemaps:
                ts.extend(wmap.targets)

            self._targets = ts
        return self._targets

    def assemble_results(
            self, point, chop_bounds=['a', 'd'], order='list',
            outmode='stacked_traces'):
        """
        Assemble seismic traces for given point in solution space.

        Parameters
        ----------
        point : :func:`pymc3.Point`
            Dictionary with model parameters

        Returns
        -------
        List with :class:`heart.SeismicResult`
        """
        if point is None:
            raise ValueError('A point has to be provided!')

        logger.debug('Assembling seismic waveforms ...')

        syn_proc_traces, obs_proc_traces = self.get_synthetics(
            point, outmode=outmode,
            chop_bounds=chop_bounds, order='wmap')

        # will yield exactly the same as previous call needs wmap.prepare data
        # to be aware of taper_tolerance_factor
        syn_filt_traces, obs_filt_traces = self.get_synthetics(
            point, outmode=outmode, taper_tolerance_factor=0.,
            chop_bounds=chop_bounds, order='wmap')

        results = []
        for i, wmap in enumerate(self.wavemaps):
            wc = wmap.config
            at = wc.arrival_taper

            wmap_results = []
            for j, obs_tr in enumerate(obs_proc_traces[i]):

                dtrace_proc = obs_tr.copy()
                dtrace_proc.set_ydata(
                    (obs_tr.get_ydata() - syn_proc_traces[i][j].get_ydata()))

                dtrace_filt = obs_filt_traces[i][j].copy()
                dtrace_filt.set_ydata(
                    (obs_filt_traces[i][j].get_ydata() -
                        syn_filt_traces[i][j].get_ydata()))

                taper = at.get_pyrocko_taper(
                    float(obs_tr.tmin - at.a))

                wmap_results.append(heart.SeismicResult(
                    processed_obs=obs_tr,
                    processed_syn=syn_proc_traces[i][j],
                    processed_res=dtrace_proc,
                    filtered_obs=obs_filt_traces[i][j],
                    filtered_syn=syn_filt_traces[i][j],
                    filtered_res=dtrace_filt,
                    taper=taper))

            if order == 'list':
                results.extend(wmap_results)

            elif order == 'wmap':
                results.append(wmap_results)

            else:
                raise ValueError('Order "%s" is not supported' % order)

        return results

    def update_llks(self, point):
        """
        Update posterior likelihoods of the composite with respect to one point
        in the solution space.

        Parameters
        ----------
        point : dict
            with numpy array-like items and variable name keys
        """
        results = self.assemble_results(point, chop_bounds=['b', 'c'])
        for k, result in enumerate(results):
            choli = self.datasets[k].covariance.chol_inverse
            tmp = choli.dot(result.processed_res.ydata)
            _llk = num.asarray([num.dot(tmp, tmp)])
            self._llks[k].set_value(_llk)
Esempio n. 3
0
class SeismicComposite(Composite):
    """
    Comprises how to solve the non-linear seismic forward model.

    Parameters
    ----------
    sc : :class:`config.SeismicConfig`
        configuration object containing seismic setup parameters
    events: list
        of :class:`pyrocko.model.Event`
    project_dir : str
        directory of the model project, where to find the data
    hypers : boolean
        if true initialise object for hyper parameter optimization
    """
    _datasets = None
    _weights = None
    _targets = None
    _hierarchicalnames = None

    def __init__(self, sc, events, project_dir, hypers=False):

        super(SeismicComposite, self).__init__(events)

        logger.debug('Setting up seismic structure ...\n')
        self.name = 'seismic'
        self._like_name = 'seis_like'
        self.correction_name = 'time_shift'

        self.engine = LocalEngine(
            store_superdirs=[sc.gf_config.store_superdir])

        if sc.responses_path is not None:
            responses_path = os.path.join(sc.responses_path,
                                          bconfig.response_file_name)
        else:
            responses_path = sc.responses_path

        # load data
        self.datahandlers = []
        for i in range(self.nevents):
            seismic_data_path = os.path.join(
                project_dir, bconfig.multi_event_seismic_data_name(i))

            logger.info('Loading seismic data for event %i'
                        ' from: %s ' % (i, seismic_data_path))
            self.datahandlers.append(
                heart.init_datahandler(seismic_config=sc,
                                       seismic_data_path=seismic_data_path,
                                       responses_path=responses_path))

        self.noise_analyser = cov.SeismicNoiseAnalyser(
            structure=sc.noise_estimator.structure,
            pre_arrival_time=sc.noise_estimator.pre_arrival_time,
            engine=self.engine,
            events=self.events,
            chop_bounds=['b', 'c'])

        self.wavemaps = []
        for i, wc in enumerate(sc.waveforms):
            if wc.include:
                wmap = heart.init_wavemap(
                    waveformfit_config=wc,
                    datahandler=self.datahandlers[wc.event_idx],
                    event=self.events[wc.event_idx],
                    mapnumber=i)

                self.wavemaps.append(wmap)
            else:
                logger.info('The waveform defined in "%s %i" config is not '
                            'included in the optimization!' % (wc.name, i))

        if hypers:
            self._llks = []
            for t in range(self.n_t):
                self._llks.append(
                    shared(num.array([1.]),
                           name='seis_llk_%i' % t,
                           borrow=True))

    def _hyper2wavemap(self, hypername):

        dummy = '_'.join(hypername.split('_')[1:-1])
        for wmap in self.wavemaps:
            if wmap._mapid == dummy:
                return wmap

        raise ValueError('No waveform mapping found for hyperparameter! %s' %
                         hypername)

    def get_hypersize(self, hp_name):
        """
        Return size of the hyperparameter

        Parameters
        ----------
        hp_name: str
            of hyperparameter name

        Returns
        -------
        int
        """
        if self.config.dataset_specific_residual_noise_estimation:
            wmap = self._hyper2wavemap(hp_name)
            return wmap.hypersize
        else:
            return 1

    def __getstate__(self):
        self.engine.close_cashed_stores()
        return self.__dict__.copy()

    def analyse_noise(self, tpoint=None, chop_bounds=['b', 'c']):
        """
        Analyse seismic noise in datatraces and set
        data-covariance matrixes accordingly.
        """
        if self.config.noise_estimator.structure == 'non-toeplitz':
            results = self.assemble_results(tpoint,
                                            order='wmap',
                                            chop_bounds=chop_bounds)
        else:
            results = [None] * len(self.wavemaps)

        for wmap, wmap_results in zip(self.wavemaps, results):
            logger.info(
                'Retrieving seismic data-covariances with structure "%s" '
                'for %s ...' %
                (self.config.noise_estimator.structure, wmap._mapid))

            cov_ds_seismic = self.noise_analyser.get_data_covariances(
                wmap=wmap,
                results=wmap_results,
                sample_rate=self.config.gf_config.sample_rate,
                chop_bounds=chop_bounds)

            for j, trc in enumerate(wmap.datasets):
                if trc.covariance is None:
                    trc.covariance = heart.Covariance(data=cov_ds_seismic[j])
                else:
                    trc.covariance.data = cov_ds_seismic[j]

                if int(trc.covariance.data.sum()) == trc.data_len():
                    logger.warning('Data covariance is identity matrix!'
                                   ' Please double check!!!')

    def init_hierarchicals(self, problem_config):
        """
        Initialise random variables for temporal station corrections.
        """
        hierarchicals = problem_config.hierarchicals
        self._hierarchicalnames = []
        if not self.config.station_corrections and \
                self.correction_name in hierarchicals:
            raise ConfigInconsistentError(
                'Station corrections disabled, but they are defined'
                ' in the problem configuration!')

        if self.config.station_corrections and \
                self.correction_name not in hierarchicals:
            raise ConfigInconsistentError(
                'Station corrections enabled, but they are not defined'
                ' in the problem configuration!')

        if self.correction_name in hierarchicals:
            logger.info(
                'Estimating time shift for each station and waveform map...')
            for wmap in self.wavemaps:
                hierarchical_name = wmap.time_shifts_id
                nhierarchs = len(wmap.get_station_names())

                logger.info('For %s with %i shifts' %
                            (hierarchical_name, nhierarchs))

                if hierarchical_name in hierarchicals:
                    logger.info('Using wavemap specific imported:'
                                ' %s ' % hierarchical_name)
                    param = hierarchicals[hierarchical_name]
                else:
                    logger.info('Using global %s' % self.correction_name)
                    param = copy.deepcopy(
                        problem_config.hierarchicals[self.correction_name])
                    param.lower = num.repeat(param.lower, nhierarchs)
                    param.upper = num.repeat(param.upper, nhierarchs)
                    param.testvalue = num.repeat(param.testvalue, nhierarchs)

                if hierarchical_name not in self.hierarchicals:
                    if not num.array_equal(param.lower, param.upper):
                        kwargs = dict(name=hierarchical_name,
                                      shape=param.dimension,
                                      lower=param.lower,
                                      upper=param.upper,
                                      testval=param.testvalue,
                                      transform=None,
                                      dtype=tconfig.floatX)

                        try:
                            self.hierarchicals[hierarchical_name] = Uniform(
                                **kwargs)
                        except TypeError:
                            kwargs.pop('name')
                            self.hierarchicals[hierarchical_name] = \
                                Uniform.dist(**kwargs)

                        self._hierarchicalnames.append(hierarchical_name)
                    else:
                        logger.info(
                            'not solving for %s, got fixed at %s' %
                            (param.name,
                             utility.list2string(param.lower.flatten())))
                        self.hierarchicals[hierarchical_name] = param.lower

    def export(self,
               point,
               results_path,
               stage_number,
               fix_output=False,
               force=False,
               update=False,
               chop_bounds=['b', 'c']):
        """
        Save results for given point to result path.
        """
        def save_covs(wmap, cov_mat='pred_v'):
            """
            Save covariance matrixes of given attribute
            """
            covs = {
                utility.list2string(dataset.nslc_id):
                getattr(dataset.covariance, cov_mat)
                for dataset in wmap.datasets
            }

            outname = os.path.join(
                results_path, '%s_C_%s_%s' % ('seismic', cov_mat, wmap._mapid))
            logger.info('"%s" to: %s' % (wmap._mapid, outname))
            num.savez(outname, **covs)

        from pyrocko import io

        # synthetics and data
        results = self.assemble_results(point, chop_bounds=chop_bounds)
        for traces, attribute in heart.results_for_export(results=results,
                                                          datatype='seismic'):

            filename = '%s_%i.mseed' % (attribute, stage_number)
            outpath = os.path.join(results_path, filename)
            try:
                io.save(traces, outpath, overwrite=force)
            except io.mseed.CodeTooLong:
                if fix_output:
                    for tr in traces:
                        tr.set_station(tr.station[-5::])
                        tr.set_location(
                            str(self.config.gf_config.reference_model_idx))

                    io.save(traces, outpath, overwrite=force)
                else:
                    raise ValueError(
                        'Some station codes are too long! '
                        '(the --fix_output option will truncate to '
                        'last 5 characters!)')

        # export stdz residuals
        self.analyse_noise(point, chop_bounds=chop_bounds)
        if update:
            logger.info('Saving velocity model covariance matrixes...')
            self.update_weights(point, chop_bounds=chop_bounds)
            for wmap in self.wavemaps:
                save_covs(wmap, 'pred_v')

        logger.info('Saving data covariance matrixes...')
        for wmap in self.wavemaps:
            save_covs(wmap, 'data')

    def init_weights(self):
        """
        Initialise shared weights in wavemaps.
        """
        logger.info('Initialising weights ...')
        for wmap in self.wavemaps:
            weights = []
            for j, trc in enumerate(wmap.datasets):
                icov = trc.covariance.chol_inverse
                weights.append(
                    shared(icov,
                           name='seis_%s_weight_%i' % (wmap._mapid, j),
                           borrow=True))

            wmap.add_weights(weights)

    def get_all_station_names(self):
        """
        Returns list of station names in the order of wavemaps.
        """
        us = []
        for wmap in self.wavemaps:
            us.extend(wmap.get_station_names())

        return us

    def get_unique_time_shifts_ids(self):
        """
        Return unique time_shifts ids from wavemaps, which are keys to
        hierarchical RVs of station corrections
        """
        ts = []
        for wmap in self.wavemaps:
            ts.append(wmap.time_shifts_id)

        return utility.unique_list(ts)

    def get_unique_station_names(self):
        """
        Return unique station names from all wavemaps
        """
        return utility.unique_list(self.get_all_station_names())

    @property
    def n_t(self):
        return sum(wmap.n_t for wmap in self.wavemaps)

    @property
    def datasets(self):
        if self._datasets is None:
            ds = []
            for wmap in self.wavemaps:
                ds.extend(wmap.datasets)

            self._datasets = ds
        return self._datasets

    @property
    def weights(self):
        if self._weights is None or len(self._weights) == 0:
            ws = []
            for wmap in self.wavemaps:
                if wmap.weights:
                    ws.extend(wmap.weights)

            self._weights = ws
        return self._weights

    @property
    def targets(self):
        if self._targets is None:
            ts = []
            for wmap in self.wavemaps:
                ts.extend(wmap.targets)

            self._targets = ts
        return self._targets

    def assemble_results(self,
                         point,
                         chop_bounds=['a', 'd'],
                         order='list',
                         outmode='stacked_traces'):
        """
        Assemble seismic traces for given point in solution space.

        Parameters
        ----------
        point : :func:`pymc3.Point`
            Dictionary with model parameters

        Returns
        -------
        List with :class:`heart.SeismicResult`
        """
        if point is None:
            raise ValueError('A point has to be provided!')

        logger.debug('Assembling seismic waveforms ...')

        syn_proc_traces, obs_proc_traces = self.get_synthetics(
            point, outmode=outmode, chop_bounds=chop_bounds, order='wmap')

        # will yield exactly the same as previous call needs wmap.prepare data
        # to be aware of taper_tolerance_factor
        # DEPRECATED but keep for now
        # syn_filt_traces, obs_filt_traces = self.get_synthetics(
        #    point, outmode=outmode, taper_tolerance_factor=0.,
        #    chop_bounds=chop_bounds, order='wmap')
        # syn_filt_traces, obs_filt_traces = syn_proc_traces, obs_proc_traces
        #from pyrocko import trace
        #trace.snuffle(syn_proc_traces + obs_proc_traces)

        results = []
        for i, wmap in enumerate(self.wavemaps):
            wc = wmap.config
            at = wc.arrival_taper

            wmap_results = []
            for j, obs_tr in enumerate(obs_proc_traces[i]):

                taper = at.get_pyrocko_taper(float(obs_tr.tmin - at.a))

                if outmode != 'tapered_data':
                    source_contributions = [syn_proc_traces[i][j]]
                else:
                    source_contributions = syn_proc_traces[i][j]

                wmap_results.append(
                    heart.SeismicResult(
                        point=point,
                        processed_obs=obs_tr,
                        source_contributions=source_contributions,
                        taper=taper))

            if order == 'list':
                results.extend(wmap_results)

            elif order == 'wmap':
                results.append(wmap_results)

            else:
                raise ValueError('Order "%s" is not supported' % order)

        return results

    def update_llks(self, point):
        """
        Update posterior likelihoods of the composite with respect to one point
        in the solution space.

        Parameters
        ----------
        point : dict
            with numpy array-like items and variable name keys
        """
        results = self.assemble_results(point, chop_bounds=['b', 'c'])
        for k, result in enumerate(results):
            choli = self.datasets[k].covariance.chol_inverse
            tmp = choli.dot(result.processed_res.ydata)
            _llk = num.asarray([num.dot(tmp, tmp)])
            self._llks[k].set_value(_llk)

    def get_standardized_residuals(self, point, chop_bounds=['b', 'c']):
        """
        Parameters
        ----------
        point : dict
            with parameters to point in solution space to calculate
            standardized residuals

        Returns
        -------
        dict of arrays of standardized residuals,
            keys are nslc_ids
        """
        results = self.assemble_results(point,
                                        order='list',
                                        chop_bounds=chop_bounds)
        self.update_weights(point, chop_bounds=chop_bounds)

        counter = utility.Counter()
        hp_specific = self.config.dataset_specific_residual_noise_estimation
        stdz_res = OrderedDict()
        for data_trc, result in zip(self.datasets, results):
            hp_name = get_hyper_name(data_trc)
            if hp_specific:
                hp = point[hp_name][counter(hp_name)]
            else:
                hp = point[hp_name]

            choli = num.linalg.inv(data_trc.covariance.chol * num.exp(hp) / 2.)
            stdz_res[data_trc.nslc_id] = choli.dot(
                result.processed_res.get_ydata())

        return stdz_res

    def get_variance_reductions(self,
                                point,
                                results=None,
                                weights=None,
                                chop_bounds=['a', 'd']):
        """
        Parameters
        ----------
        point : dict
            with parameters to point in solution space to calculate
            variance reductions

        Returns
        -------
        dict of floats,
            keys are nslc_ids
        """
        if results is None:
            results = self.assemble_results(point,
                                            order='list',
                                            chop_bounds=chop_bounds)

        ndatasets = len(self.datasets)

        assert len(results) == ndatasets

        if weights is None:
            self.analyse_noise(point, chop_bounds=chop_bounds)
            self.update_weights(point, chop_bounds=chop_bounds)
            weights = self.weights

        nweights = len(weights)
        assert nweights == ndatasets

        logger.debug('n weights %i , n datasets %i' % (nweights, ndatasets))

        assert nweights == ndatasets

        logger.debug('Calculating variance reduction for solution ...')

        var_reds = OrderedDict()
        for data_trc, weight, result in zip(self.datasets, weights, results):

            icov = data_trc.covariance.inverse

            data = result.processed_obs.get_ydata()
            residual = result.processed_res.get_ydata()

            nom = residual.T.dot(icov).dot(residual)
            denom = data.T.dot(icov).dot(data)

            logger.debug('nom %f, denom %f' % (float(nom), float(denom)))
            var_red = 1 - (nom / denom)

            nslc_id = utility.list2string(data_trc.nslc_id)
            logger.debug('Variance reduction for %s is %f' %
                         (nslc_id, var_red))

            if 0:
                from matplotlib import pyplot as plt
                fig, ax = plt.subplots(1, 1)
                im = ax.imshow(data_trc.covariance.data)
                plt.colorbar(im)
                plt.show()

            var_reds[nslc_id] = var_red

        return var_reds
Esempio n. 4
0
class SeismicComposite(Composite):
    """
    Comprises how to solve the non-linear seismic forward model.

    Parameters
    ----------
    sc : :class:`config.SeismicConfig`
        configuration object containing seismic setup parameters
    event: :class:`pyrocko.model.Event`
    project_dir : str
        directory of the model project, where to find the data
    hypers : boolean
        if true initialise object for hyper parameter optimization
    """
    _datasets = None
    _weights = None
    _targets = None

    def __init__(self, sc, event, project_dir, hypers=False):

        super(SeismicComposite, self).__init__()

        logger.debug('Setting up seismic structure ...\n')
        self.name = 'seismic'
        self._like_name = 'seis_like'
        self.correction_name = 'time_shift'

        self.event = event
        self.engine = LocalEngine(
            store_superdirs=[sc.gf_config.store_superdir])

        seismic_data_path = os.path.join(project_dir,
                                         bconfig.seismic_data_name)

        self.datahandler = heart.init_datahandler(
            seismic_config=sc, seismic_data_path=seismic_data_path)

        self.wavemaps = []
        for wc in sc.waveforms:
            if wc.include:
                wmap = heart.init_wavemap(waveformfit_config=wc,
                                          datahandler=self.datahandler,
                                          event=event)

                if sc.calc_data_cov:
                    logger.info('Estimating seismic data-covariances '
                                'for %s ...\n' % wmap.name)

                    cov_ds_seismic = cov.seismic_data_covariance(
                        data_traces=wmap.datasets,
                        filterer=wc.filterer,
                        sample_rate=sc.gf_config.sample_rate,
                        arrival_taper=wc.arrival_taper,
                        engine=self.engine,
                        event=self.event,
                        targets=wmap.targets)
                else:
                    logger.info('No data-covariance estimation, using imported'
                                ' covariances...\n')

                    cov_ds_seismic = []
                    at = wc.arrival_taper
                    n_samples = int(
                        num.ceil(at.duration * sc.gf_config.sample_rate))

                    for trc in wmap.datasets:
                        if trc.covariance is None:
                            logger.warn('No data covariance given/estimated! '
                                        'Setting default: eye')
                            cov_ds_seismic.append(num.eye(n_samples))
                        else:
                            data_cov = trc.covariance.data
                            if data_cov.shape[0] != n_samples:
                                raise ValueError(
                                    'Imported covariance %i does not agree '
                                    ' with taper duration %i!' %
                                    (data_cov.shape[0], n_samples))
                            cov_ds_seismic.append(data_cov)

                weights = []
                for t, trc in enumerate(wmap.datasets):
                    trc.covariance = heart.Covariance(data=cov_ds_seismic[t])
                    if int(trc.covariance.data.sum()) == trc.data_len():
                        logger.warn('Data covariance is identity matrix!'
                                    ' Please double check!!!')
                    icov = trc.covariance.chol_inverse
                    weights.append(
                        shared(icov,
                               name='seis_%s_weight_%i' % (wc.name, t),
                               borrow=True))

                wmap.add_weights(weights)

                self.wavemaps.append(wmap)
            else:
                logger.info('The waveform defined in "%s" config is not '
                            'included in the optimization!' % wc.name)

        if hypers:
            self._llks = []
            for t in range(self.n_t):
                self._llks.append(
                    shared(num.array([1.]),
                           name='seis_llk_%i' % t,
                           borrow=True))

    def __getstate__(self):
        self.engine.close_cashed_stores()
        return self.__dict__.copy()

    def init_hierarchicals(self, problem_config):
        """
        Initialise random variables for temporal station corrections.
        """
        if not self.config.station_corrections and \
                self.correction_name in problem_config.hierarchicals:
            raise ConfigInconsistentError(
                'Station corrections disabled, but they are defined'
                ' in the problem configuration!')

        if self.config.station_corrections and \
                self.correction_name not in problem_config.hierarchicals:
            raise ConfigInconsistentError(
                'Station corrections enabled, but they are not defined'
                ' in the problem configuration!')

        if self.correction_name in problem_config.hierarchicals:
            nhierarchs = len(self.get_unique_stations())
            param = problem_config.hierarchicals[self.correction_name]
            logger.info('Estimating time shift for each station...')
            kwargs = dict(name=self.correction_name,
                          shape=nhierarchs,
                          lower=num.repeat(param.lower, nhierarchs),
                          upper=num.repeat(param.upper, nhierarchs),
                          testval=num.repeat(param.testvalue, nhierarchs),
                          transform=None,
                          dtype=tconfig.floatX)

            try:
                station_corrs_rv = Uniform(**kwargs)

            except TypeError:
                kwargs.pop('name')
                station_corrs_rv = Uniform.dist(**kwargs)

            self.hierarchicals[self.correction_name] = station_corrs_rv
        else:
            nhierarchs = 0

    def get_unique_stations(self):
        sl = [wmap.stations for wmap in self.wavemaps]
        us = []
        map(us.extend, sl)
        return list(set(us))

    @property
    def n_t(self):
        return sum(wmap.n_t for wmap in self.wavemaps)

    @property
    def datasets(self):
        if self._datasets is None:
            ds = []
            for wmap in self.wavemaps:
                ds.extend(wmap.datasets)

            self._datasets = ds
        return self._datasets

    @property
    def weights(self):
        if self._weights is None:
            ws = []
            for wmap in self.wavemaps:
                ws.extend(wmap.weights)

            self._weights = ws
        return self._weights

    @property
    def targets(self):
        if self._targets is None:
            ts = []
            for wmap in self.wavemaps:
                ts.extend(wmap.targets)

            self._targets = ts
        return self._targets

    def assemble_results(self, point):
        """
        Assemble seismic traces for given point in solution space.

        Parameters
        ----------
        point : :func:`pymc3.Point`
            Dictionary with model parameters

        Returns
        -------
        List with :class:`heart.SeismicResult`
        """
        logger.debug('Assembling seismic waveforms ...')

        syn_proc_traces, obs_proc_traces = self.get_synthetics(
            point, outmode='stacked_traces')

        syn_filt_traces, obs_filt_traces = self.get_synthetics(
            point, outmode='stacked_traces', taper_tolerance_factor=2.)

        ats = []
        for wmap in self.wavemaps:
            wc = wmap.config
            ats.extend(wmap.n_t * [wc.arrival_taper])

        results = []
        for i, (obs_tr, at) in enumerate(zip(obs_proc_traces, ats)):

            dtrace_proc = obs_tr.copy()
            dtrace_proc.set_ydata(
                (obs_tr.get_ydata() - syn_proc_traces[i].get_ydata()))

            dtrace_filt = obs_filt_traces[i].copy()
            dtrace_filt.set_ydata((obs_filt_traces[i].get_ydata() -
                                   syn_filt_traces[i].get_ydata()))

            taper = at.get_pyrocko_taper(float(obs_tr.tmin + num.abs(at.a)))

            results.append(
                heart.SeismicResult(processed_obs=obs_tr,
                                    processed_syn=syn_proc_traces[i],
                                    processed_res=dtrace_proc,
                                    filtered_obs=obs_filt_traces[i],
                                    filtered_syn=syn_filt_traces[i],
                                    filtered_res=dtrace_filt,
                                    taper=taper))

        return results

    def update_llks(self, point):
        """
        Update posterior likelihoods of the composite with respect to one point
        in the solution space.

        Parameters
        ----------
        point : dict
            with numpy array-like items and variable name keys
        """
        results = self.assemble_results(point)
        for k, result in enumerate(results):
            choli = self.datasets[k].covariance.chol_inverse
            tmp = choli.dot(result.processed_res.ydata)
            _llk = num.asarray([num.dot(tmp, tmp)])
            self._llks[k].set_value(_llk)