Esempio n. 1
0
    def get_metadata_for_file(self, absolute_filename):
        """
        Returns the metadata for a certain file.

        :param absolute_filename: The absolute path of the file.
        """
        if os.path.commonprefix([absolute_filename, self._data_folder]) == \
                self._data_folder:
            relpath = os.path.relpath(absolute_filename, self._data_folder)
            event, type_or_tag, filename = relpath.split(os.path.sep)
            if type_or_tag == "raw":
                c = self.get_waveform_cache(event, "raw")
            else:
                c = self.get_waveform_cache(event, "processed",
                                            type_or_tag)
        elif os.path.commonprefix([absolute_filename, self._data_folder]) == \
                self._synthetics_folder:
            relpath = os.path.relpath(absolute_filename,
                                      self._synthetics_folder)
            event, iteration, filename = relpath.split(os.path.sep)
            c = self.get_waveform_cache(event, "synthetic", iteration)
        else:
            raise LASIFError("Invalid path.")

        return c.get_details(absolute_filename)
Esempio n. 2
0
    def __init__(self,
                 project_root_path,
                 init_project=False,
                 read_only_caches=False):
        """
        Upon intialization, set the paths and read the config file.

        :type project_root_path: str
        :param project_root_path: The root path of the project.
        :type init_project: str
        :param init_project: Determines whether or not to initialize a new
            project, e.g. create the necessary folder structure. If a string is
            passed, the project will be given this name. Otherwise a default
            name will be chosen. Defaults to False.
        :param read_only_caches: If True, all caches are read-only. This is
            important for concurrent access as otherwise you might end up
            with race conditions. Make sure to build all necessary caches
            before enabling this, otherwise LASIF will not find all files it
            requires to work.
        :type read_only_caches: bool
        """
        # Setup the paths.
        self.__setup_paths(project_root_path)

        if init_project:
            if read_only_caches:
                raise ValueError("Cannot initialize a project with disabled "
                                 "cache-writes.")
            if not os.path.exists(project_root_path):
                os.makedirs(project_root_path)
            self.__init_new_project(init_project)

        # Project wide flag if the caches are read_only.
        self.read_only_caches = bool(read_only_caches)

        if not os.path.exists(self.paths["config_file"]):
            msg = ("Could not find the project's config file. Wrong project "
                   "path or uninitialized project?")
            raise LASIFError(msg)

        self.__project_function_cache = {}

        # Setup the communicator and register this component.
        self.__comm = Communicator()
        super(Project, self).__init__(self.__comm, "project")

        # Setup the different components. The CACHE folder must already be
        # present.
        if not os.path.exists(self.paths["cache"]):
            os.makedirs(self.paths["cache"])
        self.__setup_components()

        # Finally update the folder structure.
        self.__update_folder_structure()

        self._read_config_file()

        self.__copy_fct_templates(init_project=init_project)
Esempio n. 3
0
    def create_new_iteration(self,
                             iteration_name,
                             solver_name,
                             events_dict,
                             min_period,
                             max_period,
                             seconds_prior_arrival=5.,
                             window_length_in_sec=50.,
                             quiet=False,
                             create_folders=True):
        """
        Creates a new iteration XML file.

        :param iteration_name: The name of the iteration.
        :param solver_name: The name of the solver to be used for the new
            iteration.
        :param events_dict: A dictionary specifying the used events.
        :param min_period: The minimum period in seconds for the new iteration.
        :param max_period: The maximum period in seconds for the new iteration.
        :param seconds_prior_arrival: nb of seconds prior the theoretical phase arrival time used to window seismograms for quality control, default 5.
        :param window_length_in_sec: nb of seconds of the time window used to window seismograms for quality control, default 50.
        :param quiet: Do not print anything if set to `True`.
        :param create_folders: Create the folders for this iteration's
            synthetic waveforms

        >>> comm = getfixture('iterations_comm')
        >>> comm.iterations.has_iteration("3")
        False
        >>> comm.iterations.create_new_iteration("3", "ses3d_4_1",
        ...     {"EVENT_1": ["AA.BB", "CC.DD"], "EVENT_2": ["EE.FF"]},
        ...     10.0, 20.0, quiet=True, create_folders=False)
        >>> comm.iterations.has_iteration("3")
        True
        >>> os.remove(comm.iterations.get_iteration_dict()["3"])
        """
        iteration_name = str(iteration_name)
        if iteration_name in self.get_iteration_dict():
            msg = "Iteration %s already exists." % iteration_name
            raise LASIFError(msg)

        from lasif.iteration_xml import create_iteration_xml_string
        xml_string = create_iteration_xml_string(iteration_name,
                                                 solver_name,
                                                 events_dict,
                                                 min_period,
                                                 max_period,
                                                 seconds_prior_arrival,
                                                 window_length_in_sec,
                                                 quiet=quiet)
        with open(self.get_filename_for_iteration(iteration_name), "wt")\
                as fh:
            fh.write(xml_string)

        if create_folders:
            self.create_synthetics_folder_for_iteration(iteration_name)
            self.create_stf_folder_for_iteration(iteration_name)
Esempio n. 4
0
    def get_project_function(self, fct_type):
        """
        Helper importing the project specific function.

        :param fct_type: The desired function.
        """
        # Cache to avoid repeated imports.
        if fct_type in self.__project_function_cache:
            return self.__project_function_cache[fct_type]

        # type / filename map
        fct_type_map = {
            "window_picking_function": "window_picking_function.py",
            "preprocessing_function": "preprocessing_function.py",
            "data_svd_selection": "data_svd_selection.py",
            "process_synthetics": "process_synthetics.py",
            "source_time_function": "source_time_function.py",
            "instaseis_synthetics_function":
            "instaseis_synthetics_function.py",
            "stf_deconvolution": "stf_deconvolution.py",
        }

        if fct_type not in fct_type:
            msg = "Function '%s' not found. Available types: %s" % (
                fct_type, str(list(fct_type_map.keys())))
            raise LASIFNotFoundError(msg)

        filename = os.path.join(self.paths["functions"],
                                fct_type_map[fct_type])
        if not os.path.exists(filename):
            msg = "No file '%s' in existence." % filename
            raise LASIFNotFoundError(msg)

        fct_template = imp.load_source("_lasif_fct_template", filename)
        try:
            fct = getattr(fct_template, fct_type)
        except AttributeError:
            raise LASIFNotFoundError(
                "Could not find function %s in file '%s'" %
                (fct_type, filename))

        if not callable(fct):
            raise LASIFError("Attribute %s in file '%s' is not a function." %
                             (fct_type, filename))

        # Add to cache.
        self.__project_function_cache[fct_type] = fct
        return fct
Esempio n. 5
0
    def plot_Q_model(self, iteration_name):
        """
        Plots the Q model for a given iteration. Will only work if the
        iteration uses SES3D as its solver.
        """
        from lasif.tools.Q_discrete import plot

        iteration = self.get(iteration_name)
        if iteration.solver_settings["solver"].lower() != "ses3d 4.1":
            msg = "Only works for SES3D 4.1"
            raise LASIFError(msg)

        proc_params = iteration.get_process_params()
        f_min = proc_params["highpass"]
        f_max = proc_params["lowpass"]

        relax = iteration.solver_settings["solver_settings"][
            "relaxation_parameter_list"]
        tau_p = relax["tau"]
        weights = relax["w"]

        plot(D_p=weights, tau_p=tau_p, f_min=f_min, f_max=f_max)
Esempio n. 6
0
def _get_default_solver_settings(solver, min_period, max_period, quiet=False):
    """
    Helper function returning etree representation of a solver's default
    settings.

    :param quiet: Do not print anything if set to `True`.
    """
    known_solvers = [
        "ses3d_4_1", "ses3d_2_0", "specfem3d_cartesian", "specfem3d_globe_cem"
    ]
    if solver.lower() == "ses3d_4_1":
        from lasif.tools import Q_discrete
        from lasif.utils import generate_ses3d_4_1_template

        # Generate the relaxation weights for SES3D.
        w_p, tau_p = Q_discrete.calculate_Q_model(
            N=3,
            # These are suitable for the default frequency range.
            f_min=1.0 / max_period,
            f_max=1.0 / min_period,
            iterations=10000,
            initial_temperature=0.1,
            cooling_factor=0.9998,
            quiet=quiet)

        return generate_ses3d_4_1_template(w_p, tau_p)
    elif solver.lower() == "ses3d_2_0":
        from lasif.utils import generate_ses3d_2_0_template
        return generate_ses3d_2_0_template()
    elif solver.lower() == "specfem3d_cartesian":
        from lasif.utils import generate_specfem3d_cartesian_template
        return generate_specfem3d_cartesian_template()
    elif solver.lower() == "specfem3d_globe_cem":
        from lasif.utils import generate_specfem3d_globe_cem_template
        return generate_specfem3d_globe_cem_template()
    else:
        msg = "Solver '%s' not known. Known solvers: %s" % (
            solver, ",".join(known_solvers))
        raise LASIFError(msg)
Esempio n. 7
0
    def what_is(self, path):
        """
        Debug function returning a string with information about the file.
        Useful as a debug function and to figure out what LASIF is doing.

        :param path: The path to the file.
        """
        path = os.path.normpath(os.path.abspath(path))

        # File does not exist.
        if not os.path.exists(path):
            raise LASIFNotFoundError("Path '%s' does not exist." % path)
        # File not part of the project.
        if os.path.commonprefix([path, self.comm.project.paths["root"]]) \
                != self.comm.project.paths["root"]:
            raise LASIFError("File '%s' is not part of the LASIF project." %
                             path)

        # Split in dir an folder to ease the rest.
        if os.path.isdir(path):
            return self.__what_is_this_folder(path)
        else:
            return self.__what_is_this_file(path)
Esempio n. 8
0
    def plot_events(self, plot_type="map"):
        """
        Plots the domain and beachballs for all events on the map.

        :param plot_type: Determines the type of plot created.
            * ``map`` (default) - a map view of the events
            * ``depth`` - a depth distribution histogram
            * ``time`` - a time distribution histogram
        """
        from lasif import visualization

        events = self.comm.events.get_all_events().values()

        if plot_type == "map":
            m = self.comm.project.domain.plot()
            visualization.plot_events(events, map_object=m)
        elif plot_type == "depth":
            visualization.plot_event_histogram(events, "depth")
        elif plot_type == "time":
            visualization.plot_event_histogram(events, "time")
        else:
            msg = "Unknown plot_type"
            raise LASIFError(msg)
Esempio n. 9
0
def preprocessing_function(processing_info, iteration):  # NOQA
    """
    Function to perform the actual preprocessing for one individual seismogram.
    This is part of the project so it can change depending on the project.

    Please keep in mind that you will have to manually update this file to a
    new version if LASIF is ever updated.

    You can do whatever you want in this function as long as the function
    signature is honored. The file is read from ``"input_filename"`` and
    written to ``"output_filename"``.

    One goal of this function is to make sure that the data is available at the
    same time steps as the synthetics. The first time sample of the synthetics
    will always be the origin time of the event.

    Furthermore the data has to be converted to m/s.

    :param processing_info: A dictionary containing information about the
        file to be processed. It will have the following structure.
    :type processing_info: dict

    .. code-block:: python

        {'event_information': {
            'depth_in_km': 22.0,
            'event_name': 'GCMT_event_VANCOUVER_ISLAND...',
            'filename': '/.../GCMT_event_VANCOUVER_ISLAND....xml',
            'latitude': 49.53,
            'longitude': -126.89,
            'm_pp': 2.22e+18,
            'm_rp': -2.78e+18,
            'm_rr': -6.15e+17,
            'm_rt': 1.98e+17,
            'm_tp': 5.14e+18,
            'm_tt': -1.61e+18,
            'magnitude': 6.5,
            'magnitude_type': 'Mwc',
            'origin_time': UTCDateTime(2011, 9, 9, 19, 41, 34, 200000),
            'region': u'VANCOUVER ISLAND, CANADA REGION'},
         'input_filename': u'/.../raw/7D.FN01A..HHZ.mseed',
         'output_filename': u'/.../processed_.../7D.FN01A..HHZ.mseed',
         'process_params': {
            'dt': 0.75,
            'highpass': 0.007142857142857143,
            'lowpass': 0.0125,
            'npts': 2000},
         'station_coordinates': {
            'elevation_in_m': -54.0,
            'latitude': 46.882,
            'local_depth_in_m': None,
            'longitude': -124.3337},
         'station_filename': u'/.../STATIONS/RESP/RESP.7D.FN01A..HH*'}

    Please note that you also got the iteration object here, so if you
    want some parameters to change depending on the iteration, just use
    if/else on the iteration objects.

    >>> iteration.name  # doctest: +SKIP
    '11'
    >>> iteration.get_process_params()  # doctest: +SKIP
    {'dt': 0.75,
     'highpass': 0.01,
     'lowpass': 0.02,
     'npts': 500}

    Use ``$ lasif shell`` to play around and figure out what the iteration
    objects can do.

    """
    def zerophase_chebychev_lowpass_filter(trace, freqmax):
        """
        Custom Chebychev type two zerophase lowpass filter useful for
        decimation filtering.

        This filter is stable up to a reduction in frequency with a factor of
        10. If more reduction is desired, simply decimate in steps.

        Partly based on a filter in ObsPy.

        :param trace: The trace to be filtered.
        :param freqmax: The desired lowpass frequency.

        Will be replaced once ObsPy has a proper decimation filter.
        """
        # rp - maximum ripple of passband, rs - attenuation of stopband
        rp, rs, order = 1, 96, 1e99
        ws = freqmax / (trace.stats.sampling_rate * 0.5)  # stop band frequency
        wp = ws  # pass band frequency

        while True:
            if order <= 12:
                break
            wp *= 0.99
            order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0)

        b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba")

        # Apply twice to get rid of the phase distortion.
        trace.data = signal.filtfilt(b, a, trace.data)

    def signal_to_noise_ratio(data, first_tt_arrival, process_params):

        minimum_period = 1. / process_params["highpass"]
        dt = process_params["dt"]

        # Estimate noise level from waveforms prior to the first arrival.
        idx_noise_end = int(
            np.ceil((first_tt_arrival - 0.5 * minimum_period) / dt)) - 1
        idx_noise_end = max(10, idx_noise_end)
        idx_noise_start = int(
            np.ceil((first_tt_arrival - 5 * minimum_period) / dt))
        idx_noise_start = max(10, idx_noise_start)
        idx_sigwin_start = int(
            np.ceil((first_tt_arrival - 0.5 * minimum_period) / dt))
        idx_sigwin_end = idx_sigwin_start + int(minimum_period / dt)

        if idx_noise_start >= idx_noise_end:
            idx_noise_start = max(10, idx_noise_end - 10)

        abs_data = np.abs(data[idx_sigwin_start:idx_sigwin_end])
        noise_absolute = np.abs(data[idx_noise_start:idx_noise_end]).max()
        noise_relative = noise_absolute / abs_data.max()

        return noise_relative, noise_absolute

    # =========================================================================
    # Define noise_theshold by default if not given in argument
    # =========================================================================
    if processing_info["noise_threshold"] is None:
        noise_threshold = 0.1
    else:
        noise_threshold = processing_info["noise_threshold"]

    # =========================================================================
    # Read seismograms and gather basic information.
    # =========================================================================
    starttime = processing_info["event_information"]["origin_time"]
    endtime = starttime + processing_info["process_params"]["dt"] * \
        (processing_info["process_params"]["npts"] - 1)
    duration = endtime - starttime

    st = obspy.read(processing_info["input_filename"])

    if len(st) != 1:
        warnings.warn("The file '%s' has %i traces and not 1. "
                      "Skip all but the first" %
                      (processing_info["input_filename"], len(st)))
    tr = st[0]

    # fill the data file header with station coordinates
    receiver = processing_info["station_coordinates"]
    tr.stats.coordinates = AttribDict({
        'latitude': receiver["latitude"],
        'elevation': receiver["elevation_in_m"],
        'longitude': receiver["longitude"]
    })

    # Make sure the seismograms are long enough. If not, skip them.
    if starttime < tr.stats.starttime or endtime > tr.stats.endtime:

        msg = ("The seismogram does not cover the required time span.\n"
               "Seismogram time span: %s - %s\n"
               "Requested time span: %s - %s" %
               (tr.stats.starttime, tr.stats.endtime, starttime, endtime))
        print(msg)
        raise LASIFError(msg)

    # Trim to reduce processing cost.
    # starttime is the origin time of the event
    # endtime is the origin time plus the length of the synthetics
    tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration)

    # =========================================================================
    # Some basic checks on the data.
    # =========================================================================
    # Non-zero length
    if not len(tr):
        msg = "No data found in time window around the event. File skipped."
        raise LASIFError(msg)

    # No nans or infinity values allowed.
    if not np.isfinite(tr.data).all():
        msg = "Data contains NaNs or Infs. File skipped"
        raise LASIFError(msg)

    # =========================================================================
    # Step 1: Decimation
    # Decimate with the factor closest to the sampling rate of the synthetics.
    # The data is still oversampled by a large amount so there should be no
    # problems. This has to be done here so that the instrument correction is
    # reasonably fast even for input data with a large sampling rate.
    # =========================================================================
    while True:
        decimation_factor = int(processing_info["process_params"]["dt"] /
                                tr.stats.delta)
        # Decimate in steps for large sample rate reductions.
        if decimation_factor > 8:
            decimation_factor = 8
        if decimation_factor > 1:
            new_nyquist = tr.stats.sampling_rate / 2.0 / float(
                decimation_factor)
            zerophase_chebychev_lowpass_filter(tr, new_nyquist)
            tr.decimate(factor=decimation_factor, no_filter=True)
        else:
            break

    # =========================================================================
    # Step 2: Detrend and taper.
    # =========================================================================
    tr.detrend("linear")
    tr.detrend("demean")
    tr.taper(max_percentage=0.05, type="hann")

    # =========================================================================
    # Step 3: Instrument correction
    # Correct seismograms to velocity in m/s.
    # =========================================================================
    output_units = "VEL"
    station_file = processing_info["station_filename"]

    # check if the station file actually exists ==============================
    if not processing_info["station_filename"]:
        msg = "No station file found for the relevant time span. File skipped"
        raise LASIFError(msg)

    # This is really necessary as other filters are just not sharp enough
    # and lots of energy from other frequency bands leaks into the frequency
    # band of interest
    freqmin = processing_info["process_params"]["highpass"]
    freqmax = processing_info["process_params"]["lowpass"]

    f2 = 0.9 * freqmin
    f3 = 1.1 * freqmax
    # Recommendations from the SAC manual.
    f1 = 0.5 * f2
    f4 = 2.0 * f3
    pre_filt = (f1, f2, f3, f4)

    # processing for seed files ==============================================
    if "/SEED/" in station_file:
        # XXX: Check if this is m/s. In all cases encountered so far it
        # always is, but SEED is in theory also able to specify corrections
        # to other units...
        parser = Parser(station_file)
        try:
            # The simulate might fail but might still modify the data. The
            # backup is needed for the backup plan to only correct using
            # poles and zeros.
            backup_tr = tr.copy()
            try:
                tr.simulate(seedresp={
                    "filename": parser,
                    "units": output_units,
                    "date": tr.stats.starttime
                },
                            pre_filt=pre_filt,
                            zero_mean=False,
                            taper=False)
            except ValueError:
                warnings.warn("Evalresp failed, will only use the Poles and "
                              "Zeros stage")
                tr = backup_tr
                paz = parser.get_paz(tr.id, tr.stats.starttime)
                if paz["sensitivity"] == 0:
                    warnings.warn("Sensitivity is 0 in SEED file and will "
                                  "not be taken into account!")
                    tr.simulate(paz_remove=paz,
                                remove_sensitivity=False,
                                pre_filt=pre_filt,
                                zero_mean=False,
                                taper=False)
                else:
                    tr.simulate(paz_remove=paz,
                                pre_filt=pre_filt,
                                zero_mean=False,
                                taper=False)
        except Exception as e:
            msg = ("File  could not be corrected with the help of the "
                   "SEED file '%s'. Will be skipped due to: %s") \
                % (processing_info["station_filename"], str(e))
            raise LASIFError(msg)
    # processing with RESP files =============================================
    elif "/RESP/" in station_file:
        try:
            tr.simulate(seedresp={
                "filename": station_file,
                "units": output_units,
                "date": tr.stats.starttime
            },
                        pre_filt=pre_filt,
                        zero_mean=False,
                        taper=False)
        except ValueError as e:
            msg = ("File  could not be corrected with the help of the "
                   "RESP file '%s'. Will be skipped. Due to: %s") \
                % (processing_info["station_filename"], str(e))
            raise LASIFError(msg)
    elif "/StationXML/" in station_file:
        try:
            inv = obspy.read_inventory(station_file, format="stationxml")
        except Exception as e:
            msg = ("Could not open StationXML file '%s'. Due to: %s. Will be "
                   "skipped." % (station_file, str(e)))
            raise LASIFError(msg)
        tr.attach_response(inv)
        try:
            tr.remove_response(output=output_units,
                               pre_filt=pre_filt,
                               zero_mean=False,
                               taper=False)
        except Exception as e:
            msg = ("File  could not be corrected with the help of the "
                   "StationXML file '%s'. Due to: '%s'  Will be skipped.") \
                % (processing_info["station_filename"], e.__repr__()),
            raise LASIFError(msg)
    else:
        raise NotImplementedError

    # =========================================================================
    # Step 4: Bandpass filtering
    # This has to be exactly the same filter as in the source time function
    # in the case of SES3D.
    # =========================================================================
    tr.detrend("linear")
    tr.detrend("demean")
    tr.taper(0.05, type="cosine")
    tr.filter("bandpass",
              freqmin=freqmin,
              freqmax=freqmax,
              corners=3,
              zerophase=False)
    tr.detrend("linear")
    tr.detrend("demean")
    tr.taper(0.05, type="cosine")
    tr.filter("bandpass",
              freqmin=freqmin,
              freqmax=freqmax,
              corners=3,
              zerophase=False)

    # =========================================================================
    # Step 5: Sinc interpolation
    # =========================================================================
    # Make sure that the data array is at least as long as the
    # synthetics array.
    tr.interpolate(sampling_rate=1.0 / processing_info["process_params"]["dt"],
                   method="lanczos",
                   starttime=starttime,
                   window="blackman",
                   a=12,
                   npts=processing_info["process_params"]["npts"])

    # =========================================================================
    # Step 6: Waveform selection based on SNR
    # =========================================================================
    # compute the noise_relative level
    snr = signal_to_noise_ratio(tr.data, processing_info["first_P_arrival"],
                                processing_info["process_params"])[0]
    # selection
    if snr < noise_threshold:
        # =========================================================================
        # Save processed data and clean up.
        # =========================================================================
        # Convert to single precision to save some space.
        tr.data = np.require(tr.data, dtype="float32", requirements="C")
        if hasattr(tr.stats, "mseed"):
            tr.stats.mseed.encoding = "FLOAT32"

        tr.write(processing_info["output_filename"], format=tr.stats._format)
Esempio n. 10
0
def preprocessing_function_asdf(dir_obs,eventname,time_increment,end_time,min_period,max_period):
    processdir=eventname+'preprocessed'
    processdata='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s.h5'
    tag_name='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s'
    if os.path.exists(processdir+'/'+processdata):
      os.system('rm -rf'+' '+processdir+'/'+processdata)
     # os.makedirs(processdir)
      os.system('cp'+' '+dir_obs+' '+processdir+'/'+processdata)
    def zerophase_chebychev_lowpass_filter(trace, freqmax):
        """
        Custom Chebychev type two zerophase lowpass filter useful for
        decimation filtering.

        This filter is stable up to a reduction in frequency with a factor of
        10. If more reduction is desired, simply decimate in steps.

        Partly based on a filter in ObsPy.

        :param trace: The trace to be filtered.
        :param freqmax: The desired lowpass frequency.

        Will be replaced once ObsPy has a proper decimation filter.
        """
        # rp - maximum ripple of passband, rs - attenuation of stopband
        rp, rs, order = 1, 96, 1e99
        ws = freqmax / (trace.stats.sampling_rate * 0.5)  # stop band frequency
        wp = ws  # pass band frequency

        while True:
            if order <= 12:
                break
            wp *= 0.99
            order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0)

        b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba")
        print(trace)
        # Apply twice to get rid of the phase distortion.
        trace.data = signal.filtfilt(b, a, trace.data)

    # =========================================================================
    # Read ASDF file
    # =========================================================================

    ds = pyasdf.ASDFDataSet(processdir+'/'+processdata) 
    print(ds)
    list=ds.waveforms.list()
    event = ds.events[0]
    dt=time_increment
    sampling_rate = 1.0 / dt    
    start_time = -time_increment
    npts=int(round((end_time -start_time)/time_increment) + 1)
    origin=event.preferred_origin() or event.origins[0]
    print(origin.time,start_time)
    start=UTCDateTime(origin.time)
    print(start+start_time)
    starttime=start_time + np.float(start)
    print(starttime)
    endtime = end_time+starttime
    duration = end_time -start_time

    f2 = 0.9 / max_period
    f3 = 1.1 / min_period
    # Recommendations from the SAC manual.
    f1 = 0.5 * f2
    f4 = 2.0 * f3
    pre_filt = (f1, f2, f3, f4)

    for _i,stid in enumerate(list): 
        #print(stid)   
        stla,stlo,evz=ds.waveforms[stid].coordinates.values()
        st=ds.waveforms[stid].raw_recording
        for tr in st:
           print(tr)
            # Trim to reduce processing costs
           #tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration)
           print(tr)
           while True:
                decimation_factor = int(dt /
                                        tr.stats.delta)
                # Decimate in steps for large sample rate reductions.
                if decimation_factor > 8:
                    decimation_factor = 8
                if decimation_factor > 1:
                    new_nyquist = tr.stats.sampling_rate / 2.0 / float(
                        decimation_factor)
                    #print(new_nyquist)
                    zerophase_chebychev_lowpass_filter(tr, new_nyquist)
                    print(tr)
                    tr.decimate(factor=decimation_factor, no_filter=True)
                else:
                    break
        inv=ds.waveforms[stid].StationXML
        # Detrend and taper
        #print('start')
        st.detrend("linear")
        st.detrend("demean")
        st.taper(max_percentage=0.05, type="hann")
        # Instrument correction
        try:
            st.attach_response(inv)
            st.remove_response(output="DISP", pre_filt=pre_filt,
                               zero_mean=False, taper=False)
        except Exception as e:
            net = inv.get_contents()['channels'][0].split('.', 2)[0]
            sta = inv.get_contents()['channels'][0].split('.', 2)[1]

            msg = ("Station: %s.%s could not be corrected with the help of"
                   " asdf file: '%s'. Due to: '%s'  Will be skipped.") \
                % (net, sta,
                   processing_info["asdf_input_filename"], e.__repr__()),
            raise LASIFError(msg)

        # Bandpass filtering
        st.detrend("linear")
        st.detrend("demean")
        st.taper(0.05, type="cosine")
        st.filter("bandpass", freqmin=1.0 / max_period,
                  freqmax=1.0 / min_period, corners=3, zerophase=False)

        st.detrend("linear")
        st.detrend("demean")
        st.taper(0.05, type="cosine")
        st.filter("bandpass", freqmin=1.0 / max_period,
                  freqmax=1.0 / min_period, corners=3, zerophase=False)

        # Sinc interpolation
        for tr in st:
            tr.data = np.require(tr.data, requirements="C")
        st.interpolate(sampling_rate=sampling_rate, method="lanczos",
                       starttime=starttime, window="blackman", a=12, npts=npts)
        # Convert to single precision to save space.
        for tr in st:
            tr.data = np.require(tr.data, dtype="float32", requirements="C")

        ds.add_waveforms(st,tag=tag_name)
        del ds.waveforms[stid].raw_recording
        del ds.waveforms[stid].preprocess
Esempio n. 11
0
    def get_matching_waveforms(self, event, iteration, station_or_channel_id):
        seed_id = station_or_channel_id.split(".")
        if len(seed_id) == 2:
            channel = None
            station_id = station_or_channel_id
        elif len(seed_id) == 4:
            network, station, _, channel = seed_id
            station_id = ".".join((network, station))
        else:
            raise ValueError("'station_or_channel_id' must either have "
                             "2 or 4 parts.")

        iteration = self.comm.iterations.get(iteration)
        event = self.comm.events.get(event)

        # Get the metadata for the processed and synthetics for this
        # particular station.
        data = self.comm.waveforms.get_waveforms_processed(
            event["event_name"], station_id, tag=iteration.processing_tag)
        synthetics = self.comm.waveforms.get_waveforms_synthetic(
            event["event_name"],
            station_id,
            long_iteration_name=iteration.long_name)
        coordinates = self.comm.query.get_coordinates_for_station(
            event["event_name"], station_id)

        # Clear data and synthetics!
        for _st, name in ((data, "observed"), (synthetics, "synthetic")):
            # Get all components and loop over all components.
            _comps = set(tr.stats.channel[-1].upper() for tr in _st)
            for _c in _comps:
                traces = [
                    _i for _i in _st if _i.stats.channel[-1].upper() == _c
                ]
                if len(traces) == 1:
                    continue
                elif len(traces) > 1:
                    traces = sorted(traces, key=lambda x: x.id)
                    warnings.warn(
                        "%s data for event '%s', iteration '%s', "
                        "station '%s', and component '%s' has %i traces: "
                        "%s. LASIF will select the first one, but please "
                        "clean up your data." %
                        (name.capitalize(), event["event_name"],
                         iteration.iteration_name, station_id, _c, len(traces),
                         ", ".join(tr.id for tr in traces)), LASIFWarning)
                    for tr in traces[1:]:
                        _st.remove(tr)
                else:
                    # Should not happen.
                    raise NotImplementedError

        # Make sure all data has the corresponding synthetics. It should not
        # happen that one has three channels of data but only two channels
        # of synthetics...in that case, discard the additional data and
        # raise a warning.
        temp_data = []
        for data_tr in data:
            component = data_tr.stats.channel[-1].upper()
            synthetic_tr = [
                tr for tr in synthetics
                if tr.stats.channel[-1].upper() == component
            ]
            if not synthetic_tr:
                warnings.warn(
                    "Station '%s' has observed data for component '%s' but no "
                    "matching synthetics." % (station_id, component),
                    LASIFWarning)
                continue
            temp_data.append(data_tr)
        data.traces = temp_data

        if len(data) == 0:
            raise LASIFError("No data remaining for station '%s'." %
                             station_id)

        # Scale the data if required.
        if iteration.scale_data_to_synthetics:
            for data_tr in data:
                synthetic_tr = [
                    tr for tr in synthetics if tr.stats.channel[-1].lower() ==
                    data_tr.stats.channel[-1].lower()
                ][0]
                scaling_factor = synthetic_tr.data.ptp() / \
                    data_tr.data.ptp()
                # Store and apply the scaling.
                data_tr.stats.scaling_factor = scaling_factor
                data_tr.data *= scaling_factor

        data.sort()
        synthetics.sort()

        # Select component if necessary.
        if channel and channel is not None:
            # Only use the last letter of the channel for the selection.
            # Different solvers have different conventions for the location
            # and channel codes.
            component = channel[-1].upper()
            data.traces = [
                i for i in data.traces
                if i.stats.channel[-1].upper() == component
            ]
            synthetics.traces = [
                i for i in synthetics.traces
                if i.stats.channel[-1].upper() == component
            ]

        return DataTuple(data=data,
                         synthetics=synthetics,
                         coordinates=coordinates)
Esempio n. 12
0
def get_inventory(resp_file, remove_duplicates=False):
    """
    Simple function reading a RESP file and returning a list of dictionaries.
    Each dictionary contains the following keys for each channel found in the
    RESP file:

        * network
        * station
        * location
        * channel
        * start_date
        * end_date
        * channel_id

    :param resp_file: Resp file to open.
    :param remove_duplicates: Some RESP files contain the same values twice.
        This option the duplicates. Defaults to False.
    """
    channels = []
    with open(resp_file, "rU") as open_file:
        current_channel = {}
        for line in open_file:
            line = line.strip().upper()
            if line.startswith("B050F03"):
                current_channel["station"] = line.split()[-1]
                if _is_channel_complete(current_channel):
                    channels.append(current_channel)
                    current_channel = {}
            elif line.startswith("B050F16"):
                current_channel["network"] = line.split()[-1]
                if _is_channel_complete(current_channel):
                    channels.append(current_channel)
                    current_channel = {}
            elif line.startswith("B052F03"):
                location = line.split()[-1]
                if location == "??":
                    location = ""
                current_channel["location"] = location
                if _is_channel_complete(current_channel):
                    channels.append(current_channel)
                    current_channel = {}
            elif line.startswith("B052F04"):
                current_channel["channel"] = line.split()[-1]
                if _is_channel_complete(current_channel):
                    channels.append(current_channel)
                    current_channel = {}
            elif line.startswith("B052F22"):
                current_channel["start_date"] = _parse_resp_datetime_string(
                    line.split()[-1])
                if _is_channel_complete(current_channel):
                    channels.append(current_channel)
                    current_channel = {}
            elif line.startswith("B052F23"):
                current_channel["end_date"] = _parse_resp_datetime_string(
                    line.split()[-1])
                if _is_channel_complete(current_channel):
                    channels.append(current_channel)
                    current_channel = {}
    for channel in channels:
        channel["channel_id"] = "{network}.{station}.{location}.{channel}"\
            .format(**channel)
    # Make unique list if requested.
    if remove_duplicates is True:
        unique_list = []
        for channel in channels:
            if channel in unique_list:
                continue
            unique_list.append(channel)
        channels = unique_list
    if not channels:
        raise LASIFError("'%s' is not a valid RESP file." % resp_file)
    return channels
Esempio n. 13
0
def stf_deconvolution(to_be_processed, output_folder, components=['E', 'N', 'Z'],):  # NOQA
    """
    Function to perform the actual preprocessing for one individual seismogram.
    This is part of the project so it can change depending on the project.

    Please keep in mind that you will have to manually update this file to a
    new version if LASIF is ever updated.

    You can do whatever you want in this function as long as the function
    signature is honored. The file is read from ``"input_filename"`` and
    written to ``"output_filename"``.

    One goal of this function is to make sure that the data is available at the
    same time steps as the synthetics. The first time sample of the synthetics
    will always be the origin time of the event.

    Furthermore the data has to be converted to m/s.

    :param processing_info: A dictionary containing information about the
        file to be processed. It will have the following structure.
    :type processing_info: dict

    .. code-block:: python

        {'event_information': {
            'depth_in_km': 22.0,
            'event_name': 'GCMT_event_VANCOUVER_ISLAND...',
            'filename': '/.../GCMT_event_VANCOUVER_ISLAND....xml',
            'latitude': 49.53,
            'longitude': -126.89,
            'm_pp': 2.22e+18,
            'm_rp': -2.78e+18,
            'm_rr': -6.15e+17,
            'm_rt': 1.98e+17,
            'm_tp': 5.14e+18,
            'm_tt': -1.61e+18,
            'magnitude': 6.5,
            'magnitude_type': 'Mwc',
            'origin_time': UTCDateTime(2011, 9, 9, 19, 41, 34, 200000),
            'region': u'VANCOUVER ISLAND, CANADA REGION'},
         'input_filename': u'/.../raw/7D.FN01A..HHZ.mseed',
         'output_filename': u'/.../processed_.../7D.FN01A..HHZ.mseed',
         'process_params': {
            'dt': 0.75,
            'highpass': 0.007142857142857143,
            'lowpass': 0.0125,
            'npts': 2000},
         'station_coordinates': {
            'elevation_in_m': -54.0,
            'latitude': 46.882,
            'local_depth_in_m': None,
            'longitude': -124.3337},
         'station_filename': u'/.../STATIONS/RESP/RESP.7D.FN01A..HH*'}

    Please note that you also got the iteration object here, so if you
    want some parameters to change depending on the iteration, just use
    if/else on the iteration objects.

    >>> iteration.name  # doctest: +SKIP
    '11'
    >>> iteration.get_process_params()  # doctest: +SKIP
    {'dt': 0.75,
     'highpass': 0.01,
     'lowpass': 0.02,
     'npts': 500}

    Use ``$ lasif shell`` to play around and figure out what the iteration
    objects can do.

    """

    def source_deconvolution_freq(
            stream_data,
            stream_green,
            lambd=0.001,
            recompute_syn=False):

        # Calculate STF:
        # deconvoluate the Green's functions from observed seismograms
        # following Pratt 1999, equation 17
        nfft = stream_data[0].stats.npts
        num = np.zeros(nfft, dtype=complex)
        den = np.zeros(nfft, dtype=complex)
        chi_obs = []
        for tr, sy in zip(stream_data, stream_green):
            tr_fft = np.fft.fft(tr.data, nfft)
            sy_fft = np.fft.fft(sy.data, nfft)

            num += np.conjugate(sy_fft) * tr_fft
            den += np.conjugate(sy_fft) * sy_fft
            chi_obs.append(np.sum(tr.data**2))
        chi_obs = 0.5 * np.sum(chi_obs)

        water_level = lambd * np.max(np.abs(den))
        s = num / (den + water_level)
        src = np.real(np.fft.ifft(s))
        stream_src = obspy.Stream()
        stream_src += tr.copy()
        stream_src[0].stats.station = ''
        stream_src[0].data = src
        residual = []
        stream_syn = obspy.Stream()

        # recompute synthetics with the estimated STF
        if recompute_syn:
            src_fft = np.fft.fft(src, nfft)
            chi_syn = []
            for tr, sy in zip(stream_data, stream_green):
                sy_fft = np.fft.fft(sy.data, nfft)
                cal = sy.copy()
                cal.data = np.real(np.fft.ifft(src_fft * sy_fft))
                stream_syn += cal
                res = tr.data - cal.data
                chi_syn.append(np.sum(res**2))
            chi_syn = 0.5 * np.sum(chi_syn)
            residual = chi_syn / chi_obs

        return stream_src, stream_syn, residual

    # =========================================================================
    # Entering the function
    # =========================================================================
    from matplotlib.dates import date2num, num2date
    SECONDS_PER_DAY = 3600 * 24

    process_params = to_be_processed[0]["processing_info"]["process_params"]
    seconds_prior_arrival = process_params["seconds_prior_arrival"]
    window_length_in_sec = process_params["window_length_in_sec"]

    for comp in components:
        # =========================================================================
        # Component selection
        # =========================================================================
        # !!!!!! replace first_P_arrival by phase of interest to be calculated in preprocess_data and given in process_info

        wav_file_list = [wav["processing_info"]["input_filename"]
                         for wav in to_be_processed
                         if comp in wav["processing_info"]["channel"]]
        syn_file_list = [wav["processing_info"]["output_filename"]
                         for wav in to_be_processed
                         if comp in wav["processing_info"]["channel"]]
        first_arrival = [wav["processing_info"]["first_P_arrival"]
                         for wav in to_be_processed
                         if comp in wav["processing_info"]["channel"]]

        idx_sigwin_start = int(
            np.ceil(
                (np.min(first_arrival) -
                 process_params["seconds_prior_arrival"]) /
                process_params["dt"]))
        idx_sigwin_end = int(
            np.ceil(
                (np.max(first_arrival) +
                 process_params["window_length_in_sec"]) /
                process_params["dt"]))

        Time = np.arange(
            0,
            process_params["npts"] *
            process_params["dt"],
            process_params["dt"])
        starttime = to_be_processed[0]["processing_info"]["event_information"]["origin_time"]
        t_start = num2date(((Time[idx_sigwin_start] / SECONDS_PER_DAY)
                            + date2num(starttime.datetime)))
        t_end = num2date(((Time[idx_sigwin_end] / SECONDS_PER_DAY)
                          + date2num(starttime.datetime)))
        startdate = obspy.UTCDateTime(
            t_start.year,
            t_start.month,
            t_start.day,
            t_start.hour,
            t_start.minute,
            t_start.second,
            t_start.microsecond)
        enddate = obspy.UTCDateTime(t_end.year, t_end.month, t_end.day,
                                    t_end.hour, t_end.minute, t_end.second,
                                    t_end.microsecond)

        # =========================================================================
        # read traces, window around phase of interest
        # =========================================================================
        st_wav = obspy.Stream()
        st_syn = obspy.Stream()
        for wav_file, syn_file in zip(wav_file_list, syn_file_list):
            wav = obspy.read(wav_file)
            syn = obspy.read(syn_file)
            wav.trim(startdate, enddate)
            syn.trim(startdate, enddate)
            #wav[0].data = wav[0].data[idx_sigwin_start:idx_sigwin_end]
            #syn[0].data = syn[0].data[idx_sigwin_start:idx_sigwin_end]
            wav[0].data /= np.max(wav[0].data)
            syn[0].data /= np.max(syn[0].data)
            st_wav += wav
            st_syn += syn

        # if no waveform selected at the previous step (snr criteria), quit the
        # process
        if not st_wav or not st_syn:
            raise LASIFError(
                "No data for this event, will skip the stf estimation")
        else:

            st_wav.taper(0.01)
            st_syn.taper(0.01)

            # =========================================================================
            # stf deconvolution
            # =========================================================================
            # stf, new_syn, residual = source_deconvolution_freq(st_wav, st_syn,
            # lambd=0.001, recompute_syn=True)
            stf, p, pp = source_deconvolution_freq(
                st_wav, st_syn, lambd=0.001, recompute_syn=False)

            '''
            src = obspy.read(wav_file_list[0])
            src[0].stats.station=''
            src_trace = np.zeros(process_params["npts"], dtype=float)
            src_trace[idx_sigwin_start:idx_sigwin_end] = stf[0].data
            src[0].data = src_trace.copy()
            stf = src.copy()
            '''

            # =========================================================================
            # write stf file
            # =========================================================================
            # Convert to single precision to save some space.
            tr = stf[0].copy()
            tr.data = np.require(tr.data, dtype="float32", requirements="C")
            tr.stats._format = wav[0].stats._format
            if hasattr(tr.stats, "mseed"):  # to be fixed
                tr.stats.mseed.encoding = "FLOAT32"

            # channel_id = [item["processing_info"]["channel"]
            #         for item in to_be_processed
            #         if comp in item["processing_info"]["channel"]][0]
            stf_filename = os.path.join(output_folder, "stf_%s__%s__%s"
                                        % (comp,
                                           to_be_processed[0]["processing_info"]["output_filename"].split('/')[-1].split('__')[-2],
                                           to_be_processed[0]["processing_info"]["output_filename"].split('/')[-1].split('__')[-1]))
            tr.write(stf_filename, format=tr.stats._format)

            """
Esempio n. 14
0
    def get_matching_waveforms(self, event, iteration, station_or_channel_id):
        seed_id = station_or_channel_id.split(".")
        if len(seed_id) == 2:
            channel = None
            station_id = station_or_channel_id
        elif len(seed_id) == 4:
            network, station, _, channel = seed_id
            station_id = ".".join((network, station))
        else:
            raise ValueError("'station_or_channel_id' must either have "
                             "2 or 4 parts.")

        iteration = self.comm.iterations.get(iteration)
        event = self.comm.events.get(event)

        # Get the metadata for the processed and synthetics for this
        # particular station.
        data = self.comm.waveforms.get_waveforms_processed(
            event["event_name"], station_id, tag=iteration.processing_tag)
        synthetics = self.comm.waveforms.get_waveforms_synthetic(
            event["event_name"],
            station_id,
            long_iteration_name=iteration.long_name)
        coordinates = self.comm.query.get_coordinates_for_station(
            event["event_name"], station_id)

        # Make sure all data has the corresponding synthetics. It should not
        # happen that one has three channels of data but only two channels
        # of synthetics...in that case, discard the additional data and
        # raise a warning.
        temp_data = []
        for data_tr in data:
            component = data_tr.stats.channel[-1].upper()
            synthetic_tr = [
                tr for tr in synthetics
                if tr.stats.channel[-1].upper() == component
            ]
            if not synthetic_tr:
                warnings.warn(
                    "Station '%s' has observed data for component '%s' but no "
                    "matching synthetics." % (station_id, component),
                    LASIFWarning)
                continue
            temp_data.append(data_tr)
        data.traces = temp_data

        if len(data) == 0:
            raise LASIFError("No data remaining for station '%s'." %
                             station_id)

        # Scale the data if required.
        if iteration.scale_data_to_synthetics:
            for data_tr in data:
                synthetic_tr = [
                    tr for tr in synthetics if tr.stats.channel[-1].lower() ==
                    data_tr.stats.channel[-1].lower()
                ][0]
                scaling_factor = synthetic_tr.data.ptp() / \
                    data_tr.data.ptp()
                # Store and apply the scaling.
                data_tr.stats.scaling_factor = scaling_factor
                data_tr.data *= scaling_factor

        data.sort()
        synthetics.sort()

        # Select component if necessary.
        if channel and channel is not None:
            # Only use the last letter of the channel for the selection.
            # Different solvers have different conventions for the location
            # and channel codes.
            component = channel[-1].upper()
            data.traces = [
                i for i in data.traces
                if i.stats.channel[-1].upper() == component
            ]
            synthetics.traces = [
                i for i in synthetics.traces
                if i.stats.channel[-1].upper() == component
            ]

        return DataTuple(data=data,
                         synthetics=synthetics,
                         coordinates=coordinates)