Esempio n. 1
0
def test_detailed_event_association_is_persistent_through_processing(
        example_data_set):
    """
    Processing a file with an associated event and storing it again should
    keep the association for all the possible event tags..
    """
    data_set = ASDFDataSet(example_data_set.filename)
    # Store a new waveform.
    event = data_set.events[0]
    origin = event.origins[0]
    magnitude = event.magnitudes[0]
    focmec = event.focal_mechanisms[0]

    tr = obspy.read()[0]
    tr.stats.network = "BW"
    tr.stats.station = "RJOB"

    data_set.add_waveforms(tr, tag="random", event_id=event,
                           origin_id=origin, focal_mechanism_id=focmec,
                           magnitude_id=magnitude)

    new_st = data_set.waveforms.BW_RJOB.random
    new_st.taper(max_percentage=0.05, type="cosine")

    data_set.add_waveforms(new_st, tag="processed")
    processed_st = data_set.waveforms.BW_RJOB.processed
    assert event.resource_id == processed_st[0].stats.asdf.event_id
    assert origin.resource_id == processed_st[0].stats.asdf.origin_id
    assert magnitude.resource_id == processed_st[0].stats.asdf.magnitude_id
    assert focmec.resource_id == processed_st[0].stats.asdf.focal_mechanism_id
Esempio n. 2
0
def test_adding_arbitrary_files(tmpdir):
    """
    Tests that adding arbitrary files works.
    """
    test_filename = os.path.join(tmpdir.strpath, "temp.json")
    test_dict = {"a": 1, "b": 2}
    with open(test_filename, "wt") as fh:
        json.dump(test_dict, fh, sort_keys=True)

    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    data_set.add_auxiliary_data_file(
        test_filename, tag="test_file", parameters={"1": 1})

    data_set.__del__()
    del data_set

    new_data_set = ASDFDataSet(asdf_filename)
    # Extraction works the same as always, but now has a special attribute,
    # that returns the data as a BytesIO.
    aux_data = new_data_set.auxiliary_data.File.test_file
    assert aux_data.parameters == {"1": 1}
    assert aux_data.tag == "test_file"

    new_test_dict = json.loads(aux_data.file.read().decode())
    assert test_dict == new_test_dict

    aux_data.file.seek(0, 0)

    with open(test_filename, "rb") as fh:
        assert fh.read() == aux_data.file.read()
Esempio n. 3
0
def test_get_provenance_document_for_id(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir,
                            "example_schematic_processing_chain.xml")

    doc = prov.read(filename)
    data_set.provenance["test_provenance"] = doc

    assert data_set.provenance.get_provenance_document_for_id(
            '{http://seisprov.org/seis_prov/0.1/#}sp002_dt_f87sf7sf78') == \
        {"name": "test_provenance", "document": doc}

    assert data_set.provenance.get_provenance_document_for_id(
            '{http://seisprov.org/seis_prov/0.1/#}sp004_lp_f87sf7sf78') == \
        {"name": "test_provenance", "document": doc}

    # Id not found.
    with pytest.raises(ASDFValueError) as err:
        data_set.provenance.get_provenance_document_for_id(
            '{http://seisprov.org/seis_prov/0.1/#}bogus_id')

    assert err.value.args[0] == (
        "Document containing id "
        "'{http://seisprov.org/seis_prov/0.1/#}bogus_id'"
        " not found in the data set.")

    # Not a qualified id.
    with pytest.raises(ASDFValueError) as err:
        data_set.provenance.get_provenance_document_for_id("bla")

    assert err.value.args[0] == ("Not a valid qualified name.")

    data_set.__del__()
Esempio n. 4
0
    def write_new_synt_asdf(self, file_prefix):
        new_synt_dict = self._sort_new_synt()

        for tag, win_array in new_synt_dict.iteritems():
            filename = "%s.%s.h5" % (file_prefix, tag)
            if os.path.exists(filename):
                os.remove(filename)
                logger.info("Output file exists, removed: %s" % filename)
            else:
                logger.info("Output new synt asdf: %s" % filename)

            ds = ASDFDataSet(filename, mode='w')
            added_list = []
            for window in win_array:
                synt_id = window.datalist['new_synt'].id
                # skip duplicate obsd location id.
                # for example, II.AAK.00.BHZ and II.AAK.10.BHZ will
                # be treated as different traces. But the synt and
                # new synt will be the same. So we only add one
                if synt_id in added_list:
                    continue
                else:
                    added_list.append(synt_id)
                ds.add_waveforms(window.datalist['new_synt'], tag=tag)
            # add stationxml
            _staxml_asdf = self._asdf_file_dict['synt']
            ds_sta = ASDFDataSet(_staxml_asdf)
            self.__add_staxml_from_other_asdf(ds, ds_sta)
            ds.flush()
Esempio n. 5
0
def test_tag_iterator(example_data_set):
    """
    Tests the tag iterator.
    """
    data_set = ASDFDataSet(example_data_set.filename)

    expected_ids = ["AE.113A..BHE", "AE.113A..BHN", "AE.113A..BHZ", "TA.POKR..BHE", "TA.POKR..BHN", "TA.POKR..BHZ"]

    for st, inv in data_set.itertag("raw_recording"):
        for tr in st:
            assert tr.id in expected_ids
            expected_ids.remove(tr.id)
            assert bool(
                inv.select(
                    network=tr.stats.network,
                    station=tr.stats.station,
                    channel=tr.stats.channel,
                    location=tr.stats.location,
                ).networks
            )

    assert expected_ids == []

    # It will only return matching tags.
    count = 0
    for _ in data_set.itertag("random"):
        count += 1
    assert count == 0
Esempio n. 6
0
    def write_new_syn_file(self, file_format="sac", outputdir=".",
                           eventname=None, suffix=None):
        """
        Write out new synthetic file based on new cmtsolution
        :return:
        """
        if not os.path.exists(outputdir):
            os.makedirs(outputdir)

        # sort the new synthetic data
        new_synt_dict = {}
        for window in self.window:
            tag = window.tag['synt']
            if tag not in new_synt_dict.keys():
                new_synt_dict[tag] = []
            new_synt_dict[tag].append(window)

        if file_format.upper() == "SAC":
            for tag, win_array in new_synt_dict.iteritems():
                if eventname is None:
                    targetdir = os.path.join(outputdir, tag)
                else:
                    targetdir = os.path.join(outputdir, "%s_%s"
                                             % (eventname, tag))
                if not os.path.exists(targetdir):
                    os.makedirs(targetdir)
                for window in win_array:
                    sta = window.station
                    nw = window.network
                    component = window.component
                    location = window.location
                    filename = "%s.%s.%s.%s.sac" \
                               % (sta, nw, location, component)
                    outputfn = os.path.join(targetdir, filename)
                    new_synt = window.datalist['new_synt']
                    new_synt.write(outputfn, format='SAC')
        elif file_format.upper() == "ASDF":
            for tag, win_array in new_synt_dict.iteritems():
                if eventname is None:
                    outputfn = os.path.join(outputdir, "new_synt.%s.h5" % tag)
                else:
                    if suffix is None:
                        outputfn = os.path.join(outputdir, "%s.new_synt.%s.h5"
                                                % (eventname, tag))
                    else:
                        outputfn = os.path.join(
                            outputdir, "%s.%s.new_synt.%s.h5"
                            % (eventname, suffix, tag))
                if os.path.exists(outputfn):
                    os.remove(outputfn)
                ds = ASDFDataSet(outputfn)
                for window in win_array:
                    ds.add_waveforms(window.datalist['new_synt'], tag=tag)
                # add stationxml
        else:
            raise NotImplementedError
Esempio n. 7
0
    def _core(self, path, param):
        """
        Core function that handles one pair of asdf file(observed and
        synthetic), windows and configuration for adjoint source

        :param path: path information, path of observed asdf, synthetic
            asdf, windows files, observed tag, synthetic tag, output adjoint
            file, figure mode and figure directory
        :type path: dict
        :param param: parameter information for constructing adjoint source
        :type param: dict
        :return:
        """
        adjoint_param = param["adjoint_config"]

        obsd_file = path["obsd_asdf"]
        synt_file = path["synt_asdf"]
        obsd_tag = path["obsd_tag"]
        synt_tag = path["synt_tag"]
        window_file = path["window_file"]
        output_filename = path["output_file"]

        self.check_input_file(obsd_file)
        self.check_input_file(synt_file)
        self.check_input_file(window_file)
        self.check_output_file(output_filename)

        obsd_ds = self.load_asdf(obsd_file, mode="r")
        synt_ds = self.load_asdf(synt_file, mode="r")

        windows = self.load_windows(window_file)

        adj_src_type = adjoint_param["adj_src_type"]
        adjoint_param.pop("adj_src_type", None)

        config = load_adjoint_config(adjoint_param, adj_src_type)

        if self.mpi_mode and self.rank == 0:
            output_ds = ASDFDataSet(output_filename, mpi=False)
            if output_ds.events:
                output_ds.events = obsd_ds.events
            del output_ds
        if self.mpi_mode:
            self.comm.barrier()

        measure_adj_func = \
            partial(measure_adjoint_wrapper, config=config,
                    obsd_tag=obsd_tag, synt_tag=synt_tag,
                    windows=windows,
                    adj_src_type=adj_src_type)

        results = obsd_ds.process_two_files(synt_ds, measure_adj_func)

        if self.rank == 0:
            print("output filename: %s" % output_filename)
            write_measurements(results, output_filename)
Esempio n. 8
0
def test_str_method_provenance_documents(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")
    data_set.add_provenance_document(filename, name="test_provenance")

    assert str(data_set.provenance) == (
        "1 Provenance Document(s):\n\ttest_provenance"
    )
Esempio n. 9
0
def test_provenance_list_command(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir,
                            "example_schematic_processing_chain.xml")

    # Add it as a document.
    doc = prov.read(filename, format="xml")
    data_set.add_provenance_document(doc, name="test_provenance")

    assert data_set.provenance.list() == ["test_provenance"]
Esempio n. 10
0
def convert_to_asdf(asdf_fn,
                    waveform_filelist,
                    tag,
                    quakemlfile=None,
                    staxml_filelist=None,
                    verbose=False,
                    status_bar=False,
                    create_simple_inv=False):
    """
    Convert files(sac or mseed) to asdf
    """

    if verbose:
        print("*" * 10 + " ASDF Converter " + "*" * 10)

    nwaveform = len(waveform_filelist)
    if nwaveform == 0:
        print("No file specified. Return...")
        return
    if os.path.exists(asdf_fn):
        raise Exception("File '%s' exists." % asdf_fn)

    ds = ASDFDataSet(asdf_fn, mode='a')

    # Add event
    if quakemlfile:
        if not os.path.exists(quakemlfile):
            raise ValueError("Quakeml file not exists:%s" % quakemlfile)
        ds.add_quakeml(quakemlfile)
        event = ds.events[0]
        if status_bar:
            drawProgressBar(1.0, "Adding Quakeml data")
    else:
        raise ValueError("No Event file")

    sta_dict = add_waveform_to_asdf(ds,
                                    waveform_filelist,
                                    tag,
                                    event=event,
                                    create_simple_inv=create_simple_inv,
                                    status_bar=status_bar)

    add_stationxml_to_asdf(ds,
                           staxml_filelist,
                           event=event,
                           create_simple_inv=create_simple_inv,
                           sta_dict=sta_dict,
                           status_bar=status_bar)

    if verbose:
        print("ASDF filesize: %s" % ds.pretty_filesize)
    del ds
Esempio n. 11
0
    def load_asdf(self, filename, mode="a"):
        """
        Load asdf file

        :param filename:
        :param mode:
        :return:
        """
        if self.mpi_mode:
            return ASDFDataSet(filename, compression=None, debug=self._debug,
                               mode=mode, mpi=self.mpi_mode)
        else:
            return ASDFDataSet(filename, mode=mode)
Esempio n. 12
0
def test_coordinate_extraction_but_no_stationxml(tmpdir):
    """
    Tests what happens if no stationxml is defined for a station.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_path = os.path.join(data_dir, "small_sample_data_set")

    data_set = ASDFDataSet(asdf_filename)
    for filename in glob.glob(os.path.join(data_path, "*.mseed")):
        data_set.add_waveforms(filename, tag="raw_recording")

    # If not stationxml exists it should just return an empty dictionary.
    assert data_set.get_all_coordinates() == {}
Esempio n. 13
0
def test_accessing_non_existent_tag_raises(example_data_set):
    """
    Accessing a non-existing station should raise.
    """
    data_set = ASDFDataSet(example_data_set.filename)

    try:
        with pytest.raises(WaveformNotInFileException) as excinfo:
            data_set.waveforms.AE_113A.asdfasdf

        assert excinfo.value.args[0] == ("Tag 'asdfasdf' not part of the data "
                                         "set for station 'AE.113A'.")
    finally:
        data_set.__del__()
Esempio n. 14
0
def test_event_association_is_persistent_through_processing(example_data_set):
    """
    Processing a file with an associated event and storing it again should
    keep the association.
    """
    data_set = ASDFDataSet(example_data_set.filename)
    st = data_set.waveforms.TA_POKR.raw_recording
    event_id = st[0].stats.asdf.event_id

    st.taper(max_percentage=0.05, type="cosine")

    data_set.add_waveforms(st, tag="processed")
    processed_st = data_set.waveforms.TA_POKR.processed
    assert event_id == processed_st[0].stats.asdf.event_id
Esempio n. 15
0
def test_waveform_accessor_printing(example_data_set):
    """
    Pretty printing of the waveform accessor proxy objects.
    """
    data_set = ASDFDataSet(example_data_set.filename)

    assert data_set.waveforms.AE_113A.__str__() == (
        "Contents of the data set for station AE.113A:\n"
        "    - Has a StationXML file\n"
        "    - 1 Waveform Tag(s):\n"
        "        raw_recording")

    data_set.__del__()
    del data_set
Esempio n. 16
0
def writeraw(event: obspy.core.event.event.Event, rawfolder: str, statloc: str,
             verbose: bool):
    """
    Write the downloaded miniseed, event, and stationxmls to a single asdf
    file.

    :param event: Event that all the waveforms are associated to.
    :type event: obspy.core.event.event.Event
    :param rawfolder: Folder to save the .h5 file to.
    :type rawfolder: str
    :param statloc: Folder, in which the station xmls can be found
    :type statloc: str
    :param verbose: show warnings?
    :type verbose: bool
    """
    # Folder to save asdf to
    outfolder = os.path.join(rawfolder, os.pardir)

    # Start out by adding the event, which later will be associated to
    # each of the waveforms
    with ASDFDataSet(os.path.join(outfolder, 'raw.h5')) as ds:
        # Retrieve eventid - not the most elgant way, but works
        evtid = event.resource_id
        try:
            ds.add_quakeml(event)
        except ValueError:
            if verbose:
                warn(
                    'Event with event-id %s already in DB, skipping...' %
                    str(evtid), UserWarning)
            else:
                pass

    # Read all the waveforms associated to this event
    try:
        st = read(os.path.join(rawfolder, '*.mseed'))
        # Write the waveforms to the asdf
        with ASDFDataSet(os.path.join(outfolder, 'raw.h5')) as ds:
            ds.add_waveforms(st, tag='raw_recording', event_id=evtid)

        # Lastly, we will want to save the stationxmls
        statxml = read_inventory(os.path.join(statloc, '*.xml'))
        with ASDFDataSet(os.path.join(outfolder, 'raw.h5')) as ds:
            ds.add_stationxml(statxml)
    except Exception:
        # For some cases, there will be events without
        # waveforms associated to them
        pass
Esempio n. 17
0
def extract_stations(d: Directory, dst: str):
    """Extract STATIONS from ASDFDataSet."""
    from os.path import join

    from pyasdf import ASDFDataSet

    for src in d.ls():
        event = src.split('.')[0]
        lines = {}
        out = join(dst, f'STATIONS.{event}')

        if d.has(out):
            continue

        with ASDFDataSet(src, mode='r', mpi=False) as ds:
            for station in ds.waveforms.list():
                if not hasattr(ds.waveforms[station], 'StationXML'):
                    print('  ' + station)
                    continue

                sta = ds.waveforms[station].StationXML.networks[0].stations[
                    0]  # type: ignore

                ll = station.split('.')
                ll.reverse()
                ll.append(f'{sta.latitude:.4f}')
                ll.append(f'{sta.longitude:.4f}')
                ll.append(f'{sta.elevation:.1f}')
                ll.append(f'{sta.channels[0].depth:.1f}')

                _format_station(lines, ll)

        d.writelines(lines.values(), join(dst, f'STATIONS.{event}'))
Esempio n. 18
0
def convert_adjsrcs_from_asdf(asdf_fn, outputdir, _verbose=True):
    """
    Convert adjoint sources from asdf to ASCII file(for specfem3d_globe use)
    """
    if not os.path.exists(asdf_fn):
        raise ValueError("No asdf file: %s" % asdf_fn)
    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    print("Input ASDF: %s" % asdf_fn)
    print("Output dir: %s" % outputdir)

    ds = ASDFDataSet(asdf_fn, mode='r')
    if "AdjointSources" not in ds.auxiliary_data:
        print("No adjoint source exists in asdf file: %s" % asdf_fn)
        return
    adjsrcs = ds.auxiliary_data.AdjointSources
    nadj = len(adjsrcs)
    print("Number of adjoint sources: %d" % nadj)

    for idx, adj in enumerate(adjsrcs):
        if _verbose:
            print("Adjoint sources(%d/%d) from: %s" % (idx, nadj, adj.path))
        time_offset = adj.parameters["time_offset"]
        dt = adj.parameters['dt']
        npts = len(adj.data)
        times = np.array([time_offset + i * dt for i in range(npts)])
        _data = np.zeros([npts, 2])
        _data[:, 0] = times[:]
        _data[:, 1] = adj.data[:]
        adj_path = adj.path.replace("_", ".")
        filename = os.path.join(outputdir, "%s.adj" % adj_path)
        np.savetxt(filename, _data)
Esempio n. 19
0
def test_pyaflowa_process_station(tmpdir, seisflows_workdir, seed_data,
                                  source_name, station_name, PAR, PATH):
    """
    Test the single station processing function 
    """
    # Turn off client to avoid searching FDSN, force local data search
    PAR.CLIENT = None
    PATH.DATA = tmpdir.strpath
    pyaflowa = Pyaflowa(structure="seisflows",
                        sfpaths=PATH,
                        sfpar=PAR,
                        iteration=1,
                        step_count=0)

    # Copy working directory to tmpdir to avoid creating unnecessary files
    shutil.copytree(src=seisflows_workdir, dst=os.path.join(tmpdir, "scratch"))
    shutil.copytree(src=seed_data, dst=os.path.join(tmpdir, "seed"))

    # Set up the same machinery as process_event()
    io = pyaflowa.setup(source_name)
    with ASDFDataSet(io.paths.ds_file) as ds:
        mgmt = Manager(ds=ds, config=io.config)
        mgmt, io = pyaflowa.process_station(mgmt=mgmt,
                                            code="NZ.BFZ.??.???",
                                            io=io)

    assert (io.nwin == mgmt.stats.nwin == 3)
    assert (io.misfit == pytest.approx(65.39037, .001))
Esempio n. 20
0
def test_asdf_event_fetch(internal_fetcher, dataset_fid):
    """
    Get event from an ASDFDataSet.
    """
    with ASDFDataSet(dataset_fid) as ds:
        internal_fetcher.ds = ds
        internal_fetcher.asdf_event_fetch()
Esempio n. 21
0
def write_st(st: Stream,
             event: Event,
             outfolder: str,
             statxml: Inventory,
             resample: bool = True):
    """
    Write raw waveform data to an asdf file. This includes the corresponding
    (teleseismic) event and the station inventory (i.e., response information).

    :param st: The stream holding the raw waveform data.
    :type st: Stream
    :param event: The seismic event associated to the recorded data.
    :type event: Event
    :param outfolder: Output folder to write the asdf file to.
    :type outfolder: str
    :param statxml: The station inventory
    :type statxml: Inventory
    :param resample: Resample the data to 10Hz sampling rate? Defaults to True.
    :type resample: bool, optional
    """
    fname = '%s.%s.h5' % (st[0].stats.network, st[0].stats.station)
    if resample:
        st.filter('lowpass_cheby_2', freq=4, maxorder=12)
        st = resample_or_decimate(st, 10, filter=False)
    with ASDFDataSet(os.path.join(outfolder, fname)) as ds:
        # Events should not be added because it will read the whole
        # catalogue every single time!
        ds.add_waveforms(st, tag='raw_recording')
        ds.add_stationxml(statxml)  # If there are still problems, we will have
Esempio n. 22
0
def test_pyaflowa_setup(source_name, PAR, PATH):
    """
    Test the one-time setup of Pyaflowa which creates the IO object
    """
    pyaflowa = Pyaflowa(structure="seisflows", sfpaths=PATH, sfpar=PAR)

    # Requirement that STATION file exists for Pyaflowa to run setup
    with pytest.raises(FileNotFoundError):
        pyaflowa.path_structure.format(source_name=source_name)
    open(os.path.join(PATH.SOLVER, source_name, "DATA", "STATIONS"), "w")

    # SeisFlows usually takes care of placing source files into the data
    # directory, so we need to do it manually here
    src = os.path.join("test_data", "test_CMTSOLUTION_2018p130600")
    dst = os.path.join(PATH.SOLVER, source_name, "DATA", "CMTSOLUTION")
    shutil.copy(src, dst)

    # Initiate Pyaflowa which will create directory structure, read in source
    # file and create an ASDFDataSet
    io = pyaflowa.setup(source_name=source_name)

    # Simple check to make sure event id is set correctly and event reading
    # machinery is working
    assert (io.config.event_id == source_name)
    assert (os.path.exists(io.paths.ds_file))
    with ASDFDataSet(io.paths.ds_file) as ds:
        assert (source_name in ds.events[0].resource_id.id)
Esempio n. 23
0
def test_extract_all_coordinates(example_data_set):
    """
    Tests the extraction of all coordinates.
    """
    data_set = ASDFDataSet(example_data_set.filename)

    assert data_set.get_all_coordinates() == {
       "AE.113A": {
           "latitude": 32.7683,
           "longitude": -113.7667,
           "elevation_in_m": 118.0},

       "TA.POKR": {
        "latitude": 65.1171,
        "longitude": -147.4335,
        "elevation_in_m": 501.0}}
Esempio n. 24
0
def empty_dataset(tmpdir):
    """Re-used test data pointing to STATIONS file"""
    fid = os.path.join(tmpdir, "empty_dataset.h5")
    # Make sure the dataset is actually empty
    if os.path.exists(fid):
        os.remove(fid)
    return ASDFDataSet(fid)
Esempio n. 25
0
def test_adding_same_event_twice_raises(tmpdir):
    """
    Adding the same event twice raises.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_path = os.path.join(data_dir, "small_sample_data_set")

    data_set = ASDFDataSet(asdf_filename)

    # Add once, all good.
    data_set.add_quakeml(os.path.join(data_path, "quake.xml"))
    assert len(data_set.events) == 1

    # Adding again should raise an error.
    with pytest.raises(ValueError):
        data_set.add_quakeml(os.path.join(data_path, "quake.xml"))
Esempio n. 26
0
def test_asdf_station_fetch(internal_fetcher, dataset_fid, code):
    """
    Get station from an ASDFDataSet
    """
    with ASDFDataSet(dataset_fid) as ds:
        internal_fetcher.ds = ds
        inv = internal_fetcher.asdf_station_fetch(code)
        assert len(inv[0][0]) == 3
Esempio n. 27
0
def test_adding_a_provenance_record(tmpdir):
    """
    Tests adding a provenance record.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")

    # Add it as a document.
    doc = prov.read(filename, format="xml")
    data_set.add_provenance_document(doc, name="test_provenance")
    del data_set

    # Read it again.
    data_set = ASDFDataSet(asdf_filename)
    assert data_set.provenance.test_provenance == doc
Esempio n. 28
0
def example_data_set(tmpdir):
    """
    Fixture creating a small example file.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_path = os.path.join(data_dir, "small_sample_data_set")

    data_set = ASDFDataSet(asdf_filename)

    for filename in glob.glob(os.path.join(data_path, "*.xml")):
        if "quake.xml" in filename:
            data_set.add_quakeml(filename)
        else:
            data_set.add_stationxml(filename)

    for filename in glob.glob(os.path.join(data_path, "*.mseed")):
        data_set.add_waveforms(filename, tag="raw_recording",
                               event_id=data_set.events[0])

    # Flush and finish writing.
    del data_set

    # Return filename and path to tempdir, no need to always create a
    # new one.
    return Namespace(filename=asdf_filename, tmpdir=tmpdir.strpath)
    def write_new_synt_asdf(self, file_prefix):
        new_synt_dict = self._sort_new_synt()

        for tag, win_array in new_synt_dict.iteritems():
            filename = "%s.%s.h5" % (file_prefix, tag)
            if os.path.exists(filename):
                os.remove(filename)
                logger.info("Output file exists, removed: %s" % filename)
            else:
                logger.info("Output new synt asdf: %s" % filename)

            ds = ASDFDataSet(filename, mode='w')
            added_list = []
            for window in win_array:
                synt_id = window.datalist['new_synt'].id
                # skip duplicate obsd location id.
                # for example, II.AAK.00.BHZ and II.AAK.10.BHZ will
                # be treated as different traces. But the synt and
                # new synt will be the same. So we only add one
                if synt_id in added_list:
                    continue
                else:
                    added_list.append(synt_id)
                ds.add_waveforms(window.datalist['new_synt'], tag=tag)
            # add stationxml
            _staxml_asdf = self._asdf_file_dict['synt']
            ds_sta = ASDFDataSet(_staxml_asdf)
            self.__add_staxml_from_other_asdf(ds, ds_sta)
            ds.flush()
Esempio n. 30
0
def test_data_set_creation(tmpdir):
    """
    Test data set creation with a small test dataset.

    It tests that the the stuff that goes in is correctly saved and
    can be retrieved again.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_path = os.path.join(data_dir, "small_sample_data_set")

    data_set = ASDFDataSet(asdf_filename)

    for filename in glob.glob(os.path.join(data_path, "*.mseed")):
        data_set.add_waveforms(filename, tag="raw_recording")

    for filename in glob.glob(os.path.join(data_path, "*.xml")):
        if "quake.xml" in filename:
            data_set.add_quakeml(filename)
        else:
            data_set.add_stationxml(filename)

    # Flush and finish writing.
    del data_set

    # Open once again
    data_set = ASDFDataSet(asdf_filename)

    # ObsPy is tested enough to make this comparison meaningful.
    for station in (("AE", "113A"), ("TA", "POKR")):
        # Test the waveforms
        stream_asdf = \
            getattr(data_set.waveforms, "%s_%s" % station).raw_recording
        stream_file = obspy.read(os.path.join(
            data_path, "%s.%s.*.mseed" % station))
        # Delete the file format specific stats attributes. These are
        # meaningless inside ASDF data sets.
        for trace in stream_file:
            del trace.stats.mseed
            del trace.stats._format
        for trace in stream_asdf:
            del trace.stats.asdf
            del trace.stats._format
        assert stream_asdf == stream_file

        # Test the inventory data.
        inv_asdf = \
            getattr(data_set.waveforms, "%s_%s" % station).StationXML
        inv_file = obspy.read_inventory(
            os.path.join(data_path, "%s.%s..BH*.xml" % station))
        assert inv_file == inv_asdf
    # Test the event.
    cat_file = obspy.readEvents(os.path.join(data_path, "quake.xml"))
    cat_asdf = data_set.events
    # from IPython.core.debugger import Tracer; Tracer(colors="Linux")()
    assert cat_file == cat_asdf
Esempio n. 31
0
def load_asdf_info(asdf_fname):
    # asdf file, we don't use parallel io here
    with ASDFDataSet(asdf_fname, mode="r", mpi=False) as asdf_file:
        lat = asdf_file.events[0].preferred_origin().latitude
        lon = asdf_file.events[0].preferred_origin().longitude
        dep = asdf_file.events[0].preferred_origin().depth
        time = asdf_file.events[0].preferred_origin().time

    return lat, lon, dep, time
Esempio n. 32
0
def test_save_adjsrcs(tmpdir, mgmt_post):
    """
    Checks that adjoint sources can be written to dataset and will match the 
    formatting required by Specfem3D
    """
    with ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) as ds:
        mgmt_post.ds = ds
        mgmt_post.save_adjsrcs()
        assert(hasattr(ds.auxiliary_data.AdjointSources.default, "NZ_BFZ_BXN"))
Esempio n. 33
0
 def __init__(self, ds_fid):
     """
     The class contains a PyASDF Dataset that will be used to load observed
     and synthetic data, based on user input
     """
     self.ds = ASDFDataSet(ds_fid)
     # Initiate an empty Manager to get access to its config
     self.mgmt = Manager()
     self.info = Info()
Esempio n. 34
0
def test_processing_multiprocessing(example_data_set):
    """
    Tests the processing using multiprocessing.
    """
    def null_processing(st, inv):
        return st

    data_set = ASDFDataSet(example_data_set.filename)
    output_filename = os.path.join(example_data_set.tmpdir, "output.h5")
    # Do not actually do anything. Apply an empty function.
    data_set.process(null_processing, output_filename,
                     {"raw_recording": "raw_recording"})

    del data_set
    data_set = ASDFDataSet(example_data_set.filename)
    out_data_set = ASDFDataSet(output_filename)

    assert data_set == out_data_set
Esempio n. 35
0
def test_asdf_waveform_fetch(internal_fetcher, dataset_fid, code, config):
    """
    Get waveforms from an ASDFDataSet
    """
    with ASDFDataSet(dataset_fid) as ds:
        internal_fetcher.ds = ds
        for tag in [config.observed_tag, config.synthetic_tag]:
            st = internal_fetcher.asdf_waveform_fetch(code, tag)
            assert len(st) == 3
Esempio n. 36
0
def load_asdf_info(asdf_fname):
    # asdf file
    with ASDFDataSet(asdf_fname, mode="r") as asdf_file:
        lat = asdf_file.events[0].preferred_origin().latitude
        lon = asdf_file.events[0].preferred_origin().longitude
        dep = asdf_file.events[0].preferred_origin().depth
        time = asdf_file.events[0].preferred_origin().time

    return lat, lon, dep, time
Esempio n. 37
0
def test_str_method_of_aux_data(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    # With provenance id.
    data = np.random.random((10, 10))
    # The data must NOT start with a number.
    data_type = "RandomArray"
    tag = "test_data"
    parameters = {"a": 1, "b": 2.0, "e": "hallo"}
    provenance_id = "{http://example.org}test"

    data_set.add_auxiliary_data(data=data, data_type=data_type,
                                tag=tag, parameters=parameters,
                                provenance_id=provenance_id)
    assert \
        str(data_set.auxiliary_data.RandomArray.test_data) == (
            "Auxiliary Data of Type 'RandomArray'\n"
            "\tTag: 'test_data'\n"
            "\tProvenance ID: '{http://example.org}test'\n"
            "\tData shape: '(10, 10)', dtype: 'float64'\n"
            "\tParameters:\n"
            "\t\ta: 1\n"
            "\t\tb: 2.0\n"
            "\t\te: hallo")

    # Without.
    data = np.random.random((10, 10))
    # The data must NOT start with a number.
    data_type = "RandomArray"
    tag = "test_data_2"
    parameters = {"a": 1, "b": 2.0, "e": "hallo"}

    data_set.add_auxiliary_data(data=data, data_type=data_type,
                                tag=tag, parameters=parameters)
    assert \
        str(data_set.auxiliary_data.RandomArray.test_data_2) == (
            "Auxiliary Data of Type 'RandomArray'\n"
            "\tTag: 'test_data_2'\n"
            "\tData shape: '(10, 10)', dtype: 'float64'\n"
            "\tParameters:\n"
            "\t\ta: 1\n"
            "\t\tb: 2.0\n"
            "\t\te: hallo")
Esempio n. 38
0
def test_gather_event(gatherer, dataset_fid):
    """
    Ensure gatherer can get an event from the correct sources
    """
    assert gatherer.gather_event(try_fm=False) is not None

    with ASDFDataSet(dataset_fid) as ds:
        gatherer.ds = ds
        gatherer.Client = None
        assert gatherer.gather_event(try_fm=False) is not None
Esempio n. 39
0
def convert_to_asdf(asdf_fn, waveform_filelist, tag, quakemlfile=None,
                    staxml_filelist=None, verbose=False, status_bar=False,
                    create_simple_inv=False):
    """
    Convert files(sac or mseed) to asdf
    """

    if verbose:
        print("*"*10 + " ASDF Converter " + "*"*10)

    nwaveform = len(waveform_filelist)
    if nwaveform == 0:
        print("No file specified. Return...")
        return
    if os.path.exists(asdf_fn):
        raise Exception("File '%s' exists." % asdf_fn)

    ds = ASDFDataSet(asdf_fn, mode='a')

    # Add event
    if quakemlfile:
        if not os.path.exists(quakemlfile):
            raise ValueError("Quakeml file not exists:%s" % quakemlfile)
        ds.add_quakeml(quakemlfile)
        event = ds.events[0]
        if status_bar:
            drawProgressBar(1.0, "Adding Quakeml data")
    else:
        raise ValueError("No Event file")

    sta_dict = add_waveform_to_asdf(ds, waveform_filelist, tag, event=event,
                                    create_simple_inv=create_simple_inv,
                                    status_bar=status_bar)

    add_stationxml_to_asdf(ds, staxml_filelist, event=event,
                           create_simple_inv=create_simple_inv,
                           sta_dict=sta_dict,
                           status_bar=status_bar)

    if verbose:
        print("ASDF filesize: %s" % ds.pretty_filesize)
    del ds
Esempio n. 40
0
def test_reading_and_writing_auxiliary_data_with_provenance_id(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    data = np.random.random((10, 10))
    # The data must NOT start with a number.
    data_type = "RandomArray"
    tag = "test_data"
    parameters = {"a": 1, "b": 2.0, "e": "hallo"}
    provenance_id = "{http://example.org}test"

    data_set.add_auxiliary_data(data=data, data_type=data_type,
                                tag=tag, parameters=parameters,
                                provenance_id=provenance_id)
    data_set.__del__()
    del data_set

    new_data_set = ASDFDataSet(asdf_filename)
    assert new_data_set.auxiliary_data.RandomArray.test_data.provenance_id \
        == provenance_id
Esempio n. 41
0
def test_adding_auxiliary_data_with_invalid_data_type_name_raises(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    data = np.random.random((10, 10))
    # The data must NOT start with a number.
    data_type = "2DRandomArray"
    tag = "test_data"
    parameters = {"a": 1, "b": 2.0, "e": "hallo"}

    try:
        with pytest.raises(ASDFValueError) as err:
            data_set.add_auxiliary_data(data=data, data_type=data_type,
                                        tag=tag, parameters=parameters)

        assert err.value.args[0] == (
            "Data type name '2DRandomArray' is invalid. It must validate "
            "against the regular expression '^[A-Z][A-Za-z0-9]*$'.")
    finally:
        data_set.__del__()
Esempio n. 42
0
def test_trying_to_add_provenance_record_with_invalid_name_fails(tmpdir):
    """
    The name must be valid according to a particular regular expression.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")

    # First try adding it as a prov document.
    doc = prov.read(filename, format="xml")
    with pytest.raises(ASDFValueError) as err:
        data_set.add_provenance_document(doc, name="a-b-c")

    assert err.value.args[0] == (
        "Name 'a-b-c' is invalid. It must validate against the regular "
        "expression '^[0-9a-z][0-9a-z_]*[0-9a-z]$'.")

    # Must sometimes be called to get around some bugs.
    data_set.__del__()
Esempio n. 43
0
def test_station_fetch(internal_fetcher, dataset_fid, code):
    """
    Test the mid level fetching function which chooses whether to search via
    ASDFDataSet or directory structure
    """
    internal_fetcher.config.paths["responses"] = "./test_data/test_seed"
    assert internal_fetcher.station_fetch(code) is not None
    internal_fetcher.config.paths["responses"] = None

    with ASDFDataSet(dataset_fid) as ds:
        internal_fetcher.ds = ds
        assert internal_fetcher.station_fetch(code) is not None
Esempio n. 44
0
def test_adding_auxiliary_data_with_wrong_tag_name_raises(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    # With provenance id.
    data = np.random.random((10, 10))
    # The data must NOT start with a number.
    data_type = "RandomArray"
    tag = "A.B.C"

    with pytest.raises(ASDFValueError) as err:
        data_set.add_auxiliary_data(
            data=data, data_type=data_type,
            tag=tag, parameters={})

    assert err.value.args[0] == (
        "Tag name 'A.B.C' is invalid. It must validate "
        "against the regular expression "
        "'^[a-zA-Z0-9][a-zA-Z0-9_]*[a-zA-Z0-9]$'.")

    data_set.__del__()
Esempio n. 45
0
def test_item_access_of_auxiliary_data(tmpdir):
    """
    Make sure all auxiliary data types, and the data itsself can be accessed
    via dictionary like accesses.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    assert str(data_set.auxiliary_data) == (
        "Data set contains no auxiliary data.")

    data = np.random.random((10, 10))
    data_type = "RandomArray"
    tag = "test_data_1"
    parameters = {"a": 1, "b": 2.0, "e": "hallo"}

    data_set.add_auxiliary_data(data=data, data_type=data_type,
                                tag=tag, parameters=parameters)

    assert data_set.auxiliary_data["RandomArray"]["test_data_1"].tag == \
        data_set.auxiliary_data.RandomArray.test_data_1.tag
Esempio n. 46
0
def save_adjoint_to_asdf(outputfile, events, adjoint_sources, stations):
    """
    Save events(obspy.Catalog) and adjoint sources, together with
    staiton information, to asdf file on disk.
    """
    print("=" * 15 + "\nWrite to file: %s" % outputfile)
    outputdir = os.path.dirname(outputfile)
    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    if os.path.exists(outputfile):
        print("Output file exists and removed:%s" % outputfile)
        os.remove(outputfile)

    ds = ASDFDataSet(outputfile, mode='a', compression=None)
    ds.add_quakeml(events)
    for adj_id in sorted(adjoint_sources):
        adj = adjoint_sources[adj_id]
        sta_tag = "%s_%s" % (adj.network, adj.station)
        sta_info = stations[sta_tag]
        adj_array, adj_path, parameters = \
            dump_adjsrc(adj, sta_info)
        ds.add_auxiliary_data(adj_array,
                              data_type="AdjointSources",
                              path=adj_path,
                              parameters=parameters)
Esempio n. 47
0
    def dump_to_asdf(self, outputfile):
        """
        Dump self.adjoin_sources into adjoint file
        """
        print("=" * 15 + "\nWrite to file: %s" % outputfile)
        if os.path.exists(outputfile):
            print("Output file exists and removed:%s" % outputfile)
            os.remove(outputfile)

        ds = ASDFDataSet(outputfile, mode='a', compression=None)
        ds.add_quakeml(self.events)
        event = self.events[0]
        origin = event.preferred_origin()
        event_time = origin.time

        for adj_id in sorted(self.adjoint_sources):
            adj = self.adjoint_sources[adj_id]
            sta_tag = "%s_%s" % (adj.network, adj.station)
            sta_info = self.stations[sta_tag]
            adj_array, adj_path, parameters = \
                dump_adjsrc(adj, sta_info, event_time)
            ds.add_auxiliary_data(adj_array,
                                  data_type="AdjointSources",
                                  path=adj_path,
                                  parameters=parameters)
Esempio n. 48
0
def test_reading_and_writing_auxiliary_data(tmpdir):
    """
    Tests reading and writing auxiliary data.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    # Define some auxiliary data and add it.
    data = np.random.random(100)
    data_type = "RandomArrays"
    tag = "test_data"
    parameters = {"a": 1, "b": 2.0, "e": "hallo"}

    data_set.add_auxiliary_data(data=data, data_type=data_type, tag=tag, parameters=parameters)
    del data_set

    new_data_set = ASDFDataSet(asdf_filename)
    aux_data = new_data_set.auxiliary_data.RandomArrays.test_data
    np.testing.assert_equal(data, aux_data.data)
    aux_data.data_type == data_type
    aux_data.tag == tag
    aux_data.parameters == parameters
Esempio n. 49
0
def test_event_fetch(internal_fetcher, dataset_fid, event_id):
    """
    Test the mid level fetching function which chooses whether to search via
    ASDFDataSet or directory structure for event information
    """
    internal_fetcher.config.paths["events"] = "./test_data"
    assert internal_fetcher.event_fetch(event_id,
                                        prefix="test_CMTSOLUTION_") is not None
    internal_fetcher.config.paths["events"] = None

    with ASDFDataSet(dataset_fid) as ds:
        internal_fetcher.ds = ds
        assert internal_fetcher.event_fetch(code) is not None
Esempio n. 50
0
File: misfit.py Progetto: icui/pyper
def read_misfit(src: str) -> float:
    """Get output misfit value."""
    from pyasdf import ASDFDataSet

    mf = 0.0

    with ASDFDataSet(Directory(src).abs('adjoint.h5'), mode='r',
                     mpi=False) as ds:
        group = ds.auxiliary_data.AdjointSources

        for sta in group.list():
            mf += group[sta].parameters['misfit']

    return mf
Esempio n. 51
0
    def process_event(self, source_name, codes=None, loc="*", cha="*",
                      **kwargs):
        """
        The main processing function for Pyaflowa misfit quantification.

        Processes waveform data for all stations related to a given event,
        produces waveform and map plots during the processing step, saves data
        to an ASDFDataSet and writes adjoint sources and STATIONS_ADJOINT file,
        required by SPECFEM3D's adjoint simulations, to disk.

        Kwargs passed to pyatoa.Manager.flow() function.

        :type source_name: str
        :param source_name: event id to be used for data gathering, processing
        :type codes: list of str
        :param codes: list of station codes to be used for processing. If None,
            will read station codes from the provided STATIONS file
        :type loc: str
        :param loc: if codes is None, Pyatoa will generate station codes based 
            on the SPECFEM STATIONS file, which does not contain location info.
            This allows user to set the location values manually when building
            the list of station codes. Defaults to wildcard '??', which is 
            usually acceptable
        :type cha: str
        :param cha: if codes is None, Pyatoa will generate station codes based
            on the SPECFEM STATIONS file, which does not contain channel info. 
            This variable allows the user to set channel searching manually,
            wildcards okay. Defaults to 'HH?' for high-gain, high-sampling rate
            broadband seismometers, but this is dependent on the available data.
        :rtype: float
        :return: the total scaled misfit collected during the processing chain
        """
        # Create the event specific configurations and attribute container (io)
        io = self.setup(source_name, **kwargs)
       
        # Allow user to provide a list of codes, else read from station file 
        if codes is None:
            codes = read_station_codes(io.paths.stations_file, 
                                       loc=loc, cha=cha)

        # Open the dataset as a context manager and process all events in serial
        with ASDFDataSet(io.paths.ds_file) as ds:
            mgmt = pyatoa.Manager(ds=ds, config=io.config)
            for code in codes:
                mgmt_out, io = self.process_station(mgmt=mgmt, code=code,
                                                    io=io, **kwargs)

        scaled_misfit = self.finalize(io)

        return scaled_misfit
Esempio n. 52
0
def convert_to_sac(asdf_fn, outputdir, tag=None, type="sac"):
    """
    Convert asdf to different types of file
    """
    if not os.path.exists(asdf_fn):
        raise ValueError("No asdf file: %s" % asdf_fn)

    if not os.path.exists(outputdir):
        raise ValueError("No output dir: %s" % outputdir)

    ds = ASDFDataSet(asdf_fn)

    sta_list = ds.get_station_list()
    print sta_list
    print "tag",tag

    for sta_tag in sta_list:
        station_name = sta_tag.replace(".", "_")
        station = getattr(ds.waveforms, station_name)
        attr_list = dir(station)
        attr_list.remove('StationXML')
        print station_name, attr_list
        if tag is None or tag == "":
            if len(attr_list) == 1:
                stream = getattr(station, attr_list[0])
        else:
            stream = getattr(station, tag)
        #print sta_tag, stream
        for tr in stream:
            network = tr.stats.network
            station = tr.stats.station
            location = tr.stats.location
            channel = tr.stats.channel
            filename = ".".join([network, station, location, channel, type])
            filename = os.path.join(outputdir, filename)
            print "output filename:", filename
            tr.write(filename, format=type)
Esempio n. 53
0
def convert_to_sac(asdf_fn, outputdir, tag=None, type="sac"):
    """
    Convert asdf to different types of file
    """
    if not os.path.exists(asdf_fn):
        raise ValueError("No asdf file: %s" % asdf_fn)

    if not os.path.exists(outputdir):
        raise ValueError("No output dir: %s" % outputdir)

    ds = ASDFDataSet(asdf_fn)

    sta_list = ds.get_station_list()
    print sta_list
    print "tag", tag

    for sta_tag in sta_list:
        station_name = sta_tag.replace(".", "_")
        station = getattr(ds.waveforms, station_name)
        attr_list = dir(station)
        attr_list.remove('StationXML')
        print station_name, attr_list
        if tag is None or tag == "":
            if len(attr_list) == 1:
                stream = getattr(station, attr_list[0])
        else:
            stream = getattr(station, tag)
        #print sta_tag, stream
        for tr in stream:
            network = tr.stats.network
            station = tr.stats.station
            location = tr.stats.location
            channel = tr.stats.channel
            filename = ".".join([network, station, location, channel, type])
            filename = os.path.join(outputdir, filename)
            print "output filename:", filename
            tr.write(filename, format=type)
Esempio n. 54
0
def check_event_information_in_asdf_files(asdf_files):
    if len(asdf_files) == 0:
        raise ValueError("Number of input asdf files is 0")

    asdf_events = {}
    # extract event information from asdf file
    for asdf_fn in asdf_files:
        ds = ASDFDataSet(asdf_fn, mode='r')
        asdf_events[asdf_fn] = ds.events

    check_events_consistent(asdf_events)

    event_base = asdf_events[asdf_events.keys()[0]]
    origin = event_base[0].preferred_origin()
    return event_base, origin
Esempio n. 55
0
def test_provenance_dicionary_behaviour(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir,
                            "example_schematic_processing_chain.xml")

    # Add it as a document.
    doc = prov.read(filename, format="xml")
    # Setting via setitem.
    data_set.provenance["test_provenance"] = doc

    data_set.__del__()
    del data_set

    new_data_set = ASDFDataSet(asdf_filename)
    assert new_data_set.provenance.list() == ["test_provenance"]

    assert new_data_set.provenance["test_provenance"] == doc
    assert getattr(new_data_set.provenance, "test_provenance") == doc

    assert list(new_data_set.provenance.keys()) == ["test_provenance"]
    assert list(new_data_set.provenance.values()) == [doc]
    assert list(new_data_set.provenance.items()) == [("test_provenance", doc)]
Esempio n. 56
0
File: ortho.py Progetto: icui/pyper
    def _diff(self):
        import numpy as np
        from pyasdf import ASDFDataSet

        with ASDFDataSet(self.path_synthetic, mode='r', mpi=False) as syn_ds, \
            ASDFDataSet(self.path_observed, mode='r', mpi=False) as obs_ds:
            fellows = {}

            syn_aux = syn_ds.auxiliary_data.FT
            obs_aux = obs_ds.auxiliary_data.FT

            syn_keys = syn_aux.list()
            obs_keys = obs_aux.list()

            for key in syn_keys:
                if key not in obs_keys:
                    continue

                keypath = key.split('_')
                cha = keypath[-1]
                station = '.'.join(keypath[:-1])

                if cha not in fellows:
                    fellows[cha] = {}

                # phase and amplitude difference
                syn = np.array(syn_aux[key].data)
                obs = np.array(obs_aux[key].data)

                phase_diff = np.angle(syn / obs)
                amp_diff = np.abs(syn) / np.abs(obs)

                fellows[cha][station] = phase_diff, amp_diff, np.squeeze(
                    np.where(np.isnan(syn) | np.isnan(obs)))

            self.dump(fellows, 'fellows.pickle')
Esempio n. 57
0
def save_adjoint_to_asdf(outputfile, events, adjoint_sources, stations):
    """
    Save events(obspy.Catalog) and adjoint sources, together with
    staiton information, to asdf file on disk.
    """
    print("="*15 + "\nWrite to file: %s" % outputfile)
    outputdir = os.path.dirname(outputfile)
    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    if os.path.exists(outputfile):
        print("Output file exists and removed:%s" % outputfile)
        os.remove(outputfile)

    ds = ASDFDataSet(outputfile, mode='a', compression=None)
    ds.add_quakeml(events)
    for adj_id in sorted(adjoint_sources):
        adj = adjoint_sources[adj_id]
        sta_tag = "%s_%s" % (adj.network, adj.station)
        sta_info = stations[sta_tag]
        adj_array, adj_path, parameters = \
            dump_adjsrc(adj, sta_info)
        ds.add_auxiliary_data(adj_array, data_type="AdjointSources",
                              path=adj_path, parameters=parameters)
Esempio n. 58
0
def convert_to_asdf(filelist,
                    asdf_fn,
                    quakemlfile,
                    staxml_filelist=None,
                    tag=None):
    """
    Convert files(sac or mseed) to asdf
    """

    nfiles = len(filelist)
    if nfiles == 0:
        print "No file specified. Return..."
        return

    if os.path.exists(asdf_fn):
        raise Exception("File '%s' exists." % asdf_fn)

    ds = ASDFDataSet(asdf_fn)

    # Add event
    if quakemlfile is not None and os.path.exists(quakemlfile):
        print "Event info added"
        ds.add_quakeml(quakemlfile)
        event = ds.events[0]
    else:
        raise ValueError("No Event file")

    # Add waveforms.
    print "Adding Waveform data"
    for _i, filename in enumerate(filelist):
        if os.path.exists(filename):
            #print("Adding file %i of %i: %s" % (_i + 1,
            #       len(filelist), os.path.basename(filename)))
            ds.add_waveforms(filename, tag=tag, event_id=event)
        else:
            print("File not exist %i of %i")

    # Add StationXML files.
    if staxml_filelist is not None and len(staxml_filelist) > 0:
        for _i, filename in enumerate(staxml_filelist):
            if os.path.exists(filename):
                #print("Adding StationXML file %i of %i..." % (_i + 1, len(filenames)))
                ds.add_stationxml(filename)
    else:
        print("No stationxml added")
Esempio n. 59
0
def extract_adjoint_misfit(asdf_file, verbose):

    print("Input asdf file: %s" % asdf_file)

    if not os.path.exists(asdf_file):
        raise ValueError("ASDF file not exists: %s" % asdf_file)
    ds = ASDFDataSet(asdf_file, mode='r')
    try:
        adjsrc_group = ds.auxiliary_data.AdjointSources
    except Exception as err:
        raise ValueError("Can not get adjoint misfit information(due to %s). "
                         "Check if the adjoint source group exists in the "
                         "file" % err)

    nadj = 0
    nadj_cat = {}
    misfit_cat = {}
    misfit_dict = {}
    for adj in adjsrc_group:
        nadj += 1

        nw = adj.parameters["station_id"].split(".")[0]
        sta = adj.parameters["station_id"].split(".")[1]
        comp = adj.parameters["component"]
        loc = adj.parameters["location"]
        station_id = "%s.%s.%s.%s" % (nw, sta, loc, comp)
        misfit = adj.parameters["misfit"]

        misfit_dict[station_id] = misfit
        if comp not in misfit_cat:
            misfit_cat[comp] = 0
            nadj_cat[comp] = 0
        misfit_cat[comp] += misfit
        nadj_cat[comp] += 1

    content = {
        "asdf_file": asdf_file,
        "misfit": misfit_dict,
        "misfit_category": misfit_cat,
        "nadj_total": nadj,
        "nadj_category": nadj_cat
    }

    if verbose:
        print("Number of adjoint sources:", nadj)

    return content