Esempio n. 1
0
def get_ref_data(stream,
                 inv,
                 model='ak135f_1s',
                 eventid=None,
                 origins=None,
                 m_tensor=None,
                 source_dc=None):

    ref_stream = Stream()

    rlats = []
    rlons = []
    geom = geometrical_center(inv)
    d, az, baz = gps2dist_azimuth(origins.latitude, origins.longitude,
                                  geom.latitude, geom.longitude)
    for i, trace in enumerate(stream):
        dist = degrees2kilometers(trace.stats.distance) * 1000.

        rlat, rlon = dist_azimuth2gps(origins.latitude, origins.longitude, az,
                                      dist)
        if rlon > 180:
            rlon = 180. - rlon

        print(rlat, rlon)
        rlats.append(rlat)
        rlons.append(rlon)
        print('Receiving trace %i of %i.' % (i + 1, len(stream)))
        stream_tmp, cat_void = get_syngine_data(model,
                                                reclat=rlats,
                                                reclon=rlons,
                                                eventid=eventid,
                                                origins=origins,
                                                m_tensor=m_tensor,
                                                source_dc=source_dc)

        trace_tmp = stream_tmp[0].copy()
        trace_tmp.stats.station = trace.stats.station
        trace_tmp.stats.starttime = trace.stats.starttime
        trace_tmp.stats.distance = trace.stats.distance
        ref_stream.append(trace_tmp)

    return ref_stream
Esempio n. 2
0
    def test_init(self):
        test_model = SeisModel(
            TestClassSeisModel.test_model_6column,
            flattening=False,
            use_kappa=False)
        test_source = SourceModel(sdep=12, srcType="dc")
        test_config = Config(
            model=test_model,
            source=test_source,
            receiver_distance=[
                10,
                20,
                30])
        newmodel = np.array([
            [5.5, 3.18, 5.501, 2.53, 600, 1100],
            [6.5, 3.64, 6.301, 2.55, 700, 1300],
            [4, 3.64, 6.301, 2.55, 700, 1300],
            [16.0, 3.87, 6.699, 2.59, 800, 1600],
            [0, 4.50, 7.799, 2.6, 900, 1800]
        ])
        assert np.all(test_config.model.model_values == newmodel)

        test_config = Config(
            model=test_model,
            source=test_source,
            receiver_distance=[
                10,
                20,
                30],
            rdep=16,
            degrees=True)
        receiver_distance_km = [degrees2kilometers(
            10), degrees2kilometers(20), degrees2kilometers(30)]
        assert np.allclose(test_config.receiver_distance, receiver_distance_km)

        test_config = Config(
            model=test_model,
            source=test_source,
            receiver_distance=[
                10,
                20,
                30],
            rdep=16)
        assert np.all(test_config.model.model_values == newmodel)

        test_config = Config(
            model=test_model,
            source=test_source,
            receiver_distance=[
                10,
                20,
                30],
            rdep=30)
        newmodel = np.array([
            [5.5, 3.18, 5.501, 2.53, 600, 1100],
            [6.5, 3.64, 6.301, 2.55, 700, 1300],
            [4, 3.64, 6.301, 2.55, 700, 1300],
            [14.0, 3.87, 6.699, 2.59, 800, 1600],
            [2.0, 3.87, 6.699, 2.59, 800, 1600],
            [0, 4.50, 7.799, 2.6, 900, 1800]
        ])
        assert np.all(test_config.model.model_values == newmodel)

        test_config = Config(
            model=test_model,
            source=test_source,
            receiver_distance=[
                10,
                20,
                30],
            rdep=13)
        newmodel = np.array([
            [5.5, 3.18, 5.501, 2.53, 600, 1100],
            [6.5, 3.64, 6.301, 2.55, 700, 1300],
            [1, 3.64, 6.301, 2.55, 700, 1300],
            [3, 3.64, 6.301, 2.55, 700, 1300],
            [16.0, 3.87, 6.699, 2.59, 800, 1600],
            [0, 4.50, 7.799, 2.6, 900, 1800]
        ])
        assert np.all(test_config.model.model_values == newmodel)

        test_config = Config(
            model=test_model,
            source=test_source,
            receiver_distance=[
                10,
                20,
                30],
            rdep=7)
        newmodel = np.array([
            [5.5, 3.18, 5.501, 2.53, 600, 1100],
            [1.5, 3.64, 6.301, 2.55, 700, 1300],
            [5, 3.64, 6.301, 2.55, 700, 1300],
            [4, 3.64, 6.301, 2.55, 700, 1300],
            [16.0, 3.87, 6.699, 2.59, 800, 1600],
            [0, 4.50, 7.799, 2.6, 900, 1800]
        ])
        assert np.all(test_config.model.model_values == newmodel)
Esempio n. 3
0
    def __init__(self,
                 model: Optional[SeisModel] = None,
                 source: Optional[SourceModel] = None,
                 receiver_distance: Optional[Union[list, np.ndarray]] = None,
                 planet_radius: float = 6371.,
                 degrees: bool = False,
                 taper: float = 0.3,
                 filter: Tuple[float, float] = (0, 0),
                 npt: int = 256,
                 dt: float = 1.,
                 dk: float = 0.3,
                 smth: float = 1.,
                 pmin: float = 0.,
                 pmax: float = 1.,
                 kmax: float = 15.,
                 rdep: float = 0.,
                 updn: str = "all",
                 samples_before_first_arrival: int = 50,
                 suppression_sigma: float = 2.,
                 cuda: bool = False) -> None:
        """
        The configuration class used in generating Green's function and the synthetic waveform.

        :param model: the Earth model used in calculation, defaults to None
        :type model: Optional[SeisModel]
        :param source: the source model used in calculation, defaults to None
        :type source: Optional[SourceModel]
        :param receiver_distance: a list of receiver distance in km, defaults to None
        :type receiver_distance: Optional[Union[list, np.ndarray]]
        :param degrees: use degrees instead of km, defaults to False
        :type degrees: bool, optional
        :param planet_radius: the radius of the planet in km, default to 6371.
        :type planet_radius: float, optional
        :param taper: taper applies a low-pass cosine filter at fc=(1-taper)*f_Niquest, defaults to 0.3
        :type taper: float, optional
        :param filter: apply a high-pass filter with a cosine transition zone between freq. f1 and f2 in Hz, defaults to (0, 0)
        :type filter: Tuple[float, float], optional
        :param npt: the number of points, defaults to 256
        :type npt: int, optional
        :param dt: the sampling interval in seconds, defaults to 1.
        :type dt: float, optional
        :param dk: the non-dimensional sampling interval of wavenumber, defaults to 0.3
        :type dk: float, optional
        :param smth: makes the final sampling interval to be dt/smth, defaults to 1.
        :type smth: float, optional
        :param pmin: the min slownesses in term of 1/vs_at_the_source, defaults to 0.
        :type pmin: float, optional
        :param pmax: the max slownesses in term of 1/vs_at_the_source, defaults to 1.
        :type pmax: float, optional
        :param kmax: kmax at zero frequency in term of 1/hs, defaults to 15.
        :type kmax: float, optional
        :param rdep: the depth for the receivers in km, defaults to 0.
        :type rdep: float, optional
        :param updn: "up" for up-going wave only, "down" for down-going wave only, "all" for both "up" and "down", defaults to "all"
        :type updn: str, optional
        :param samples_before_first_arrival: the number of points before the first arrival, defaults to 50
        :type samples_before_first_arrival: int, optional
        :param suppression_sigma: the suppression factor of the numerical noise, defaults to 2
        :type suppression_sigma: float, optional
        :param cuda: whether to use the cuda mode. if set PYFK_USE_CUDA=1, this flag will be ignored and will always use cuda, defaults to False
        :type cuda: bool
        :raises PyfkError: Must provide a list of receiver distance
        :raises PyfkError: Can't set receiver distance as 0, please consider to use a small value instead
        :raises PyfkError: planet_radius should be positive
        :raises PyfkError: Taper must be with (0,1)
        :raises PyfkError: Filter must be a tuple (f1,f2), f1 and f2 should be within [0,1]
        :raises PyfkError: npt should be positive.
        :raises PyfkError: dt should be positive.
        :raises PyfkError: dk should be within (0,0.5)
        :raises PyfkError: smth should be positive.
        :raises PyfkError: pmin should be within [0,1]
        :raises PyfkError: pmax should be within [0,1]
        :raises PyfkError: pmin should be smaller than pmax
        :raises PyfkError: kmax should be larger or equal to 10
        :raises PyfkError: the selection of phases should be either 'up', 'down' or 'all'
        :raises PyfkError: samples_before_first_arrival should be positive
        :raises PyfkError: suppression_sigma should be positive
        :raises PyfkError: Must provide a source
        :raises PyfkError: Must provide a seisModel
        """
        # * read in and validate parameters
        # receiver_distance
        if receiver_distance is None:
            raise PyfkError("Must provide a list of receiver distance")
        self.receiver_distance: np.ndarray = np.array(receiver_distance,
                                                      dtype=np.float64)
        if 0 in self.receiver_distance:
            raise PyfkError(
                "Can't set receiver distance as 0, please consider to use a small value instead"
            )
        # planet_radius
        if planet_radius <= 0:
            raise PyfkError("planet_radius should be positive")
        self.planet_radius = planet_radius
        # degrees
        if degrees:
            self.receiver_distance = np.array(
                list(
                    map(
                        lambda dis: degrees2kilometers(
                            dis, radius=self.planet_radius),
                        self.receiver_distance)))
        # taper
        if taper <= 0 or taper > 1:
            raise PyfkError("Taper must be with (0,1)")
        self.taper = taper
        # filter
        self.filter = filter
        # npt
        if npt <= 0:
            raise PyfkError("npt should be positive.")
        self.npt = npt
        if self.npt == 1:
            # we don't use st_fk
            self.npt = 2
        # dt
        if dt <= 0:
            raise PyfkError("dt should be positive.")
        if self.npt == 2 and dt < 1000:
            self.dt = 1000
        else:
            self.dt = dt
        # dk
        if dk <= 0 or dk >= 0.5:
            raise PyfkError("dk should be within (0,0.5)")
        if dk <= 0.1 or dk >= 0.4:
            warnings.warn(
                PyfkWarning("dk is recommended to be within (0.1,0.4)"))
        self.dk = dk
        # smth
        if smth <= 0:
            raise PyfkError("smth should be positive.")
        self.smth = smth
        # pmin
        if pmin < 0 or pmin > 1:
            raise PyfkError("pmin should be within [0,1]")
        self.pmin = pmin
        # pmax
        if pmax < 0 or pmax > 1:
            raise PyfkError("pmax should be within [0,1]")
        if pmin >= pmax:
            raise PyfkError("pmin should be smaller than pmax")
        self.pmax = pmax
        # kmax
        if kmax < 10:
            raise PyfkError("kmax should be larger or equal to 10")
        self.kmax = kmax
        # rdep
        self.rdep = rdep
        # updn
        if updn not in ["all", "up", "down"]:
            raise PyfkError(
                "the selection of phases should be either 'up', 'down' or 'all'"
            )
        self.updn = updn
        # samples_before_first_arrival
        if samples_before_first_arrival <= 0:
            raise PyfkError("samples_before_first_arrival should be positive")
        self.samples_before_first_arrival = samples_before_first_arrival
        # suppression_sigma
        if suppression_sigma <= 0:
            raise PyfkError("suppression_sigma should be positive")
        self.suppression_sigma = suppression_sigma
        # cuda
        self.cuda = cuda
        # source and model
        if (source is None) or (not isinstance(source, SourceModel)):
            raise PyfkError("Must provide a source")
        if (model is None) or (not isinstance(model, SeisModel)):
            raise PyfkError("Must provide a seisModel")
        self.source = source
        # use copy since the model will be modified
        self.model = copy(model)
        self._couple_model_and_source()
Esempio n. 4
0
def stream_add_stats(data_stream,
                     inv,
                     evt,
                     id_evt=0,
                     write_sac=False,
                     rotate_in_obspy=False,
                     taup_model='ak135',
                     plot_seismogram=True,
                     verbose=False):
    """
    this function will add event, station, distance and other sac header
    information for all data stream for one particular event 
    add stats headers including trace.stats.distance/latitude/longitude
    figure out channel code number before select stream ...
    this works for all data stream belongs to one event! 

    Todo: 1) test if there are multiple location codes; right now only work with one stream. update in future to deal
             with multiple channel (total_number_of channels)
          2) check if rotation is fine for BH[12] or BH[EN] when the cmpaz is not 0/90. 
    """
    model = TauPyModel(model=taup_model)

    # loop over all station in the inventory and find the stream for sta.net
    for net in inv:
        for sta in net:
            str1 = data_stream.select(network=net.code, station=sta.code)
            if len(str1) == 0:
                continue

            if len(str1
                   ) != 3:  # does not deal with multi-channel-code case yet
                sys.exit('Problem: missing components', str1)

            # initialize P and S arrival times to be set for str1
            Ptime = 0
            Stime = 0

            for (j, tr) in enumerate(str1):
                # check the consistency of channel code
                for chan in sta:
                    if tr.stats.channel == chan.code and tr.stats.location == chan.location_code:
                        break
                else:
                    sys.exit('Problem finding channel in inventory for trace ',
                             tr)
                # add trace statistics from inventory and event catalog for sac writing later
                tr.stats.station_coordinates = {
                    'latitude': chan.latitude,
                    'longitude': chan.longitude,
                    'elevation': chan.elevation
                }
                (tr.stats.gcarc, tr.stats.azimuth,
                 tr.stats.back_azimuth) = taup_geo.calc_dist_azi(
                     evt.origins[0].latitude,
                     evt.origins[0].longitude,
                     chan.latitude,
                     chan.longitude,
                     radius_of_planet_in_km=radiusOfEarth,
                     flattening_of_planet=0)
                tr.stats.distance = degrees2kilometers(
                    tr.stats.gcarc) * 1000  # in meters
                tr.stats.event_origin = evt.origins[0]  # depth in m
                tr.stats.event_mag = evt.magnitudes[0]
                tr.stats.dip = chan.dip
                tr.stats.cmpaz = chan.azimuth
                if verbose:
                    print('tr.stats=', tr.stats)
                if j == 0:
                    # set P and S arrival times and ray angles based on taupmodel(ak135)
                    arrivals = model.get_travel_times(
                        source_depth_in_km=evt.origins[0].depth / 1000.,
                        distance_in_degree=tr.stats.gcarc)
                    for k in range(len(
                            arrivals)):  # always take the first P or S arrival
                        if (arrivals[k].name == 'P'
                                or arrivals[k].name == 'p') and Ptime == 0:
                            # ray parameter=rsin(th)/v in s/radians
                            (Ptime, Prayp,
                             Pinc_angle) = (arrivals[k].time,
                                            arrivals[k].ray_param,
                                            arrivals[k].incident_angle)
                            #  print('P traveltime, ray_p, inc_angle=: ', arrivals[k].time,arrivals[k].ray_param,arrivals[k].incident_angle,evt.origins[0].depth/1000.)
                        elif (arrivals[k].name == 'S'
                              or arrivals[k].name == 's') and Stime == 0:
                            (Stime, Srayp,
                             Sinc_angle) = (arrivals[k].time,
                                            arrivals[k].ray_param,
                                            arrivals[k].incident_angle)

                    if verbose:
                        print('%s.%s Dist %4.1f   Ptime %4.1f   Stimes %4.1f' %
                              (tr.stats.network, tr.stats.station,
                               tr.stats.distance / 1000., Ptime, Stime))
                    (Parr, Sarr) = (evt.origins[0].time + Ptime,
                                    evt.origins[0].time + Stime)
                tr.stats.Parr = {
                    'arrival_time': Ptime,
                    'time': Parr,
                    'rayp': Prayp,
                    'inc_angle': Pinc_angle
                }
                tr.stats.Sarr = {
                    'arrival_time': Stime,
                    'time': Sarr,
                    'rayp': Srayp,
                    'inc_angle': Sinc_angle
                }

            if write_sac:
                write_stream_to_sac(str1, ext=str(id_evt))
Esempio n. 5
0
#lam = np.linspace(0.1 * degrees2kilometers(1)*1000., (epidist.max()-epidist.min())*degrees2kilometers(1)*1000.)

lam= np.linspace(0.1, (epidist.max()-epidist.min()))

waveno = 1./lam

angular_k = waveno * 2. * np.pi
pgram = sp.signal.lombscargle(epidist, signal, angular_k)
norm_pgram = np.sqrt( 4.*(pgram / signal.shape[0])  )


plt.figure(figsize=(14,4))
plt.plot(lam, norm_pgram)
plt.xlabel(r"Wavelength $/lamda$ (deg)")
deg_ticks, deg_labels = np.arange(10)*degrees2kilometers(1)*1000, ['{:2.1f}'.format for d in np.arange(10)]
plt.xticks(deg_ticks, deg_labels)
plt.tight_layout()

# test with fft
nout = 1000.
T = 3
d = T/nout

x = np.linspace(0,T*2.0*np.pi, nout)
y=np.sin(x)
f = np.fft.fft(y)
freq = np.fft.fftfreq(len(y), d)

# will produce fft and its frequencies
Esempio n. 6
0
import numpy as np
from obspy.geodetics.base import degrees2kilometers
from obspy.taup.utils import get_phase_names
from obspy.taup.helper_classes import SlownessModelError
from obspy.taup.tau_model import TauModel
from obspy.taup.taup_time import TauPTime

from stream2segment.utils import get_progressbar

# global vars
DEFAULT_SD_MAX = 700.0  # in km
DEFAULT_RD_MAX = 0.0  # in km
DEFAULT_DIST_MAX = 180.0  # in degrees
DEFAULT_PWAVEVELOCITY = 5  # in km/sec  PLEASE SPECIFY A FLOAT!!
DEFAULT_SWAVEVELOCITY = 3  # in km/sec  PLEASE SPECIFY A FLOAT!!
DEFAULT_DEG2KM = degrees2kilometers(1)


def timemaxdecimaldigits(time_err_tolerance):
    numdigits = 0
    _ = time_err_tolerance
    while int(_) != _:
        _ *= 10
        numdigits += 1
        if numdigits > 3:
            raise ValueError(
                "MAX_TIME_ERR_TOL cannot be lower than 0.001 (one millisecond)"
            )
    return numdigits

Esempio n. 7
0
def get_domain(lat_source,
               lon_source,
               lat_max_in_,
               lat_min_in_,
               lon_max_in_,
               lon_min_in_,
               dimension,
               dchosen=50):

    lat_max_in = lat_max_in_
    lat_min_in = lat_min_in_
    if (abs(lat_min_in - lat_max_in) < 1e-3):
        lat_min_in -= 0.1
    lon_max_in = lon_max_in_
    lon_min_in = lon_min_in_
    if (abs(lon_min_in - lon_max_in) < 1e-3):
        lon_min_in -= 0.1

    factor = 0
    dshift = 15000.
    #dchosen = 80

    diff = abs(lat_max_in_ - lat_min_in_)
    if diff < 0.25:
        lat_max_in = lat_max_in_ + diff / 2.
        lat_min_in = lat_min_in_ - diff / 2.

    diff = abs(lon_max_in_ - lon_min_in_)
    if diff < 0.25:
        lon_max_in = lon_max_in_ + diff / 2.
        lon_min_in = lon_min_in_ - diff / 2.

    dlon, dlat = abs(lon_max_in -
                     lon_min_in) / dchosen, abs(lat_max_in -
                                                lat_min_in) / dchosen

    lat_max, lat_min = degrees2kilometers(
        lat_max_in) * 1000., degrees2kilometers(lat_min_in) * 1000.
    lon_max, lon_min = degrees2kilometers(
        lon_max_in) * 1000., degrees2kilometers(lon_min_in) * 1000.

    dx, dy, dz = abs(lon_max - lon_min) / dchosen, abs(lat_max -
                                                       lat_min) / dchosen, 200.

    xmin, xmax = lon_min - factor * dy - dshift, lon_max + factor * dy + dshift
    ymin, ymax = lat_min - factor * dx - dshift, lat_max + factor * dx + dshift
    zmax = 30000.

    ## Transform domain to make x and y powers of two
    xmin_, xmax_, dx_ = transform_domain_power2(xmin, xmax, dx)
    #ymin_, ymax_, dy_ = transform_domain_power2(ymin, ymax, dy)
    xmin, xmax, dx = xmin_, xmax_, dx_
    #ymin, ymax, dy = ymin_, ymax_, dy_
    #int(2**nextpow2((xmax-xmin)/dx))

    if dimension == 3:

        if abs(dy) < 1e-5:
            dy = (ymax - ymin) / 10  ## DEFAULT VALUE

        ymin_, ymax_, dy_ = transform_domain_power2(ymin, ymax, dy)
        ymin, ymax, dy = ymin_, ymax_, dy_

        yy = np.arange(ymin, ymax, dy)
        ymin = yy[0]
        ymax = yy[-1]
        loc_ = np.argmin(abs(yy))
        if abs(yy[loc_]) < 1e-5:
            ymax -= yy[loc_]
            ymin -= yy[loc_]

    ## OLD before Jul 13 2020
    dx, dy = abs(xmax - xmin) / dchosen, abs(ymax - ymin) / dchosen

    domain = {}
    domain.update({'origin': (lat_source, lon_source)})
    domain.update({
        'latmin': lat_source + kilometer2degrees(ymin / 1000.),
        'latmax': lat_source + kilometer2degrees(ymax / 1000.)
    })
    domain.update({
        'lonmin': lon_source + kilometer2degrees(xmin / 1000.),
        'lonmax': lon_source + kilometer2degrees(xmax / 1000.)
    })
    domain.update({'xmin': xmin, 'xmax': xmax})
    domain.update({'ymin': ymin, 'ymax': ymax})
    domain.update({'zmin': 0., 'zmax': zmax})
    domain.update({'dx': dx, 'dy': dy, 'dz': dz})

    return domain
Esempio n. 8
0
def main(args):
    
    random.seed(datetime.now())
    
    if args.n_distances < 1:
        args.n_distances = None
    # print distance classifications
    if args.n_distances != None:
        print 'dist_class, dist_deg, dist_km'
        for dclass in range(0, args.n_distances, 1):
            dist_deg = util.classification2distance(dclass, args.n_distances)
            dist_km = geo.degrees2kilometers(dist_deg)
            print "{}   {:.2f}   {:.1f}".format(dclass, dist_deg, dist_km)
        print ''
     
    if args.n_magnitudes < 1:
        args.n_magnitudes = None
    # print magtitude classifications
    if args.n_magnitudes != None:
        print 'mag_class, mag'
        for mclass in range(0, args.n_magnitudes, 1):
            mag = util.classification2magnitude(mclass, args.n_magnitudes)
            print "{}   {:.2f}".format(mclass, mag)
        print ''
     
    if args.n_depths < 1:
        args.n_depths = None
    # print depth classifications
    if args.n_depths != None:
        print 'depth_class, depth'
        for dclass in range(0, args.n_depths, 1):
            depth = util.classification2depth(dclass, args.n_depths)
            print "{}   {:.1f}".format(dclass, depth)
        print ''
     
    if args.n_azimuths < 1:
        args.n_azimuths = None
    # print azimuth classifications
    if args.n_azimuths != None:
        print 'azimuth_class, azimuth'
        for aclass in range(0, args.n_azimuths, 1):
            azimuth = util.classification2azimuth(aclass, args.n_azimuths)
            print "{}   {:.1f}".format(aclass, azimuth)
        print ''
     
    
    if not os.path.exists(args.outpath):
        os.makedirs(args.outpath)
        
    # save arguments
    with open(os.path.join(args.outpath, 'params.pkl'), 'w') as file:
        file.write(pickle.dumps(args)) # use `pickle.loads` to do the reverse
        
    for dataset in ['train', 'validate', 'test']:
        for datatype in ['events', 'noise']:
            datapath = os.path.join(args.outpath, dataset, datatype)
            if not os.path.exists(datapath):
                os.makedirs(datapath)
            mseedpath = os.path.join(datapath, 'mseed')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            mseedpath = os.path.join(datapath, 'mseed_raw')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            if datatype == 'events':
                xmlpath = os.path.join(datapath, 'xml')
                if not os.path.exists(xmlpath):
                    os.makedirs(xmlpath)

        
    # read catalog of events
    #filenames = args.event_files_path + os.sep + '*.xml'
    catalog_dict = {}
    catalog_all = []
    for dirpath, dirnames, filenames in os.walk(args.event_files_path):
        for name in filenames:
            if name.endswith(".xml"):
                file = os.path.join(dirpath, name)
                catalog = read_events(file)
                target_count = int(args.event_fraction * float(catalog.count()))
                print catalog.count(), 'events:', 'read from:', file, 'will use:', target_count, 'since args.event_fraction=', args.event_fraction
                if (args.event_fraction < 1.0):
                    while catalog.count() > target_count:
                        del catalog[random.randint(0, catalog.count() - 1)]
                if not args.systematic:
                    tokens = name.split('_')
                    net_sta = tokens[0] + '_' + tokens[1]
                    if not net_sta in catalog_dict:
                        catalog_dict[net_sta] = catalog
                    else:
                        catalog_dict[net_sta] += catalog
                    # sort catalog by date
                    catalog_dict[net_sta] = Catalog(sorted(catalog_dict[net_sta], key=lambda e: e.origins[0].time))
                else:
                    catalog_all += catalog
    
    # read list of channels to use
    inventory_full = read_inventory(args.channel_file)
    inventory_full = inventory_full.select(channel=args.channel_prefix+'Z', sampling_rate=args.sampling_rate)
    #print(inventory)
    
    client = fdsn.Client(args.base_url)
    
    # get existing already processed event channel dictionary
    try:
        with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'r') as file:
            event_channel_dict = pickle.load(file)
    except IOError:
        event_channel_dict = {}
    print 'Existing event_channel_dict size:', len(event_channel_dict)

    n_noise = int(0.5 + float(args.n_streams) * args.noise_fraction)
    n_events = args.n_streams - n_noise
    n_validate = int(0.5 + float(n_events) * args.validation_fraction)
    n_test = int(0.5 + float(n_events) * args.test_fraction)
    n_train = n_events - n_validate - n_test
    n_count = 0;
    n_streams = 0
    
    if args.systematic:
        event_ndx = 0
        net_ndx = 0
        sta_ndx = 0
        channel_ndx = -1


    
#     distance_id_count = {}
#     max_num_for_distance_id = {}
#     if args.n_distances != None:
#         # train
#         distance_id_count['train'] = [0] * args.n_distances
#         max_num_for_distance_id['train'] = 1 + int(2.0 * float(n_train) / float(args.n_distances))
#         print 'Maximum number events for each distance bin train:', max_num_for_distance_id['train']
#         # validate
#         distance_id_count['validate'] = [0] * args.n_distances
#         max_num_for_distance_id['validate'] = 1 + int(2.0 * float(n_validate) / float(args.n_distances))
#         print 'Maximum number events for each distance bin validate:', max_num_for_distance_id['validate']
#         # test
#         distance_id_count['test'] = [0] * args.n_distances
#         max_num_for_distance_id['test'] = 1 + int(2.0 * float(n_test) / float(args.n_distances))
#         print 'Maximum number events for each distance bin test:', max_num_for_distance_id['test']
        
    while args.systematic or n_streams < args.n_streams:
        
        try:
        
            # choose event or noise
            is_noise = n_streams >= n_events
            
            # reset validate test count if switching from event to  noise
            if n_streams == n_events:
                n_validate = int(0.5 + float(n_noise) * args.validation_fraction)
                n_test = int(0.5 + float(n_noise) * args.test_fraction)
                n_train = n_noise - n_validate - n_test
                n_count = 0;
                
            # set out paths
            if is_noise:
                datatype = 'noise'
            else:
                datatype = 'events'
            if n_count < n_train:
                dataset = 'train'
            elif n_count < n_train + n_validate:
                dataset = 'validate'
            else:
                dataset = 'test'
            datapath = os.path.join(args.outpath, dataset, datatype)

            # get random channel from Inventory
            #inventory = inventory_full.select(time=origin.time)
            inventory = inventory_full
            
            if args.systematic:
                try:
                    catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx \
                        = get_systematic_channel(inventory, catalog_all, is_noise, event_ndx, net_ndx, sta_ndx, channel_ndx)
                except ValueError:
                    break
            else:
                try:
                    catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx = get_random_channel(inventory, catalog_dict, is_noise)
                except ValueError:
                    continue
                                
            distance_id = 0
            distance = -999.0
            magnitude = -999.0
            depth = -999.0
            azimuth = -999.0
            if not is_noise:
                dist_meters, azim, bazim = geo.gps2dist_azimuth(channel.latitude, channel.longitude, origin.latitude, origin.longitude, a=geo.WGS84_A, f=geo.WGS84_F)
                distance = geo.kilometer2degrees(dist_meters / 1000.0, radius=6371)
                azimuth = azim
                magnitude = event.preferred_magnitude().mag
                depth = origin.depth / 1000.0
                if args.n_distances != None:
                    distance_id = util.distance2classification(distance, args.n_distances)
#                                 if distance_id_count[dataset][distance_id] >= max_num_for_distance_id[dataset]:
#                                     print 'Skipping event_channel: distance bin', distance_id, 'for', dataset, 'already full:', \
#                                         distance_id_count[dataset][distance_id], '/', max_num_for_distance_id[dataset]
#                                     continue

            print ''
            print 'Event:', origin.time.isoformat(), event.event_descriptions[0].text, \
            ', Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \
            ', Mag: {:.2f}'.format(magnitude), \
            ', Depth(km): {:.1f}'.format(depth), \
            ', Az(deg): {:.1f}'.format(azimuth)
            print 'Retrieving channels:', (n_streams + 1), '/ ', args.n_streams, (', NOISE, ' if  is_noise else ', EVENT, '), 'event', event_ndx, origin.time, \
                ', net', net_ndx, ', sta', sta_ndx, ', chan', channel_ndx, \
                ', ', net.code, sta.code, \
                channel.code, channel.location_code, \
                channel.sample_rate
            # check station was available at origin.time
            if not sta.is_active(time=origin.time):
                print 'Skipping event_channel: station not active at origin.time:'
                continue
            #key = str(event_ndx) + '_' + str(net_ndx) + '_' + str(sta_ndx) + '_' + str(channel_ndx) + '_' + str(is_noise)
            key = str(event_ndx) + '_' + net.code + '_' + sta.code + '_' + channel.code + '_' + str(is_noise)
            if key in event_channel_dict:
                print 'Skipping event_channel: already processed.'
                continue
            event_channel_dict[key] = 1
                
            # get start time for waveform request
            ttime = get_first_P_travel_time(origin, channel)
            arrival_time = origin.time + ttime
            if is_noise:
                # get start time of next event
                event2 = catalog[event_ndx + 1]
                origin2 = event2.preferred_origin()
                # check that origins are at least min time apart
                if origin2.time - origin.time < MIN_INTER_EVENT_TIME:
                    print 'Skipping noise event_channel: inter event time too small: ', str(origin2.time - origin.time), \
                        origin2.time, origin.time
                    continue
                ttime2 = get_first_P_travel_time(origin2, channel)
                arrival_time2 = origin2.time + ttime2
                arrival_time = (arrival_time + ((arrival_time2 - arrival_time) / 2.0)) - args.window_start
            
            start_time = arrival_time - args.window_start
                                    
            # request data for 3 channels
            
            #for orientation in ['Z', 'N', 'E', '1', '2']:
            #    req_chan = args.channel_prefix + orientation
            channel_name = net.code + '_' + sta.code + '_' + channel.location_code + '_' + args.channel_prefix
            padded_start_time = start_time - WINDOW_PADDING_FDSN
            padded_end_time = start_time + args.window_length + 2.0 * WINDOW_PADDING_FDSN
            chan_param = args.channel_prefix + '?'
            # kluge to get url used for data request
            kwargs = {'network': net.code, 'station': sta.code, 'location': channel.location_code, 'channel': chan_param,
                      'starttime': padded_start_time, 'endtime': padded_end_time}                      
            #url = client._create_url_from_parameters('dataselect', DEFAULT_PARAMETERS['dataselect'],  **kwargs)
            url = fdsn.client.build_url(client.base_url, 'dataselect', client.major_versions['dataselect'], "query", parameters=kwargs)
            print '  java net.alomax.seisgram2k.SeisGram2K', '\"', url, '\"'
            try:
                stream = client.get_waveforms(  \
                                               net.code, sta.code, channel.location_code, chan_param, \
                                               padded_start_time, padded_end_time, \
                                               attach_response=True)
                
            except fdsn.header.FDSNException as ex:
                print 'Skipping channel:', channel_name, 'FDSNException:', ex, 
                continue
                                    
            print stream
            # TEST
#                         for trace in stream:
#                             print '==========> trace.stats', trace.stats
                
            # check some things
            if (len(stream) != 3):
                print 'Skipping channel: len(stream) != 3:', channel_name
                continue
            ntrace = 0
            for trace in stream:
                if (len(trace) < 1):
                    print 'Skipping trace: len(trace) < 1:', channel_name
                    continue
                if (trace.stats.starttime > start_time or trace.stats.endtime < start_time + args.window_length):
                    print 'Skipping trace: does not contain required time window:', channel_name
                    continue
                ntrace += 1
            if (ntrace != 3):
                print 'Skipping channel: ntrace != 3:', channel_name
                continue
            
            # pre-process streams
            # sort so that channels will be ingested in NN always in same order ENZ
            stream.sort(['channel'])
            # detrend - this is meant to be equivalent to detrend or a long period low-pass (e.g. at 100sec) applied to real-time data
            stream.detrend(type='linear')
            for trace in stream:
                # correct for required sampling rate
                if abs(trace.stats.sampling_rate - args.sampling_rate) / args.sampling_rate > 0.01:
                    trace.resample(args.sampling_rate)
                    
            # apply high-pass filter if requested
            if args.hp_filter_freq > 0.0:
                stream.filter('highpass', freq=args.hp_filter_freq, corners=args.hp_filter_corners)
            
            # check signal to noise ratio, if fail, repeat on 1sec hp data to capture local/regional events in longer period microseismic noise
            sn_type = 'BRB'
            first_pass = True;
            while True:
                if is_noise:
                    snrOK = True
                else:
                    snrOK = False
                for trace in stream:
                    # slice with 1sec margin of error for arrival time to: 1) avoid increasing noise amplitude with signal, 2) avoid missing first P in signal
                    if (first_pass):
                        signal_slice = trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length)
                        noise_slice = trace.slice(endtime=arrival_time - 1.0) 
                    else:
                        # highpass at 1sec
                        filt_trace = trace.copy()
                        filt_trace.filter('highpass', freq=1.0, corners=4)
                        signal_slice = filt_trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length)
                        noise_slice = filt_trace.slice(endtime=arrival_time - 1.0) 
                        sn_type = '1HzHP'
                    # check signal to noise around arrival_time
                    # ratio of std
                    asignal = signal_slice.std()
                    anoise = noise_slice.std()
                    snr = asignal / anoise
                    print trace.id, sn_type, 'snr:', snr, 'std_signal:', asignal, 'std_noise:', anoise
                    # ratio of peak amplitudes (DO NOT USE, GIVE UNSTABLE RESULTS!)
#                                 asignal = signal_slice.max()
#                                 anoise = noise_slice.max()
#                                 snr = np.absolute(asignal / anoise)
#                                 print trace.id, sn_type, 'snr:', snr, 'amax_signal:', asignal, 'amax_noise:', anoise
                    if is_noise:
                        snrOK = snrOK and snr <= MAX_SNR_NOISE
                        if not snrOK:
                            break
                    else:
                        snrOK = snrOK or snr >= args.snr_accept
                if (first_pass and not snrOK and args.hp_filter_freq < 0.0):
                    first_pass = False;
                    continue
                else:
                    break

            if (not snrOK):
                if is_noise:
                    print 'Skipping channel:', sn_type, 'snr >', MAX_SNR_NOISE,  'on one or more traces:', channel_name
                else:
                    print 'Skipping channel:', sn_type, 'snr < args.snr_accept:', args.snr_accept, 'on all traces:', channel_name
                continue
               
            # trim data to required window
            # try to make sure samples and start/end times align as closely as possible to first trace
            trace = stream.traces[0]
            trace = trace.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True)
            start_time = trace.stats.starttime
            stream = stream.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True)
            
            cstart_time = '%04d.%02d.%02d.%02d.%02d.%02d.%03d' % \
                (start_time.year, start_time.month, start_time.day, start_time.hour, start_time.minute, \
                 start_time.second, start_time.microsecond // 1000)

            # process each trace
            try:
                for trace in stream:
                    # correct for overall sensitivity or gain
                    trace.normalize(trace.stats.response.instrument_sensitivity.value)
                    trace.data = trace.data.astype(np.float32)
                    # write miniseed
                    #tracefile = os.path.join(datapath, 'mseed', trace.id + '.' + cstart_time + '.mseed')
                    #trace.write(tracefile, format='MSEED', encoding='FLOAT32')
                    #print 'Channel written:', tracefile, trace.count(), 'samples'
            except AttributeError as err:
                print 'Skipping channel:', channel_name,  ': Error applying trace.normalize():' , err
                
            filename_root =  channel_name + '.' + cstart_time

            # write raw miniseed
            streamfile = os.path.join(datapath, 'mseed_raw', filename_root + '.mseed')
            stream.write(streamfile, format='MSEED', encoding='FLOAT32')
            print 'Stream written:', stream.count(), 'traces:'
            print '  java net.alomax.seisgram2k.SeisGram2K', streamfile
                
            # store absolute maximum
            stream_max = np.absolute(stream.max()).max()
            # normalize by absolute maximum
            stream.normalize(global_max = True)
            
            # 20180521 AJL
            # spherical coordinates
            # raw data always in same order ENZ
            # tensor indexing is [traces, datapoints, comps]
            if args.spherical:
                rad2deg = 180.0 / math.pi
                # calculate modulus
                temp_square = np.add(np.square(stream.traces[0].data), np.add(np.square(stream.traces[1].data), np.square(stream.traces[2].data)))
                temp_modulus = np.sqrt(temp_square)
                # calculate azimuth
                temp_azimuth = np.add( np.multiply(np.arctan2(stream.traces[0].data, stream.traces[1].data), rad2deg), 180.0)
                # calculate inclination
                temp_inclination = np.multiply(np.arcsin(np.divide(stream.traces[2].data, temp_modulus)), rad2deg)
                # reset stream data to spherical coordinates
                stream.traces[0].data = temp_inclination
                stream.traces[1].data = temp_azimuth
                temp_modulus = np.multiply(temp_modulus, 100.0)  # increase scale for plotting purposes
                stream.traces[2].data = temp_modulus


            # put absolute maximum normalization in first element of data array, to seed NN magnitude estimation
            # 20180816 AJL - do not mix max with data
            # for trace in stream:
            #    trace.data[0] = stream_max
            print 'stream_max', stream_max
            

            # write processed miniseed
            streamfile = os.path.join(datapath, 'mseed', filename_root + '.mseed')
            stream.write(streamfile, format='MSEED', encoding='FLOAT32')
            print 'Stream written:', stream.count(), 'traces:'
            print '  java net.alomax.seisgram2k.SeisGram2K', streamfile
                
            # write event waveforms and distance_id in .tfrecords
            magnitude_id = 0
            depth_id = 0
            azimuth_id = 0
            if not is_noise:
#                             if args.n_distances != None:
#                                 distance_id_count[dataset][distance_id] += 1
                if args.n_magnitudes != None:
                    magnitude_id = util.magntiude2classification(magnitude, args.n_magnitudes)
                if args.n_depths != None:
                    depth_id = util.depth2classification(depth, args.n_depths)
                if args.n_azimuths != None:
                    azimuth_id = util.azimuth2classification(azimuth, args.n_azimuths)
            else:
                distance_id = -1
                distance = 0.0
            output_name = filename_root + '.tfrecords'
            output_path = os.path.join(datapath, output_name)
            writer = DataWriter(output_path)
            writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth)
            if not is_noise:
                print '==== Event stream tfrecords written:', output_name, \
                'Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \
                ', Mag: {:.2f} ID: {}'.format(magnitude, magnitude_id), \
                ', Depth(km): {:.1f} ID: {}'.format(depth, depth_id), \
                ', Az(deg): {:.1f} ID: {}'.format(azimuth, azimuth_id)
            else:
                print '==== Noise stream tfrecords written:', output_name, 'ID: Dist {}, Mag {}, Depth {}, Az {}'.format(distance_id, magnitude_id, depth_id, azimuth_id)
                
            # write event data
            if not is_noise:
                filename = os.path.join(datapath, 'xml', filename_root + '.xml')
                event.write(filename, 'QUAKEML')
           
            n_streams += 1
            n_count += 1
                    
        except KeyboardInterrupt:
            print 'Stopping: KeyboardInterrupt'
            break

        except Exception as ex:
            print 'Skipping stream: Exception:', ex
            traceback.print_exc()
            continue

    print n_streams, 'streams:', 'written to:', args.outpath

    # save event_channel_dict
    with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'w') as file:
        file.write(pickle.dumps(event_channel_dict))
Esempio n. 9
0
from obspy import read_events
from obspy.geodetics.base import degrees2kilometers
import csv

evtfiles = glob.glob(
    '/home/ubuntu/isc-out-final/2010_iloc_phase_defined/*.xml')
with open('/home/ubuntu/p_stats_in_2010-m.csv',
          'w') as p_out, open('/home/ubuntu/s_stats_in_2010-m.csv',
                              'w') as s_out:
    p_writer = csv.writer(p_out)
    s_writer = csv.writer(s_out)
    for f in evtfiles:
        evts = read_events(f)
        if evts and evts[0]:
            evt = evts[0]
            for i in range(len(evt.picks)):
                for j in range(len(evt.preferred_origin().arrivals)):
                    if evt.preferred_origin(
                    ).arrivals[j].pick_id == evt.picks[i].resource_id.id:
                        dist_deg = evt.preferred_origin().arrivals[j].distance
                        dist_km = degrees2kilometers(dist_deg)
                        tt = evt.picks[i].time - evt.preferred_origin().time
                        res = evt.preferred_origin().arrivals[j].time_residual
                        if evt.preferred_origin().arrivals[j].phase == 'P':
                            p_writer.writerow([dist_km / tt, dist_deg, res])
                        elif evt.preferred_origin().arrivals[j].phase == 'S':
                            s_writer.writerow([dist_km / tt, dist_deg, res])
                        else:
                            print "New phase uncovered: " + str(
                                evt.preferred_origin().arrivals[j].phase)