Ejemplo n.º 1
0
def ingv_horus(fname):
    """
    Reader for the INGV (Istituto Nazionale di Geofisica e Vulcanologia - Italy)
    Homogenized instrumental seismic catalog (HORUS)
    It reads a catalog in plain text format, directly downloaded from 
    http://horus.bo.ingv.it/. 

    
    The CSEP Format has the following dtype:

    dtype = numpy.dtype([('id', 'S256'),
                         ('origin_time', '<i8'),
                         ('latitude', '<f4'),
                         ('longitude', '<f4'),
                         ('depth', '<f4'),
                         ('magnitude', '<f4')])

    """

    ind = {
        'year': (0, "<i4"),
        'month': (1, "<i4"),
        'day': (2, "<i4"),
        'hour': (3, "<i4"),
        'minute': (4, "<i4"),
        'second': (5, "<f4"),
        'lat': (6, "<f4"),
        'lon': (7, "<f4"),
        'depth': (8, "<f4"),
        'Mw': (9, "<f4")
    }
    out = []

    data = numpy.genfromtxt(fname,
                            skip_header=1,
                            names=ind.keys(),
                            usecols=[i[0] for i in ind.values()],
                            dtype=[i[1] for i in ind.values()])
    for n, line in enumerate(data):
        dt = datetime.timedelta(0, 0, 0)
        if line['second'] >= 60.:
            line['second'] -= 60.
            dt += datetime.timedelta(minutes=1)
        if line['minute'] >= 60.:
            dt += datetime.timedelta(hours=1)
            line['minute'] -= 60.
        if line['hour'] >= 24.:
            dt += datetime.timedelta(days=1)
            line['hour'] -= 24.
        time = datetime.datetime(int(line['year']), int(line['month']),
                                 int(line['day']), int(line['hour']),
                                 int(line['minute']), int(line['second'])) + dt
        event_tuple = (time, datetime_to_utc_epoch(time), float(line["lat"]),
                       float(line["lon"]), float(line["depth"]),
                       float(line["Mw"]))
        out.append(event_tuple)

    return out
Ejemplo n.º 2
0
 def to_dict(self):
     adict = {
         'id': self.id,
         'magnitude': self.magnitude,
         'latitude': self.latitude,
         'longitude': self.longitude,
         'time': datetime_to_utc_epoch(self.time)
     }
     return adict
Ejemplo n.º 3
0
    def __next__(self):
        """ Allows the class to be used in a for-loop. Handles the case where the catalogs are stored as a list or
        loaded in using a generator function. The latter solves the problem where memory is a concern or all of the
        catalogs should not be held in memory at once. """
        is_generator = True
        try:
            n_items = len(self.catalogs)
            is_generator = False
            assert self.n_cat == n_items
            # here, we have reached the end of the list, simply reset the index to the front
            if self._idx >= self.n_cat:
                self._idx = 0
                raise StopIteration()
            catalog = self.catalogs[self._idx]
            self._idx += 1
        except TypeError:
            # handle generator case. a generator does not have the __len__ attribute, but an iterable does.
            try:
                catalog = next(self.catalogs)
                self._idx += 1
            except StopIteration:
                # gets a new generator function after the old one is exhausted
                if not self.store:
                    self.catalogs = self.loader(format=self.catalog_format,
                                                filename=self.filename,
                                                region=self.region,
                                                name=self.name)
                else:
                    self.catalogs = self._catalogs
                    del self._catalogs
                    if self.apply_filters:
                        self.apply_filters = False

                self.n_cat = self._idx
                self._idx = 0
                raise StopIteration()

        # apply filtering to catalogs, these can throw errors if not configured properly
        if self.apply_filters:
            if self.filters:
                catalog = catalog.filter(self.filters)
            if self.apply_mct:
                catalog = catalog.apply_mct(
                    self.event.magnitude,
                    datetime_to_utc_epoch(self.event.time))
            if self.filter_spatial:
                catalog = catalog.filter_spatial(self.region)

        if is_generator and self.store:
            self._catalogs.append(catalog)

        # return potentially filtered data
        return catalog
Ejemplo n.º 4
0
    def _get_catalog_as_ndarray(self):
        """
        This function will be called anytime that a catalog is assigned to self.catalog

        The purpose of this function is to ensure that the catalog is being properly parsed into the correct format, and
        to prevent users of the catalog classes from assigning improper data types.

        This also acts as a convenience to allow easy assignment of different types to the catalog. The default
        implementation of this function expects that the data are arranged as a collection of tuples corresponding to
        the catalog data type.
        """
        """
        Converts eventlist into ndarray format.

        Note:
         Failure state exists if self.catalog is not bound
            to instance explicity.
        """
        # short-circuit
        if isinstance(self.catalog, numpy.ndarray):
            return self.catalog
        # if catalog is not a numpy array, class must have dtype information
        catalog_length = len(self.catalog)
        catalog = numpy.empty(catalog_length, dtype=self.dtype)
        if catalog_length == 0:
            return catalog
        if isinstance(self.catalog[0], (list, tuple)):
            for i, event in enumerate(self.catalog):
                catalog[i] = tuple(event)
        elif isinstance(self.catalog[0], SummaryEvent):
            for i, event in enumerate(self.catalog):
                catalog[i] = (event.id, datetime_to_utc_epoch(event.time),
                              event.latitude, event.longitude, event.depth, event.magnitude)
        else:
            raise TypeError("Catalog data must be list of events tuples with order:\n"
                            f"{', '.join(self.dtype.names)} or \n"
                            "list of SummaryEvent type.")
        return catalog
Ejemplo n.º 5
0
def ingv_emrcmt(fname):
    """
    Reader for the INGV (Istituto Nazionale di Geofisica e Vulcanologia - Italy)  European-
    Mediterranean regional Centroid Moment Tensor Catalog.
    It reads a catalog in .csv format, directly downloaded from http://rcmt2.bo.ingv.it/ using the Catalog Search (Beta
    version).
    
    
    The CSEP Format has the following dtype:

    dtype = numpy.dtype([('id', 'S256'),
                         ('origin_time', '<i8'),
                         ('latitude', '<f4'),
                         ('longitude', '<f4'),
                         ('depth', '<f4'),
                         ('magnitude', '<f4')])

    Dismiss events with typo errors in its magnitude, and repeated events with 
    same id.
    
    """

    ind = {'evcat_id': 0,
           'date': 1,
           'time': 2,
           'sec_dec': 3,
           'lat': 4,
           'lon': 5,
           'depth': 6,
           'Mw': 61}

    def is_header_line(line):
        if line[0] == 'ev_id':
            return True
        else:
            return False

    out = []
    evcat_id = []
    n_event = 0
    with open(fname) as file_:
        reader = csv.reader(file_)
        for n, line in enumerate(reader):
            if is_header_line(line):
                continue
            try:
                date = line[ind['date']].replace('-', '/')
                time = line[ind['time']].replace(' ', '0')
                sec_frac = line[ind['sec_dec']].replace(' ', '')
                if time.endswith(':'):
                    time += '00'
                date_time_dict = _parse_datetime_to_zmap(date,
                                                         time + '.' + sec_frac)
            except ValueError:
                msg = ("Could not parse date/time string '%s' and '%s' to a valid "
                       "time" % (line[ind['date']], line[ind['time']]))
                warnings.warn(msg, RuntimeWarning)
                continue

            dt = datetime.datetime(
                date_time_dict['year'],
                date_time_dict['month'],
                date_time_dict['day'],
                date_time_dict['hour'],
                date_time_dict['minute'],
                date_time_dict['second']
            )
            if 0. < float(line[ind["Mw"]]) < 10.0:
                event_tuple = (
                    n_event,
                    datetime_to_utc_epoch(dt),
                    float(line[ind["lat"]]),
                    float(line[ind["lon"]]),
                    float(line[ind["depth"]]),
                    float(line[ind["Mw"]])
                )
                n_event += 1
                evcat_id.append(line[ind["evcat_id"]])
                out.append(event_tuple)
            else:
                pass
            
        rep_events = [i for i in range(len(evcat_id)) if i not in 
                          numpy.unique(numpy.array(evcat_id), 
                                       return_index=True)[1]]
        for rep_id in rep_events:
            out.pop(rep_id)
        print('Removed %i badly formatted events' % (n + 1 - n_event))
        print('Removed %i repeated events' % len(rep_events))
        
    return out
Ejemplo n.º 6
0
def zmap_ascii(fname, delimiter=None):
    """
    Reads csep1 ascii format into numpy structured array. this can be passed into a catalog object constructor. Using

    $ catalog = csep.core.catalogs.CSEPCatalog(catalog=zmap_ascii(fname), **kwargs)

    Many of the catalogs from the CSEP1 testing center were empty indicating that no observed earthquakes were available
    during the time period of the catalog. In the case of an empty catalog, this function will return an empty numpy array. The
    catalog object should still be created, but it will contain zero events. Therefore it can still be used for evaluations
    and plotting as normal.

    The CSEP Format has the following dtype:

    dtype = numpy.dtype([('longitude', numpy.float32),
                        ('latitude', numpy.float32),
                        ('year', numpy.int32),
                        ('month', numpy.int32),
                        ('day', numpy.int32),
                        ('magnitude', numpy.float32),
                        ('depth', numpy.float32),
                        ('hour', numpy.int32),
                        ('minute', numpy.int32),
                        ('second', numpy.int32)])

    Args:
        fname: absolute path to csep1 catalog file

    Returns:
        list: list of tuples representing above type, empty if no events were found
    """

    class ColumnIndex(enum.Enum):
        Longitude = 0
        Latitude = 1
        DecimalYear = 2
        Month = 3
        Day = 4
        Magnitude = 5
        Depth = 6
        Hour = 7
        Minute = 8
        Second = 9

        # Error columns
        HorizontalError = 10
        DepthError = 11
        MagnitudeError = 12

        NetworkName = 13
        NumColumns = 14

    # short-circuit for empty file
    if os.stat(fname).st_size == 0:
        return []

    # arrange file into list of tuples
    out = []
    zmap_catalog_data = numpy.loadtxt(fname, delimiter=delimiter)
    for event_id, line in enumerate(zmap_catalog_data):
        dt = datetime.datetime(
            line[ColumnIndex.DecimalYear],
            line[ColumnIndex.Month],
            line[ColumnIndex.Day],
            line[ColumnIndex.Hour],
            line[ColumnIndex.Minute],
            line[ColumnIndex.Second]
        )
        event_tuple = (
            event_id,
            datetime_to_utc_epoch(dt),
            line[ColumnIndex.Latitude],
            line[ColumnIndex.Longitude],
            line[ColumnIndex.Depth],
            line[ColumnIndex.Magnitude],
        )
        out.append(event_tuple)
    return out
Ejemplo n.º 7
0
def ndk(filename):
    """
    Reads an NDK file to a tuple of events.

    This code was modified from the obspy v1.2.2 implementation to work with CSEP Catalog objects. The original source
    code can be found at https://github.com/obspy/obspy/blob/master/obspy/io/ndk/core.py.

    Args:
        filename: file or file-like object
    """
    # this function first parses the data into a human readable dict with appropriate values and then finally returns a
    # CSEP catalog object.

    def _read_lines(line1, line2, line3, line4, line5):
        # First line: Hypocenter line
        # [1-4]   Hypocenter reference catalog (e.g., PDE for USGS location,
        #         ISC for #ISC catalog, SWE for surface-wave location,
        #         [Ekstrom, BSSA, 2006])
        # [6-15]  Date of reference event
        # [17-26] Time of reference event
        # [28-33] Latitude
        # [35-41] Longitude
        # [43-47] Depth
        # [49-55] Reported magnitudes, usually mb and MS
        # [57-80] Geographical location (24 characters)
        rec = {}
        rec["hypocenter_reference_catalog"] = line1[:4].strip()
        rec["date"] = line1[5:15].strip()
        rec["time"] = line1[16:26]
        rec["hypo_lat"] = float(line1[27:33])
        rec["hypo_lng"] = float(line1[34:41])
        rec["hypo_depth_in_km"] = float(line1[42:47])
        rec["mb"], rec["MS"] = map(float, line1[48:55].split())
        rec["location"] = line1[56:80].strip()

        # Second line: CMT info (1)
        # [1-16]  CMT event name. This string is a unique CMT-event identifier.
        #         Older events have 8-character names, current ones have
        #         14-character names.  See note (1) below for the naming
        #         conventions used.
        # [18-61] Data used in the CMT inversion. Three data types may be used:
        #         Long-period body waves (B), Intermediate-period surface waves
        #         (S), and long-period mantle waves (M). For each data type,
        #         three values are given: the number of stations used, the
        #         number  of components used, and the shortest period used.
        # [63-68] Type of source inverted for:
        #         "CMT: 0" - general moment tensor;
        #         "CMT: 1" - moment tensor with constraint of zero trace
        #             (standard);
        #         "CMT: 2" - double-couple source.
        # [70-80] Type and duration of moment-rate function assumed in the
        #         inversion.  "TRIHD" indicates a triangular moment-rate
        #         function, "BOXHD" indicates a boxcar moment-rate function.
        #         The value given is half the duration of the moment-rate
        #         function. This value is assumed in the inversion, following a
        #         standard scaling relationship (see note (2) below), and is
        #         not derived from the analysis.
        rec["cmt_event_name"] = line2[:16].strip()

        data_used = line2[17:61].strip()
        # Use regex to get the data used in case the data types are in a
        # different order.
        data_used = re.findall(r"[A-Z]:\s*\d+\s+\d+\s+\d+", data_used)
        rec["data_used"] = []
        for data in data_used:
            data_type, count = data.split(":")
            if data_type == "B":
                data_type = "body waves"
            elif data_type == "S":
                data_type = "surface waves"
            elif data_type == "M":
                data_type = "mantle waves"
            else:
                msg = "Unknown data type '%s'." % data_type
                raise ValueError(msg)

            sta, comp, period = count.strip().split()

            rec["data_used"].append({
                "wave_type": data_type,
                "station_count": int(sta),
                "component_count": int(comp),
                "shortest_period": float(period)
            })

        source_type = line2[62:68].strip().upper().replace(" ", "")
        if source_type == "CMT:0":
            rec["source_type"] = "general"
        elif source_type == "CMT:1":
            rec["source_type"] = "zero trace"
        elif source_type == "CMT:2":
            rec["source_type"] = "double couple"
        else:
            msg = "Unknown source type."
            raise ValueError(msg)

        mr_type, mr_duration = [i.strip() for i in line2[69:].split(":")]
        mr_type = mr_type.strip().upper()
        if mr_type == "TRIHD":
            rec["moment_rate_type"] = "triangle"
        elif mr_type == "BOXHD":
            rec["moment_rate_type"] = "box car"
        else:
            msg = "Moment rate function '%s' unknown." % mr_type
            raise ValueError(msg)

        # Specified as half the duration in the file.
        rec["moment_rate_duration"] = float(mr_duration) * 2.0

        # Third line: CMT info (2)
        # [1-58]  Centroid parameters determined in the inversion. Centroid
        #         time, given with respect to the reference time, centroid
        #         latitude, centroid longitude, and centroid depth. The value
        #         of each variable is followed by its estimated standard error.
        #         See note (3) below for cases in which the hypocentral
        #         coordinates are held fixed.
        # [60-63] Type of depth. "FREE" indicates that the depth was a result
        #         of the inversion; "FIX " that the depth was fixed and not
        #         inverted for; "BDY " that the depth was fixed based on
        #         modeling of broad-band P waveforms.
        # [65-80] Timestamp. This 16-character string identifies the type of
        #         analysis that led to the given CMT results and, for recent
        #         events, the date and time of the analysis. This is useful to
        #         distinguish Quick CMTs ("Q-"), calculated within hours of an
        #         event, from Standard CMTs ("S-"), which are calculated later.
        if line3[0:9] != "CENTROID:":
            raise IOError("parse error: file should have CENTROID ")
        numbers = [line3[10:18], line3[18:22], line3[22:29], line3[29:34],
                   line3[34:42], line3[42:47], line3[47:53], line3[53:58]]
        rec["centroid_time"], rec["centroid_time_error"], \
        rec["centroid_latitude"], rec["centroid_latitude_error"], \
        rec["centroid_longitude"], rec["centroid_longitude_error"], \
        rec["centroid_depth_in_km"], rec["centroid_depth_in_km_error"] = \
            map(float, numbers)
        type_of_depth = line3[59:63].strip().upper()

        if type_of_depth == "FREE":
            rec["type_of_centroid_depth"] = "from moment tensor inversion"
        elif type_of_depth == "FIX":
            rec["type_of_centroid_depth"] = "from location"
        elif type_of_depth == "BDY":
            rec["type_of_centroid_depth"] = "from modeling of broad-band P " \
                                            "waveforms"
        else:
            msg = "Unknown type of depth '%s'." % type_of_depth
            raise ValueError(msg)

        timestamp = line3[64:].strip().upper()
        rec["cmt_timestamp"] = timestamp
        if timestamp.startswith("Q-"):
            rec["cmt_type"] = "quick"
        elif timestamp.startswith("S-"):
            rec["cmt_type"] = "standard"
        # This is invalid but occurs a lot so we include it here.
        elif timestamp.startswith("O-"):
            rec["cmt_type"] = "unknown"
        else:
            msg = "Invalid CMT timestamp '%s' for event %s." % (
                timestamp, rec["cmt_event_name"])
            raise ValueError(msg)

        # Fourth line: CMT info (3)
        # [1-2]   The exponent for all following moment values. For example, if
        #         the exponent is given as 24, the moment values that follow,
        #         expressed in dyne-cm, should be multiplied by 10**24.
        # [3-80]  The six moment-tensor elements: Mrr, Mtt, Mpp, Mrt, Mrp, Mtp,
        #         where r is up, t is south, and p is east. See Aki and
        #         Richards for conversions to other coordinate systems. The
        #         value of each moment-tensor element is followed by its
        #         estimated standard error. See note (4) below for cases in
        #         which some elements are constrained in the inversion.
        # Exponent converts to dyne*cm. To convert to N*m it has to be decreased
        # seven orders of magnitude.
        exponent = int(line4[:2]) - 7
        # Directly set the exponent instead of calculating it to enhance
        # precision.
        rec["m_rr"], rec["m_rr_error"], rec["m_tt"], rec["m_tt_error"], \
        rec["m_pp"], rec["m_pp_error"], rec["m_rt"], rec["m_rt_error"], \
        rec["m_rp"], rec["m_rp_error"], rec["m_tp"], rec["m_tp_error"] = \
            map(lambda x: float("%sE%i" % (x, exponent)), line4[2:].split())

        # Fifth line: CMT info (4)
        # [1-3]   Version code. This three-character string is used to track
        #         the version of the program that generates the "ndk" file.
        # [4-48]  Moment tensor expressed in its principal-axis system:
        #         eigenvalue, plunge, and azimuth of the three eigenvectors.
        #         The eigenvalue should be multiplied by 10**(exponent) as
        #         given on line four.
        # [50-56] Scalar moment, to be multiplied by 10**(exponent) as given on
        #         line four.
        # [58-80] Strike, dip, and rake for first nodal plane of the
        #         best-double-couple mechanism, repeated for the second nodal
        #         plane.  The angles are defined as in Aki and Richards. The
        #         format for this string should not be considered fixed.
        rec["version_code"] = line5[:3].strip()
        rec["scalar_moment"] = float(line5[49:56]) * (10 ** exponent)
        # Calculate the moment magnitude.
        rec["Mw"] = 2.0 / 3.0 * (math.log10(rec["scalar_moment"]) - 9.1)

        principal_axis = line5[3:48].split()
        rec["principal_axis"] = []
        for axis in zip(*[iter(principal_axis)] * 3):
            rec["principal_axis"].append({
                # Again set the exponent directly to avoid even more rounding
                # errors.
                "length": "%sE%i" % (axis[0], exponent),
                "plunge": float(axis[1]),
                "azimuth": float(axis[2])
            })

        nodal_planes = map(float, line5[57:].strip().split())
        rec["nodal_plane_1"] = {
            "strike": next(nodal_planes),
            "dip": next(nodal_planes),
            "rake": next(nodal_planes)
        }
        rec["nodal_plane_2"] = {
            "strike": next(nodal_planes),
            "dip": next(nodal_planes),
            "rake": next(nodal_planes)
        }

        return rec

    out = []

    if not hasattr(filename, "read"):
        # Check if it exists, otherwise assume its a string.
        try:
            with open(filename, "rt") as fh:
                data = fh.read()
        except Exception:
            try:
                data = filename.decode()
            except Exception:
                data = str(filename)
            data = data.strip()
    else:
        data = filename.read()
        if hasattr(data, "decode"):
            data = data.decode()

    # Create iterator that yields lines.
    def lines_iter():
        prev_line = -1
        while True:
            next_line = data.find("\n", prev_line + 1)
            if next_line < 0:
                break
            yield data[prev_line + 1: next_line]
            prev_line = next_line
        if len(data) > prev_line + 1:
            yield data[prev_line + 1:]

    # Loop over 5 lines at once.
    for _i, lines in enumerate(zip_longest(*[lines_iter()] * 5)):
        if None in lines:
            msg = "Skipped last %i lines. Not a multiple of 5 lines." % (
                lines.count(None))
            warnings.warn(msg, RuntimeWarning)
            continue

        # Parse the lines to a human readable dictionary.
        try:
            record = _read_lines(*lines)
        # need to handle the exception here.
        except (ValueError, IOError):
            # exc = traceback.format_exc()
            msg = (
                "Could not parse event %i (faulty file?). Will be "
                "skipped." % (_i + 1))
            warnings.warn(msg, RuntimeWarning)
            continue

        # Assemble the time for the reference origin.
        try:
            date_time_dict = _parse_datetime_to_zmap(record["date"], record["time"])
        except ValueError:
            msg = ("Invalid time in event %i. '%s' and '%s' cannot be "
                   "assembled to a valid time. Event will be skipped.") % \
                  (_i + 1, record["date"], record["time"])
            warnings.warn(msg, RuntimeWarning)
            continue

        # we are stripping off a significant amount of information from the gCMT catalog
        # if more information is required please use the obspy implementation
        dt = datetime.datetime(
            date_time_dict['year'],
            date_time_dict['month'],
            date_time_dict['day'],
            date_time_dict['hour'],
            date_time_dict['minute'],
            date_time_dict['second']
        )
        out_tup = (_i,
                   datetime_to_utc_epoch(dt),
                   record['hypo_lat'],
                   record['hypo_lng'],
                   record["hypo_depth_in_km"],
                   record["Mw"])
        out.append(out_tup)
    return out
Ejemplo n.º 8
0
 def end_epoch(self):
     return datetime_to_utc_epoch(self.end_time)
Ejemplo n.º 9
0
 def start_epoch(self):
     return datetime_to_utc_epoch(self.start_time)
Ejemplo n.º 10
0
 def test_for_consistency_dt_to_dt(self):
     dt = datetime.datetime(1984,4,24,21,15,18,760000, tzinfo=datetime.timezone.utc)
     dt_test = epoch_time_to_utc_datetime(datetime_to_utc_epoch(dt))
     self.assertEqual(dt, dt_test)
Ejemplo n.º 11
0
 def test_for_consistency_epoch_to_epoch(self):
     epoch = 709732845000.0
     test_epoch = datetime_to_utc_epoch(epoch_time_to_utc_datetime(epoch))
     self.assertEqual(epoch, test_epoch)
Ejemplo n.º 12
0
 def test_datetime_to_utc_epoch_one_year(self):
     epoch = datetime.datetime(1970,1,1,1,0,0,0)
     test_time = datetime_to_utc_epoch(epoch)
     self.assertEqual(test_time, 60*60*1000)
Ejemplo n.º 13
0
 def test_datetime_to_utc_epoch(self):
     epoch = datetime.datetime(1970,1,1)
     test_time = datetime_to_utc_epoch(epoch)
     self.assertEqual(test_time, 0)
Ejemplo n.º 14
0
print(catalog)

####################################################################################################################################
# Filter to desired spatial region
# --------------------------------
#
# We use a circular spatial region with a radius of 3 average fault lengths as defined by the Wells and Coppersmith scaling
# relationship. PyCSEP provides :func:`csep.utils.spatial.generate_aftershock_region` to create an aftershock region
# based on the magnitude and epicenter of an event.
#
# We use :func:`csep.utils.comcat.get_event_by_id` the ComCat API provided by the USGS to obtain the event information
# from the M7.1 Ridgecrest mainshock.

m71_event_id = 'ci38457511'
event = comcat.get_event_by_id(m71_event_id)
m71_epoch = time_utils.datetime_to_utc_epoch(event.time)

# build aftershock region
aftershock_region = regions.generate_aftershock_region(event.magnitude,
                                                       event.longitude,
                                                       event.latitude)

# apply new aftershock region and magnitude of completeness
catalog = catalog.filter_spatial(aftershock_region).apply_mct(
    event.magnitude, m71_epoch)
print(catalog)

####################################################################################################################################
# Write catalog
# -------------
#
Ejemplo n.º 15
0
def ucerf3_consistency_testing(sim_dir,
                               event_id,
                               end_epoch,
                               n_cat=None,
                               plot_dir=None,
                               generate_markdown=True,
                               catalog_repo=None,
                               save_results=False,
                               force_plot_all=False,
                               skip_processing=False,
                               event_repo=None,
                               name=''):
    """
    computes all csep consistency tests for simulation located in sim_dir with event_id

    Args:
        sim_dir (str): directory where results and configuration are stored
        event_id (str): event_id corresponding to comcat event
    """
    # set up directories
    matplotlib.use('agg')
    matplotlib.rcParams['figure.max_open_warning'] = 150
    sns.set()

    # try using two different files
    print(f"Processing simulation in {sim_dir}", flush=True)
    filename = os.path.join(sim_dir, 'results_complete.bin')
    if not os.path.exists(filename):
        filename = os.path.join(sim_dir, 'results_complete_partial.bin')
    if not os.path.exists(filename):
        raise FileNotFoundError(
            'could not find results_complete.bin or results_complete_partial.bin'
        )

    if plot_dir is None:
        plot_dir = sim_dir
        print(f'No plotting directory specified defaulting to {plot_dir}')
    else:
        print(f"Using user specified plotting directory: {plot_dir}")

    # config file can be either config.json or basename of simulation-config.json
    config_file = os.path.join(sim_dir, 'config.json')
    if not os.path.exists(config_file):
        config_file = os.path.join(sim_dir,
                                   os.path.basename(sim_dir) + '-config.json')
    mkdirs(os.path.join(plot_dir))

    # observed_catalog filename
    catalog_fname = os.path.join(plot_dir, 'evaluation_catalog.json')

    # load ucerf3 configuration
    with open(os.path.join(config_file), 'r') as f:
        u3etas_config = json.load(f)

    if plot_dir != sim_dir:
        print(
            "Plotting dir is different than simulation directory. copying simulation configuration to plot directory"
        )
        copy_file(config_file, os.path.join(plot_dir, 'config.json'))

    # determine how many catalogs to process
    if n_cat is None or n_cat > u3etas_config['numSimulations']:
        n_cat = u3etas_config['numSimulations']

    # download comcat information, sometimes times out but usually doesn't fail twice in a row
    if event_repo is not None:
        print("Using event information stored instead of accessing ComCat.")
        event_repo = FileSystem(url=event_repo)
        event = event_repo.load(Event())
    else:
        try:
            event = get_event_by_id(event_id)
        except:
            event = get_event_by_id(event_id)

    # filter to aftershock radius
    rupture_length = WellsAndCoppersmith.mag_length_strike_slip(
        event.magnitude) * 1000
    aftershock_polygon = Polygon.from_great_circle_radius(
        (event.longitude, event.latitude), 3 * rupture_length, num_points=100)
    aftershock_region = masked_region(california_relm_region(dh_scale=4),
                                      aftershock_polygon)

    # event timing
    event_time = event.time.replace(tzinfo=datetime.timezone.utc)
    event_epoch = datetime_to_utc_epoch(event.time)
    origin_epoch = u3etas_config['startTimeMillis']

    # this kinda booty, should probably add another variable or something
    if type(end_epoch) == str:
        print(
            f'Found end_epoch as time_delta string (in days), adding {end_epoch} days to simulation start time'
        )
        time_delta = 1000 * 24 * 60 * 60 * int(end_epoch)
        end_epoch = origin_epoch + time_delta

    # convert epoch time (millis) to years
    time_horizon = (end_epoch -
                    origin_epoch) / SECONDS_PER_ASTRONOMICAL_YEAR / 1000

    # Download comcat observed_catalog, if it fails its usually means it timed out, so just try again
    if catalog_repo is None:
        print(
            "Catalog repository not specified downloading new observed_catalog from ComCat."
        )

        # Sometimes ComCat fails for non-critical reasons, try twice just to make sure.
        try:
            comcat = query_comcat(epoch_time_to_utc_datetime(origin_epoch),
                                  epoch_time_to_utc_datetime(end_epoch),
                                  min_magnitude=2.50,
                                  min_latitude=31.50,
                                  max_latitude=43.00,
                                  min_longitude=-125.40,
                                  max_longitude=-113.10)
            comcat = comcat.filter_spatial(aftershock_region).apply_mct(
                event.magnitude, event_epoch)
            print(comcat)
        except:
            comcat = query_comcat(event_time,
                                  epoch_time_to_utc_datetime(end_epoch),
                                  min_magnitude=2.50,
                                  min_latitude=31.50,
                                  max_latitude=43.00,
                                  min_longitude=-125.40,
                                  max_longitude=-113.10)
            comcat = comcat.filter_spatial(aftershock_region).apply_mct(
                event.magnitude, event_epoch)
            print(comcat)
    else:
        # if this fails it should stop the program, therefore no try-catch block
        print(
            f"Reading observed_catalog from repository at location {catalog_repo}"
        )
        catalog_repo = FileSystem(url=catalog_repo)
        comcat = catalog_repo.load(ComcatCatalog(query=False))
        comcat = comcat.filter(f'origin_time >= {origin_epoch}').filter(
            f'origin_time < {end_epoch}')
        comcat = comcat.filter_spatial(aftershock_region).apply_mct(
            event.magnitude, event_epoch)
        print(comcat)

    # define products to compute on simulation, this could be extracted
    data_products = {
        'n-test': NumberTest(),
        'm-test': MagnitudeTest(),
        'l-test': LikelihoodAndSpatialTest(),
        'cum-plot': CumulativeEventPlot(origin_epoch, end_epoch),
        'mag-hist': MagnitudeHistogram(),
        'arp-plot': ApproximateRatePlot(),
        'prob-plot': SpatialProbabilityPlot(),
        'prob-test': SpatialProbabilityTest(),
        'carp-plot': ConditionalApproximateRatePlot(comcat),
        'terd-test': TotalEventRateDistribution(),
        'iedd-test': InterEventDistanceDistribution(),
        'ietd-test': InterEventTimeDistribution(),
        'bv-test': BValueTest()
    }

    # try and read metadata file from plotting dir
    metadata_fname = os.path.join(plot_dir, 'meta.json')
    meta_repo = FileSystem(url=metadata_fname)
    try:
        eval_config = meta_repo.load(EvaluationConfiguration())
    except IOError:
        print(
            'Unable to load metadata file due to filesystem error or file not existing. Replotting everything by default.'
        )
        eval_config = EvaluationConfiguration()

    if eval_config.n_cat is None or n_cat > eval_config.n_cat:
        force_plot_all = True

    # determine which data we are actually computing and whether the data should be shared
    active_data_products = {}
    for task_name, calc in data_products.items():
        version = eval_config.get_evaluation_version(task_name)
        if calc.version != version or force_plot_all:
            active_data_products[task_name] = calc

    # set 'calc' status on relevant items, we always share from pair[0] with pair[1]
    calc_pairs = [('l-test', 'arp-plot'), ('m-test', 'mag-hist'),
                  ('l-test', 'carp-plot'), ('prob-test', 'prob-plot')]

    # this should probably be a part of the class-state when we refactor the code
    print(
        'Trying to determine if we can share calculation data between processing tasks...'
    )
    for pair in calc_pairs:
        if set(pair).issubset(set(active_data_products.keys())):
            class_name0 = active_data_products[pair[0]].__class__.__name__
            class_name1 = active_data_products[pair[1]].__class__.__name__
            print(
                f'Found {class_name0} and {class_name1} in workload manifest that can share data, thus skipping calculations for {class_name1}.'
            )
            active_data_products[pair[1]].calc = False

    # output some info for the user
    print(f'Will process {n_cat} catalogs from simulation\n')
    for k, v in active_data_products.items():
        print(f'Computing {v.__class__.__name__}')
    print('\n')

    if not name:
        days_since_mainshock = numpy.round(
            millis_to_days(origin_epoch - event_epoch))
        if u3etas_config['griddedOnly']:
            name = f'NoFaults, M{event.magnitude} + {days_since_mainshock} days'
        else:
            name = f'U3ETAS, M{event.magnitude} + {days_since_mainshock} days'

    # read the catalogs
    print('Begin processing catalogs', flush=True)
    t0 = time.time()
    loaded = 0
    u3 = load_stochastic_event_sets(filename=filename,
                                    type='ucerf3',
                                    name=name,
                                    region=aftershock_region)
    if not skip_processing:
        try:
            for i, cat in enumerate(u3):
                cat_filt = cat.filter(
                    f'origin_time < {end_epoch}').filter_spatial(
                        aftershock_region).apply_mct(event.magnitude,
                                                     event_epoch)
                for task_name, calc in active_data_products.items():
                    calc.process(copy.copy(cat_filt))
                tens_exp = numpy.floor(numpy.log10(i + 1))
                if (i + 1) % 10**tens_exp == 0:
                    t1 = time.time()
                    print(f'Processed {i+1} catalogs in {t1-t0} seconds',
                          flush=True)
                if (i + 1) % n_cat == 0:
                    break
                loaded += 1
        except Exception as e:
            print(
                f'Failed loading at observed_catalog {i+1} with {str(e)}. This may happen normally if the simulation is incomplete\nProceeding to finalize plots'
            )
            n_cat = loaded

        t2 = time.time()
        print(f'Finished processing catalogs in {t2-t0} seconds\n', flush=True)

        print('Processing catalogs again for distribution-based tests',
              flush=True)
        for k, v in active_data_products.items():
            if v.needs_two_passes == True:
                print(v.__class__.__name__)
        print('\n')

        # share data if needed
        print('Sharing data between related tasks...')
        for pair in calc_pairs:
            if set(pair).issubset(set(active_data_products.keys())):
                class_name0 = active_data_products[pair[0]].__class__.__name__
                class_name1 = active_data_products[pair[1]].__class__.__name__
                print(f'Sharing data from {class_name0} with {class_name1}.')
                active_data_products[pair[1]].data = active_data_products[
                    pair[0]].data

        # old iterator is expired, need new one
        t2 = time.time()
        u3 = load_stochastic_event_sets(filename=filename,
                                        type='ucerf3',
                                        name=name,
                                        region=aftershock_region)
        for i, cat in enumerate(u3):
            cat_filt = cat.filter(f'origin_time < {end_epoch}').filter_spatial(
                aftershock_region).apply_mct(event.magnitude, event_epoch)
            for task_name, calc in active_data_products.items():
                calc.process_again(copy.copy(cat_filt),
                                   args=(time_horizon, n_cat, end_epoch,
                                         comcat))
            # if we failed earlier, just stop there again
            tens_exp = numpy.floor(numpy.log10(i + 1))
            if (i + 1) % 10**tens_exp == 0:
                t3 = time.time()
                print(f'Processed {i + 1} catalogs in {t3 - t2} seconds',
                      flush=True)
            if (i + 1) % n_cat == 0:
                break

        # evaluate the catalogs and store results
        t1 = time.time()

        # make plot directory
        fig_dir = os.path.join(plot_dir, 'plots')
        mkdirs(fig_dir)

        # make results directory
        results_dir = os.path.join(plot_dir, 'results')
        if save_results:
            mkdirs(results_dir)

        # we want to
        for task_name, calc in active_data_products.items():
            print(f'Finalizing calculations for {task_name} and plotting')
            result = calc.post_process(comcat,
                                       args=(u3, time_horizon, end_epoch,
                                             n_cat))
            # plot, and store in plot_dir
            calc.plot(result, fig_dir, show=False)

            if save_results:
                # could expose this, but hard-coded for now
                print(f"Storing results from evaluations in {results_dir}",
                      flush=True)
                calc.store_results(result, results_dir)

        t2 = time.time()
        print(f"Evaluated forecasts in {t2-t1} seconds", flush=True)

        # update evaluation config
        print("Updating evaluation metadata file", flush=True)
        eval_config.compute_time = utc_now_epoch()
        eval_config.catalog_file = catalog_fname
        eval_config.forecast_file = filename
        eval_config.forecast_name = name
        eval_config.n_cat = n_cat
        eval_config.eval_start_epoch = origin_epoch
        eval_config.eval_end_epoch = end_epoch
        eval_config.git_hash = current_git_hash()
        for task_name, calc in active_data_products.items():
            eval_config.update_version(task_name, calc.version, calc.fnames)
        # save new meta data
        meta_repo.save(eval_config.to_dict())

        # writing observed_catalog
        print(f"Saving ComCat observed_catalog used for Evaluation",
              flush=True)
        evaluation_repo = FileSystem(url=catalog_fname)
        evaluation_repo.save(comcat.to_dict())

        print(
            f"Finished evaluating everything in {t2-t0} seconds with average time per observed_catalog of {(t2-t0)/n_cat} seconds",
            flush=True)
    else:
        print(
            'Skip processing flag enabled so skipping straight to report generation.'
        )

    # create the notebook for results, but this should really be a part of the processing task as to support an arbitrary
    # set of inputs. right now this is hard-coded to support these types of analysis
    if generate_markdown:
        md = MarkdownReport('README.md')

        md.add_introduction(
            adict={
                'simulation_name': u3etas_config['simulationName'],
                'origin_time': epoch_time_to_utc_datetime(origin_epoch),
                'evaluation_time': epoch_time_to_utc_datetime(end_epoch),
                'catalog_source': 'ComCat',
                'forecast_name': 'UCERF3-ETAS',
                'num_simulations': n_cat
            })

        md.add_sub_heading(
            'Visual Overview of Forecast', 1,
            "These plots show qualitative comparisons between the forecast "
            f"and the target data obtained from ComCat. Plots contain events within {numpy.round(millis_to_days(end_epoch-origin_epoch))} days "
            f"of the forecast start time and within {numpy.round(3*rupture_length/1000)} kilometers from the epicenter of the mainshock.  \n  \n"
            "All catalogs (synthetic and observed) are processed using the time-dependent magnitude of completeness model from Helmstetter et al., (2006).\n"
        )

        md.add_result_figure(
            'Cumulative Event Counts',
            2,
            list(map(get_relative_path, eval_config.get_fnames('cum-plot'))),
            ncols=2,
            text=
            "Percentiles for cumulative event counts are aggregated within one-day bins. \n"
        )

        md.add_result_figure(
            'Magnitude Histogram',
            2,
            list(map(get_relative_path, eval_config.get_fnames('mag-hist'))),
            text=
            "Forecasted magnitude number distribution compared with the observed magnitude number "
            "distribution from ComCat. The forecasted number distribution in each magnitude bin is "
            "shown using a box and whisker plot. The box indicates the 95th percentile range and the "
            "whiskers indicate the minimum and maximum values. The horizontal line indicates the median.\n"
        )

        md.add_result_figure(
            'Approximate Rate Density with Observations',
            2,
            list(map(get_relative_path, eval_config.get_fnames('arp-plot'))),
            ncols=2,
            text=
            "The approximate rate density is computed from the expected number of events within a spatial cell and normalized over "
            "the time horizon of the forecast and the area of the spatial cell.\n"
        )

        md.add_result_figure(
            'Conditional Rate Density',
            2,
            list(map(get_relative_path, eval_config.get_fnames('carp-plot'))),
            ncols=2,
            text=
            "Plots are conditioned on number of target events ± 5%, and can be used to create "
            "statistical tests conditioned on the number of observed events. In general, these plots will tend to "
            "be undersampled with respect to the entire distribution from the forecast.\n"
        )

        md.add_result_figure(
            'Spatial Probability Plot',
            2,
            list(map(get_relative_path, eval_config.get_fnames('prob-plot'))),
            ncols=2,
            text=
            "Probability of one or more events occuring in an individual spatial cell. This figure shows another way of "
            "visualizing the spatial distribution of a forecast.")

        md.add_sub_heading(
            'CSEP Consistency Tests', 1,
            "<b>Note</b>: These tests are explained in detail by Savran et al., (In review).\n"
        )

        md.add_result_figure(
            'Number Test',
            2,
            list(map(get_relative_path, eval_config.get_fnames('n-test'))),
            text=
            "The number test compares the earthquake counts within the forecast region aginst observations from the"
            " target observed_catalog.\n")

        md.add_result_figure(
            'Magnitude Test',
            2,
            list(map(get_relative_path, eval_config.get_fnames('m-test'))),
            text=
            "The magnitude test computes the sum of squared residuals between normalized "
            "incremental magnitude number distributions."
            " The test distribution is built from statistics scored between individal catalogs and the"
            " expected magnitude number distribution of the forecast.\n")

        md.add_result_figure(
            'Likelihood Test',
            2,
            list(
                map(get_relative_path,
                    eval_config.get_fnames('l-test')['l-test'])),
            text=
            "The likelihood tests uses a statistic based on the continuous point-process "
            "likelihood function. We approximate the rate-density of the forecast "
            "by stacking synthetic catalogs in spatial bins. The rate-density represents the "
            "probability of observing an event selected at random from the forecast. "
            "Event log-likelihoods are aggregated for each event in the observed_catalog. This "
            "approximation to the continuous rate-density is unconditional in the sense that it does "
            "not consider the number of target events. Additionally, we do not include the magnitude component "
            "of the forecast to minimize the amount of undersampling present in these simulations.\n"
        )

        md.add_result_figure(
            'Probability Test',
            2,
            list(map(get_relative_path, eval_config.get_fnames('prob-test'))),
            text=
            "This test uses a probability map to build the test distribution and the observed "
            "statistic. Unlike the pseudo-likelihood based tests, the test statistic is built "
            "by summing probabilities associated with cells where earthquakes occurred once. In effect,"
            "two simulations that have the exact same spatial distribution, but different numbers of events "
            "will product the same statistic.")

        md.add_result_figure(
            'Spatial Test',
            2,
            list(
                map(get_relative_path,
                    eval_config.get_fnames('l-test')['s-test'])),
            text=
            "The spatial test is based on the same likelihood statistic from above. However, "
            "the scores are normalized so that differences in earthquake rates are inconsequential. "
            "As above, this statistic is unconditional.\n")

        md.add_sub_heading('One-point Statistics', 1, "")
        md.add_result_figure(
            'B-Value Test',
            2,
            list(map(get_relative_path, eval_config.get_fnames('bv-test'))),
            text=
            "This test compares the estimated b-value from the observed observed_catalog along with the "
            "b-value distribution from the forecast. This test can be considered an alternate form to the Magnitude Test.\n"
        )

        md.add_sub_heading('Distribution-based Tests', 1, "")
        md.add_result_figure(
            'Inter-event Time Distribution',
            2,
            list(map(get_relative_path, eval_config.get_fnames('ietd-test'))),
            text=
            'This test compares inter-event time distributions based on a Kilmogorov-Smirnov type statistic '
            'computed from the empiricial CDF.\n')

        md.add_result_figure(
            'Inter-event Distance Distribution',
            2,
            list(map(get_relative_path, eval_config.get_fnames('iedd-test'))),
            text=
            'This test compares inter-event distance distributions based on a Kilmogorov-Smirnov type statistic '
            'computed from the empiricial CDF.\n')

        md.add_result_figure(
            'Total Earthquake Rate Distribution',
            2,
            list(map(get_relative_path, eval_config.get_fnames('terd-test'))),
            text=
            'The total earthquake rate distribution provides another form of insight into the spatial '
            'consistency of the forecast with observations. The total earthquake rate distribution is computed from the '
            'cumulative probability distribution of earthquake occurrence against the earthquake rate per spatial bin.\n'
        )

        md.finalize(plot_dir)

    t1 = time.time()
    print(f'Completed all processing in {t1-t0} seconds')