예제 #1
0
 def test_filter_with_list(self):
     # Filter together
     start_epoch = strptime_to_utc_epoch('2009-07-01 00:00:00.0')
     end_epoch = strptime_to_utc_epoch('2010-07-01 00:00:00.0')
     filters = [f'origin_time >= {start_epoch}', f'origin_time < {end_epoch}']  # should return only event 2
     test_cat = copy.deepcopy(self.test_cat1)
     test_cat.filter(filters)
     # Filter together
     numpy.testing.assert_array_equal(numpy.array([b'2'], dtype='S256'), test_cat.get_event_ids())
예제 #2
0
    def setUp(self):

        # define dummy cat
        date1 = strptime_to_utc_epoch('2009-01-01 00:00:00.0000')
        date2 = strptime_to_utc_epoch('2010-01-01 00:00:00.0000')
        date3 = strptime_to_utc_epoch('2011-01-01 00:00:00.0000')
        catalog = [(b'1', date1, 1.0, 1.0, 1.0, 1.0),
                   (b'2', date2, 2.0, 2.0, 2.0, 2.0),
                   (b'3', date3, 3.0, 3.0, 3.0, 3.0)]
        self.test_cat1 = CSEPCatalog(data=catalog)
예제 #3
0
 def parse_datetime(dt_string):
     try:
         origin_time = strptime_to_utc_epoch(dt_string, format='%Y-%m-%dT%H:%M:%S.%f')
         return origin_time
     except:
         pass
     try:
         origin_time = strptime_to_utc_epoch(dt_string, format='%Y-%m-%dT%H:%M:%S')
         return origin_time
     except:
         pass
     raise CSEPIOException("Supported time-string formats are '%Y-%m-%dT%H:%M:%S.%f' and '%Y-%m-%dT%H:%M:%S'")
예제 #4
0
    def setUp(self):

        # create some arbitrary grid
        self.nx = 8
        self.ny = 10
        self.dh = 1
        x_points = numpy.arange(1.5, self.nx) * self.dh
        y_points = numpy.arange(1.5, self.ny) * self.dh

        # spatial grid starts at (1.5, 1.5); so the event at (1, 1) should be removed.
        self.origins = list(itertools.product(x_points, y_points))
        self.num_nodes = len(self.origins)
        self.cart_grid = CartesianGrid2D([Polygon(bbox) for bbox in compute_vertices(self.origins, self.dh)], self.dh)

        # define dummy cat
        date1 = strptime_to_utc_epoch('2009-01-01 00:00:00.0000')
        date2 = strptime_to_utc_epoch('2010-01-01 00:00:00.0000')
        date3 = strptime_to_utc_epoch('2011-01-01 00:00:00.0000')
        catalog = [(b'1', date1, 1.0, 1.0, 1.0, 1.0),
                   (b'2', date2, 2.0, 2.0, 2.0, 2.0),
                   (b'3', date3, 3.0, 3.0, 3.0, 3.0)]
        self.test_cat1 = CSEPCatalog(data=catalog)
예제 #5
0
 def test_utc_epoch_time_from_strptime(self):
     timestring = '1970-1-1 0:0:0.0'
     test_time = strptime_to_utc_epoch(timestring)
     self.assertEqual(test_time, 0)
예제 #6
0
    def load_ascii_catalogs(cls, filename, **kwargs):
        """ Loads multiple CSEP catalogs in ASCII format.

        This function can load multiple catalogs stored in a single file or directories. This typically called to
        load a catalog-based forecast.

        Args:
            filename (str): filepath or directory of catalog files
            **kwargs (dict): passed to class constructor

        Return:
            yields CSEPCatalog class
        """

        def parse_filename(filename):
            # this works for unix
            basename = str(os.path.basename(filename.rstrip('/')).split('.')[0])
            split_fname = basename.split('_')
            name = split_fname[0]
            start_time = strptime_to_utc_datetime(split_fname[1], format="%Y-%m-%dT%H-%M-%S-%f")
            return (name, start_time)

        def is_header_line(line):
            if line[0] == 'lon':
                return True
            else:
                return False

        name_from_file, start_time = parse_filename(filename)
        # overwrite filename, if user specifies
        kwargs.setdefault('name', name_from_file)
        # handle all catalogs in single file
        if os.path.isfile(filename):
            with open(filename, 'r', newline='') as input_file:
                catalog_reader = csv.reader(input_file, delimiter=',')
                # csv treats everything as a string convert to correct types
                events = []
                # all catalogs should start at zero
                prev_id = None
                for line in catalog_reader:
                    # skip header line on first read if included in file
                    if prev_id is None:
                        if is_header_line(line):
                            continue
                    # convert to correct types
                    lon = float(line[0])
                    lat = float(line[1])
                    magnitude = float(line[2])
                    # maybe fractional seconds are not included
                    try:
                        origin_time = strptime_to_utc_epoch(line[3], format='%Y-%m-%dT%H:%M:%S.%f')
                    except ValueError:
                        origin_time = strptime_to_utc_epoch(line[3], format='%Y-%m-%dT%H:%M:%S')
                    depth = float(line[4])
                    catalog_id = int(line[5])
                    event_id = line[6]
                    # first event is when prev_id is none, catalog_id should always start at zero
                    if prev_id is None:
                        prev_id = 0
                        # if the first catalog doesn't start at zero
                        if catalog_id != prev_id:
                            prev_id = catalog_id
                            # store this event for next time
                            events = [(event_id, origin_time, lat, lon, depth, magnitude)]
                            for id in range(catalog_id):
                                yield cls(data=[], catalog_id=id, **kwargs)
                    # deal with cases of events
                    if catalog_id == prev_id:
                        prev_id = catalog_id
                        events.append((event_id, origin_time, lat, lon, depth, magnitude))
                    # create and yield class if the events are from different catalogs
                    elif catalog_id == prev_id + 1:
                        catalog = cls(data=events, catalog_id=prev_id, **kwargs)
                        prev_id = catalog_id
                        # add first event to new event list
                        events = [(event_id, origin_time, lat, lon, depth, magnitude)]
                        yield catalog
                    # this implies there are empty catalogs, because they are not listed in the ascii file
                    elif catalog_id > prev_id + 1:
                        catalog = cls(data=events, catalog_id=prev_id, **kwargs)
                        # add event to new event list
                        events = [(event_id, origin_time, lat, lon, depth, magnitude)]
                        # if prev_id = 0 and catalog_id = 2, then we skipped one catalog. thus, we skip catalog_id - prev_id - 1 catalogs
                        num_empty_catalogs = catalog_id - prev_id - 1
                        # create empty catalog classes
                        for id in range(num_empty_catalogs):
                            yield cls(data=[], catalog_id=catalog_id - num_empty_catalogs + id, **kwargs)
                        # finally we want to yield the buffered catalog to preserve order
                        prev_id = catalog_id
                        yield catalog
                    else:
                        raise ValueError(
                            "catalog_id should be monotonically increasing and events should be ordered by catalog_id")
                # yield final catalog, note: since this is just loading catalogs, it has no idea how many should be there
                yield cls(data=events, catalog_id=prev_id, **kwargs)

        if os.path.isdir(filename):
            raise NotImplementedError("reading from directory or batched files not implemented yet!")
예제 #7
0
    def filter(self, statements=None, in_place=True):
        """
        Filters the catalog based on statements. This function takes about 60% of the run-time for processing UCERF3-ETAS
        simulations, so likely all other simulations. Implementations should try and limit how often this function
        will be called.

        Args:
            statements (str, iter): logical statements to evaluate, e.g., ['magnitude > 4.0', 'year >= 1995']
            in_place (bool): return new instance of catalog

        Returns:
            self: instance of AbstractBaseCatalog, so that this function can be chained.

        """
        if not self.filters and statements is None:
            raise CSEPCatalogException("Must provide filter statements to function or class to filter")

        # programmatically assign operators
        operators = {'>': operator.gt,
                     '<': operator.lt,
                     '>=': operator.ge,
                     '<=': operator.le,
                     '==': operator.eq}

        # filter catalogs, implied logical and
        if statements is None:
            statements = self.filters

        if isinstance(statements, str):
            name = statements.split(' ')[0]
            if name == 'datetime':
                _, oper, date, time = statements.split(' ')
                name = 'origin_time'
                # can be a datetime.datetime object or datetime string, if we want to support filtering on meta data it
                # can happen here. but need to determine what to do if entry are not present bc meta data does not
                # need to be square
                value = strptime_to_utc_epoch(' '.join([date, time]))
                filtered = self.catalog[operators[oper](self.catalog[name], float(value))]
            else:
                name, oper, value = statements.split(' ')
                filtered = self.catalog[operators[oper](self.catalog[name], float(value))]
        elif isinstance(statements, (list, tuple)):
            # slower but at the convenience of not having to call multiple times
            filters = list(statements)
            filtered = numpy.copy(self.catalog)
            for filt in filters:
                name = filt.split(' ')[0]
                # create indexing array, start with all events
                if name == 'datetime':
                    _, oper, date, time = filt.split(' ')
                    # we map the requested datetime to an epoch time so we act like the user requested origin_time
                    name = 'origin_time'
                    value = strptime_to_utc_epoch(' '.join([date, time]))
                    filtered = filtered[operators[oper](filtered[name], float(value))]
                else:
                    name, oper, value = filt.split(' ')
                    filtered = filtered[operators[oper](filtered[name], float(value))]
        else:
            raise ValueError('statements should be either a string or list or tuple of strings')
        # can return new instance of class or original instance
        self.filters = statements
        if in_place:
            self.catalog = filtered
            return self
        else:
            # make and return new object
            cls = self.__class__
            inst = cls(data=filtered, catalog_id=self.catalog_id, format=self.format, name=self.name,
                       region=self.region, filters=statements)
            return inst