def test_filter_with_list(self): # Filter together start_epoch = strptime_to_utc_epoch('2009-07-01 00:00:00.0') end_epoch = strptime_to_utc_epoch('2010-07-01 00:00:00.0') filters = [f'origin_time >= {start_epoch}', f'origin_time < {end_epoch}'] # should return only event 2 test_cat = copy.deepcopy(self.test_cat1) test_cat.filter(filters) # Filter together numpy.testing.assert_array_equal(numpy.array([b'2'], dtype='S256'), test_cat.get_event_ids())
def setUp(self): # define dummy cat date1 = strptime_to_utc_epoch('2009-01-01 00:00:00.0000') date2 = strptime_to_utc_epoch('2010-01-01 00:00:00.0000') date3 = strptime_to_utc_epoch('2011-01-01 00:00:00.0000') catalog = [(b'1', date1, 1.0, 1.0, 1.0, 1.0), (b'2', date2, 2.0, 2.0, 2.0, 2.0), (b'3', date3, 3.0, 3.0, 3.0, 3.0)] self.test_cat1 = CSEPCatalog(data=catalog)
def parse_datetime(dt_string): try: origin_time = strptime_to_utc_epoch(dt_string, format='%Y-%m-%dT%H:%M:%S.%f') return origin_time except: pass try: origin_time = strptime_to_utc_epoch(dt_string, format='%Y-%m-%dT%H:%M:%S') return origin_time except: pass raise CSEPIOException("Supported time-string formats are '%Y-%m-%dT%H:%M:%S.%f' and '%Y-%m-%dT%H:%M:%S'")
def setUp(self): # create some arbitrary grid self.nx = 8 self.ny = 10 self.dh = 1 x_points = numpy.arange(1.5, self.nx) * self.dh y_points = numpy.arange(1.5, self.ny) * self.dh # spatial grid starts at (1.5, 1.5); so the event at (1, 1) should be removed. self.origins = list(itertools.product(x_points, y_points)) self.num_nodes = len(self.origins) self.cart_grid = CartesianGrid2D([Polygon(bbox) for bbox in compute_vertices(self.origins, self.dh)], self.dh) # define dummy cat date1 = strptime_to_utc_epoch('2009-01-01 00:00:00.0000') date2 = strptime_to_utc_epoch('2010-01-01 00:00:00.0000') date3 = strptime_to_utc_epoch('2011-01-01 00:00:00.0000') catalog = [(b'1', date1, 1.0, 1.0, 1.0, 1.0), (b'2', date2, 2.0, 2.0, 2.0, 2.0), (b'3', date3, 3.0, 3.0, 3.0, 3.0)] self.test_cat1 = CSEPCatalog(data=catalog)
def test_utc_epoch_time_from_strptime(self): timestring = '1970-1-1 0:0:0.0' test_time = strptime_to_utc_epoch(timestring) self.assertEqual(test_time, 0)
def load_ascii_catalogs(cls, filename, **kwargs): """ Loads multiple CSEP catalogs in ASCII format. This function can load multiple catalogs stored in a single file or directories. This typically called to load a catalog-based forecast. Args: filename (str): filepath or directory of catalog files **kwargs (dict): passed to class constructor Return: yields CSEPCatalog class """ def parse_filename(filename): # this works for unix basename = str(os.path.basename(filename.rstrip('/')).split('.')[0]) split_fname = basename.split('_') name = split_fname[0] start_time = strptime_to_utc_datetime(split_fname[1], format="%Y-%m-%dT%H-%M-%S-%f") return (name, start_time) def is_header_line(line): if line[0] == 'lon': return True else: return False name_from_file, start_time = parse_filename(filename) # overwrite filename, if user specifies kwargs.setdefault('name', name_from_file) # handle all catalogs in single file if os.path.isfile(filename): with open(filename, 'r', newline='') as input_file: catalog_reader = csv.reader(input_file, delimiter=',') # csv treats everything as a string convert to correct types events = [] # all catalogs should start at zero prev_id = None for line in catalog_reader: # skip header line on first read if included in file if prev_id is None: if is_header_line(line): continue # convert to correct types lon = float(line[0]) lat = float(line[1]) magnitude = float(line[2]) # maybe fractional seconds are not included try: origin_time = strptime_to_utc_epoch(line[3], format='%Y-%m-%dT%H:%M:%S.%f') except ValueError: origin_time = strptime_to_utc_epoch(line[3], format='%Y-%m-%dT%H:%M:%S') depth = float(line[4]) catalog_id = int(line[5]) event_id = line[6] # first event is when prev_id is none, catalog_id should always start at zero if prev_id is None: prev_id = 0 # if the first catalog doesn't start at zero if catalog_id != prev_id: prev_id = catalog_id # store this event for next time events = [(event_id, origin_time, lat, lon, depth, magnitude)] for id in range(catalog_id): yield cls(data=[], catalog_id=id, **kwargs) # deal with cases of events if catalog_id == prev_id: prev_id = catalog_id events.append((event_id, origin_time, lat, lon, depth, magnitude)) # create and yield class if the events are from different catalogs elif catalog_id == prev_id + 1: catalog = cls(data=events, catalog_id=prev_id, **kwargs) prev_id = catalog_id # add first event to new event list events = [(event_id, origin_time, lat, lon, depth, magnitude)] yield catalog # this implies there are empty catalogs, because they are not listed in the ascii file elif catalog_id > prev_id + 1: catalog = cls(data=events, catalog_id=prev_id, **kwargs) # add event to new event list events = [(event_id, origin_time, lat, lon, depth, magnitude)] # if prev_id = 0 and catalog_id = 2, then we skipped one catalog. thus, we skip catalog_id - prev_id - 1 catalogs num_empty_catalogs = catalog_id - prev_id - 1 # create empty catalog classes for id in range(num_empty_catalogs): yield cls(data=[], catalog_id=catalog_id - num_empty_catalogs + id, **kwargs) # finally we want to yield the buffered catalog to preserve order prev_id = catalog_id yield catalog else: raise ValueError( "catalog_id should be monotonically increasing and events should be ordered by catalog_id") # yield final catalog, note: since this is just loading catalogs, it has no idea how many should be there yield cls(data=events, catalog_id=prev_id, **kwargs) if os.path.isdir(filename): raise NotImplementedError("reading from directory or batched files not implemented yet!")
def filter(self, statements=None, in_place=True): """ Filters the catalog based on statements. This function takes about 60% of the run-time for processing UCERF3-ETAS simulations, so likely all other simulations. Implementations should try and limit how often this function will be called. Args: statements (str, iter): logical statements to evaluate, e.g., ['magnitude > 4.0', 'year >= 1995'] in_place (bool): return new instance of catalog Returns: self: instance of AbstractBaseCatalog, so that this function can be chained. """ if not self.filters and statements is None: raise CSEPCatalogException("Must provide filter statements to function or class to filter") # programmatically assign operators operators = {'>': operator.gt, '<': operator.lt, '>=': operator.ge, '<=': operator.le, '==': operator.eq} # filter catalogs, implied logical and if statements is None: statements = self.filters if isinstance(statements, str): name = statements.split(' ')[0] if name == 'datetime': _, oper, date, time = statements.split(' ') name = 'origin_time' # can be a datetime.datetime object or datetime string, if we want to support filtering on meta data it # can happen here. but need to determine what to do if entry are not present bc meta data does not # need to be square value = strptime_to_utc_epoch(' '.join([date, time])) filtered = self.catalog[operators[oper](self.catalog[name], float(value))] else: name, oper, value = statements.split(' ') filtered = self.catalog[operators[oper](self.catalog[name], float(value))] elif isinstance(statements, (list, tuple)): # slower but at the convenience of not having to call multiple times filters = list(statements) filtered = numpy.copy(self.catalog) for filt in filters: name = filt.split(' ')[0] # create indexing array, start with all events if name == 'datetime': _, oper, date, time = filt.split(' ') # we map the requested datetime to an epoch time so we act like the user requested origin_time name = 'origin_time' value = strptime_to_utc_epoch(' '.join([date, time])) filtered = filtered[operators[oper](filtered[name], float(value))] else: name, oper, value = filt.split(' ') filtered = filtered[operators[oper](filtered[name], float(value))] else: raise ValueError('statements should be either a string or list or tuple of strings') # can return new instance of class or original instance self.filters = statements if in_place: self.catalog = filtered return self else: # make and return new object cls = self.__class__ inst = cls(data=filtered, catalog_id=self.catalog_id, format=self.format, name=self.name, region=self.region, filters=statements) return inst