def __init__(self, engine=DEFAULT_ENGINE, **engine_params): log.logger(__name__).info( "initializing Catalogue Database (engine=%s, params %s)", engine, engine_params) self._engine_class = self.__class__.get_engine(engine) self._engine = self._engine_class(**engine_params) if 'drop' in engine_params or 'memory' in engine_params: log.logger(__name__).info("reset catalogue data") self._cache = collections.defaultdict(dict)
def __init__(self, drop=False, engine=DEFAULT_ENGINE, **engine_params): log.logger(__name__).info( "initializing Catalogue Database (engine=%s, params %s)", engine, engine_params) self._engine_class = self.__class__.get_engine(engine) self._engine = self._engine_class(**engine_params) if drop or 'memory' in engine_params: log.logger(__name__).info("reset catalogue data") self.recreate()
def select(self, grouped_measures, native_scale, target_scale, mus): selected = self.__class__.do_select(grouped_measures, native_scale, target_scale, mus) log.logger(__name__).debug( "selected %d measures over %d groups on scales %s,%s (%s)", len(selected[0]), len(grouped_measures), native_scale, target_scale, mus) return selected
def group_measures_by_time(self, measures): """ Group `measures` in time by using the time_window and the time_distance_fn """ groups = self.group_measures_by_var( measures, self.time_distance_fn, self.time_window) log.logger(__name__).debug( "grouping by time returned %d groups", len(groups)) return groups
def set_criteria(self, criteria=None): """ Set the criteria used to filters measures E.g. h**o = Homogeniser() h**o.set_criteria(C(agency__in=a_list_agency) or C(magnitude__gt=4)) """ self._criteria = criteria or Criteria() log.logger(__name__).debug("Changed criteria to %s", self._criteria) return self.selected_native_measures(), self.selected_target_measures()
def group(cls, measures): """ Groups the measures using the event source key as aggregator """ groups = {} for m in measures: key = m.event_key if not key in groups: groups[key] = [] groups[key].append(m) log.logger(__name__).info( "Measure grouper by source key returned %d groups" % len(groups)) return groups
def load_file(self, filename, importer_module_name, **kwargs): """ Load filename by using an Importer defined in `importer_module_name`. Other kwargs are passed to the store method of the importer """ if not '.' in importer_module_name: importer_module_name = ( 'eqcatalogue.importers.' + importer_module_name) module = __import__(importer_module_name, fromlist=['Importer']) importer = module.Importer(file(filename), self) summary = importer.store(**kwargs) log.logger(__name__).info(summary)
def harmonise(self, measures, path_finder_cls=FormulaPathFinder, measure_uncertainty=0.0, allow_trivial_conversion=True): """ Harmonise an iterator of measures. :param measures: the measures to be converted :param path_finder_cls: the class used to find the sequence of formulas to apply to get a conversion. :param measure_uncertainty: default grade of uncertainty, error related to the measures if no standard error for measures is no defined. :returns: the converted and the unconverted measures :rtype: a 2-tuple. The former is dictionary where the keys are the converted measures and the value is a dictionary storing the converted value and the formula used for the conversion. The latter is a list of the unconverted measures """ result = HarmoniserResult() path_finder = path_finder_cls(self._formulas) identity = ConversionFormula.make_identity(self.target_scale) log.logger(__name__).debug("Start harmonization of %d measures", len(measures)) for m in measures: formulas = path_finder.find_formulas_for(m, self.target_scale) if formulas: value = formulas[0].apply(m, measure_uncertainty) for formula in formulas[1:]: value = formula.apply(value, measure_uncertainty) result.append(m, value) log.logger(__name__).debug( "Measure %s harmonised with formulas %s", m, formulas) elif m.scale == self.target_scale and allow_trivial_conversion: result.append( m, m.convert(m.value, identity, m.standard_error)) log.logger(__name__).debug( "Measure %s harmonised with trivial conversion", m) else: result.append(m) log.logger(__name__).debug("Could not convert measure %s", m) log.logger(__name__).info( "Harmonized %s measures. %s measures not converted", len(result.converted), len(result.unconverted)) return result
def group_measures_by_magnitude_value(self, measures): """ Group `measures` in magnitude by using the magnitude_window and the magnitude_distance_fn. If no magnitude window is given, an error is raised """ if not self.magnitude_window: raise RuntimeError("Please provide a magnitude window") groups = self.group_measures_by_var( measures, self.magnitude_distance_fn, self.magnitude_window) log.logger(__name__).debug( "grouping by magnitude value returned %d groups", len(groups)) return groups
def select(self, grouped_measures, native_scale, target_scale, mus): """ Build two lists for native_measure and target_measure. Each list is built by selecting a measure from a grouped_measures item. The selection is driven by the agency ranking. :py:param:: grouped_measures A dictionary where the keys identifies the events and the value are the list of measures associated with it :py:param:: native_scale, target_scale The native and target scale used :py:param:: mus A missing uncertainty strategy object used to handle the case when no standard error of a measure is provided """ native_measures = [] target_measures = [] for measures in grouped_measures.values(): sorted_native_measures = [] sorted_target_measures = [] for measure in measures: if mus.should_be_discarded(measure): continue if measure.scale == native_scale: sorted_native_measures.append( (self.calculate_rank(measure), measure)) elif measure.scale == target_scale: sorted_target_measures.append( (self.calculate_rank(measure), measure)) if not measure.standard_error: measure.standard_error = mus.get_default(measure) sorted_native_measures.sort(reverse=True) sorted_target_measures.sort(reverse=True) if sorted_native_measures and sorted_target_measures: native_measures.append(sorted_native_measures[0][1]) target_measures.append(sorted_target_measures[0][1]) log.logger(__name__).debug( "selected %d measures over %d groups on scales %s,%s (%s)", len(native_measures), len(grouped_measures), native_scale, target_scale, mus) return native_measures, target_measures
def export_measures(measures, filename, header=True, mode="w", **kwargs): """ Export `measures` to `filename` by using the csv module from the standard python library. If `header` is true the first row of the csv will be an header. The remaining arguments of the function are passed as they are to the csv writer constructor. """ with open(filename, mode) as csvfile: measure_writer = csv.writer(csvfile, **kwargs) if header: measure_writer.writerow(measures[0].keys()) for measure in measures: measure_writer.writerow(measure.values()) log.logger(__name__).info( "Exported %d measures to %s" % (len(measures), filename))
def set_grouper(self, grouper_class, **grouper_args): """ Set the algorithm used to group measures by event. :param grouper_class: A class that implements the MeasureGrouper protocol. See :py:class:`eqcatalogue.grouping` for the current admitted values Any other parameter is given as input to the constructor of `grouper_class`. E.g.:: from eqcatalogue.grouping import GroupMeasuresByHierarchicalClustering an_homogeniser.set_grouper(GroupMeasuresByHierarchicalClustering) """ self._grouper = grouper_class(**grouper_args) log.logger(__name__).debug("Changed grouper to %s" % self._grouper) return self.grouped_measures()
def set_selector(self, selector_class, **selector_args): """ Set the algorithm used to select a measure among grouped measures. :param selector_class: A class that implements the MeasureSelection protocol. See :py:class:`eqcatalogue.selection` for the current admitted values Any other parameter is given as input to the constructor of `selector_class`. E.g.:: from eqcatalogue.selection import Precise an_homogeniser.set_selector(Precise) """ self._selector = selector_class(**selector_args) log.logger(__name__).debug("Changed selector to %s" % self._selector) return self.selected_native_measures(), self.selected_target_measures()
def group_measures(self, measures): """ Groups the measures by clustering on time """ data = np.array([self._key_fn(m) for m in measures]) npdata = np.reshape(np.array(data), [len(data), 1]) clusters = hierarchy.fclusterdata(npdata, **self._clustering_args) grouped = {} for i, cluster in enumerate(clusters): current = grouped.get(cluster, []) current.append(measures[i]) grouped[cluster] = current log.logger(__name__).info( "Measure grouper by clustering on time returned %d groups" % len( grouped)) return grouped
def group_measures(self, measures): """ Group `measures` by sequentially applying a grouping on time, space and magnitude (optional) variables. """ groups = sum([ self.group_measures_by_space(time_group) for time_group in self.group_measures_by_time(measures)], []) if self.magnitude_window: groups = sum([ self.group_measures_by_magnitude_value(group) for group in groups], []) log.logger(__name__).info( "Measure grouper (sequential clustering) returned %d groups", len(groups)) return dict([(i, group) for i, group in enumerate(groups)])
def set_missing_uncertainty_strategy(self, mu_strategy_class, **mu_strategy_args): """ Set the algorithm used to handle situations where uncertainty data are missing in a measure. :param mu_strategy_class: A class that implements the MissingUncertaintyStrategy protocol. See :py:class:`eqcatalogue.selection` for the current admitted values Any other parameter is given as input to the constructor of `mu_strategy_class`. E.g.:: from eqcatalogue.selection import MUSSetDefault an_homogeniser.set_missing_uncertainty_strategy( MUSSetDefault, default=3) """ self._mu_strategy = mu_strategy_class(**mu_strategy_args) log.logger(__name__).debug( "Changed missing uncertainty strategy to %s", self._selector) return self.selected_native_measures(), self.selected_target_measures()
events from catalogue saved in the ISF Format http://www.isc.ac.uk/standards/isf/ """ import re import datetime from sqlalchemy.exc import IntegrityError from eqcatalogue import models as catalogue from eqcatalogue.log import logger from eqcatalogue.importers import BaseImporter from eqcatalogue.exceptions import ParsingFailure LOG = logger(__name__) CATALOG_URL = 'http://www.isc.ac.uk/cgi-bin/web-db-v4' ANALYSIS_TYPES = {'a': 'automatic', 'm': 'manual', 'g': 'guess'} LOCATION_METHODS = { 'i': 'inversion', 'p': 'pattern recognition', 'g': 'ground truth', 'o': 'other' } EVENT_TYPES = { 'uk': 'unknown',