def __init__(self, config, cache): """ Create CIPRES gateway data source object :param config: AmassConfig object containing configuration information """ amass.features.gateway.Gateway.__init__(self, config, cache) self.name = "gateway.cipres" self.db_conn = None try: self.logger.info('Reading db params for CIPRES database') self.db = config.get_db_vars(self.name) except Exception as e: amass.abort( 'Problem reading db params from "%s" section in config file: %s' % (self.name, str(e))) try: self.resources_as_string = config.get_resources(self.name, True) self.resources = config.get_resources(self.name) except: amass.abort( 'Problem finding "resources" in "%s" section in config file') # TODO get rid of pickle file self.tools = pickle.load(open("data/tool_ids.pkl", "rb")) self.logger.debug("Read %d tools from file" % len(self.tools))
def __init__(self, config): """ Constructor for FeatureCache. :param config: AmassConfig object containing config info for local SQL db cache. :return: A new FeatureCache object. """ self.cache = {} self.config = config self.db_conn = None self.logger = logging.getLogger(self.__module__) self.cache_prefix = "amass" self.name = "cache" try: self.logger.debug('Reading db params for cache') self.db = config.get_db_vars(self.name) except Exception as e: amass.abort('Problem reading "cache" section in config file: %s' % (str(e))) try: self.logger.debug('Reading resource params for cache') self.resources = config.get_resources(self.name) except Exception as e: amass.abort('Problem reading "cache" section in config file: %s' % (str(e))) self.load()
def generate_features(self, features_name, gateway_name, refresh=False): """ Generate the features from all configured feature sources. :return: """ cache_name = self.cache.features_to_table_name(features_name, gateway_name) gw = self.sources["gateway"] if self.cache.has_features(features_name, gateway_name): if refresh: self.cache.drop_features(features_name, gateway_name) else: amass.abort( "Cached features already exists; " + "re-run with refresh if you want to re-generate them") self.cache.create_features(features_name, gateway_name, self.columns, self.primary_keys) db_cursor = gw.query_jobs() self.logger.info("Found %i gateway jobs" % db_cursor.rowcount) for job_info in db_cursor: ordered_features = self.generate_feature(features_name, job_info) entry = { "GATEWAY_JOB_ID": gw.get_job_field(job_info, "JOB_ID"), "FEATURES": json.dumps(ordered_features) } self.cache.insert(cache_name, entry) return True
def run(self, config, args): arg_vals = self.parse_args(args) server_model_names = ["SourceConfig", "Resource", "SourceResource"] server_models = self.get_django_models(config, *server_model_names) server_config = {} for i, model in enumerate(server_models): server_config[server_model_names[i]] = model startDate = "\'2016-03-01 03:00:00\'" endDate = "\'2016-06-01 4:00:00\'" features = amass.features.Features(config, server_config) features.cache_init() refresh_features = self.is_arg_true(arg_vals["refresh-features"]) refresh_sources = [] if arg_vals["refresh-sources"] != "": refresh_sources = re.split("\s*,\s*", arg_vals["refresh-sources"]) if not features.load_or_fetch_sources( arg_vals["features"], arg_vals["gateway"], arg_vals["startdate"], arg_vals["enddate"], refresh_sources): amass.abort("Unable to fetch source data for features %s" % arg_vals["features"]) if arg_vals["jobid"] == "": features.generate_features(arg_vals["features"], arg_vals["gateway"], refresh_features) else: gateway = features.get_gateway(arg_vals["gateway"]) job = gateway.query_job(arg_vals["jobid"]) print features.generate_feature(arg_vals["features"], job)
def run(self, config, args): arg_vals = self.parse_args(args) [source_gateway_type] = self.get_django_models(config, "SourceGatewayType") try: source_gateway_type.objects.create(type=arg_vals["type"]) print "Gateway type '%s' sucessfully added" % arg_vals["type"] except: amass.abort("Problem creating gatewa type '%s'" % arg_vals["type"])
def run(self, config, args): arg_vals = self.parse_args(args) [source_config] = self.get_django_models(config, "SourceConfig") try: source_config.objects.create(source=arg_vals["source"], name=arg_vals["name"], value=arg_vals["value"]) print "Config parameter '%s' sucessfully added for source '%s'" % (arg_vals["name"], arg_vals["source"]) except Exception as e: amass.abort("Problem adding config '%s' for source '%s': %s" % (arg_vals["name"], arg_vals["source"], str(e)))
def run(self, config, args): arg_vals = self.parse_args(args) [resource] = self.get_django_models(config, "Resource") try: resource.objects.create(name=arg_vals["resource"], ) print "Resource '%s' sucessfully added" % arg_vals["resource"] except Exception as e: amass.abort("Problem adding resource '%s': %s" % (arg_vals["resource"], str(e)))
def run(self, config, args): arg_vals = self.parse_args(args) [source_config] = self.get_django_models(config, "SourceConfig") try: cfg = source_config.objects.get(source=arg_vals["source"], name=arg_vals["name"]) cfg.delete() print "Configuration parameter '%s' for source '%s' sucessfully deleted" % (arg_vals["name"], arg_vals["source"]) except Exception as e: amass.abort("Problem deleting configuration parameter '%s' for source '%s': %s" % (arg_vals["name"], arg_vals["source"], str(e)))
def run(self, config, args): arg_vals = self.parse_args(args) [source_gateway_type, source_gateway_error] = self.get_django_models(config, "SourceGatewayType", "SourceGatewayError") gw = None try: gw_type = source_gateway_type.objects.get(type=arg_vals["gateway type"]) except: amass.abort("'%s' is not a known gateway type" % arg_vals["gateway type"]) try: source_gateway_error.objects.create(gateway_type=gw_type, error=arg_vals["error"], regex=arg_vals["regex"]) print "Error type '%s' sucessfully added for gateway '%s'" % (arg_vals["error"], arg_vals["gateway type"]) except Exception as e: amass.abort("Problem adding gateway error '%s' for gateway '%s': %s" % (arg_vals["error"], arg_vals["gateway type"], str(e)))
def run(self, config, args): arg_vals = self.parse_args(args) [source_gateway] = self.get_django_models(config, "SourceGateway") gw = None try: gw = source_gateway.objects.get(name=arg_vals["gateway"]) except: amass.abort("Gateway '%s' does not exist" % arg_vals["gateway"]) try: gw.delete() print "Gateway '%s' sucessfully deleted" % arg_vals["gateway"] except Exception as e: amass.abort("Problem deleting gateway '%s': %s" % (arg_vals["gateway"], str(e)))
def load_or_fetch_sources(self, features_def, gw_name, start_date, end_date, refreshes=[]): """ Load or fetch data from available sources to satisfy features in specified feature definition :param features_def: A string containing the name of a feature def :param gw_name: A string containing the name of the gateway :param start_date: A datetime object indicating the start date to fetch data if needed :param end_date: A datetime object indicating the end date to fetch data if needed :param refreshes: A string containing the name of any sources to refresh if already cached :return: True if sources were successfully loaded; otherwise False """ if features_def not in self.features: self.logger.error("Unable to find feature set %s" % features_def) self.logger.info("Adding feature sources") for feature in self.features[features_def]: feature_source, feature_name = feature.split(".") if feature_source not in self.sources: self.sources[feature_source] = self.get_feature_source( feature_source, gw_name) self.sources[feature_source].add_feature(feature_name) for source_name, source in self.sources.items(): self.logger.debug("Looking at source %s in '%s'" % (source_name, ", ".join(refreshes))) self.logger.debug("%s" % self.cache.has_source(source_name)) self.logger.debug("%s" % source_name in refreshes) if self.cache.has_source(source_name) and source_name in refreshes: self.logger.info("Refreshing feature source %s" % source_name) if self.cache.drop_source(source_name): self.logger.info("Dropped feature source table %s" % source) else: amass.abort("Unable to drop feature source %s" % source) source.cache_history(start_date, end_date) return True
def run(self, config, args): arg_vals = self.parse_args(args) [source_gateway, source_gateway_type] = self.get_django_models(config, "SourceGateway", "SourceGatewayType") gw_type = None try: gw_type = source_gateway_type.objects.get(type=arg_vals["type"]) except: amass.abort("'%s' is not a known gateway type" % arg_vals["type"]) try: source_gateway.objects.create(name=arg_vals["gateway"], type=gw_type) print "Gateway '%s' sucessfully added" % arg_vals["gateway"] except Exception as e: amass.abort("Problem adding gateway '%s': %s" % (arg_vals["gateway"], str(e)))
def run(self, config, args): arg_vals = self.parse_args(args) self.logger.info("Running prediction") features = amass.features.Features(config) filter_errors = None if arg_vals["filtererrors"] != "": filter_errors = re.split("\s*,\s*", arg_vals["filtererrors"]) features, results = features.get_features_results( arg_vals["features"], arg_vals["gateway"], filter_errors) if len(results) < 1: amass.abort("Unable to find matching features to run prediction") split_percentage = float(arg_vals["split"]) split_index = int(split_percentage * len(features)) train_features = features[:split_index] test_features = features[split_index:] train_results = results[:split_index] test_results = results[split_index:] predict = amass.predict.Prediction() print predict.train(train_features, train_results)
def run(self, config, args): arg_vals = self.parse_args(args) [resource, source_resource] = self.get_django_models(config, "Resource", "SourceResource") amass_resource = None try: amass_resource = resource.objects.get(name=arg_vals["resource"]) except Exception as e: amass.abort("'%s' is not a known AMASS resource" % arg_vals["resource"]) try: source_resource.objects.create( resource=amass_resource, source=arg_vals["source"], source_name=arg_vals["source resource"]) print "Successfully added source resource '%s' for source '%s'" % ( arg_vals["source resource"], arg_vals["source"]) except Exception as e: amass.abort("Problem adding source resource '%s': %s" % (arg_vals["resource"], str(e)))
def run(self, config, args): arg_vals = self.parse_args(args) [source_gateway_type, source_gateway_error ] = self.get_django_models(config, "SourceGatewayType", "SourceGatewayError") gw_type = None try: gw_type = source_gateway_type.objects.get( type=arg_vals["gateway type"]) except: amass.abort("Gateway type '%s' does not exist" % arg_vals["gateway type"]) try: e = source_gateway_error.objects.get(gateway_type=gw_type, error=arg_vals["error"]) e.delete() print "Gateway error type '%s' for gateway '%s' sucessfully deleted" % ( arg_vals["error"], arg_vals["gateway type"]) except Exception as e: amass.abort( "Problem deleting gateway error type '%s' for gateway '%s': %s" % (arg_vals["error"], arg_vals["gateway type"], str(e)))