class StompListener(object): def __init__(self, config): self.config_ = config self.logger_ = logging.getLogger("main") self.extractor = EsaFeatureExtractor(prefix = config['prefix']) def __extract_features(self, message): ''' Extracts features from clean content and sends it on ''' self.logger_.debug("Got article '%s'" % message['headline']) features = self.extractor.get_features(message['clean_content']) version = self.extractor.get_version() #add features to json representation of article message['features'] = {'version': version, 'data': features} #send message on to Article Ranker try: self.conn_.send(json.dumps(message), destination="queue/features") except Exception as inst: self.logger_.error("Could not send message to feature queue. " "Unknown Error %s: %s" % (type(inst), inst)) def on_error(self, hears, message): self.logger_ .error('received an error %s' % message) def on_message(self, headers, message): received_message = json.loads(message) self.__extract_features(received_message) def set_stomp_connection(self, connection): self.conn_ = connection
def __init__(self, config): self.config_ = config self.logger_ = logging.getLogger("main") self.extractor = EsaFeatureExtractor(prefix = config['prefix'])
logger.info("Load config from %s" % options.config) config = load_config(options.config, logger, exit_with_error = True) #Connect to mongo database try: connect(config['database']['db-name'], username= config['database']['user'], password= config['database']['passwd'], port = config['database']['port']) except connection.ConnectionError as e: logger.error("Could not connect to mongodb: %s" % e) sys.exit(1) feature_extractor = EsaFeatureExtractor(prefix = config['prefix'] ) #go through each article and convert features count = 0 for article in Article.objects(features__version__ne = feature_extractor.get_version()): if count % 10 == 0: logger.info("PROGRESS: processing article #%d" % count) count += 1 if article.features.version == EsaFeatureExtractor.get_version(): continue clean_content = article.clean_content #get new features new_features = feature_extractor.get_features(clean_content)