class AnnotatorWorker(object): def __init__(self): pass def send_to_broker(self,annotated_article): publisher = Publisher('data_distributor') publisher.send_message(annotated_article,'articles') def handle_delivery(self,channel, method, header, body): article = json.loads(body) #1.TODO All annotator work would ideally be done here. Import annotator module being developed #2.TODO Once the article that was sent is annotated. Send it back to be saved to the database annotated_article = json.dumps('new article here') self.send_to_broker(annotated_article) channel.basic_ack(delivery_tag = method.delivery_tag) print '--Done Annotating Article--' def run(self): self.consumer = Consumer('data_distributor', 'annotator_worker', 'articles', handle_message=self.handle_delivery) self.consumer.start()
def run(self): self.consumer = Consumer('data_distributor', 'database_queue', 'articles', handle_message=self.handle_delivery) self.consumer.start()
def run(self): self.consumer = Consumer('data_distributor', 'annotator_worker', 'articles', handle_message=self.handle_delivery) self.consumer.start()
class AnnotatorWorker(object): def __init__(self): pass def send_to_broker(self, annotated_article): publisher = Publisher('data_distributor') publisher.send_message(annotated_article, 'articles') def handle_delivery(self, channel, method, header, body): article = json.loads(body) #1.TODO All annotator work would ideally be done here. Import annotator module being developed #2.TODO Once the article that was sent is annotated. Send it back to be saved to the database annotated_article = json.dumps('new article here') self.send_to_broker(annotated_article) channel.basic_ack(delivery_tag=method.delivery_tag) print '--Done Annotating Article--' def run(self): self.consumer = Consumer('data_distributor', 'annotator_worker', 'articles', handle_message=self.handle_delivery) self.consumer.start()
class ElasticSearchWorker(object): def __init__(self): self.module_path = os.path.dirname(os.path.realpath(__file__)) self.config_dir = os.path.join(self.module_path,os.path.join('..' + os.sep + 'configs' )) self.file_path = os.path.abspath(self.config_dir) + os.sep + 'settings.yaml' self.config_file = open(self.file_path,'r') self.config_data = yaml.load(self.config_file) #Should ideally read from config file self.es = Elasticsearch(timeout=30) def convert_to_datetime(self,timestamp): date = datetime.fromtimestamp(timestamp/1000) return date def handle_delivery(self,channel, method, header, body): article = json.loads(body) article['time_of_crawl'] = self.convert_to_datetime(article['time_of_crawl']) if 'date_published' in article['meta_information']: article['meta_information']['date_published'] = self.convert_to_datetime(article['meta_information']['date_published']) es_result = self.es.index(index="news", doc_type="article", body=article) self.es.indices.refresh(index="news") channel.basic_ack(delivery_tag = method.delivery_tag) print '--Done Inserting To ElasticSearch--' def run(self): self.consumer = Consumer('data_distributor', 'database_queue', 'articles', handle_message=self.handle_delivery) self.consumer.start()