class AnnotatorWorker(object):

	def __init__(self):
		pass

	def send_to_broker(self,annotated_article):
		publisher = Publisher('data_distributor')
		publisher.send_message(annotated_article,'articles')

	def handle_delivery(self,channel, method, header, body):

		article = json.loads(body)

		#1.TODO All annotator work would ideally be done here. Import annotator module being developed
		#2.TODO  Once the article that was sent is annotated. Send it back to be saved to the database 

		annotated_article = json.dumps('new article here')
		self.send_to_broker(annotated_article)

 		channel.basic_ack(delivery_tag = method.delivery_tag)
		print '--Done Annotating Article--'


	def run(self):	
		self.consumer = Consumer('data_distributor',
            'annotator_worker',
            'articles',
             handle_message=self.handle_delivery)

		self.consumer.start()		
Example #2
0
	def run(self):
		self.consumer = Consumer('data_distributor',
			      'database_queue',
			      'articles',
			       handle_message=self.handle_delivery)

		self.consumer.start()
Example #3
0
    def run(self):
        self.consumer = Consumer('data_distributor',
                                 'annotator_worker',
                                 'articles',
                                 handle_message=self.handle_delivery)

        self.consumer.start()
Example #4
0
class AnnotatorWorker(object):
    def __init__(self):
        pass

    def send_to_broker(self, annotated_article):
        publisher = Publisher('data_distributor')
        publisher.send_message(annotated_article, 'articles')

    def handle_delivery(self, channel, method, header, body):

        article = json.loads(body)

        #1.TODO All annotator work would ideally be done here. Import annotator module being developed
        #2.TODO  Once the article that was sent is annotated. Send it back to be saved to the database

        annotated_article = json.dumps('new article here')
        self.send_to_broker(annotated_article)

        channel.basic_ack(delivery_tag=method.delivery_tag)
        print '--Done Annotating Article--'

    def run(self):
        self.consumer = Consumer('data_distributor',
                                 'annotator_worker',
                                 'articles',
                                 handle_message=self.handle_delivery)

        self.consumer.start()
	def run(self):	
		self.consumer = Consumer('data_distributor',
            'annotator_worker',
            'articles',
             handle_message=self.handle_delivery)

		self.consumer.start()		
Example #6
0
class ElasticSearchWorker(object):

	def __init__(self):

		self.module_path = os.path.dirname(os.path.realpath(__file__))
		self.config_dir = os.path.join(self.module_path,os.path.join('..' + os.sep + 'configs' ))
		self.file_path = os.path.abspath(self.config_dir) + os.sep + 'settings.yaml'
 		self.config_file = open(self.file_path,'r')
 		self.config_data = yaml.load(self.config_file)

		#Should ideally read from config file	
		self.es = Elasticsearch(timeout=30)


	def convert_to_datetime(self,timestamp):
		date = datetime.fromtimestamp(timestamp/1000)
		return date

	def handle_delivery(self,channel, method, header, body):

		article = json.loads(body)
		article['time_of_crawl'] = self.convert_to_datetime(article['time_of_crawl'])
		if 'date_published' in article['meta_information']:
			article['meta_information']['date_published'] = self.convert_to_datetime(article['meta_information']['date_published'])


		es_result = self.es.index(index="news", doc_type="article", body=article)
		self.es.indices.refresh(index="news")

		channel.basic_ack(delivery_tag = method.delivery_tag)
		print '--Done Inserting To ElasticSearch--'


	def run(self):
		self.consumer = Consumer('data_distributor',
			      'database_queue',
			      'articles',
			       handle_message=self.handle_delivery)

		self.consumer.start()