def _enqueue(self, resource): # Discovered URI - capped collection --db.discovered_uri mongoConnection = Connection() db = mongoConnection.feed_reading_discovery # settings max_content_length = config['discovery']['resource_enqueueing']['max_content_length'] # message queue mq_client = message_queue_client_from_config(config['message_queue']['client']) mq_client.connect() mq_codec = JSONCodec() queue = 'discovered_resources' try: #insert into capped db.discovered_uri.insert({'_id':makeIdFromURI(resource.uri), 'uri':resource.uri}) #insert into queue if len(resource.content) > max_content_length: self._logger.warning('Skipped %s: Content length is %s.' % (resource, len(resource.content))) msg_body = mq_codec.encode(resource) mq_client.put_message(queue, msg_body) self._logger.debug("Enqueued: %s" % resource._id) except DuplicateKeyError: pass # remove resource from collection self._resources_collection.remove_model(resource)
# MongoDB host = 'localhost' port = 27017 mcm = MongoConnectionManager(host, port, MongoCodec()) database = 'processed' resource_collection = mcm.get_collection(database, 'resources', Resource) # message queue mq_config = { 'transport': 'socket', 'protocol': 'binary', 'host': 'localhost', 'port': 9091 } mq_client = message_queue_client_from_config(mq_config) mq_codec = JSONCodec() processed_resource_queue = 'processed_resources' # ElasticSearch es = ES('localhost:9200', timeout=60) es_index = 'topic_tracking' # dequeue one resource mq_client.connect() message = mq_client.get_message(processed_resource_queue) resource = mq_codec.decode(message.body, Resource) mq_client.delete_message(processed_resource_queue, message.id) mq_client.disconnect() # save the resource to mongo resource._id = makeIdFromURI(resource.uri)
EMPTY_QUEUE_TIMEOUT = 1 if __name__ == '__main__': config_file = sys.argv[1] config = yaml.load(file(config_file, 'r')) # logging logging.config.dictConfig(config['logging']) logger = logging.getLogger() # message queue mq_client = message_queue_client_from_config(config['message_queue']['client']) mq_codec = JSONCodec() input_queue = 'discovered_resources' output_queue = 'processed_resources' # processing p_client = processing_client_from_config(config['processing']['client']) # start service clients mq_client.connect() p_client.connect() # begin processing while True: try: # input input_message = mq_client.get_message(input_queue)