コード例 #1
0
    def _enqueue(self, resource):

        # Discovered URI - capped collection --db.discovered_uri
        mongoConnection = Connection()
        db = mongoConnection.feed_reading_discovery

        # settings
        max_content_length = config['discovery']['resource_enqueueing']['max_content_length']

        # message queue
        mq_client = message_queue_client_from_config(config['message_queue']['client'])
        mq_client.connect()
        mq_codec = JSONCodec()
        queue = 'discovered_resources'

        try:
            #insert into capped
            db.discovered_uri.insert({'_id':makeIdFromURI(resource.uri), 'uri':resource.uri})
            #insert into queue
            if len(resource.content) > max_content_length:
                self._logger.warning('Skipped %s: Content length is %s.' %
                    (resource, len(resource.content)))
            msg_body = mq_codec.encode(resource)
            mq_client.put_message(queue, msg_body)
            self._logger.debug("Enqueued: %s" % resource._id)
        except DuplicateKeyError:
            pass

        # remove resource from collection
        self._resources_collection.remove_model(resource)
コード例 #2
0
    # MongoDB
    host = 'localhost'
    port = 27017
    mcm = MongoConnectionManager(host, port, MongoCodec())
    database = 'processed'
    resource_collection = mcm.get_collection(database, 'resources', Resource)

    # message queue
    mq_config = {
        'transport': 'socket',
        'protocol': 'binary',
        'host': 'localhost',
        'port': 9091
        }
    mq_client = message_queue_client_from_config(mq_config)
    mq_codec = JSONCodec()
    processed_resource_queue = 'processed_resources'

    # ElasticSearch
    es = ES('localhost:9200', timeout=60)
    es_index = 'topic_tracking'

    # dequeue one resource
    mq_client.connect()
    message = mq_client.get_message(processed_resource_queue)
    resource = mq_codec.decode(message.body, Resource)
    mq_client.delete_message(processed_resource_queue, message.id)
    mq_client.disconnect()

    # save the resource to mongo
    resource._id = makeIdFromURI(resource.uri)
コード例 #3
0
EMPTY_QUEUE_TIMEOUT = 1


if __name__ == '__main__':

    config_file = sys.argv[1]
    config = yaml.load(file(config_file, 'r'))

    # logging
    logging.config.dictConfig(config['logging'])
    logger = logging.getLogger()

    # message queue
    mq_client = message_queue_client_from_config(config['message_queue']['client'])
    mq_codec = JSONCodec()
    input_queue = 'discovered_resources'
    output_queue = 'processed_resources'

    # processing
    p_client = processing_client_from_config(config['processing']['client'])

    # start service clients
    mq_client.connect()
    p_client.connect()

    # begin processing
    while True:
        try:
            # input
            input_message = mq_client.get_message(input_queue)