Ejemplo n.º 1
0
def process(data):
    rabbit_publish = utils.RabbitClient(queue=PUBLISH, host='rabbitmq')
    data['ner_info'] = {}
    for sid, sent in data['sents'].iteritems():
        try:
            print(sent)
            tokens = tokenize(sent['text'])
            entities = NER.extract_entities(tokens)

            new_ents = []
            for e in entities:
                #MITIE returns xrange iters. Convert to tuples of ints
                r = (e[0].__reduce__()[1][0], e[0].__reduce__()[1][1])
                tag = e[1]
                score = e[2]
                new_ents.append((r, tag, score))
            data['sents'][sid]['tokens'] = tokens
            data['ner_info'][sid] = new_ents
        except Exception as e:
            # If something goes wrong, log it and return nothing
            logger.info(e)
            # Make sure to update this line if you change the variable names

    logger.info('Finished processing content.')

    rabbit_publish.send(data, PUBLISH)
Ejemplo n.º 2
0
def process(data, tfidf, clf):
    rabbit_publish = utils.RabbitClient(queue=PUBLISH, host='rabbitmq')
    try:
        mat = tfidf.transform([data['title']])
        pred = clf.predict(mat)
        data['predicted_relevancy'] = pred[0]
        logger.info('Finished processing content.')
    except Exception as e:
        # If something goes wrong, log it and return nothing
        logger.info(e)
        # Make sure to update this line if you change the variable names

    rabbit_publish.send(data, PUBLISH)
Ejemplo n.º 3
0
    def post(self):
        args = self.reqparse.parse_args()

        rabbit = utils.RabbitClient(queue=PUBLISH, host='rabbitmq')

        logger.info('Received data...')
        data = utils.prep_data(args['data'])
        key = hashlib.sha1(''.join(data['sents'])).hexdigest()
        data['pipeline_key'] = key

        logger.info('Sending downstream with key {}...'.format(key))
        rabbit.send(data, PUBLISH)

        logging.info('Sent {}'.format(key))
        return key
Ejemplo n.º 4
0
    def post(self):
        args = self.reqparse.parse_args()

        rabbit = utils.RabbitClient(queue=PUBLISH, host='rabbitmq')

        logger.info('Received data...')
        data = args['data']
        data = utils.prep_data(data)
        pipeline_key = str(uuid.uuid4())
        data['pipeline_key'] = pipeline_key

        logger.info('Sending to the downstream with key {}...'.format(pipeline_key))
        rabbit.send(data, PUBLISH)

        logging.info('Sent {}'.format(pipeline_key))
        return pipeline_key
Ejemplo n.º 5
0
def process(data):
    rabbit_publish = utils.RabbitClient(queue=PUBLISH,
                                        host='rabbitmq')
    data['predicate_info'] = {}
    for sid, sent in data['sents'].iteritems():
        try:
            output = ParseyPredFace.parse(sent['text'].encode('utf-8'))

            data['predicate_info'][sid] = output
        except Exception as e:
            # If something goes wrong, log it and return nothing
            logger.info(e)
            # Make sure to update this line if you change the variable names

    logger.info('Finished processing content.')

    rabbit_publish.send(data, PUBLISH)
Ejemplo n.º 6
0
def extract(message):
    rabbit_publish = utils.RabbitClient(queue=PUBLISH,
                                        host='rabbitmq')

    story = message

    keys = story['event_info'].keys()
    #keys = [k for k in keys if k != 'predicted_relevancy']
    for val in keys:
        logger.info('Processing {}'.format(val))
        text = story['event_info'][val]['sent']['text']
        text = text.encode('utf-8')

        event_dict = send_to_corenlp(story, text)

        try:
            events_r = send_to_petr(event_dict)
        except Exception as e:
            logger.info('There was an exception with PETR. {}\n'.format(e))
            events_r = {}
        try:
#            event_updated = process_results(events_r.json())
            event_updated = events_r.json()

            story['event_info'][val]['coded'] = []
            for e in event_updated:
                if e:
                    story['event_info'][val]['coded'].append(e)
                else:
                    pass

            #logger.info(json.dumps(story))
        except:
            logger.exception('Something went wrong in the formatting.')
            logger.info(json.dumps(events_r.json()))

    rabbit_publish.send(story, PUBLISH)
Ejemplo n.º 7
0
def main():
    rabbit_consume = utils.RabbitClient(queue=CONSUME, host='rabbitmq')
    rabbit_consume.receive(callback)
Ejemplo n.º 8
0
def publish(data):
    client = utils.RabbitClient(queue=PUBLISH, host='rabbitmq')
    client.send(data, PUBLISH)