# -*- coding: utf-8 -*- from datetime import datetime from os import listdir from os.path import isfile, join from concurrent import futures from epoms.db import * from epoms.entity_extract import EntityExtract INDEX_NAME = 'epoms' TIMEOUT = 300 MAX_WORKER = 1 config = EPOMSConfig() en = EntityExtract() news = (News().select()) for n in news: print '>> Extracting Entity [%5d]' % ( n.id ) try: names = en.extract_name( n.content ) keys = names.keys() for i in keys: for j in range(names[i]): print i except Exception as exc: pass
start = new_start return res for n in news: try: content = n.content sentences = sent_tokenize(content) sentences = merge_sentence(sentences, mode) print "Getting %d groups of text from document %d" % (len(sentences), n.id) for s in sentences: names = en.extract_name(s) keys = names.keys() for i in range(len(keys)): name1, created = Name().get_or_create(name=keys[i]) orig = keys[i].replace(" ", "_") for j in range(len(keys)): if i != j: # Save graph to db name2, created = Name().get_or_create(name=keys[j]) Name_Graph.create(name1=name1, name2=name2, doc_id=n.id) dest = keys[j].replace(" ", "_") print "### ", orig, dest except Exception as exc: pass # print '--> Error %s' % n.as_dict()