Esempio n. 1
0
 def get_articles_by_story(self, story_id):
     story=Story.find_by_id(story_id)
     if story==None:
         s=relegence_API.stories.by_story_id(story_id, {'numDocs': 100})
         story=smodel=Story(story_id=s['id'], title=s['title'], mag_score=s['magScore'],
                            num_total_docs=s['numTotalDocs'], num_original_docs=s['numOriginalDocs']
                            # ,topic_creation_time= s['topicCreationTime'], last_topic_activity_time = s['lastTopicActivityTime']
                            )
         smodel.save()
         articles=s['articles']
         for a in articles:
             fname=a['id'].replace('/','|')
             amodel=Article(article_id=fname, story=smodel, source=a['source']['title'], \
                            link=a['link'], title=a['title'], snippet=a['snippet'], \
                            author=a['author']['name'], text=a['content'], file_name=fname, \
                            source_id=a['source']['id'])
             amodel.save()
     return Article.find_by_story(story)
Esempio n. 2
0
def run_entity_dive_method(articles_collection):
    ent_store = AnnotationStore('Entity')
    kt_store = AnnotationStore('KeyTerm')

    doc_store = DocumentStore('~')

    prs = [
        DuplicateClearingPR(),
        SentimentAnalyserPR('Sentence'),
        SentimentHighlighter(),
        KeyTermAnnotatorPR(),
        RelEntityTagger(),
        BratEmbeddingToMongoPR(['KeyTerm', 'PosSentiment', 'NegSentiment', 'Entity']),
        ent_store, kt_store, doc_store]

    pipe = Pipeline(articles_collection)
    pipe.setPRs(prs).setCorpus(articles_collection)

    result = pipe.process()
    ent_annots = ent_store.annots
    # kt_annots= kt_store.annots
    # unique_kt= set([kt.text for kt in kt_annots])
    unique_ent_dict= defaultdict(lambda :[])

    for ent in ent_annots:
        unique_ent_dict[ent['wikidata']].append(ent.text)
    for key, ent_texts in unique_ent_dict.iteritems():
        ename=sorted(ent_texts, key=len)[-1]
        unique_ent_dict[key]=ename

    clust_dicts={}
    for ent_id, ename in unique_ent_dict.iteritems():
        fe = CustomEntityFeatureExtractor(ent_id)
        fann_store=AnnotationStore('Sentence', filterby_attrib_exist='FSentence')
        Pipeline(result).setPRs([fe, fann_store]).process()
        fsents=fann_store.annots
        X = get_sentiment(fsents)
        clust_dict = cluster_by_sentiment(fsents, X)
        clust_dicts[ename]=clust_dict

    clutering_list_model = make_clustering_list_model(clust_dicts)
    clutering_list_model.collection_id=articles_collection.story_id
    clutering_list_model.name= Story.find_by_id(articles_collection.story_id).title
    return clutering_list_model