def get_articles_by_story(self, story_id): story=Story.find_by_id(story_id) if story==None: s=relegence_API.stories.by_story_id(story_id, {'numDocs': 100}) story=smodel=Story(story_id=s['id'], title=s['title'], mag_score=s['magScore'], num_total_docs=s['numTotalDocs'], num_original_docs=s['numOriginalDocs'] # ,topic_creation_time= s['topicCreationTime'], last_topic_activity_time = s['lastTopicActivityTime'] ) smodel.save() articles=s['articles'] for a in articles: fname=a['id'].replace('/','|') amodel=Article(article_id=fname, story=smodel, source=a['source']['title'], \ link=a['link'], title=a['title'], snippet=a['snippet'], \ author=a['author']['name'], text=a['content'], file_name=fname, \ source_id=a['source']['id']) amodel.save() return Article.find_by_story(story)
def run_entity_dive_method(articles_collection): ent_store = AnnotationStore('Entity') kt_store = AnnotationStore('KeyTerm') doc_store = DocumentStore('~') prs = [ DuplicateClearingPR(), SentimentAnalyserPR('Sentence'), SentimentHighlighter(), KeyTermAnnotatorPR(), RelEntityTagger(), BratEmbeddingToMongoPR(['KeyTerm', 'PosSentiment', 'NegSentiment', 'Entity']), ent_store, kt_store, doc_store] pipe = Pipeline(articles_collection) pipe.setPRs(prs).setCorpus(articles_collection) result = pipe.process() ent_annots = ent_store.annots # kt_annots= kt_store.annots # unique_kt= set([kt.text for kt in kt_annots]) unique_ent_dict= defaultdict(lambda :[]) for ent in ent_annots: unique_ent_dict[ent['wikidata']].append(ent.text) for key, ent_texts in unique_ent_dict.iteritems(): ename=sorted(ent_texts, key=len)[-1] unique_ent_dict[key]=ename clust_dicts={} for ent_id, ename in unique_ent_dict.iteritems(): fe = CustomEntityFeatureExtractor(ent_id) fann_store=AnnotationStore('Sentence', filterby_attrib_exist='FSentence') Pipeline(result).setPRs([fe, fann_store]).process() fsents=fann_store.annots X = get_sentiment(fsents) clust_dict = cluster_by_sentiment(fsents, X) clust_dicts[ename]=clust_dict clutering_list_model = make_clustering_list_model(clust_dicts) clutering_list_model.collection_id=articles_collection.story_id clutering_list_model.name= Story.find_by_id(articles_collection.story_id).title return clutering_list_model