Esempio n. 1
0
 def get(self):
   id = self.request.get('id')
   miner = Miner('/comments/' + id)
   tree = miner.populate()
   story = Story().getById(id)
   story.status = 'archived'
   story.name = tree['data']['title']
   story.comments = tree['data']['num_comments']
   story.permalink = tree['data']['permalink']
   story.compressed = True
   story.json = base64.b64encode(zlib.compress(json.dumps(tree), 9))
   story.put()
Esempio n. 2
0
  def get(self):
    result = dict()
    id = self.request.get('id')
    story = Story().getById(id)
    if story is False:
      #Just take a little peek for sanity's sake
      miner = Miner('/comments/' + id)
      info = miner.get_info()
      if info:
        if info['data']['num_comments'] > 300:
          taskqueue.add(url='/worker/save_story?id=' + id, method='GET')
          story = Story()
          story.id = id
          story.status = 'queued'
          story.put()
          result['message'] = 'Story added to queue for archiving'
        else:
          result['message'] = "That story doesn't have over 300 comments"
      else:
        result['message'] = "That story doesn't seem to exist. What does existence even mean?"
    elif story.status is 'queued':
      result['message'] = 'Still in the queue to be archived'
    else:
      result['message'] = 'Story already archived'

    self.response.headers['Content-Type'] = 'application/json'
    self.response.out.write(json.dumps(result))
Esempio n. 3
0
 def get(self):
     stories = Story.all().filter('status = ',
                                  'archived').order('-comments')
     self.response.out.write(
         template.render(TEMPLATE_DIR + 'index.html', {
             'stories': stories,
             'page': 'index'
         }))
Esempio n. 4
0
 def get(self):
     id = self.request.get('id')
     story = Story().getById(id)
     result = json.dumps(False)
     if story:
         if story.compressed:
             result = zlib.decompress(base64.b64decode(story.json))
         else:
             result = story.json
     self.response.headers['Content-Type'] = 'application/json'
     self.response.out.write(result)
Esempio n. 5
0
 def get_articles_by_story(self, story_id):
     story=Story.find_by_id(story_id)
     if story==None:
         s=relegence_API.stories.by_story_id(story_id, {'numDocs': 100})
         story=smodel=Story(story_id=s['id'], title=s['title'], mag_score=s['magScore'],
                            num_total_docs=s['numTotalDocs'], num_original_docs=s['numOriginalDocs']
                            # ,topic_creation_time= s['topicCreationTime'], last_topic_activity_time = s['lastTopicActivityTime']
                            )
         smodel.save()
         articles=s['articles']
         for a in articles:
             fname=a['id'].replace('/','|')
             amodel=Article(article_id=fname, story=smodel, source=a['source']['title'], \
                            link=a['link'], title=a['title'], snippet=a['snippet'], \
                            author=a['author']['name'], text=a['content'], file_name=fname, \
                            source_id=a['source']['id'])
             amodel.save()
     return Article.find_by_story(story)
Esempio n. 6
0
def run_entity_dive_method(articles_collection):
    ent_store = AnnotationStore('Entity')
    kt_store = AnnotationStore('KeyTerm')

    doc_store = DocumentStore('~')

    prs = [
        DuplicateClearingPR(),
        SentimentAnalyserPR('Sentence'),
        SentimentHighlighter(),
        KeyTermAnnotatorPR(),
        RelEntityTagger(),
        BratEmbeddingToMongoPR(['KeyTerm', 'PosSentiment', 'NegSentiment', 'Entity']),
        ent_store, kt_store, doc_store]

    pipe = Pipeline(articles_collection)
    pipe.setPRs(prs).setCorpus(articles_collection)

    result = pipe.process()
    ent_annots = ent_store.annots
    # kt_annots= kt_store.annots
    # unique_kt= set([kt.text for kt in kt_annots])
    unique_ent_dict= defaultdict(lambda :[])

    for ent in ent_annots:
        unique_ent_dict[ent['wikidata']].append(ent.text)
    for key, ent_texts in unique_ent_dict.iteritems():
        ename=sorted(ent_texts, key=len)[-1]
        unique_ent_dict[key]=ename

    clust_dicts={}
    for ent_id, ename in unique_ent_dict.iteritems():
        fe = CustomEntityFeatureExtractor(ent_id)
        fann_store=AnnotationStore('Sentence', filterby_attrib_exist='FSentence')
        Pipeline(result).setPRs([fe, fann_store]).process()
        fsents=fann_store.annots
        X = get_sentiment(fsents)
        clust_dict = cluster_by_sentiment(fsents, X)
        clust_dicts[ename]=clust_dict

    clutering_list_model = make_clustering_list_model(clust_dicts)
    clutering_list_model.collection_id=articles_collection.story_id
    clutering_list_model.name= Story.find_by_id(articles_collection.story_id).title
    return clutering_list_model
Esempio n. 7
0
 def get(self):
   stories = Story.all().filter('status = ', 'archived').order('-comments')
   self.response.out.write(template.render(TEMPLATE_DIR + 'index.html', {'stories' : stories, 'page': 'index'}))