コード例 #1
0
ファイル: test.py プロジェクト: gui11aume/pubcron
 def test_fetch_ids(self):
    """This also tests 'SAXmed.eFetchResultHandler'."""
    (abs1, abs2) = eUtils.fetch_ids(['22180407', '23580981'])
    target_title1 = u'Update on PARP1 inhibitors in ovarian cancer.'
    target_title2 = u'Replication clamps and clamp loaders.'
    self.assertEqual(abs1['title'], target_title1)
    self.assertEqual(abs2['title'], target_title2)
コード例 #2
0
ファイル: feedback.py プロジェクト: gui11aume/pubcron
   def post(self):
      # Who is it? Get it from the POST parameters.
      uid = self.request.get('uid')
      data = models.UserData.get_by_key_name(uid)
      # Check that POST is issued from PubCron mail.
      checksum = self.validate_request(data)
      if not self.request.get('checksum'):
         # Could not check identity (hacked?!!): good-bye.
         return

      # Identity check successful. Do the update.
      new_relevant_pmids = []
      new_irrelevant_pmids = []

      # Process key/value pairs.
      for name in self.request.arguments():
         # NB: only PMID update correspond to 'name' equal to
         # "Yes" or "No". The other cases are either no answer
         # or non PMID POST paramters (like uid or checksum).
         if self.request.get(name) == 'Yes':
            new_relevant_pmids += [name]
         elif self.request.get(name) == 'No':
            new_irrelevant_pmids += [name]

      # It is unlikely that a malicious request went
      # until here, but because we are about to save user-
      # submitted data, we do a validity (security) check.
      pmids_to_update = new_relevant_pmids + new_irrelevant_pmids
      if not self.validate_pmid(pmids_to_update):
         # Validation failed: good-bye.
         return

      # From here, PMIDs have been parsed and checked.
      # Now recall and parse user JSON data.
      mu_corpus = utils.decrypt(data, 'mu_corpus')
      relevant_docs = utils.decrypt(data, 'relevant_docs')
      irrelevant_docs = utils.decrypt(data, 'irrelevant_docs')

      # Clear new docs from user data (in case users are notifying
      # that they change their mind on relevance).
      pmids_to_update = new_relevant_pmids + new_irrelevant_pmids
      for relevant_then_irrelevant in (relevant_docs, irrelevant_docs):
         for doc in relevant_then_irrelevant:
            if doc.get('pmid') in pmids_to_update:
               relevant_then_irrelevant.remove(doc)


      # Now, get the PubMed data and compute tf-idf.
      for (new_ids, doc_list) in (
            (new_relevant_pmids, relevant_docs),
            (new_irrelevant_pmids, irrelevant_docs)):

         new_docs = eUtils.fetch_ids(new_ids)
         new_tfidf = tfidf.compute_from_texts(
             [abstr.get('text', '') for abstr in new_docs],
             mu_corpus.values()
         )
         for (doc, tfidf_dict) in zip (new_docs, new_tfidf):
            # Keep only fields 'pmid' and 'title'.
            for field_name in doc.keys():
               if not field_name in ('pmid', 'title'):
                  doc.pop(field_name, None)
            # Add field 'tfidf'.
            doc['tfidf'] = tfidf_dict
         # Append to user data.
         doc_list.extend(new_docs)
         and_finally_remove_junk_from(doc_list)


      # Update the documents...
      data.relevant_docs = zlib.compress(json.dumps(relevant_docs))
      data.irrelevant_docs = zlib.compress(json.dumps(irrelevant_docs))
      # ... and put.
      data.put()

      # Now reassure the user.
      self.response.out.write(utils.render('feedback.html'))