def test_fetch_ids(self): """This also tests 'SAXmed.eFetchResultHandler'.""" (abs1, abs2) = eUtils.fetch_ids(['22180407', '23580981']) target_title1 = u'Update on PARP1 inhibitors in ovarian cancer.' target_title2 = u'Replication clamps and clamp loaders.' self.assertEqual(abs1['title'], target_title1) self.assertEqual(abs2['title'], target_title2)
def post(self): # Who is it? Get it from the POST parameters. uid = self.request.get('uid') data = models.UserData.get_by_key_name(uid) # Check that POST is issued from PubCron mail. checksum = self.validate_request(data) if not self.request.get('checksum'): # Could not check identity (hacked?!!): good-bye. return # Identity check successful. Do the update. new_relevant_pmids = [] new_irrelevant_pmids = [] # Process key/value pairs. for name in self.request.arguments(): # NB: only PMID update correspond to 'name' equal to # "Yes" or "No". The other cases are either no answer # or non PMID POST paramters (like uid or checksum). if self.request.get(name) == 'Yes': new_relevant_pmids += [name] elif self.request.get(name) == 'No': new_irrelevant_pmids += [name] # It is unlikely that a malicious request went # until here, but because we are about to save user- # submitted data, we do a validity (security) check. pmids_to_update = new_relevant_pmids + new_irrelevant_pmids if not self.validate_pmid(pmids_to_update): # Validation failed: good-bye. return # From here, PMIDs have been parsed and checked. # Now recall and parse user JSON data. mu_corpus = utils.decrypt(data, 'mu_corpus') relevant_docs = utils.decrypt(data, 'relevant_docs') irrelevant_docs = utils.decrypt(data, 'irrelevant_docs') # Clear new docs from user data (in case users are notifying # that they change their mind on relevance). pmids_to_update = new_relevant_pmids + new_irrelevant_pmids for relevant_then_irrelevant in (relevant_docs, irrelevant_docs): for doc in relevant_then_irrelevant: if doc.get('pmid') in pmids_to_update: relevant_then_irrelevant.remove(doc) # Now, get the PubMed data and compute tf-idf. for (new_ids, doc_list) in ( (new_relevant_pmids, relevant_docs), (new_irrelevant_pmids, irrelevant_docs)): new_docs = eUtils.fetch_ids(new_ids) new_tfidf = tfidf.compute_from_texts( [abstr.get('text', '') for abstr in new_docs], mu_corpus.values() ) for (doc, tfidf_dict) in zip (new_docs, new_tfidf): # Keep only fields 'pmid' and 'title'. for field_name in doc.keys(): if not field_name in ('pmid', 'title'): doc.pop(field_name, None) # Add field 'tfidf'. doc['tfidf'] = tfidf_dict # Append to user data. doc_list.extend(new_docs) and_finally_remove_junk_from(doc_list) # Update the documents... data.relevant_docs = zlib.compress(json.dumps(relevant_docs)) data.irrelevant_docs = zlib.compress(json.dumps(irrelevant_docs)) # ... and put. data.put() # Now reassure the user. self.response.out.write(utils.render('feedback.html'))