def process_article(article): processed = meta.assign_solution_concs(article) if processed == 1: meta.assign_species(article) meta.assign_electrode_type(article) meta.assign_strain(article) meta.assign_prep_type(article) meta.assign_rec_temp(article) meta.assign_animal_age(article) meta.assign_jxn_potential(article) articlesProcessed += 1 if processed == -1: if article.journal in jnNoFullText: jnNoFullText[article.journal] += 1 else: jnNoFullText[article.journal] = 1 articlesNoFullText += 1 if processed == -2: if article.journal in jnNoMethodsTag: jnNoMethodsTag[article.journal] += 1 else: jnNoMethodsTag[article.journal] = 1 articlesNoMethodsTag += 1 if processed == -3: if article.journal in jnMethodsTooSmall: jnMethodsTooSmall[article.journal] += 1 else: jnMethodsTooSmall[article.journal] = 1 articlesMethodsTooSmall += 1
def apply_article_metadata(article = None): if article: artObs = [article] num_arts = 1 else: # artObs = m.Article.objects.filter(metadata__isnull = True, articlefulltext__isnull = False).distinct() artObs = m.Article.objects.filter(articlefulltext__isnull = False).distinct() # artObs = artObs.exclude(articlefulltext__articlefulltextstat__metadata_processed = True) # artObs = m.Article.objects.filter(articlefulltext__isnull = False, articlefulltext__articlefulltextstat__methods_tag_found = True).distinct() # artObs = artObs.exclude(articlefulltext__articlefulltextstat__metadata_processed = True) # artObs = artObs.exclude(articlefulltext__articlefulltextstat__metadata_human_assigned = True) num_arts = artObs.count() print 'annotating %s articles for metadata...' % num_arts for i,art in enumerate(artObs): if not article: prog(i,num_arts) assign_metadata.assign_species(art) assign_metadata.assign_electrode_type(art) assign_metadata.assign_strain(art) assign_metadata.assign_rec_temp(art) assign_metadata.assign_prep_type(art) assign_metadata.assign_animal_age(art) assign_metadata.assign_jxn_potential(art) assign_metadata.assign_solution_concs(art) aft_ob = art.get_full_text() aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = aft_ob)[0] aftStatOb.metadata_processed = True aftStatOb.save()
def process_article(article_id): article = m.Article.objects.get(id = int(article_id)) processed = meta.assign_solution_concs(article) if processed == 1: meta.assign_species(article) meta.assign_electrode_type(article) meta.assign_strain(article) meta.assign_prep_type(article) meta.assign_rec_temp(article) meta.assign_animal_age(article) meta.assign_jxn_potential(article) print processed
articlesNoMethodsTag += 1 if processed == -3: global articlesMethodsTooSmall articlesMethodsTooSmall += 1 ''' path = os.getcwd() os.chdir("/Users/dtebaykin/Desktop/raw_full_texts") articles = m.Article.objects.all() for a in articles: try: meta.assign_solution_concs(a) meta.assign_species(a) meta.assign_electrode_type(a) meta.assign_strain(a) meta.assign_prep_type(a) meta.assign_rec_temp(a) meta.assign_animal_age(a) meta.assign_jxn_potential(a) except: print "Exception occurred for article %s" % a.pk os.chdir(path) #print "Out of %s articles: %s processed, %s no full text, %s no methods tag, %s methods section #too small\n" % (articlesTotal, articlesProcessed, articlesNoFullText, articlesNoMethodsTag, articlesMethodsTooSmall) # print "Journals with no full text attached: %s\n" % jnNoFullText # print "Journals with no methods tag: %s\n" % jnNoMethodsTag # print "Journals with methods section too short: %s\n" % jnMethodsTooSmall print "done"