def generate_all_article_info(csv_dir="1. excel files"): ''' 1. Parse all .csv files in the directory specified by csv_dir, and, 2. Generate corresponding article information files that include PMIDs and titles (citation info) for articles annotated in csv files. ''' failures = [] for csv_f in [f for f in os.listdir(csv_dir) if f.endswith(".xls")]: cur_title = get_article_title(os.path.join(csv_dir, csv_f)) #get pubmid from title pmid = pubmedpy.get_pmid_from_title(cur_title) if pmid: print "matched %s for %s." % (pmid, csv_f) citation_info = list(pubmedpy.fetch_articles([pmid]))[0] title, mesh, authors, abstract, affiliation, journal, volume, article = get_fields_from_article( citation_info) with open( os.path.join(csv_dir, csv_f).replace(".xls", ".article_info.txt"), 'wb') as out_f: csv_writer = csv.writer(out_f) csv_writer.writerow([ pmid, title, mesh, authors, abstract, affiliation, journal, volume ]) else: #print "!!! failure !!! could not find PubMed entry for %s" % cur_title failures.append(cur_title) print "ok! %s failures (see below, if any). \n %s" % (len(failures), "\n".join(failures))
def generate_all_article_info(csv_dir="1. excel files"): ''' 1. Parse all .csv files in the directory specified by csv_dir, and, 2. Generate corresponding article information files that include PMIDs and titles (citation info) for articles annotated in csv files. ''' failures = [] for csv_f in [f for f in os.listdir(csv_dir) if f.endswith(".xls")]: cur_title = get_article_title(os.path.join(csv_dir, csv_f)) #get pubmid from title pmid = pubmedpy.get_pmid_from_title(cur_title) if pmid: print "matched %s for %s." % (pmid, csv_f) citation_info = list(pubmedpy.fetch_articles([pmid]))[0] title, mesh, authors, abstract, affiliation, journal, volume, article = get_fields_from_article(citation_info) with open(os.path.join(csv_dir,csv_f).replace(".xls", ".article_info.txt"), 'wb') as out_f: csv_writer = csv.writer(out_f) csv_writer.writerow([pmid, title, mesh, authors, abstract, affiliation, journal, volume]) else: #print "!!! failure !!! could not find PubMed entry for %s" % cur_title failures.append(cur_title) print "ok! %s failures (see below, if any). \n %s" % (len(failures), "\n".join(failures))
with open(FILE_PATH, 'rU') as f: reader = csv.DictReader(f, dialect='excel', delimiter='\t') with open(OUT_PATH, 'w') as csvfile: fieldnames = [hPmid, hCitation, hTitle, hPrediction, hHard] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) for row in reader: output_row = {} citation_id = row[hCitation] title = row[hTitle] predicted = row[hPrediction] hard = row[hHard] try: pmid = pubmedpy.get_pmid_from_title(title) pmid = int(pmid) except Exception as e: pmid = '-- Cannot find --' output_row = { hPmid: pmid, hCitation: citation_id, hTitle: title, hPrediction: predicted, hHard: hard } writer.writerow(output_row) ###############################################################################################
with open(FILE_PATH, 'rU') as f: reader = csv.DictReader(f, dialect='excel', delimiter='\t') with open(OUT_PATH, 'w') as csvfile: fieldnames = [hPmid, hCitation, hTitle, hPrediction, hHard] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) for row in reader: output_row = {} citation_id = row[hCitation] title = row[hTitle] predicted = row[hPrediction] hard = row[hHard] try: pmid = pubmedpy.get_pmid_from_title(title) pmid = int(pmid) except Exception as e: pmid = '-- Cannot find --' output_row = { hPmid: pmid, hCitation: citation_id, hTitle: title, hPrediction: predicted, hHard: hard } writer.writerow(output_row) ############################################################################################### #### This is fixing Emily's project. The refman ids weren't saved when she imported the project #PROJECT_ID = 219 #FILE_PATH = './Abstraktr_Update_Lit_Review_11.12.13.txt' #citations_q = Session.query(model.Citation)