Beispiel #1
0
def generate_all_article_info(csv_dir="1. excel files"):
    '''
    1. Parse all .csv files in the directory specified by csv_dir, and, 
    2. Generate corresponding article information files that include 
        PMIDs and titles (citation info) for articles annotated in csv 
        files. 
    '''
    failures = []
    for csv_f in [f for f in os.listdir(csv_dir) if f.endswith(".xls")]:
        cur_title = get_article_title(os.path.join(csv_dir, csv_f))
        #get pubmid from title
        pmid = pubmedpy.get_pmid_from_title(cur_title)
        if pmid:
            print "matched %s for %s." % (pmid, csv_f)
            citation_info = list(pubmedpy.fetch_articles([pmid]))[0]
            title, mesh, authors, abstract, affiliation, journal, volume, article = get_fields_from_article(
                citation_info)
            with open(
                    os.path.join(csv_dir,
                                 csv_f).replace(".xls", ".article_info.txt"),
                    'wb') as out_f:
                csv_writer = csv.writer(out_f)
                csv_writer.writerow([
                    pmid, title, mesh, authors, abstract, affiliation, journal,
                    volume
                ])
        else:
            #print "!!! failure !!! could not find PubMed entry for %s" % cur_title
            failures.append(cur_title)

    print "ok! %s failures (see below, if any). \n %s" % (len(failures),
                                                          "\n".join(failures))
def generate_all_article_info(csv_dir="1. excel files"):
    '''
    1. Parse all .csv files in the directory specified by csv_dir, and, 
    2. Generate corresponding article information files that include 
        PMIDs and titles (citation info) for articles annotated in csv 
        files. 
    '''
    failures = []
    for csv_f in [f for f in os.listdir(csv_dir) if f.endswith(".xls")]:
        cur_title = get_article_title(os.path.join(csv_dir, csv_f))
        #get pubmid from title
        pmid = pubmedpy.get_pmid_from_title(cur_title)
        if pmid: 
            print "matched %s for %s." % (pmid, csv_f)
            citation_info = list(pubmedpy.fetch_articles([pmid]))[0]
            title, mesh, authors, abstract, affiliation, journal, volume, article = get_fields_from_article(citation_info)
            with open(os.path.join(csv_dir,csv_f).replace(".xls", ".article_info.txt"), 'wb') as out_f:
                csv_writer = csv.writer(out_f)
                csv_writer.writerow([pmid, title, mesh, authors, abstract, affiliation, journal, volume])
        else:
            #print "!!! failure !!! could not find PubMed entry for %s" % cur_title
            failures.append(cur_title)

    print "ok! %s failures (see below, if any). \n %s" % (len(failures), "\n".join(failures))
Beispiel #3
0
with open(FILE_PATH, 'rU') as f:
    reader = csv.DictReader(f, dialect='excel', delimiter='\t')

    with open(OUT_PATH, 'w') as csvfile:
        fieldnames = [hPmid, hCitation, hTitle, hPrediction, hHard]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        for row in reader:
            output_row = {}
            citation_id = row[hCitation]
            title = row[hTitle]
            predicted = row[hPrediction]
            hard = row[hHard]
            try:
                pmid = pubmedpy.get_pmid_from_title(title)
                pmid = int(pmid)
            except Exception as e:
                pmid = '-- Cannot find --'

            output_row = {
                hPmid: pmid,
                hCitation: citation_id,
                hTitle: title,
                hPrediction: predicted,
                hHard: hard
            }
            writer.writerow(output_row)

###############################################################################################
Beispiel #4
0
with open(FILE_PATH, 'rU') as f:
    reader = csv.DictReader(f, dialect='excel', delimiter='\t')

    with open(OUT_PATH, 'w') as csvfile:
        fieldnames = [hPmid, hCitation, hTitle, hPrediction, hHard]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        for row in reader:
            output_row = {}
            citation_id = row[hCitation]
            title = row[hTitle]
            predicted = row[hPrediction]
            hard = row[hHard]
            try:
                pmid = pubmedpy.get_pmid_from_title(title)
                pmid = int(pmid)
            except Exception as e:
                pmid = '-- Cannot find --'

            output_row = { hPmid: pmid, hCitation: citation_id,
                    hTitle: title, hPrediction: predicted,
                    hHard: hard }
            writer.writerow(output_row)

###############################################################################################

#### This is fixing Emily's project. The refman ids weren't saved when she imported the project
#PROJECT_ID = 219
#FILE_PATH = './Abstraktr_Update_Lit_Review_11.12.13.txt'
#citations_q = Session.query(model.Citation)