예제 #1
0
def getfactors (RL, articletype, cancer, short_cancer):
    
    article_count = []
    article_factors = []
    hit_count = 0
    miss_count = 0
    bad_journals = []
    columntitlecount = 'Paper count - ' + short_cancer + ' ' + articletype
    columntitleIF = 'Impact factor - ' + short_cancer + ' ' + articletype
    
    if articletype != 'All':
        searchterm = "2008:2013 [DP] " + articletype + "[PT] "
    else:
        searchterm = "2008:2013 [DP] "

    b = 0
    while b < len(RL.Search_Term):
        org = '(' + RL.iloc[b][1] + ') '
        full_searchterm = org + searchterm + cancer
        #print full_searchterm
        try:
            cancer_count = pmhits(full_searchterm)
        except URLError:
            try:
                cancer_count = pmhits(full_searchterm)
            except URLError:
                pass
        article_count.append(cancer_count)
        #print cancer_count
        cancer_factor = 0
        
        if cancer_count > 0:
            records = pubmedsearch(full_searchterm)
            for record in records: #iterating over the records collected, get the journal for each and impact factor
                TA = record.get('TA', '?')
                TA = TA.upper()
                TA = TA.replace('.', '')
                factor = IFdict.get(TA)
                try:
                    factor = float(factor)
                    cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization
                    hit_count = hit_count + 1
                except TypeError:
                    bad_journals.append(TA)
                    miss_count = miss_count + 1
        else:
            pass
        #print cancer_factor
        article_factors.append(cancer_factor)
        b = b + 1
    
    print sorted(list(set(bad_journals)))
    print short_cancer
    RL[columntitleIF] = article_factors
    RL[columntitlecount] = article_count
    RL.to_csv('C:\Users\JAG\USN-dz\RL_dz_pm_impacts_meshb.csv', sep = ',' , index = False)
    return RL
예제 #2
0
def getfactors (articletype, cancer, short_cancer):

    hit_count = 0
    miss_count = 0
    bad_journals = []
    
    if articletype != 'All':
        searchterm = "2008:2013 [DP] " + articletype + "[PT] "
    else:
        searchterm = "2008:2013 [DP] "


    full_searchterm = searchterm + cancer

    print full_searchterm
    try:
        cancer_count = pmhits(full_searchterm)
    except URLError:
        try:
            cancer_count = pmhits(full_searchterm)
        except URLError:
            pass
    
    records = pubmedsearch(full_searchterm, MAX_COUNT = 100000)
    cancer_factor = 0
    
    for record in records: #iterating over the records collected, get the journal for each and impact factor
        TA = record.get('TA', '?')
        TA = TA.upper()
        TA = TA.replace('.', '')
        factor = IFdict.get(TA)
        try:
            factor = float(factor)
            cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization
            hit_count = hit_count + 1
        except TypeError:
            bad_journals.append(TA)
            miss_count = miss_count + 1

    print articletype
    bad_journals = list(set(bad_journals))
    print bad_journals
    return (cancer_count,cancer_factor)
예제 #3
0
'''usnews wrangle
To wrangle complicated files with stacked information

'''
import pandas as pd
import numpy as np
import pubmedsearch as pm

List_path = 'C:\Users\JAG\USnewsy\RL-trials-5yr.csv'
Completed_path = 'C:\Users\JAG\USnewsy\RL-trials-5yr.csv'

# Read in the csv
RL = pd.read_csv(List_path, index_col=False, header = 0)

phase2 = []
phase3 = []
clinicaltrials = []

for institution in RL['Search_Term']:
    p2 = pm.pmhits(institution + '[AD] 2008:2013 [DP] Clinical Trial, Phase II[PT] Cancer[MAJR]')
    phase2.append(p2)
    p3 = pm.pmhits(institution + '[AD] 2008:2013 [DP] Clinical Trial, Phase III[PT], Cancer[MAJR]')
    phase3.append(p3)
    ct = pm.pmhits(institution + '[AD] 2008:2013 [DP] Clinical Trial[PT], Cancer[MAJR]')
    clinicaltrials.append(ct)    

RL['Phase 2'] = phase2
RL['Phase 3'] = phase3
RL['Clinical Trials'] = clinicaltrials

RL.to_csv(Completed_path)