def getfactors (RL, articletype, cancer, short_cancer):
    
    article_count = []
    article_factors = []
    hit_count = 0
    miss_count = 0
    bad_journals = []
    columntitlecount = 'Paper count - ' + short_cancer + ' ' + articletype
    columntitleIF = 'Impact factor - ' + short_cancer + ' ' + articletype
    
    if articletype != 'All':
        searchterm = "2008:2013 [DP] " + articletype + "[PT] "
    else:
        searchterm = "2008:2013 [DP] "

    b = 0
    while b < len(RL.Search_Term):
        org = '(' + RL.iloc[b][1] + ') '
        full_searchterm = org + searchterm + cancer
        #print full_searchterm
        try:
            cancer_count = pmhits(full_searchterm)
        except URLError:
            try:
                cancer_count = pmhits(full_searchterm)
            except URLError:
                pass
        article_count.append(cancer_count)
        #print cancer_count
        cancer_factor = 0
        
        if cancer_count > 0:
            records = pubmedsearch(full_searchterm)
            for record in records: #iterating over the records collected, get the journal for each and impact factor
                TA = record.get('TA', '?')
                TA = TA.upper()
                TA = TA.replace('.', '')
                factor = IFdict.get(TA)
                try:
                    factor = float(factor)
                    cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization
                    hit_count = hit_count + 1
                except TypeError:
                    bad_journals.append(TA)
                    miss_count = miss_count + 1
        else:
            pass
        #print cancer_factor
        article_factors.append(cancer_factor)
        b = b + 1
    
    print sorted(list(set(bad_journals)))
    print short_cancer
    RL[columntitleIF] = article_factors
    RL[columntitlecount] = article_count
    RL.to_csv('C:\Users\JAG\USN-dz\RL_dz_pm_impacts_meshb.csv', sep = ',' , index = False)
    return RL
Example #2
0
def getfactors (RL, articletype):
    IFs = []
    article_count = []
    bad_journals = []
    hit_count = 0
    miss_count = 0
    columntitleIF = 'Impact factor - ' + articletype
    columntitlecount = 'Paper count - ' + articletype
    
    if articletype != 'All':
        searchterm = " [AD] 2008:2013 [DP] " + articletype + "[PT] Cancer[MAJR]"

    else:
        searchterm = " [AD] 2008:2013 [DP] Cancer[MAJR]"

    
    for org in RL['Search_Term']:

        records = pubmedsearch(org + searchterm)
        org_factor = 0
        org_count = 0
        for record in records: #iterating over the records collected, get the journal for each and impact factor
            org_count = org_count + 1
            TA = record.get('TA', '?')
            TA = TA.upper()
            TA = TA.replace('.', '')
            factor = IFdict.get(TA)
            try:
                factor = float(factor)
                org_factor = org_factor + factor #add this IF to the total IF for the organization
                hit_count = hit_count + 1
            except TypeError:
                bad_journals.append(TA)
                miss_count = miss_count + 1
        IFs.append(org_factor)
        article_count.append(org_count)
    
    bad_journals = sorted(list(set(bad_journals)))
    RL[columntitleIF] = IFs
    RL[columntitlecount] = article_count

    print articletype
    print hit_count
    print miss_count
    print len(bad_journals)
    print bad_journals

    return RL
Example #3
0
def getfactors (articletype, cancer, short_cancer):

    hit_count = 0
    miss_count = 0
    bad_journals = []
    
    if articletype != 'All':
        searchterm = "2008:2013 [DP] " + articletype + "[PT] "
    else:
        searchterm = "2008:2013 [DP] "


    full_searchterm = searchterm + cancer

    print full_searchterm
    try:
        cancer_count = pmhits(full_searchterm)
    except URLError:
        try:
            cancer_count = pmhits(full_searchterm)
        except URLError:
            pass
    
    records = pubmedsearch(full_searchterm, MAX_COUNT = 100000)
    cancer_factor = 0
    
    for record in records: #iterating over the records collected, get the journal for each and impact factor
        TA = record.get('TA', '?')
        TA = TA.upper()
        TA = TA.replace('.', '')
        factor = IFdict.get(TA)
        try:
            factor = float(factor)
            cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization
            hit_count = hit_count + 1
        except TypeError:
            bad_journals.append(TA)
            miss_count = miss_count + 1

    print articletype
    bad_journals = list(set(bad_journals))
    print bad_journals
    return (cancer_count,cancer_factor)
Example #4
0
import pandas as pd
from pubmedsearch import pubmedsearch, pmhits

# Read in the rank-list which includes the organizations
RL_path = 'C:\Users\JAG\USnewsy\RL-trials-5yr.csv'
RL = pd.read_csv(RL_path, index_col=False, header=0, squeeze=True)

PMIDs = []
DPs = []
ADs = []
TAs = []

# get publications for each organization
for org in RL['Search_Term']:
    records = pubmedsearch(org + " [AD] 2008:2013 [DP] Clinical Trial, Phase III[PT] Cancer[MAJR]")

    for record in records:
        uid = record.get('PMID', '?')
        PMIDs.append(uid)
        DP = record.get('DP', '?')
        DPs.append(DP)
        ADs.append(org)
        TA = record.get('TA', '?')
        TAs.append(TA)

d = {'PMID' : PMIDs, 'DATE' : DPs, 'ORGANIZATION' : ADs, 'JOURNAL' : TAs}
df = pd.DataFrame(d)

pivot = df.pivot_table('PMID', rows ='ORGANIZATION', cols ='JOURNAL', aggfunc = 'count')
Example #5
0
def getfactors (cancers, articletype, org = None, orgshort = None):
    IFs = []
    article_count = []
    bad_journals = []
    hit_count = 0
    miss_count = 0
    columntitleIF = 'Impact factor - ' + articletype + orgshort
    columntitlecount = 'Paper count - ' + articletype + orgshort
    
    if articletype != 'All':
        searchterm = "2008:2013 [DP] " + articletype + "[PT] "

    else:
        searchterm = "2008:2013 [DP] "

    a = 0
    while a < len(cancers.kw1) :
        cancer_factor = 0
        cancer_count = 0
        
        # string together all the synonyms to make a single string
        keywords = cancers.iloc[a][2:9]
        keys = ""
        for keyword in keywords:

            if str(keyword) == 'nan':
                pass
            else:
                keys = keys + keyword + '[TIAB] OR '
                if keyword[-6:] == 'cancer':
                    keys = keys + keyword[:-6] + 'carcinoma[TIAB] OR '
                
        keys = keys[:-3]
        full_searchterm = org + searchterm + keys
        print full_searchterm

        records = pubmedsearch(full_searchterm)
        for record in records: #iterating over the records collected, get the journal for each and impact factor
            cancer_count = cancer_count + 1
            TA = record.get('TA', '?')
            TA = TA.upper()
            TA = TA.replace('.', '')
            factor = IFdict.get(TA)
            try:
                factor = float(factor)
                cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization
                hit_count = hit_count + 1
            except TypeError:
                bad_journals.append(TA)
                miss_count = miss_count + 1
        IFs.append(cancer_factor)
        article_count.append(cancer_count)
        print cancer_factor
        print cancer_count
        a = a + 1

    #bad_journals = sorted(list(set(bad_journals)))
    cancers[columntitleIF] = IFs
    cancers[columntitlecount] = article_count



    return cancers