def getfactors (RL, articletype, cancer, short_cancer): article_count = [] article_factors = [] hit_count = 0 miss_count = 0 bad_journals = [] columntitlecount = 'Paper count - ' + short_cancer + ' ' + articletype columntitleIF = 'Impact factor - ' + short_cancer + ' ' + articletype if articletype != 'All': searchterm = "2008:2013 [DP] " + articletype + "[PT] " else: searchterm = "2008:2013 [DP] " b = 0 while b < len(RL.Search_Term): org = '(' + RL.iloc[b][1] + ') ' full_searchterm = org + searchterm + cancer #print full_searchterm try: cancer_count = pmhits(full_searchterm) except URLError: try: cancer_count = pmhits(full_searchterm) except URLError: pass article_count.append(cancer_count) #print cancer_count cancer_factor = 0 if cancer_count > 0: records = pubmedsearch(full_searchterm) for record in records: #iterating over the records collected, get the journal for each and impact factor TA = record.get('TA', '?') TA = TA.upper() TA = TA.replace('.', '') factor = IFdict.get(TA) try: factor = float(factor) cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization hit_count = hit_count + 1 except TypeError: bad_journals.append(TA) miss_count = miss_count + 1 else: pass #print cancer_factor article_factors.append(cancer_factor) b = b + 1 print sorted(list(set(bad_journals))) print short_cancer RL[columntitleIF] = article_factors RL[columntitlecount] = article_count RL.to_csv('C:\Users\JAG\USN-dz\RL_dz_pm_impacts_meshb.csv', sep = ',' , index = False) return RL
def getfactors (RL, articletype): IFs = [] article_count = [] bad_journals = [] hit_count = 0 miss_count = 0 columntitleIF = 'Impact factor - ' + articletype columntitlecount = 'Paper count - ' + articletype if articletype != 'All': searchterm = " [AD] 2008:2013 [DP] " + articletype + "[PT] Cancer[MAJR]" else: searchterm = " [AD] 2008:2013 [DP] Cancer[MAJR]" for org in RL['Search_Term']: records = pubmedsearch(org + searchterm) org_factor = 0 org_count = 0 for record in records: #iterating over the records collected, get the journal for each and impact factor org_count = org_count + 1 TA = record.get('TA', '?') TA = TA.upper() TA = TA.replace('.', '') factor = IFdict.get(TA) try: factor = float(factor) org_factor = org_factor + factor #add this IF to the total IF for the organization hit_count = hit_count + 1 except TypeError: bad_journals.append(TA) miss_count = miss_count + 1 IFs.append(org_factor) article_count.append(org_count) bad_journals = sorted(list(set(bad_journals))) RL[columntitleIF] = IFs RL[columntitlecount] = article_count print articletype print hit_count print miss_count print len(bad_journals) print bad_journals return RL
def getfactors (articletype, cancer, short_cancer): hit_count = 0 miss_count = 0 bad_journals = [] if articletype != 'All': searchterm = "2008:2013 [DP] " + articletype + "[PT] " else: searchterm = "2008:2013 [DP] " full_searchterm = searchterm + cancer print full_searchterm try: cancer_count = pmhits(full_searchterm) except URLError: try: cancer_count = pmhits(full_searchterm) except URLError: pass records = pubmedsearch(full_searchterm, MAX_COUNT = 100000) cancer_factor = 0 for record in records: #iterating over the records collected, get the journal for each and impact factor TA = record.get('TA', '?') TA = TA.upper() TA = TA.replace('.', '') factor = IFdict.get(TA) try: factor = float(factor) cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization hit_count = hit_count + 1 except TypeError: bad_journals.append(TA) miss_count = miss_count + 1 print articletype bad_journals = list(set(bad_journals)) print bad_journals return (cancer_count,cancer_factor)
import pandas as pd from pubmedsearch import pubmedsearch, pmhits # Read in the rank-list which includes the organizations RL_path = 'C:\Users\JAG\USnewsy\RL-trials-5yr.csv' RL = pd.read_csv(RL_path, index_col=False, header=0, squeeze=True) PMIDs = [] DPs = [] ADs = [] TAs = [] # get publications for each organization for org in RL['Search_Term']: records = pubmedsearch(org + " [AD] 2008:2013 [DP] Clinical Trial, Phase III[PT] Cancer[MAJR]") for record in records: uid = record.get('PMID', '?') PMIDs.append(uid) DP = record.get('DP', '?') DPs.append(DP) ADs.append(org) TA = record.get('TA', '?') TAs.append(TA) d = {'PMID' : PMIDs, 'DATE' : DPs, 'ORGANIZATION' : ADs, 'JOURNAL' : TAs} df = pd.DataFrame(d) pivot = df.pivot_table('PMID', rows ='ORGANIZATION', cols ='JOURNAL', aggfunc = 'count')
def getfactors (cancers, articletype, org = None, orgshort = None): IFs = [] article_count = [] bad_journals = [] hit_count = 0 miss_count = 0 columntitleIF = 'Impact factor - ' + articletype + orgshort columntitlecount = 'Paper count - ' + articletype + orgshort if articletype != 'All': searchterm = "2008:2013 [DP] " + articletype + "[PT] " else: searchterm = "2008:2013 [DP] " a = 0 while a < len(cancers.kw1) : cancer_factor = 0 cancer_count = 0 # string together all the synonyms to make a single string keywords = cancers.iloc[a][2:9] keys = "" for keyword in keywords: if str(keyword) == 'nan': pass else: keys = keys + keyword + '[TIAB] OR ' if keyword[-6:] == 'cancer': keys = keys + keyword[:-6] + 'carcinoma[TIAB] OR ' keys = keys[:-3] full_searchterm = org + searchterm + keys print full_searchterm records = pubmedsearch(full_searchterm) for record in records: #iterating over the records collected, get the journal for each and impact factor cancer_count = cancer_count + 1 TA = record.get('TA', '?') TA = TA.upper() TA = TA.replace('.', '') factor = IFdict.get(TA) try: factor = float(factor) cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization hit_count = hit_count + 1 except TypeError: bad_journals.append(TA) miss_count = miss_count + 1 IFs.append(cancer_factor) article_count.append(cancer_count) print cancer_factor print cancer_count a = a + 1 #bad_journals = sorted(list(set(bad_journals))) cancers[columntitleIF] = IFs cancers[columntitlecount] = article_count return cancers