def getfactors (RL, articletype, cancer, short_cancer): article_count = [] article_factors = [] hit_count = 0 miss_count = 0 bad_journals = [] columntitlecount = 'Paper count - ' + short_cancer + ' ' + articletype columntitleIF = 'Impact factor - ' + short_cancer + ' ' + articletype if articletype != 'All': searchterm = "2008:2013 [DP] " + articletype + "[PT] " else: searchterm = "2008:2013 [DP] " b = 0 while b < len(RL.Search_Term): org = '(' + RL.iloc[b][1] + ') ' full_searchterm = org + searchterm + cancer #print full_searchterm try: cancer_count = pmhits(full_searchterm) except URLError: try: cancer_count = pmhits(full_searchterm) except URLError: pass article_count.append(cancer_count) #print cancer_count cancer_factor = 0 if cancer_count > 0: records = pubmedsearch(full_searchterm) for record in records: #iterating over the records collected, get the journal for each and impact factor TA = record.get('TA', '?') TA = TA.upper() TA = TA.replace('.', '') factor = IFdict.get(TA) try: factor = float(factor) cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization hit_count = hit_count + 1 except TypeError: bad_journals.append(TA) miss_count = miss_count + 1 else: pass #print cancer_factor article_factors.append(cancer_factor) b = b + 1 print sorted(list(set(bad_journals))) print short_cancer RL[columntitleIF] = article_factors RL[columntitlecount] = article_count RL.to_csv('C:\Users\JAG\USN-dz\RL_dz_pm_impacts_meshb.csv', sep = ',' , index = False) return RL
def getfactors (articletype, cancer, short_cancer): hit_count = 0 miss_count = 0 bad_journals = [] if articletype != 'All': searchterm = "2008:2013 [DP] " + articletype + "[PT] " else: searchterm = "2008:2013 [DP] " full_searchterm = searchterm + cancer print full_searchterm try: cancer_count = pmhits(full_searchterm) except URLError: try: cancer_count = pmhits(full_searchterm) except URLError: pass records = pubmedsearch(full_searchterm, MAX_COUNT = 100000) cancer_factor = 0 for record in records: #iterating over the records collected, get the journal for each and impact factor TA = record.get('TA', '?') TA = TA.upper() TA = TA.replace('.', '') factor = IFdict.get(TA) try: factor = float(factor) cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization hit_count = hit_count + 1 except TypeError: bad_journals.append(TA) miss_count = miss_count + 1 print articletype bad_journals = list(set(bad_journals)) print bad_journals return (cancer_count,cancer_factor)
'''usnews wrangle To wrangle complicated files with stacked information ''' import pandas as pd import numpy as np import pubmedsearch as pm List_path = 'C:\Users\JAG\USnewsy\RL-trials-5yr.csv' Completed_path = 'C:\Users\JAG\USnewsy\RL-trials-5yr.csv' # Read in the csv RL = pd.read_csv(List_path, index_col=False, header = 0) phase2 = [] phase3 = [] clinicaltrials = [] for institution in RL['Search_Term']: p2 = pm.pmhits(institution + '[AD] 2008:2013 [DP] Clinical Trial, Phase II[PT] Cancer[MAJR]') phase2.append(p2) p3 = pm.pmhits(institution + '[AD] 2008:2013 [DP] Clinical Trial, Phase III[PT], Cancer[MAJR]') phase3.append(p3) ct = pm.pmhits(institution + '[AD] 2008:2013 [DP] Clinical Trial[PT], Cancer[MAJR]') clinicaltrials.append(ct) RL['Phase 2'] = phase2 RL['Phase 3'] = phase3 RL['Clinical Trials'] = clinicaltrials RL.to_csv(Completed_path)