-
Notifications
You must be signed in to change notification settings - Fork 0
/
pm_impacts.py
80 lines (61 loc) · 2.47 KB
/
pm_impacts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Runs through a list of organizations and determines how many the impact factor
# of their publications
# Impact factors from: http://www.citefactor.org/impact-factor-list-2012.html
import pandas as pd
from pubmedsearch import pubmedsearch, pmhits
# Read in the rank-list which includes the organizations
RL_path = 'C:\Users\JAG\USnewsy\RL_USN_Grants_Impact_CTg_d.csv'
RL = pd.read_csv(RL_path, index_col=False, header=0, squeeze=True)
# Read in the impact-factor list
IF_path = 'C:\Users\JAG\USnewsy\impact_factors.csv'
IF = pd.read_csv(IF_path, index_col=False, header=0, squeeze=True)
journals = IF['Pubmed Journal Title']
factors = IF['Impact Factor']
IFdict = dict(zip(journals,factors))
# Which article types to consider
types = ['Clinical Trial', 'Clinical Trial, Phase I', 'Clinical Trial, Phase II', 'Clinical Trial, Phase III',
'Review', 'All']
# get publications for each organization
def getfactors (RL, articletype):
IFs = []
article_count = []
bad_journals = []
hit_count = 0
miss_count = 0
columntitleIF = 'Impact factor - ' + articletype
columntitlecount = 'Paper count - ' + articletype
if articletype != 'All':
searchterm = " [AD] 2008:2013 [DP] " + articletype + "[PT] Cancer[MAJR]"
else:
searchterm = " [AD] 2008:2013 [DP] Cancer[MAJR]"
for org in RL['Search_Term']:
records = pubmedsearch(org + searchterm)
org_factor = 0
org_count = 0
for record in records: #iterating over the records collected, get the journal for each and impact factor
org_count = org_count + 1
TA = record.get('TA', '?')
TA = TA.upper()
TA = TA.replace('.', '')
factor = IFdict.get(TA)
try:
factor = float(factor)
org_factor = org_factor + factor #add this IF to the total IF for the organization
hit_count = hit_count + 1
except TypeError:
bad_journals.append(TA)
miss_count = miss_count + 1
IFs.append(org_factor)
article_count.append(org_count)
bad_journals = sorted(list(set(bad_journals)))
RL[columntitleIF] = IFs
RL[columntitlecount] = article_count
print articletype
print hit_count
print miss_count
print len(bad_journals)
print bad_journals
return RL
for PT in types:
RL = getfactors(RL, PT)
RL.to_csv('C:\Users\JAG\USnewsy\RL_USN_Grants_Impact_CTg_e.csv', sep = ',' , index = False)