-
Notifications
You must be signed in to change notification settings - Fork 0
/
Impact-by-dz.py
95 lines (73 loc) · 2.91 KB
/
Impact-by-dz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Runs through a list of cancers and determines the number and impact factor
# of their publications
# Impact factors from: http://www.citefactor.org/impact-factor-list-2012.html
import pandas as pd
from pubmedsearch import pubmedsearch, pmhits
from urllib2 import URLError
# Read in the list of cancers which includes the organizations
cancer_path = 'C:\Users\JAG\USN-dz\cancers_cases_keys_mesh.csv'
cancers = pd.read_csv(cancer_path, index_col=False, header=0)
# Read in the impact-factor list
IF_path = 'C:\Users\JAG\USnewsy\impact_factors.csv'
IF = pd.read_csv(IF_path, index_col=False, header=0)
journals = IF['Pubmed Journal Title']
factors = IF['Impact Factor']
IFdict = dict(zip(journals,factors))
# Which article types to consider
types = ['Clinical Trial', 'Clinical Trial, Phase I', 'Clinical Trial, Phase II', 'Clinical Trial, Phase III',
'Review', 'All']
types = ['Clinical Trial, Phase I', 'Clinical Trial, Phase II', 'Clinical Trial, Phase III']
# get publications for each organization
def getfactors (articletype, cancer, short_cancer):
hit_count = 0
miss_count = 0
bad_journals = []
if articletype != 'All':
searchterm = "2008:2013 [DP] " + articletype + "[PT] "
else:
searchterm = "2008:2013 [DP] "
full_searchterm = searchterm + cancer
print full_searchterm
try:
cancer_count = pmhits(full_searchterm)
except URLError:
try:
cancer_count = pmhits(full_searchterm)
except URLError:
pass
records = pubmedsearch(full_searchterm, MAX_COUNT = 100000)
cancer_factor = 0
for record in records: #iterating over the records collected, get the journal for each and impact factor
TA = record.get('TA', '?')
TA = TA.upper()
TA = TA.replace('.', '')
factor = IFdict.get(TA)
try:
factor = float(factor)
cancer_factor = cancer_factor + factor #add this IF to the total IF for the organization
hit_count = hit_count + 1
except TypeError:
bad_journals.append(TA)
miss_count = miss_count + 1
print articletype
bad_journals = list(set(bad_journals))
print bad_journals
return (cancer_count,cancer_factor)
for PT in types:
a = 0
counts = []
impacts = []
while a < len(cancers.kw1) :
# string together all the synonyms to make a single string
shortkey = cancers['kw1'][a]
keys = cancers['mesh'][a]
(count,impact) = getfactors(PT, keys, shortkey)
counts.append(count)
impacts.append(impact)
a = a + 1
count_title = 'Publications - ' + PT
cancers[count_title] = counts
impact_title = 'Impacts - ' + PT
cancers[impact_title] = impacts
cancers.to_csv('C:\Users\JAG\USN-dz\Impact_by_dz.csv', sep = ',' , index = False)
cancers.to_csv('C:\Users\JAG\USN-dz\Impact_by_dz.csv', sep = ',' , index = False)