def format_nct (nct): snct = [] url = 'http://clinicaltrials.gov/ct2/show/%s?displayxml=true' for ct in nct: if ct[0] in app.jinja_env.globals['ctgov_idx']: title = app.jinja_env.globals['ctgov_idx'][ct[0]][0] cond = format_condition(app.jinja_env.globals['ctgov_idx'][ct[0]][1]) else: xmltree = ElementTree.fromstring (download_web_data(url % ct[0])) title = xmltree.find ('brief_title') # title if title is None: title = ct[0] else: title = title.text # condition lcond = xmltree.findall ('condition') cond = '' if lcond is not None: for c in lcond: if c is not None: cond = '%s; %s' % (cond, c.text) cond = cond[2:] app.jinja_env.globals['ctgov_idx'][ct[0]] = (title, cond) cond = format_condition (cond) log.info ('added %s to the CTGOV idx (updated to %d trials)' % (ct[0], len(app.jinja_env.globals['ctgov_idx']))) snct.append ((ct[0], ct[1], title, cond)) return snct
def get_nct_number(url): xml = download_web_data(url) if xml is None: return 0 xmltree = ElementTree.fromstring(xml) nnct = xmltree.get('count') return int(nnct)
def format_nct(nct): snct = [] url = 'http://clinicaltrials.gov/ct2/show/%s?displayxml=true' for ct in nct: if ct[0] in app.jinja_env.globals['ctgov_idx']: title = app.jinja_env.globals['ctgov_idx'][ct[0]][0] cond = format_condition( app.jinja_env.globals['ctgov_idx'][ct[0]][1]) else: xmltree = ElementTree.fromstring(download_web_data(url % ct[0])) title = xmltree.find('brief_title') # title if title is None: title = ct[0] else: title = title.text # condition lcond = xmltree.findall('condition') cond = '' if lcond is not None: for c in lcond: if c is not None: cond = '%s; %s' % (cond, c.text) cond = cond[2:] app.jinja_env.globals['ctgov_idx'][ct[0]] = (title, cond) cond = format_condition(cond) log.info('added %s to the CTGOV idx (updated to %d trials)' % (ct[0], len(app.jinja_env.globals['ctgov_idx']))) snct.append((ct[0], ct[1], title, cond)) return snct
def get_initial_nct_from_url_ad(url): # num. of studies available ''' n = get_nct_number('%s&count=0' % url) if n == 0: return [] # get the list of clinical studies xmltree = ElementTree.fromstring (download_web_data('%s&count=%d' % (url, n))) lnct = xmltree.findall ('clinical_study') rnct = [] i = 1 for ct in lnct: ids = ct.find ('nct_id') if ids is None: continue rnct.append ('%s;%d' % (ids.text,i)) i += 1 ''' n = 2000 xmltree = ElementTree.fromstring( download_web_data('%s&count=%d' % (url, n))) rnct = [] i = 1 for ct in xmltree.iter('nct_id'): ids = ct if ids is None: continue rnct.append('%s;%d' % (ids.text, i)) i += 1 return rnct
def get_nct_number (url): xml = download_web_data(url) if xml is None: return 0 xmltree = ElementTree.fromstring (xml) nnct = xmltree.get ('count') return int(nnct)
def get_disease_clinical_trials (ldisease): #retrieve Trial IDs from clincialtrials.gov disease_to_nct = {} stat = [] #ldisease = sorted(map(lambda x:' '.join(x.lower().split()), ldisease)) trial_ids = [] i=1 d = ldisease.replace (',', '') fd = d.replace(' ', '+') url = 'https://clinicaltrials.gov/ct2/results?cond=%s&displayxml=true' # num. of studies available if ldisease=='all': url='https://clinicaltrials.gov/ct2/results?%sdisplayxml=true' fd='' print url xml = download_web_data(url % (fd)) xmltree = xml_parser.fromstring (xml) n = int(xmltree.get ('count')) print n i=i+1 nct = set() url_final = url + '&start=%d&count=%s'; for j in range(1, n, 1): if j % 5000 ==0: print "id: 0-",j," ..." xmltree = xml_parser.fromstring (download_web_data(url_final % (fd, j, 1))) lnct = xmltree.findall ('clinical_study') for ct in lnct: cod = ct.find ('nct_id') if cod is None: continue #print "trial %s"%cod.text trial_ids.append((cod.text)) return trial_ids
def retrieve_trials(url, npag): # num. of studies available n = of.format_nct_number(get_nct_number('%s&count=0' % url)) # get the list of clinical studies xml = download_web_data('%s&pg=%s' % (url, npag)) if xml is None: return (0, []) xmltree = ElementTree.fromstring(xml) lnct = xmltree.findall('clinical_study') nct = [] for ct in lnct: pct = parse_xml_nct(ct) if pct[0] is not None: cond = of.format_condition(pct[3]) nct.append((pct[0], pct[1], pct[2], cond)) return (n, nct)
def get_initial_nct_from_url (url): # num. of studies available n = get_nct_number('%s&count=0' % url) if n == 0: return [] # get the list of clinical studies xmltree = ElementTree.fromstring (download_web_data('%s&count=%d' % (url, n))) lnct = xmltree.findall ('clinical_study') rnct = [] i = 1 for ct in lnct: ids = ct.find ('nct_id') if ids is None: continue rnct.append ('%s;%d' % (ids.text,i)) i += 1 return rnct
def retrieve_trials (url, npag): # num. of studies available n = of.format_nct_number(get_nct_number('%s&count=0' % url)) # get the list of clinical studies xml = download_web_data('%s&pg=%s' % (url, npag)) if xml is None: return (0, []) xmltree = ElementTree.fromstring (xml) lnct = xmltree.findall ('clinical_study') nct = [] for ct in lnct: pct = parse_xml_nct (ct) if pct[0] is not None: if pct[0] not in app.jinja_env.globals['ctgov_idx']: app.jinja_env.globals['ctgov_idx'][pct[0]] = (pct[2], pct[3]) cond = of.format_condition (pct[3]) nct.append ((pct[0], pct[1], pct[2], cond)) return (n, nct)
def extract_criteria(cid): #Using IDs to retrieve eligibility criteria output = "" if cid is not None: url_trial = 'http://clinicaltrials.gov/show/%s?displayxml=true' #url_trial ='http://clinicaltrials.gov/search?term=%s&displayxml=true' page = download_web_data(url_trial % cid) #with codecs.open('temp.txt', 'w','utf8') as writer: # writer.write(page) #with codec.open('temp.txt', 'r', 'utf8') as reader: if page is not None: ct_xml = xml_parser.fromstring (page) ec = ct_xml.find ('eligibility') if ec is not None: # parse to get criteria text d = ec.find ('criteria') if d is not None: txt = d.find ('textblock') if txt is not None: output = txt.text return output
def extract_description(cid): #Using IDs to retrieve eligibility criteria output = "" if cid is not None: url_trial = 'http://clinicaltrials.gov/show/%s?displayxml=true' #url_trial ='http://clinicaltrials.gov/search?term=%s&displayxml=true' page = download_web_data(url_trial % cid) #with codecs.open('temp.txt', 'w','utf8') as writer: # writer.write(page) #with codec.open('temp.txt', 'r', 'utf8') as reader: if page is not None: ct_xml = xml_parser.fromstring (page) summary = ct_xml.find ('brief_summary') if summary is not None: txt = summary.find ('textblock') if txt is not None: output = txt.text description = ct_xml.find('detailed_description') if description is not None: txt2 = summary.find('textblock') if txt2 is not None: output = output+txt2.text return output