Exemple #1
0
def format_nct (nct):
    snct = []
    url = 'http://clinicaltrials.gov/ct2/show/%s?displayxml=true'
    for ct in nct:
        if ct[0] in app.jinja_env.globals['ctgov_idx']:
            title = app.jinja_env.globals['ctgov_idx'][ct[0]][0]
            cond = format_condition(app.jinja_env.globals['ctgov_idx'][ct[0]][1])
        else:
            xmltree = ElementTree.fromstring (download_web_data(url % ct[0]))
            title = xmltree.find ('brief_title')
            # title
            if title is None:
                title = ct[0]
            else:
                title = title.text
            # condition        
            lcond = xmltree.findall ('condition')
            cond = ''
            if lcond is not None:
                for c in lcond:
                    if c is not None:
                        cond = '%s; %s' % (cond, c.text)
                cond = cond[2:]
            app.jinja_env.globals['ctgov_idx'][ct[0]] = (title, cond)
            cond = format_condition (cond)
            log.info ('added %s to the CTGOV idx (updated to %d trials)' % (ct[0], len(app.jinja_env.globals['ctgov_idx'])))
        snct.append ((ct[0], ct[1], title, cond))
    return snct
Exemple #2
0
def get_nct_number(url):
    xml = download_web_data(url)
    if xml is None:
        return 0
    xmltree = ElementTree.fromstring(xml)
    nnct = xmltree.get('count')
    return int(nnct)
Exemple #3
0
def format_nct(nct):
    snct = []
    url = 'http://clinicaltrials.gov/ct2/show/%s?displayxml=true'
    for ct in nct:
        if ct[0] in app.jinja_env.globals['ctgov_idx']:
            title = app.jinja_env.globals['ctgov_idx'][ct[0]][0]
            cond = format_condition(
                app.jinja_env.globals['ctgov_idx'][ct[0]][1])
        else:
            xmltree = ElementTree.fromstring(download_web_data(url % ct[0]))
            title = xmltree.find('brief_title')
            # title
            if title is None:
                title = ct[0]
            else:
                title = title.text
            # condition
            lcond = xmltree.findall('condition')
            cond = ''
            if lcond is not None:
                for c in lcond:
                    if c is not None:
                        cond = '%s; %s' % (cond, c.text)
                cond = cond[2:]
            app.jinja_env.globals['ctgov_idx'][ct[0]] = (title, cond)
            cond = format_condition(cond)
            log.info('added %s to the CTGOV idx (updated to %d trials)' %
                     (ct[0], len(app.jinja_env.globals['ctgov_idx'])))
        snct.append((ct[0], ct[1], title, cond))
    return snct
def get_initial_nct_from_url_ad(url):
    # num. of studies available
    '''
    n = get_nct_number('%s&count=0' % url)
    if n == 0:
        return []
    

    # get the list of clinical studies
    xmltree = ElementTree.fromstring (download_web_data('%s&count=%d' % (url, n)))
    lnct = xmltree.findall ('clinical_study')
    rnct = []
    i = 1
    for ct in lnct:
        ids = ct.find ('nct_id')
        if ids is None:
            continue
        rnct.append ('%s;%d' % (ids.text,i))
        i += 1
    '''
    n = 2000
    xmltree = ElementTree.fromstring(
        download_web_data('%s&count=%d' % (url, n)))
    rnct = []
    i = 1
    for ct in xmltree.iter('nct_id'):
        ids = ct
        if ids is None:
            continue
        rnct.append('%s;%d' % (ids.text, i))
        i += 1

    return rnct
Exemple #5
0
def get_nct_number (url):
    xml = download_web_data(url)
    if xml is None:
        return 0
    xmltree = ElementTree.fromstring (xml)
    nnct = xmltree.get ('count')
    return int(nnct)
Exemple #6
0
def get_disease_clinical_trials (ldisease):     #retrieve Trial IDs from clincialtrials.gov
    disease_to_nct = {}
    stat = []
    #ldisease = sorted(map(lambda x:' '.join(x.lower().split()), ldisease))
    trial_ids = []

    i=1
    d = ldisease.replace (',', '')
    fd = d.replace(' ', '+')

    url = 'https://clinicaltrials.gov/ct2/results?cond=%s&displayxml=true'
       # num. of studies available
    if ldisease=='all':
        url='https://clinicaltrials.gov/ct2/results?%sdisplayxml=true'
        fd=''
    print url
    xml = download_web_data(url % (fd))
    xmltree = xml_parser.fromstring (xml)

    n = int(xmltree.get ('count'))
    print n

    i=i+1
    nct = set()
    url_final = url + '&start=%d&count=%s';
    for j in range(1, n, 1):
        if j % 5000 ==0:
            print "id: 0-",j," ..."
        xmltree = xml_parser.fromstring (download_web_data(url_final % (fd, j, 1)))
        lnct = xmltree.findall ('clinical_study')
        for ct in lnct:
            cod = ct.find ('nct_id')
            if cod is None:
                continue
           #print "trial %s"%cod.text
            trial_ids.append((cod.text))

    return trial_ids
Exemple #7
0
def retrieve_trials(url, npag):
    # num. of studies available
    n = of.format_nct_number(get_nct_number('%s&count=0' % url))
    # get the list of clinical studies
    xml = download_web_data('%s&pg=%s' % (url, npag))
    if xml is None:
        return (0, [])
    xmltree = ElementTree.fromstring(xml)
    lnct = xmltree.findall('clinical_study')
    nct = []
    for ct in lnct:
        pct = parse_xml_nct(ct)
        if pct[0] is not None:
            cond = of.format_condition(pct[3])
            nct.append((pct[0], pct[1], pct[2], cond))
    return (n, nct)
Exemple #8
0
def get_initial_nct_from_url (url):
    # num. of studies available
    n = get_nct_number('%s&count=0' % url)
    if n == 0:
        return []
    # get the list of clinical studies
    xmltree = ElementTree.fromstring (download_web_data('%s&count=%d' % (url, n)))
    lnct = xmltree.findall ('clinical_study')
    rnct = []
    i = 1
    for ct in lnct:
        ids = ct.find ('nct_id')
        if ids is None:
            continue
        rnct.append ('%s;%d' % (ids.text,i))
        i += 1
    return rnct
Exemple #9
0
def retrieve_trials (url, npag):
    # num. of studies available
    n = of.format_nct_number(get_nct_number('%s&count=0' % url))
    # get the list of clinical studies
    xml = download_web_data('%s&pg=%s' % (url, npag))
    if xml is None:
        return (0, [])
    xmltree = ElementTree.fromstring (xml)
    lnct = xmltree.findall ('clinical_study')
    nct = []
    for ct in lnct:
        pct = parse_xml_nct (ct)
        if pct[0] is not None:
            if pct[0] not in app.jinja_env.globals['ctgov_idx']:
                app.jinja_env.globals['ctgov_idx'][pct[0]] = (pct[2], pct[3])
            cond = of.format_condition (pct[3])
            nct.append ((pct[0], pct[1], pct[2], cond))
    return (n, nct)
Exemple #10
0
def extract_criteria(cid):   #Using IDs to retrieve eligibility criteria
    output = ""
    if cid is not None:
        url_trial = 'http://clinicaltrials.gov/show/%s?displayxml=true'
        #url_trial ='http://clinicaltrials.gov/search?term=%s&displayxml=true'
        page = download_web_data(url_trial % cid)
        #with codecs.open('temp.txt', 'w','utf8') as writer:
        #    writer.write(page)
        #with codec.open('temp.txt', 'r', 'utf8') as reader:
        if page is not None:
            ct_xml = xml_parser.fromstring (page)
            ec = ct_xml.find ('eligibility')
            if ec is not None:
                # parse to get criteria text
                d = ec.find ('criteria')
                if d is not None:
                    txt = d.find ('textblock')
                    if txt is not None:
                        output = txt.text
    return output
Exemple #11
0
def extract_description(cid):   #Using IDs to retrieve eligibility criteria
    output = ""
    if cid is not None:
        url_trial = 'http://clinicaltrials.gov/show/%s?displayxml=true'
        #url_trial ='http://clinicaltrials.gov/search?term=%s&displayxml=true'
        page = download_web_data(url_trial % cid)
        #with codecs.open('temp.txt', 'w','utf8') as writer:
        #    writer.write(page)
        #with codec.open('temp.txt', 'r', 'utf8') as reader:
        if page is not None:
            ct_xml = xml_parser.fromstring (page)
            summary = ct_xml.find ('brief_summary')
            if summary is not None:
                txt = summary.find ('textblock')
                if txt is not None:
                     output = txt.text
            description = ct_xml.find('detailed_description')
            if description is not None:
                txt2 = summary.find('textblock')
                if txt2 is not None:
                    output = output+txt2.text

    return output