def index(): """ The index page """ # get parameters cond = bottle.request.query.get('cond') if cond is not None and len(cond) < 1: cond = None criteria = None csv_name = None trials = [] num_trials = 0 # if we got a condition if cond is not None: dump = True if bottle.request.query.get('criteria') is not None else False csv = True if bottle.request.query.get('csv') is not None else False lilly = LillyCOI() args = None if not dump and not csv else ['id', 'eligibility'] found_trials = lilly.search_for_condition(cond, True, args) num_trials = len(found_trials) # list criteria if dump: trials = found_trials # return CSV elif csv: csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7] with codecs.open(csv_name, 'w', 'utf-8') as handle: heads = ["format","num in","num ex","w age","w gender","w pregnancy","incomplete","overly complex","sub-populations","negated inclusions","labs","scores","acronyms","temporal components","patient behavior/abilities","investigator-subjective components","sum"] headers = ','.join('""' for h in heads) # CSV header handle.write('"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n' % ','.join(['"%s"' % h for h in heads])) # CSV rows i = 0; every = 1; for study in found_trials: if 0 == i % every: study.load() handle.write('"%s","","","","","%s",%s\n' % (study.nct, study.criteria_text.replace('"', '""'), headers)) i += 1; # render index template = _jinja_templates.get_template('index.html') return template.render(cond=cond, trials=trials, csv=csv_name, num=num_trials)
def get_trials(condition, recruiting=True, filename='years.csv'): lilly = LillyCOI() fields = [ 'id', 'lastchanged_date', 'firstreceived_date', 'primary_completion_date', 'completion_date', 'verification_date' ] found = lilly.search_for_condition(condition, recruiting, fields) if len(found) > 0: print "Found %d" % len(found) now = datetime.datetime.now() # list trials with codecs.open(filename, 'w') as csv: csv.write( 'NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n' ) if len(found) > 150: found = random.sample(found, len(found) / 4) for trial in found: # date comparison first = trial.date('firstreceived_date') first_y = round((now - first[1]).days / 365.25 * 10) / 10 if first[1] else 99 last = trial.date('lastchanged_date') last_y = round( (now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99 comp = trial.date('primary_completion_date') comp_y = round( (now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99 done = trial.date('completion_date') done_y = round( (now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99 veri = trial.date('verification_date') veri_y = round( (now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99 csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' % (trial.nct, first_y, last_y, comp_y, done_y, veri_y, 'TRUE' if done[1] else 'FALSE', 'TRUE' if done[1] and done[1] > now else 'FALSE')) print 'Written to "%s"' % filename else: print "None found"
def get_trials(condition, recruiting=True, filename='years.csv'): lilly = LillyCOI() fields = [ 'id', 'lastchanged_date', 'firstreceived_date', 'primary_completion_date', 'completion_date', 'verification_date' ] found = lilly.search_for_condition(condition, recruiting, fields) if len(found) > 0: print "Found %d" % len(found) now = datetime.datetime.now() # list trials with codecs.open(filename, 'w') as csv: csv.write('NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n') if len(found) > 150: found = random.sample(found, len(found) / 4) for trial in found: # date comparison first = trial.date('firstreceived_date') first_y = round((now - first[1]).days / 365.25 * 10) / 10 if first[1] else 99 last = trial.date('lastchanged_date') last_y = round((now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99 comp = trial.date('primary_completion_date') comp_y = round((now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99 done = trial.date('completion_date') done_y = round((now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99 veri = trial.date('verification_date') veri_y = round((now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99 csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' % (trial.nct, first_y, last_y, comp_y, done_y, veri_y, 'TRUE' if done[1] else 'FALSE', 'TRUE' if done[1] and done[1] > now else 'FALSE')) print 'Written to "%s"' % filename else: print "None found"
# ask for NCT list if _list_path is None: _list_path = raw_input('Path to the NCT list: ') # look for the list if not os.path.exists(_list_path): print 'x> The list file at %s does not exist' % _list_path sys.exit(1) # read list with codecs.open(_list_path, 'r') as handle: ncts = [nct.strip() if len(nct.strip()) > 0 else None for nct in handle.readlines()] assert len(ncts) > 0 trials = {} rows_and_years = [] lilly = LillyCOI() # retrieve from our database if not _force_update: existing = Study.retrieve(ncts) for ex in existing: trials[ex.nct] = ex # loop trials for nct in ncts: if not nct: continue # get the trial fresh via web if nct in trials: trial = trials[nct]
# main if __name__ == "__main__": Study.setup_ctakes(CTAKES) Study.setup_tables() UMLS.setup_umls(UMLS_FILE) UMLS.setup_tables() # ask for a condition condition = raw_input("Condition: ") if condition is None or len(condition) < 1: condition = 'spondylitis' # search for studies print "Fetching %s studies..." % condition lilly = LillyCOI() results = lilly.search_for(condition) # process all studies run_ctakes = False i = 0 for study in results: i += 1 print 'Processing %d of %d...' % (i, len(results)) study.sync_with_db() study.process_eligibility_from_text() study.codify_eligibility() if study.waiting_for_ctakes(): run_ctakes = True Study.sqlite_commit_if_needed()
def _run(self, fields=None, callback=None): """ Runs the whole toolchain. Currently writes all status to a file associated with run_id. If the first word in that file is "error", the process is assumed to have stopped. If it is "done" the work here is done. """ # check prerequisites if self.condition is None and self.term is None: raise Exception("No 'condition' and no 'term' provided") self.assure_run_directory() self.status = "Searching for %s trials..." % (self.condition if self.condition is not None else self.term) # anonymous callback for progress reporting def cb(inst, progress): if progress > 0: self.status = "Fetching (%d%%)" % (100 * progress) # make sure we retrieve the properties that we want to analyze if self.analyze_keypaths: if fields is None: fields = [] fields.extend(self.analyze_keypaths) fields.append('eligibility') # start the search self.status = "Fetching %s trials..." % (self.condition if self.condition is not None else self.term) lilly = LillyCOI() trials = [] if self.condition is not None: trials = lilly.search_for_condition(self.condition, True, fields, cb) else: trials = lilly.search_for_term(self.term, True, fields, cb) if self.limit and len(trials) > self.limit: trials = trials[:self.limit] # process found trials self.status = "Processing..." sqlite = SQLite.get(self.sqlite_db) progress = 0 progress_tot = len(trials) progress_each = max(5, progress_tot / 25) ncts = [] num_nlp_trials = 0 nlp_to_run = set() for trial in trials: ncts.append(trial.nct) trial.analyze_keypaths = self.analyze_keypaths if self.catch_exceptions: try: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) except Exception as e: self.status = 'Error processing trial: %s' % e return else: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) trial.store() self.write_trial(sqlite, trial) # make sure we run the NLP pipeline if needed to_run = trial.waiting_for_nlp(self.nlp_pipelines) if len(to_run) > 0: nlp_to_run.update(to_run) num_nlp_trials = num_nlp_trials + 1 # progress progress = progress + 1 if 0 == progress % progress_each: self.status = "Processing (%d %%)" % (float(progress) / progress_tot * 100) sqlite.commit() # run the needed NLP pipelines success = True for nlp in self.nlp_pipelines: if nlp.name in nlp_to_run: self.status = "Running %s for %d trials (this may take a while)" % (nlp.name, num_nlp_trials) if self.catch_exceptions: try: nlp.run() except Exception as e: self.status = "Running %s failed: %s" % (nlp.name, str(e)) success = False break else: nlp.run() # make sure we codified all criteria if success: for trial in trials: trial.codify_analyzables(self.nlp_pipelines, False) self.status = 'done' # run the callback if callback is not None: callback(success, trials)
def index(): """ The index page """ # get parameters cond = bottle.request.query.get('cond') if cond is not None and len(cond) < 1: cond = None criteria = None csv_name = None trials = [] num_trials = 0 # if we got a condition if cond is not None: dump = True if bottle.request.query.get( 'criteria') is not None else False csv = True if bottle.request.query.get('csv') is not None else False lilly = LillyCOI() args = None if not dump and not csv else ['id', 'eligibility'] found_trials = lilly.search_for_condition(cond, True, args) num_trials = len(found_trials) # list criteria if dump: trials = found_trials # return CSV elif csv: csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7] with codecs.open(csv_name, 'w', 'utf-8') as handle: heads = [ "format", "num in", "num ex", "w age", "w gender", "w pregnancy", "incomplete", "overly complex", "sub-populations", "negated inclusions", "labs", "scores", "acronyms", "temporal components", "patient behavior/abilities", "investigator-subjective components", "sum" ] headers = ','.join('""' for h in heads) # CSV header handle.write( '"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n' % ','.join(['"%s"' % h for h in heads])) # CSV rows i = 0 every = 1 for study in found_trials: if 0 == i % every: study.load() handle.write( '"%s","","","","","%s",%s\n' % (study.nct, study.criteria_text.replace( '"', '""'), headers)) i += 1 # render index template = _jinja_templates.get_template('index.html') return template.render(cond=cond, trials=trials, csv=csv_name, num=num_trials)
# read CSV with codecs.open(csv_path, 'r') as handle: reader = csv.reader(handle) header = reader.next() idx_nct = header.index('NCT') idx_drop = header.index('criteria') idx_first = header.index('first received yrs ago') idx_last = header.index('last update yrs ago') # open output file csv_new = "%s-auto-updated.csv" % os.path.splitext( csv_path)[0].replace('-manual', '') with codecs.open(csv_new, 'w') as w_handle: lilly = LillyCOI() # ref_date = datetime.datetime(2013, 7, 30) # this can NOT be used against date last updated, of course ref_date = datetime.datetime.now() writer = csv.writer(w_handle) header.pop(idx_drop) writer.writerow(header) # loop trials for row in reader: trial = lilly.get_trial(row[idx_nct]) # date calculations first = trial.date('firstreceived_date') first_y = round((ref_date - first[1]).days / 365.25 * 10) / 10 if first[1] else 99
# look for the list if not os.path.exists(_list_path): print 'x> The list file at %s does not exist' % _list_path sys.exit(1) # read list with codecs.open(_list_path, 'r') as handle: ncts = [ nct.strip() if len(nct.strip()) > 0 else None for nct in handle.readlines() ] assert len(ncts) > 0 trials = {} rows_and_years = [] lilly = LillyCOI() # retrieve from our database if not _force_update: existing = Study.retrieve(ncts) for ex in existing: trials[ex.nct] = ex # loop trials for nct in ncts: if not nct: continue # get the trial fresh via web if nct in trials: trial = trials[nct]
raise Exception("There is no such file (%s)" % csv_path) # read CSV with codecs.open(csv_path, 'r') as handle: reader = csv.reader(handle) header = reader.next() idx_nct = header.index('NCT') idx_drop = header.index('criteria') idx_first = header.index('first received yrs ago') idx_last = header.index('last update yrs ago') # open output file csv_new = "%s-auto-updated.csv" % os.path.splitext(csv_path)[0].replace('-manual', '') with codecs.open(csv_new, 'w') as w_handle: lilly = LillyCOI() # ref_date = datetime.datetime(2013, 7, 30) # this can NOT be used against date last updated, of course ref_date = datetime.datetime.now() writer = csv.writer(w_handle) header.pop(idx_drop) writer.writerow(header) # loop trials for row in reader: trial = lilly.get_trial(row[idx_nct]) # date calculations first = trial.date('firstreceived_date') first_y = round((ref_date - first[1]).days / 365.25 * 10) / 10 if first[1] else 99 last = trial.date('lastchanged_date')
def _run(self, fields=None, callback=None): """ Runs the whole toolchain. Currently writes all status to a file associated with run_id. If the first word in that file is "error", the process is assumed to have stopped. If it is "done" the work here is done. """ # check prerequisites if self.condition is None and self.term is None: raise Exception("No 'condition' and no 'term' provided") self.assure_run_directory() self.status = "Searching for %s trials..." % ( self.condition if self.condition is not None else self.term) # anonymous callback for progress reporting def cb(inst, progress): if progress > 0: self.status = "Fetching (%d%%)" % (100 * progress) # make sure we retrieve the properties that we want to analyze if self.analyze_keypaths: if fields is None: fields = [] fields.extend(self.analyze_keypaths) fields.append('eligibility') # start the search self.status = "Fetching %s trials..." % ( self.condition if self.condition is not None else self.term) lilly = LillyCOI() trials = [] if self.condition is not None: trials = lilly.search_for_condition(self.condition, True, fields, cb) else: trials = lilly.search_for_term(self.term, True, fields, cb) if self.limit and len(trials) > self.limit: trials = trials[:self.limit] # process found trials self.status = "Processing..." sqlite = SQLite.get(self.sqlite_db) progress = 0 progress_tot = len(trials) progress_each = max(5, progress_tot / 25) ncts = [] num_nlp_trials = 0 nlp_to_run = set() for trial in trials: ncts.append(trial.nct) trial.analyze_keypaths = self.analyze_keypaths if self.catch_exceptions: try: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) except Exception as e: self.status = 'Error processing trial: %s' % e return else: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) trial.store() self.write_trial(sqlite, trial) # make sure we run the NLP pipeline if needed to_run = trial.waiting_for_nlp(self.nlp_pipelines) if len(to_run) > 0: nlp_to_run.update(to_run) num_nlp_trials = num_nlp_trials + 1 # progress progress = progress + 1 if 0 == progress % progress_each: self.status = "Processing (%d %%)" % (float(progress) / progress_tot * 100) sqlite.commit() # run the needed NLP pipelines success = True for nlp in self.nlp_pipelines: if nlp.name in nlp_to_run: self.status = "Running %s for %d trials (this may take a while)" % ( nlp.name, num_nlp_trials) if self.catch_exceptions: try: nlp.run() except Exception as e: self.status = "Running %s failed: %s" % (nlp.name, str(e)) success = False break else: nlp.run() # make sure we codified all criteria if success: for trial in trials: trial.codify_analyzables(self.nlp_pipelines, False) self.status = 'done' # run the callback if callback is not None: callback(success, trials)