def index(): """ The index page """ # get parameters cond = bottle.request.query.get('cond') if cond is not None and len(cond) < 1: cond = None criteria = None csv_name = None trials = [] num_trials = 0 # if we got a condition if cond is not None: dump = True if bottle.request.query.get('criteria') is not None else False csv = True if bottle.request.query.get('csv') is not None else False lilly = LillyCOI() args = None if not dump and not csv else ['id', 'eligibility'] found_trials = lilly.search_for_condition(cond, True, args) num_trials = len(found_trials) # list criteria if dump: trials = found_trials # return CSV elif csv: csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7] with codecs.open(csv_name, 'w', 'utf-8') as handle: heads = ["format","num in","num ex","w age","w gender","w pregnancy","incomplete","overly complex","sub-populations","negated inclusions","labs","scores","acronyms","temporal components","patient behavior/abilities","investigator-subjective components","sum"] headers = ','.join('""' for h in heads) # CSV header handle.write('"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n' % ','.join(['"%s"' % h for h in heads])) # CSV rows i = 0; every = 1; for study in found_trials: if 0 == i % every: study.load() handle.write('"%s","","","","","%s",%s\n' % (study.nct, study.criteria_text.replace('"', '""'), headers)) i += 1; # render index template = _jinja_templates.get_template('index.html') return template.render(cond=cond, trials=trials, csv=csv_name, num=num_trials)
def get_trials(condition, recruiting=True, filename='years.csv'): lilly = LillyCOI() fields = [ 'id', 'lastchanged_date', 'firstreceived_date', 'primary_completion_date', 'completion_date', 'verification_date' ] found = lilly.search_for_condition(condition, recruiting, fields) if len(found) > 0: print "Found %d" % len(found) now = datetime.datetime.now() # list trials with codecs.open(filename, 'w') as csv: csv.write( 'NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n' ) if len(found) > 150: found = random.sample(found, len(found) / 4) for trial in found: # date comparison first = trial.date('firstreceived_date') first_y = round((now - first[1]).days / 365.25 * 10) / 10 if first[1] else 99 last = trial.date('lastchanged_date') last_y = round( (now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99 comp = trial.date('primary_completion_date') comp_y = round( (now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99 done = trial.date('completion_date') done_y = round( (now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99 veri = trial.date('verification_date') veri_y = round( (now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99 csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' % (trial.nct, first_y, last_y, comp_y, done_y, veri_y, 'TRUE' if done[1] else 'FALSE', 'TRUE' if done[1] and done[1] > now else 'FALSE')) print 'Written to "%s"' % filename else: print "None found"
def get_trials(condition, recruiting=True, filename='years.csv'): lilly = LillyCOI() fields = [ 'id', 'lastchanged_date', 'firstreceived_date', 'primary_completion_date', 'completion_date', 'verification_date' ] found = lilly.search_for_condition(condition, recruiting, fields) if len(found) > 0: print "Found %d" % len(found) now = datetime.datetime.now() # list trials with codecs.open(filename, 'w') as csv: csv.write('NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n') if len(found) > 150: found = random.sample(found, len(found) / 4) for trial in found: # date comparison first = trial.date('firstreceived_date') first_y = round((now - first[1]).days / 365.25 * 10) / 10 if first[1] else 99 last = trial.date('lastchanged_date') last_y = round((now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99 comp = trial.date('primary_completion_date') comp_y = round((now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99 done = trial.date('completion_date') done_y = round((now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99 veri = trial.date('verification_date') veri_y = round((now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99 csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' % (trial.nct, first_y, last_y, comp_y, done_y, veri_y, 'TRUE' if done[1] else 'FALSE', 'TRUE' if done[1] and done[1] > now else 'FALSE')) print 'Written to "%s"' % filename else: print "None found"
def _run(self, fields=None, callback=None): """ Runs the whole toolchain. Currently writes all status to a file associated with run_id. If the first word in that file is "error", the process is assumed to have stopped. If it is "done" the work here is done. """ # check prerequisites if self.condition is None and self.term is None: raise Exception("No 'condition' and no 'term' provided") self.assure_run_directory() self.status = "Searching for %s trials..." % (self.condition if self.condition is not None else self.term) # anonymous callback for progress reporting def cb(inst, progress): if progress > 0: self.status = "Fetching (%d%%)" % (100 * progress) # make sure we retrieve the properties that we want to analyze if self.analyze_keypaths: if fields is None: fields = [] fields.extend(self.analyze_keypaths) fields.append('eligibility') # start the search self.status = "Fetching %s trials..." % (self.condition if self.condition is not None else self.term) lilly = LillyCOI() trials = [] if self.condition is not None: trials = lilly.search_for_condition(self.condition, True, fields, cb) else: trials = lilly.search_for_term(self.term, True, fields, cb) if self.limit and len(trials) > self.limit: trials = trials[:self.limit] # process found trials self.status = "Processing..." sqlite = SQLite.get(self.sqlite_db) progress = 0 progress_tot = len(trials) progress_each = max(5, progress_tot / 25) ncts = [] num_nlp_trials = 0 nlp_to_run = set() for trial in trials: ncts.append(trial.nct) trial.analyze_keypaths = self.analyze_keypaths if self.catch_exceptions: try: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) except Exception as e: self.status = 'Error processing trial: %s' % e return else: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) trial.store() self.write_trial(sqlite, trial) # make sure we run the NLP pipeline if needed to_run = trial.waiting_for_nlp(self.nlp_pipelines) if len(to_run) > 0: nlp_to_run.update(to_run) num_nlp_trials = num_nlp_trials + 1 # progress progress = progress + 1 if 0 == progress % progress_each: self.status = "Processing (%d %%)" % (float(progress) / progress_tot * 100) sqlite.commit() # run the needed NLP pipelines success = True for nlp in self.nlp_pipelines: if nlp.name in nlp_to_run: self.status = "Running %s for %d trials (this may take a while)" % (nlp.name, num_nlp_trials) if self.catch_exceptions: try: nlp.run() except Exception as e: self.status = "Running %s failed: %s" % (nlp.name, str(e)) success = False break else: nlp.run() # make sure we codified all criteria if success: for trial in trials: trial.codify_analyzables(self.nlp_pipelines, False) self.status = 'done' # run the callback if callback is not None: callback(success, trials)
def index(): """ The index page """ # get parameters cond = bottle.request.query.get('cond') if cond is not None and len(cond) < 1: cond = None criteria = None csv_name = None trials = [] num_trials = 0 # if we got a condition if cond is not None: dump = True if bottle.request.query.get( 'criteria') is not None else False csv = True if bottle.request.query.get('csv') is not None else False lilly = LillyCOI() args = None if not dump and not csv else ['id', 'eligibility'] found_trials = lilly.search_for_condition(cond, True, args) num_trials = len(found_trials) # list criteria if dump: trials = found_trials # return CSV elif csv: csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7] with codecs.open(csv_name, 'w', 'utf-8') as handle: heads = [ "format", "num in", "num ex", "w age", "w gender", "w pregnancy", "incomplete", "overly complex", "sub-populations", "negated inclusions", "labs", "scores", "acronyms", "temporal components", "patient behavior/abilities", "investigator-subjective components", "sum" ] headers = ','.join('""' for h in heads) # CSV header handle.write( '"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n' % ','.join(['"%s"' % h for h in heads])) # CSV rows i = 0 every = 1 for study in found_trials: if 0 == i % every: study.load() handle.write( '"%s","","","","","%s",%s\n' % (study.nct, study.criteria_text.replace( '"', '""'), headers)) i += 1 # render index template = _jinja_templates.get_template('index.html') return template.render(cond=cond, trials=trials, csv=csv_name, num=num_trials)
def _run(self, fields=None, callback=None): """ Runs the whole toolchain. Currently writes all status to a file associated with run_id. If the first word in that file is "error", the process is assumed to have stopped. If it is "done" the work here is done. """ # check prerequisites if self.condition is None and self.term is None: raise Exception("No 'condition' and no 'term' provided") self.assure_run_directory() self.status = "Searching for %s trials..." % ( self.condition if self.condition is not None else self.term) # anonymous callback for progress reporting def cb(inst, progress): if progress > 0: self.status = "Fetching (%d%%)" % (100 * progress) # make sure we retrieve the properties that we want to analyze if self.analyze_keypaths: if fields is None: fields = [] fields.extend(self.analyze_keypaths) fields.append('eligibility') # start the search self.status = "Fetching %s trials..." % ( self.condition if self.condition is not None else self.term) lilly = LillyCOI() trials = [] if self.condition is not None: trials = lilly.search_for_condition(self.condition, True, fields, cb) else: trials = lilly.search_for_term(self.term, True, fields, cb) if self.limit and len(trials) > self.limit: trials = trials[:self.limit] # process found trials self.status = "Processing..." sqlite = SQLite.get(self.sqlite_db) progress = 0 progress_tot = len(trials) progress_each = max(5, progress_tot / 25) ncts = [] num_nlp_trials = 0 nlp_to_run = set() for trial in trials: ncts.append(trial.nct) trial.analyze_keypaths = self.analyze_keypaths if self.catch_exceptions: try: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) except Exception as e: self.status = 'Error processing trial: %s' % e return else: trial.codify_analyzables(self.nlp_pipelines, self.discard_cached) trial.store() self.write_trial(sqlite, trial) # make sure we run the NLP pipeline if needed to_run = trial.waiting_for_nlp(self.nlp_pipelines) if len(to_run) > 0: nlp_to_run.update(to_run) num_nlp_trials = num_nlp_trials + 1 # progress progress = progress + 1 if 0 == progress % progress_each: self.status = "Processing (%d %%)" % (float(progress) / progress_tot * 100) sqlite.commit() # run the needed NLP pipelines success = True for nlp in self.nlp_pipelines: if nlp.name in nlp_to_run: self.status = "Running %s for %d trials (this may take a while)" % ( nlp.name, num_nlp_trials) if self.catch_exceptions: try: nlp.run() except Exception as e: self.status = "Running %s failed: %s" % (nlp.name, str(e)) success = False break else: nlp.run() # make sure we codified all criteria if success: for trial in trials: trial.codify_analyzables(self.nlp_pipelines, False) self.status = 'done' # run the callback if callback is not None: callback(success, trials)