コード例 #1
0
ファイル: gen_csv.py プロジェクト: p2/clinical-trials-eval
def get_trials(condition, recruiting=True, filename='years.csv'):
    lilly = LillyCOI()
    fields = [
        'id', 'lastchanged_date', 'firstreceived_date',
        'primary_completion_date', 'completion_date', 'verification_date'
    ]
    found = lilly.search_for_condition(condition, recruiting, fields)
    if len(found) > 0:
        print "Found %d" % len(found)
        now = datetime.datetime.now()

        # list trials
        with codecs.open(filename, 'w') as csv:
            csv.write(
                'NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n'
            )

            if len(found) > 150:
                found = random.sample(found, len(found) / 4)

            for trial in found:

                # date comparison
                first = trial.date('firstreceived_date')
                first_y = round((now - first[1]).days / 365.25 *
                                10) / 10 if first[1] else 99
                last = trial.date('lastchanged_date')
                last_y = round(
                    (now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99
                comp = trial.date('primary_completion_date')
                comp_y = round(
                    (now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99
                done = trial.date('completion_date')
                done_y = round(
                    (now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99
                veri = trial.date('verification_date')
                veri_y = round(
                    (now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99

                csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' %
                          (trial.nct, first_y, last_y, comp_y, done_y, veri_y,
                           'TRUE' if done[1] else 'FALSE',
                           'TRUE' if done[1] and done[1] > now else 'FALSE'))
        print 'Written to "%s"' % filename
    else:
        print "None found"
コード例 #2
0
def index():
    """ The index page """

    # get parameters
    cond = bottle.request.query.get('cond')
    if cond is not None and len(cond) < 1:
        cond = None
    criteria = None
    csv_name = None
    trials = []
    num_trials = 0

    # if we got a condition
    if cond is not None:
        dump = True if bottle.request.query.get(
            'criteria') is not None else False
        csv = True if bottle.request.query.get('csv') is not None else False

        lilly = LillyCOI()
        args = None if not dump and not csv else ['id', 'eligibility']
        found_trials = lilly.search_for_condition(cond, True, args)
        num_trials = len(found_trials)

        # list criteria
        if dump:
            trials = found_trials

        # return CSV
        elif csv:
            csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7]
            with codecs.open(csv_name, 'w', 'utf-8') as handle:
                heads = [
                    "format", "num in", "num ex", "w age", "w gender",
                    "w pregnancy", "incomplete", "overly complex",
                    "sub-populations", "negated inclusions", "labs", "scores",
                    "acronyms", "temporal components",
                    "patient behavior/abilities",
                    "investigator-subjective components", "sum"
                ]
                headers = ','.join('""' for h in heads)

                # CSV header
                handle.write(
                    '"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n'
                    % ','.join(['"%s"' % h for h in heads]))

                # CSV rows
                i = 0
                every = 1
                for study in found_trials:
                    if 0 == i % every:
                        study.load()
                        handle.write(
                            '"%s","","","","","%s",%s\n' %
                            (study.nct, study.criteria_text.replace(
                                '"', '""'), headers))
                    i += 1

    # render index
    template = _jinja_templates.get_template('index.html')
    return template.render(cond=cond,
                           trials=trials,
                           csv=csv_name,
                           num=num_trials)
コード例 #3
0
    # read CSV
    with codecs.open(csv_path, 'r') as handle:
        reader = csv.reader(handle)
        header = reader.next()

        idx_nct = header.index('NCT')
        idx_drop = header.index('criteria')
        idx_first = header.index('first received yrs ago')
        idx_last = header.index('last update yrs ago')

        # open output file
        csv_new = "%s-auto-updated.csv" % os.path.splitext(
            csv_path)[0].replace('-manual', '')
        with codecs.open(csv_new, 'w') as w_handle:
            lilly = LillyCOI()
            # ref_date = datetime.datetime(2013, 7, 30)		# this can NOT be used against date last updated, of course
            ref_date = datetime.datetime.now()

            writer = csv.writer(w_handle)
            header.pop(idx_drop)
            writer.writerow(header)

            # loop trials
            for row in reader:
                trial = lilly.get_trial(row[idx_nct])

                # date calculations
                first = trial.date('firstreceived_date')
                first_y = round((ref_date - first[1]).days / 365.25 *
                                10) / 10 if first[1] else 99
コード例 #4
0
ファイル: runner.py プロジェクト: vedantja/ClinicalTrialsNLP
    def _run(self, fields=None, callback=None):
        """ Runs the whole toolchain.
		Currently writes all status to a file associated with run_id. If the
		first word in that file is "error", the process is assumed to have
		stopped. If it is "done" the work here is done.
		"""

        # check prerequisites
        if self.condition is None and self.term is None:
            raise Exception("No 'condition' and no 'term' provided")

        self.assure_run_directory()
        self.status = "Searching for %s trials..." % (
            self.condition if self.condition is not None else self.term)

        # anonymous callback for progress reporting
        def cb(inst, progress):
            if progress > 0:
                self.status = "Fetching (%d%%)" % (100 * progress)

        # make sure we retrieve the properties that we want to analyze
        if self.analyze_keypaths:
            if fields is None:
                fields = []
            fields.extend(self.analyze_keypaths)
            fields.append('eligibility')

        # start the search
        self.status = "Fetching %s trials..." % (
            self.condition if self.condition is not None else self.term)

        lilly = LillyCOI()
        trials = []
        if self.condition is not None:
            trials = lilly.search_for_condition(self.condition, True, fields,
                                                cb)
        else:
            trials = lilly.search_for_term(self.term, True, fields, cb)

        if self.limit and len(trials) > self.limit:
            trials = trials[:self.limit]

        # process found trials
        self.status = "Processing..."
        sqlite = SQLite.get(self.sqlite_db)

        progress = 0
        progress_tot = len(trials)
        progress_each = max(5, progress_tot / 25)
        ncts = []
        num_nlp_trials = 0
        nlp_to_run = set()
        for trial in trials:
            ncts.append(trial.nct)
            trial.analyze_keypaths = self.analyze_keypaths

            if self.catch_exceptions:
                try:
                    trial.codify_analyzables(self.nlp_pipelines,
                                             self.discard_cached)
                except Exception as e:
                    self.status = 'Error processing trial: %s' % e
                    return
            else:
                trial.codify_analyzables(self.nlp_pipelines,
                                         self.discard_cached)

            trial.store()
            self.write_trial(sqlite, trial)

            # make sure we run the NLP pipeline if needed
            to_run = trial.waiting_for_nlp(self.nlp_pipelines)
            if len(to_run) > 0:
                nlp_to_run.update(to_run)
                num_nlp_trials = num_nlp_trials + 1

            # progress
            progress = progress + 1
            if 0 == progress % progress_each:
                self.status = "Processing (%d %%)" % (float(progress) /
                                                      progress_tot * 100)

        sqlite.commit()

        # run the needed NLP pipelines
        success = True
        for nlp in self.nlp_pipelines:
            if nlp.name in nlp_to_run:
                self.status = "Running %s for %d trials (this may take a while)" % (
                    nlp.name, num_nlp_trials)
                if self.catch_exceptions:
                    try:
                        nlp.run()
                    except Exception as e:
                        self.status = "Running %s failed: %s" % (nlp.name,
                                                                 str(e))
                        success = False
                        break
                else:
                    nlp.run()

        # make sure we codified all criteria
        if success:
            for trial in trials:
                trial.codify_analyzables(self.nlp_pipelines, False)

            self.status = 'done'

        # run the callback
        if callback is not None:
            callback(success, trials)