Example #1
0
def lookup_phenotype_results_by_id(id_list: list):
    client = util.mongo_client()
    db = client[util.mongo_db]
    obj = dict()
    obj['results'] = list()
    obj['indexes'] = dict()

    try:
        # db.phenotype_results.find({"_id": { $in: [ObjectId("5b117352bcf26f020e392a9c"),
        # ObjectId("5b117352bcf26f020e3926e2")]}})
        # TODO TODO TODO
        ids = list(map(lambda x: ObjectId(x), id_list))
        res = db.phenotype_results.find({"_id": {"$in": ids}})
        obj['results'] = list(res)
        n = 0
        for o in obj['results']:
            o = display_mapping(o)
            id = str(o['_id'])
            obj['indexes'][id] = n
            n = n + 1

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        obj['success'] = False

    return obj
Example #2
0
def writeResultFeedback(data):
    try:

        # Parsing info
        job_id = data['job_id']
        result_id = data['result_id']

        # connecting to the Mongo DB
        client = util.mongo_client()
        db = client[util.mongo_db]

        # checking if the results collection exists
        # if collection doesn't exist, creating
        collection = db['result_feedback']

        # checking if the result exists in Mongo
        query = {'result_id': result_id}
        existing_entry = collection.find_one(query)
        if existing_entry is None:
            # Writing a new result to Mongo
            collection.insert_one(data)
        else:
            # Updating existing result in Mongo
            updated_entry = data
            element = {"$set": updated_entry}
            collection.update_one(query, element)

        # returning 200 response
        return Response("Successfully wrote result feedback",
                        status=200,
                        mimetype='application/json')
    except Exception as e:
        # returning 400 response
        return Response(str(e), status=400, mimetype='application/json')
Example #3
0
def phenotype_subjects(job_id: str, phenotype_final: bool):
    client = util.mongo_client()
    db = client[util.mongo_db]
    res = []
    # db.phenotype_results.aggregate([  {"$match":{"job_id":{"$eq":10201}, "phenotype_final":{"$eq":true}}},
    #  {"$group" : {_id:"$subject", count:{$sum:1}}} ])
    try:
        q = [{
            "$match": {
                "phenotype_final": {
                    "$eq": phenotype_final
                },
                "job_id": {
                    "$eq": int(job_id)
                }
            }
        }, {
            "$group": {
                "_id": "$subject",
                "count": {
                    "$sum": 1
                }
            }
        }]
        res = list(db.phenotype_results.aggregate(q))
        res = sorted(res, key=lambda r: r['count'], reverse=True)
    except Exception as e:
        traceback.print_exc(file=sys.stdout)

    return res
Example #4
0
def phenotype_subject_results(job_id: str, phenotype_final: bool,
                              subject: str):
    client = util.mongo_client()
    db = client[util.mongo_db]
    res = []
    try:
        query = {
            "job_id": int(job_id),
            "phenotype_final": phenotype_final,
            "subject": subject
        }

        temp = list(db["phenotype_results"].find(query))
        for r in temp:
            obj = r.copy()
            for k in r.keys():
                val = r[k]
                if (isinstance(val, int)
                        or isinstance(val, float)) and math.isnan(val):
                    del obj[k]
            res.append(obj)

    except Exception as e:
        traceback.print_exc(file=sys.stdout)

    return res
Example #5
0
def delete_job(job_id: str, connection_string: str):
    conn = psycopg2.connect(connection_string)
    client = util.mongo_client()

    cursor = conn.cursor()
    flag = -1 # To determine whether the update was successful or not

    try:
        cursor.execute("DELETE FROM nlp.nlp_job_status  WHERE nlp_job_id=" + job_id)
        cursor.execute("DELETE FROM nlp.nlp_job WHERE nlp_job_id=" + job_id)
        conn.commit()

        db = client[util.mongo_db]
        db.phenotype_results.remove({
            "job_id": int(job_id)
        })

        flag = 1
    except Exception as e:
        flag = -1
        traceback.print_exc(file=sys.stdout)
    finally:
        conn.close()
        client.close()

    return flag
Example #6
0
    def run(self):
        task_family_name = str(self.task_family)
        if self.task_name == "ClarityNLPLuigiTask":
            self.task_name = task_family_name
        client = util.mongo_client()

        try:
            with self.output().open('w') as temp_file:
                temp_file.write("start writing custom task")
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Running Batch %s" % self.batch)

                self.pipeline_config = config.get_pipeline_config(
                    self.pipeline, util.conn_string)
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS, "Running Solr query")
                self.docs = solr_data.query(
                    self.solr_query,
                    rows=util.row_count,
                    start=self.start,
                    solr_url=util.solr_url,
                    tags=self.pipeline_config.report_tags,
                    mapper_inst=util.report_mapper_inst,
                    mapper_url=util.report_mapper_url,
                    mapper_key=util.report_mapper_key,
                    types=self.pipeline_config.report_types,
                    sources=self.pipeline_config.sources,
                    filter_query=self.pipeline_config.filter_query,
                    cohort_ids=self.pipeline_config.cohort,
                    job_results_filters=self.pipeline_config.job_results)

                for d in self.docs:
                    doc_id = d[util.solr_report_id_field]
                    if util.use_memory_caching == "true":
                        k = keys.hashkey(doc_id)
                        document_cache[k] = d
                    if util.use_redis_caching == "true":
                        util.write_to_redis_cache("doc:" + doc_id,
                                                  json.dumps(d))
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Running %s main task" % self.task_name)
                self.run_custom_task(temp_file, client)
                temp_file.write("Done writing custom task!")

            self.docs = list()
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
        finally:
            client.close()
Example #7
0
def paged_phenotype_results(job_id: str,
                            phenotype_final: bool,
                            last_id: str = ''):
    client = util.mongo_client()
    db = client[util.mongo_db]
    obj = dict()

    try:
        columns = sorted(get_columns(db, job_id, 'phenotype', phenotype_final))
        if last_id == '' and last_id != '-1':
            res = list(
                db.phenotype_results.find({
                    "job_id": int(job_id),
                    "phenotype_final": phenotype_final
                }).limit(page_size))
            obj['count'] = int(
                db.phenotype_results.find({
                    "job_id": int(job_id),
                    "phenotype_final": phenotype_final
                }).count())
        else:
            res = list(
                db.phenotype_results.find({
                    "_id": {
                        "$gt": ObjectId(last_id)
                    },
                    "job_id": int(job_id),
                    "phenotype_final": phenotype_final
                }).limit(page_size))

        results_length = len(res)
        no_more = False
        if results_length < page_size:
            no_more = True
        if results_length > 0:
            new_last_id = res[-1]['_id']
        else:
            new_last_id = ''

        obj['results'] = list(map(display_mapping, res))
        obj['no_more'] = no_more
        obj['new_last_id'] = new_last_id
        obj['columns'] = columns
        obj['result_count'] = results_length
        obj['success'] = True

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        obj['success'] = False
    finally:
        client.close()

    return obj
Example #8
0
def lookup_phenotype_result_by_id(id: str):
    client = util.mongo_client()
    db = client[util.mongo_db]
    obj = dict()

    try:
        obj = db.phenotype_results.find_one({'_id': ObjectId(id)})
        obj = display_mapping(obj)
    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        obj['success'] = False

    return obj
Example #9
0
def phenotype_performance_results(jobs: list):
    client = util.mongo_client()
    db = client[util.mongo_db]
    metrics = dict()

    if len(jobs) < 1:
        return metrics
    try:
        for job in jobs:
            performance = {
                'total_answered': 0,
                'total_correct': 0,
                'total_incorrect': 0,
                'accuracy_score': 0.0,
                'total_comments': 0
            }

            query = {"job_id": int(job.strip())}

            has_comments = 0
            count = 0
            correct = 0
            query_results = db['result_feedback'].find(query)
            # ['comments', 'feature', 'is_correct', 'job_id', 'subject', 'report_id', 'result_id']
            for res in query_results:
                if len(res['comments']) > 0:
                    has_comments += 1
                else:
                    count += 1
                    if res['is_correct'] == 'true' or res[
                            'is_correct'] == 'True':
                        correct += 1

            if count > 0:
                performance['accuracy_score'] = float(
                    (correct * 1.0) / (count * 1.0))
            else:
                performance['accuracy_score'] = 0.0
            performance['total_incorrect'] = count - correct
            performance['total_correct'] = correct
            performance['total_answered'] = count
            performance['total_comments'] = has_comments

            metrics[job] = performance
    except Exception as e:
        log(e, ERROR)
    finally:
        client.close()

    return metrics
Example #10
0
    def cleanup(self, pipeline_id, job, owner, pipeline_type, p_config):
        client = util.mongo_client()
        db = client[util.mongo_db]

        try:
            jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS,
                                   "Running Collector Cleanup")
            self.custom_cleanup(pipeline_id, job, owner, pipeline_type,
                                p_config, client, db)
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(job, util.conn_string, jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
Example #11
0
    def cleanup(self, pipeline_id, job, owner, pipeline_type, p_config):
        client = util.mongo_client()
        db = client[util.mongo_db]

        try:
            jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS,
                                   "Running Collector Cleanup")
            self.custom_cleanup(pipeline_id, job, owner, pipeline_type,
                                p_config, client, db)
        except Exception as ex:
            jobs.update_job_status(job, util.conn_string, jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            log(ex, ERROR)
        finally:
            client.close()
Example #12
0
def phenotype_results_by_context(context: str, query_filters: dict):
    client = util.mongo_client()
    db = client[util.mongo_db]
    res = []
    try:
        if context.lower() == 'patient' or context.lower() == 'subject':
            project = 'subject'
        else:
            project = 'report_id'
        res = list(db["phenotype_results"].find(query_filters, {project: 1}))

    except Exception as e:
        traceback.print_exc(file=sys.stdout)

    return res
Example #13
0
def generic_results(job: str, job_type: str, phenotype_final: bool = False):
    client = util.mongo_client()
    db = client[util.mongo_db]
    today = datetime.today().strftime('%m_%d_%Y_%H%M')
    filename = '/tmp/job%s_%s_%s.csv' % (job, job_type, today)
    try:
        with open(filename, 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile,
                                    delimiter=util.delimiter,
                                    quotechar=util.quote_character,
                                    quoting=csv.QUOTE_MINIMAL)

            header_written = False
            length = 0
            if job_type == 'phenotype':
                query = {
                    "job_id": int(job),
                    "phenotype_final": phenotype_final
                }
            else:
                query = {"job_id": int(job)}

            query_results = db[job_type + "_results"].find(query)
            columns = sorted(get_columns(db, job, job_type, phenotype_final))
            for res in query_results:
                keys = list(res.keys())
                if not header_written:
                    length = len(columns)
                    csv_writer.writerow(columns)
                    header_written = True

                output = [''] * length
                i = 0
                for key in columns:
                    if key in keys:
                        val = res[key]
                        output[i] = val
                    else:
                        output[i] = ''
                    i += 1
                csv_writer.writerow(output)

    except Exception as e:
        log(e)
    finally:
        client.close()

    return filename
Example #14
0
def phenotype_feature_results(job_id: str, feature: str, subject: str):
    client = util.mongo_client()
    db = client[util.mongo_db]
    res = []
    try:
        query = {
            "job_id": int(job_id),
            "nlpql_feature": feature,
            "subject": subject
        }

        res = list(db["phenotype_results"].find(query))
    except Exception as e:
        traceback.print_exc(file=sys.stdout)

    return res
Example #15
0
def pipeline_results(job: str):
    client = util.mongo_client()
    today = datetime.today().strftime('%m_%d_%Y_%H%M')
    filename = '/tmp/job%s_pipeline_%s.csv' % (job, today)
    length = len(pipeline_output_positions)

    db = client[util.mongo_db]

    try:
        with open(filename, 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile,
                                    delimiter=util.delimiter,
                                    quotechar=util.quote_character,
                                    quoting=csv.QUOTE_MINIMAL)

            header_written = False
            header_values = pipeline_output_positions
            length = 0
            for res in db.pipeline_results.find({"job_id": int(job)}):
                keys = list(res.keys())
                if not header_written:
                    new_cols = []
                    for k in keys:
                        if k not in header_values:
                            new_cols.append(k)
                    new_cols = sorted(new_cols)
                    header_values.extend(new_cols)

                    length = len(header_values)
                    csv_writer.writerow(header_values)
                    header_written = True

                i = 0
                output = [''] * length
                for key in header_values:
                    if key in keys:
                        val = res[key]
                        output[i] = val
                    i += 1
                csv_writer.writerow(output)
    except Exception as e:
        print(e)
    finally:
        client.close()

    return filename
Example #16
0
def phenotype_feedback_results(job: str):
    job_type = 'annotations'
    client = util.mongo_client()
    db = client[util.mongo_db]
    today = datetime.today().strftime('%m_%d_%Y_%H%M')
    filename = '/tmp/job_feedback%s_%s_%s.csv' % (job, job_type, today)
    try:
        with open(filename, 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile,
                                    delimiter=util.delimiter,
                                    quotechar=util.quote_character,
                                    quoting=csv.QUOTE_MINIMAL)

            header_written = False
            query = {"job_id": int(job)}

            query_results = db['result_feedback'].find(query)
            columns = sorted([
                'comments', 'feature', 'is_correct', 'job_id', 'subject',
                'report_id', 'result_id'
            ])

            for res in query_results:
                keys = list(res.keys())
                if not header_written:
                    length = len(columns)
                    csv_writer.writerow(columns)
                    header_written = True

                output = [''] * length
                i = 0
                for key in columns:
                    if key in keys:
                        val = res[key]
                        output[i] = val
                    else:
                        output[i] = ''
                    i += 1
                csv_writer.writerow(output)

    except Exception as e:
        log(e)
    finally:
        client.close()

    return filename
def mongo_process_operations(expr_obj_list,
                             db,
                             job_id,
                             phenotype: PhenotypeModel,
                             phenotype_id,
                             phenotype_owner,
                             c: PhenotypeOperations,
                             final=False):
    """
    Use MongoDB aggregation to evaluate NLPQL expressions.
    """

    print('mongo_process_operations expr_object_list: ')
    for expr_obj in expr_obj_list:
        print(expr_obj)

    context_var = phenotype.context.lower()
    if 'document' == context_var:
        # document IDs are in the report_id field
        context_field = 'report_id'
    else:
        # patient IDs are in the subject field
        context_field = 'subject'

    # setup access to the Mongo collection
    client = util.mongo_client()
    mongo_db_obj = client[util.mongo_db]
    mongo_collection_obj = mongo_db_obj['phenotype_results']

    is_final_save = c['final']
    
    for expr_obj in expr_obj_list:

        # the 'is_final' flag only applies to the last subexpression
        if expr_obj != expr_obj_list[-1]:
            is_final = False
        else:
            is_final = is_final_save
        
        # evaluate the (sub)expression in expr_obj
        eval_result = expr_eval.evaluate_expression(expr_obj,
                                                    job_id,
                                                    context_field,
                                                    mongo_collection_obj)
            
        # query MongoDB to get result docs
        cursor = mongo_collection_obj.find({'_id': {'$in': eval_result.doc_ids}})

        # initialize for MongoDB result document generation
        phenotype_info = expr_result.PhenotypeInfo(
            job_id = job_id,
            phenotype_id = phenotype_id,
            owner = phenotype_owner,
            context_field = context_field,
            is_final = is_final
        )

        # generate result documents
        if expr_eval.EXPR_TYPE_MATH == eval_result.expr_type:

            output_docs = expr_result.to_math_result_docs(eval_result,
                                                          phenotype_info,
                                                          cursor)
        else:
            assert expr_eval.EXPR_TYPE_LOGIC == eval_result.expr_type

            # flatten the result set into a set of Mongo documents
            doc_map, oid_list_of_lists = expr_eval.flatten_logical_result(eval_result,
                                                                          mongo_collection_obj)
            
            output_docs = expr_result.to_logic_result_docs(eval_result,
                                                           phenotype_info,
                                                           doc_map,
                                                           oid_list_of_lists)
            
        if len(output_docs) > 0:
            mongo_collection_obj.insert_many(output_docs)
        else:
            print('mongo_process_operations ({0}): ' \
                  'no phenotype matches on "{1}".'.format(eval_result.expr_type,
                                                          eval_result.expr_text))

    client.close()
Example #18
0
def mongo_process_operations(expr_obj_list,
                             db,
                             job_id,
                             phenotype: PhenotypeModel,
                             phenotype_id,
                             phenotype_owner,
                             c: PhenotypeOperations,
                             final=False):
    """
    Use MongoDB aggregation to evaluate NLPQL expressions.
    """

    log('mongo_process_operations expr_object_list: ')
    for expr_obj in expr_obj_list:
        log(expr_obj)

    context_var = phenotype.context.lower()
    if 'document' == context_var:
        # document IDs are in the report_id field
        context_field = 'report_id'
    else:
        # patient IDs are in the subject field
        context_field = 'subject'

    # setup access to the Mongo collection
    client = util.mongo_client()
    mongo_db_obj = client[util.mongo_db]
    mongo_collection_obj = mongo_db_obj['phenotype_results']

    # ensure integer job_id; expression evaluator needs to lookup results
    # by job_id, but a job id of 42 is different from a job_id of '42'
    job_id = int(job_id)
    
    try:
        is_final_save = c['final']

        for expr_obj in expr_obj_list:

            # the 'is_final' flag only applies to the last subexpression
            if expr_obj != expr_obj_list[-1]:
                is_final = False
            else:
                is_final = is_final_save

            # evaluate the (sub)expression in expr_obj
            eval_result = expr_eval.evaluate_expression(expr_obj,
                                                        job_id,
                                                        context_field,
                                                        mongo_collection_obj)

            # query MongoDB to get result docs
            cursor = mongo_collection_obj.find({'_id': {'$in': eval_result.doc_ids}})

            # initialize for MongoDB result document generation
            phenotype_info = expr_result.PhenotypeInfo(
                job_id=job_id,
                phenotype_id=phenotype_id,
                owner=phenotype_owner,
                context_field=context_field,
                is_final=is_final
            )

            # generate result documents
            if expr_eval.EXPR_TYPE_MATH == eval_result.expr_type:

                output_docs = expr_result.to_math_result_docs(eval_result,
                                                              phenotype_info,
                                                              cursor)
            else:
                assert expr_eval.EXPR_TYPE_LOGIC == eval_result.expr_type

                # flatten the result set into a set of Mongo documents
                doc_map, oid_list_of_lists = expr_eval.flatten_logical_result(eval_result,
                                                                              mongo_collection_obj)

                output_docs = expr_result.to_logic_result_docs(eval_result,
                                                               phenotype_info,
                                                               doc_map,
                                                               oid_list_of_lists)

            if len(output_docs) > 0:
                log('***** mongo_process_operations: writing {0} ' \
                    'output_docs *****'.format(len(output_docs)))
                try:
                    mongo_collection_obj.insert_many(output_docs)
                except pymongo.errors.BulkWriteError as e:
                    log('****** mongo_process_operations: ' \
                        'mongo insert_many failure ******')
                    log(e)
            else:
                log('mongo_process_operations ({0}): ' \
                      'no phenotype matches on "{1}".'.format(eval_result.expr_type,
                                                              eval_result.expr_text))
    except Exception as exc:
        log(exc, ERROR)
    finally:
        client.close()