예제 #1
0
    def cleanup(self, pipeline_id, job, owner, pipeline_type, p_config):
        client = util.mongo_client()
        db = client[util.mongo_db]

        try:
            jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS,
                                   "Running Collector Cleanup")
            self.custom_cleanup(pipeline_id, job, owner, pipeline_type,
                                p_config, client, db)
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(job, util.conn_string, jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
예제 #2
0
    def cleanup(self, pipeline_id, job, owner, pipeline_type, p_config):
        client = util.mongo_client()
        db = client[util.mongo_db]

        try:
            jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS,
                                   "Running Collector Cleanup")
            self.custom_cleanup(pipeline_id, job, owner, pipeline_type,
                                p_config, client, db)
        except Exception as ex:
            jobs.update_job_status(job, util.conn_string, jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            log(ex, ERROR)
        finally:
            client.close()
예제 #3
0
    def run(self):

        client = MongoClient(util.mongo_host, util.mongo_port)

        try:
            jobs.update_job_status(
                str(self.job), util.conn_string, jobs.IN_PROGRESS,
                "Running ProviderAssertion Batch %s" % self.batch)

            pipeline_config = config.get_pipeline_config(
                self.pipeline, util.conn_string)

            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.IN_PROGRESS, "Running Solr query")
            docs = solr_data.query(
                self.solr_query,
                rows=util.row_count,
                start=self.start,
                solr_url=util.solr_url,
                tags=pipeline_config.report_tags,
                report_type_query=pipeline_config.report_type_query,
                mapper_inst=util.report_mapper_inst,
                mapper_url=util.report_mapper_url,
                mapper_key=util.report_mapper_key,
                cohort_ids=pipeline_config.cohort)
            term_matcher = TermFinder(pipeline_config.terms,
                                      pipeline_config.include_synonyms,
                                      pipeline_config.include_descendants,
                                      pipeline_config.include_ancestors,
                                      pipeline_config.vocabulary)
            pa_filters = provider_assertion_filters
            if pipeline_config.sections and len(pipeline_config.sections) > 0:
                pa_filters[SECTIONS_FILTER] = pipeline_config.sections

            with self.output().open('w') as outfile:
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Finding terms with TermFinder")
                for doc in docs:
                    terms_found = term_matcher.get_term_full_text_matches(
                        doc["report_text"], pa_filters)
                    for term in terms_found:
                        inserted = mongo_writer(client, self.pipeline,
                                                self.job, self.batch,
                                                pipeline_config, term, doc,
                                                "ProviderAssertion")
                        outfile.write(str(inserted))
                        outfile.write('\n')
                    del terms_found
            del docs
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
        finally:
            client.close()
예제 #4
0
    def run(self, pipeline_id, job, owner, pipeline_type, p_config):
        client = MongoClient(util.mongo_host, util.mongo_port)
        db = client[util.mongo_db]

        try:
            jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS,
                                   "Running Collector")
            self.run_custom_task(pipeline_id, job, owner, pipeline_type,
                                 p_config, client, db)
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(job, util.conn_string, jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
        finally:
            client.close()
예제 #5
0
    def run(self):
        task_family_name = str(self.task_family)
        if self.task_name == "ClarityNLPLuigiTask":
            self.task_name = task_family_name
        client = util.mongo_client()

        try:
            with self.output().open('w') as temp_file:
                temp_file.write("start writing custom task")
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Running Batch %s" % self.batch)

                self.pipeline_config = config.get_pipeline_config(
                    self.pipeline, util.conn_string)
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS, "Running Solr query")
                self.docs = solr_data.query(
                    self.solr_query,
                    rows=util.row_count,
                    start=self.start,
                    solr_url=util.solr_url,
                    tags=self.pipeline_config.report_tags,
                    mapper_inst=util.report_mapper_inst,
                    mapper_url=util.report_mapper_url,
                    mapper_key=util.report_mapper_key,
                    types=self.pipeline_config.report_types,
                    sources=self.pipeline_config.sources,
                    filter_query=self.pipeline_config.filter_query,
                    cohort_ids=self.pipeline_config.cohort,
                    job_results_filters=self.pipeline_config.job_results)

                for d in self.docs:
                    doc_id = d[util.solr_report_id_field]
                    if util.use_memory_caching == "true":
                        k = keys.hashkey(doc_id)
                        document_cache[k] = d
                    if util.use_redis_caching == "true":
                        util.write_to_redis_cache("doc:" + doc_id,
                                                  json.dumps(d))
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Running %s main task" % self.task_name)
                self.run_custom_task(temp_file, client)
                temp_file.write("Done writing custom task!")

            self.docs = list()
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
        finally:
            client.close()
예제 #6
0
    def run(self):
        client = MongoClient(util.mongo_host, util.mongo_port)

        try:
            jobs.update_job_status(
                str(self.job), util.conn_string, jobs.IN_PROGRESS,
                "Running MeasurementFinder Batch %s" % self.batch)

            pipeline_config = config.get_pipeline_config(
                self.pipeline, util.conn_string)

            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.IN_PROGRESS, "Running Solr query")
            docs = solr_data.query(self.solr_query,
                                   rows=util.row_count,
                                   start=self.start,
                                   solr_url=util.solr_url,
                                   tags=pipeline_config.report_tags,
                                   mapper_inst=util.report_mapper_inst,
                                   mapper_url=util.report_mapper_url,
                                   mapper_key=util.report_mapper_key,
                                   cohort_ids=pipeline_config.cohort)

            filters = dict()
            if pipeline_config.sections and len(pipeline_config.sections) > 0:
                filters[SECTIONS_FILTER] = pipeline_config.sections

            with self.output().open('w') as outfile:
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Finding terms with MeasurementFinder")
                # TODO incorporate sections and filters
                for doc in docs:
                    meas_results = run_measurement_finder_full(
                        doc["report_text"], pipeline_config.terms)
                    for meas in meas_results:
                        inserted = mongo_writer(client, self.pipeline,
                                                self.job, self.batch,
                                                pipeline_config, meas, doc,
                                                "MeasurementFinder")
                        outfile.write(str(inserted))
                        outfile.write('\n')
                    del meas_results
            del docs
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
        finally:
            client.close()
예제 #7
0
    def run(self):
        task_family_name = str(self.task_family)
        if self.task_name == "ClarityNLPLuigiTask":
            self.task_name = task_family_name
        client = MongoClient(util.mongo_host, util.mongo_port)

        try:
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.IN_PROGRESS,
                                   "Running Batch %s" % self.batch)

            self.pipeline_config = config.get_pipeline_config(
                self.pipeline, util.conn_string)
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.IN_PROGRESS, "Running Solr query")
            self.docs = solr_data.query(
                self.solr_query,
                rows=util.row_count,
                start=self.start,
                solr_url=util.solr_url,
                tags=self.pipeline_config.report_tags,
                mapper_inst=util.report_mapper_inst,
                mapper_url=util.report_mapper_url,
                mapper_key=util.report_mapper_key,
                types=self.pipeline_config.report_types,
                filter_query=self.pipeline_config.filter_query)

            with self.output().open('w') as temp_file:
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Running %s main task" % self.task_name)
                self.run_custom_task(temp_file, client)
                temp_file.write("Done writing custom task!")

            self.docs = list()
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.WARNING,
                                   ''.join(traceback.format_stack()))
            print(ex)
        finally:
            client.close()
예제 #8
0
    def run(self):
        client = MongoClient(util.mongo_host, util.mongo_port)

        current_doc = None
        try:
            jobs.update_job_status(
                str(self.job), util.conn_string, jobs.IN_PROGRESS,
                "Running ValueExtractor Batch %s" % self.batch)

            pipeline_config = config.get_pipeline_config(
                self.pipeline, util.conn_string)

            jobs.update_job_status(str(self.job), util.conn_string,
                                   jobs.IN_PROGRESS, "Running Solr query")
            docs = solr_data.query(self.solr_query,
                                   rows=util.row_count,
                                   start=self.start,
                                   solr_url=util.solr_url,
                                   tags=pipeline_config.report_tags,
                                   mapper_inst=util.report_mapper_inst,
                                   mapper_url=util.report_mapper_url,
                                   mapper_key=util.report_mapper_key,
                                   cohort_ids=pipeline_config.cohort)

            filters = dict()
            if pipeline_config.sections and len(pipeline_config.sections) > 0:
                filters[SECTIONS_FILTER] = pipeline_config.sections

            with self.output().open('w') as outfile:
                jobs.update_job_status(str(self.job), util.conn_string,
                                       jobs.IN_PROGRESS,
                                       "Finding terms with ValueExtractor")
                # TODO incorporate sections and filters
                for doc in docs:
                    current_doc = doc
                    result = run_value_extractor_full(
                        pipeline_config.terms, doc["report_text"],
                        float(pipeline_config.minimum_value),
                        float(pipeline_config.maximum_value),
                        pipeline_config.case_sensitive)
                    if result:
                        for val in result:
                            inserted = mongo_writer(client, self.pipeline,
                                                    self.job, self.batch,
                                                    pipeline_config, val, doc,
                                                    "ValueExtractor")
                            outfile.write(str(inserted))
                            outfile.write('\n')
                        del result
                    else:
                        outfile.write("no matches!\n")
            del docs
        except AssertionError as a:
            print(a)
            print(current_doc)
        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(
                str(self.job), util.conn_string, jobs.WARNING,
                'Report ID: ' + current_doc['report_id'] + '\n'
                ''.join(traceback.format_stack()))
            print(ex)
            # print(current_doc)
        finally:
            client.close()
예제 #9
0
 def write_log_data(self, job_status, status_message):
     jobs.update_job_status(str(self.job), util.conn_string, job_status,
                            status_message)