Beispiel #1
0
def _get_custom_arg(str_key, str_variable_name, job_id, custom_arg_dict):
    """
    Extract a value at the given key from the given dict, or return None
    if not found. Attempt to read from environmental variables, if known.
    """

    value = None
    if str_key in custom_arg_dict:
        value = custom_arg_dict[str_key]

    if value is None:
        if str_key in util.properties:
            value = util.properties[str_key]

    # treat empty strings as None
    if str == type(value) and 0 == len(value):
        value = None

    # echo in job status and in log file
    msg = 'CQLExecutionTask: {0} == {1}'.format(str_variable_name, value)
    data_access.update_job_status(job_id, util.conn_string,
                                  data_access.IN_PROGRESS, msg)
    # write msg to log file
    log(msg)

    return value
Beispiel #2
0
def _get_custom_arg(str_key, str_variable_name, job_id, custom_arg_dict):
    """
    Extract a value at the given key from the given dict, or return None
    if not found.
    """

    value = None
    if str_key in custom_arg_dict:
        value = custom_arg_dict[str_key]

    # echo in job status and in log file
    msg = '{0}: {1}'.format(str_variable_name, value)
    data_access.update_job_status(job_id, util.conn_string,
                                  data_access.IN_PROGRESS, msg)
    # write msg to log file
    print(msg)

    return value
Beispiel #3
0
    def run(self):
        print('dependencies done; run phenotype reconciliation')
        client = MongoClient(util.mongo_host, util.mongo_port)

        try:
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.IN_PROGRESS,
                                          "Finished Pipelines")

            phenotype = data_access.query_phenotype(int(self.phenotype),
                                                    util.conn_string)
            print(phenotype)

            db = client[util.mongo_db]

            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.IN_PROGRESS,
                                          "Filtering Results")

            with self.output().open('w') as outfile:
                phenotype_helper.write_phenotype_results(
                    db, self.job, phenotype, self.phenotype, self.phenotype)
                data_access.update_job_status(str(self.job), util.conn_string,
                                              data_access.COMPLETED,
                                              "Job completed successfully")
                outfile.write("DONE!")
                outfile.write('\n')
        except BulkWriteError as bwe:
            print(bwe.details)
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.WARNING,
                                          str(bwe.details))
        except Exception as ex:
            traceback.print_exc(file=sys.stdout)
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.FAILURE, str(ex))
            print(ex)
        finally:
            client.close()
Beispiel #4
0
    def run(self):
        log('dependencies done; run phenotype reconciliation')
        client = util.mongo_client()

        try:
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.IN_PROGRESS,
                                          "Finished Pipelines")

            phenotype = data_access.query_phenotype(int(self.phenotype),
                                                    util.conn_string)
            # log(phenotype)

            db = client[util.mongo_db]

            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.IN_PROGRESS,
                                          "Filtering Results")

            stats = phenotype_stats(str(self.job), True)
            intermediate_stats = phenotype_stats(str(self.job), False)
            data_access.update_job_status(
                str(self.job), util.conn_string,
                data_access.STATS + "_INTERMEDIATE_RESULTS",
                str(intermediate_stats["results"]))
            data_access.update_job_status(
                str(self.job), util.conn_string,
                data_access.STATS + "_INTERMEDIATE_SUBJECTS",
                str(intermediate_stats["subjects"]))
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.STATS + "_FINAL_RESULTS",
                                          str(stats["results"]))
            data_access.update_job_status(
                str(self.job), util.conn_string,
                data_access.STATS + "_FINAL_SUBJECTS", str(stats["subjects"]))
            log("writing job stats....")
            log(json.dumps(stats, indent=4))
            # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_QUERY_COUNTS",
            #                               str(util.get_cache_query_count()))
            # data_access.update_job_status(str(self.job), util.conn_string,data_access.STATS + "_CACHE_COMPUTE_COUNTS",
            #                               str(util.get_cache_compute_count()))
            # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_HIT_RATIO",
            #                               str(util.get_cache_hit_ratio()))

            for k in util.properties.keys():
                data_access.update_job_status(str(self.job), util.conn_string,
                                              data_access.PROPERTIES + "_" + k,
                                              util.properties[k])
            with self.output().open('w') as outfile:
                phenotype_helper.write_phenotype_results(
                    db, self.job, phenotype, self.phenotype, self.phenotype)

                # do tuple processing now that all tasks have completed
                succeeded = tuple_processor.process_tuples(
                    db['phenotype_results'], int(self.job))
                if not succeeded:
                    log('*** ERROR: tuple processing failed ***')

                # force all mongo writes to complete by calling fsync on the admin db, then releasing the lock

                wrote_docs = False
                for tries in range(1, _MAX_ATTEMPTS):

                    try:
                        with ILock(_LOCK_NAME, timeout=_LOCK_WAIT_SECS):

                            # only a SINGLE ClarityNLP process can execute this code at any time

                            # force writes to disk by locking the Mongo admin database
                            log('*** Job {0}: FORCING MONGO WRITES ***'.format(
                                self.job))

                            admin_db = client['admin']
                            fsync_result = admin_db.command('fsync', lock=True)
                            assert 1 == fsync_result['lockCount']
                            unlock_result = admin_db.command('fsyncUnlock')
                            assert 0 == unlock_result['lockCount']

                            log('*** Job {0}: ALL MONGO WRITES COMPLETED ***'.
                                format(self.job))

                            wrote_docs = True

                    except ILockException:
                        # timed out before acquiring the lock, will try again
                        pass

                    if wrote_docs:
                        break

                if not wrote_docs:
                    log('Job {0} failed to lock the Mongo admin database.'.
                        format(self.job))

                data_access.update_job_status(str(self.job), util.conn_string,
                                              data_access.COMPLETED,
                                              "Job completed successfully")
                outfile.write("DONE!")
                outfile.write('\n')

            log("job {} done!".format(self.job))
        except BulkWriteError as bwe:
            log(bwe.details)
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.WARNING,
                                          str(bwe.details))
        except Exception as ex:
            traceback.print_exc(file=sys.stdout)
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.FAILURE, str(ex))
            log(ex)
        finally:
            client.close()
Beispiel #5
0
    def run(self):
        log('dependencies done; run phenotype reconciliation')
        client = util.mongo_client()

        try:
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.IN_PROGRESS,
                                          "Finished Pipelines")

            phenotype = data_access.query_phenotype(int(self.phenotype),
                                                    util.conn_string)
            # log(phenotype)

            db = client[util.mongo_db]

            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.IN_PROGRESS,
                                          "Filtering Results")

            stats = phenotype_stats(str(self.job), True)
            intermediate_stats = phenotype_stats(str(self.job), False)
            data_access.update_job_status(
                str(self.job), util.conn_string,
                data_access.STATS + "_INTERMEDIATE_RESULTS",
                str(intermediate_stats["results"]))
            data_access.update_job_status(
                str(self.job), util.conn_string,
                data_access.STATS + "_INTERMEDIATE_SUBJECTS",
                str(intermediate_stats["subjects"]))
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.STATS + "_FINAL_RESULTS",
                                          str(stats["results"]))
            data_access.update_job_status(
                str(self.job), util.conn_string,
                data_access.STATS + "_FINAL_SUBJECTS", str(stats["subjects"]))
            log("writing job stats....")
            log(json.dumps(stats, indent=4))
            # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_QUERY_COUNTS",
            #                               str(util.get_cache_query_count()))
            # data_access.update_job_status(str(self.job), util.conn_string,data_access.STATS + "_CACHE_COMPUTE_COUNTS",
            #                               str(util.get_cache_compute_count()))
            # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_HIT_RATIO",
            #                               str(util.get_cache_hit_ratio()))

            for k in util.properties.keys():
                data_access.update_job_status(str(self.job), util.conn_string,
                                              data_access.PROPERTIES + "_" + k,
                                              util.properties[k])
            with self.output().open('w') as outfile:
                phenotype_helper.write_phenotype_results(
                    db, self.job, phenotype, self.phenotype, self.phenotype)
                data_access.update_job_status(str(self.job), util.conn_string,
                                              data_access.COMPLETED,
                                              "Job completed successfully")
                outfile.write("DONE!")
                outfile.write('\n')
            log("job {} done!".format(self.job))
        except BulkWriteError as bwe:
            log(bwe.details)
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.WARNING,
                                          str(bwe.details))
        except Exception as ex:
            traceback.print_exc(file=sys.stdout)
            data_access.update_job_status(str(self.job), util.conn_string,
                                          data_access.FAILURE, str(ex))
            log(ex)
        finally:
            client.close()
    def run(self):
        print('dependencies done; run phenotype reconciliation')
        client = MongoClient(util.mongo_host, util.mongo_port)

        try:
            data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS,
                                          "Finished Pipelines")

            phenotype = data_access.query_phenotype(int(self.phenotype), util.conn_string)
            print(phenotype)

            db = client[util.mongo_db]

            data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS,
                                          "Filtering Results")

            stats = phenotype_stats(str(self.job), True)
            intermediate_stats = phenotype_stats(str(self.job), False)
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_RESULTS",
                                          str(intermediate_stats["results"]))
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_SUBJECTS",
                                          str(intermediate_stats["subjects"]))
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_FINAL_RESULTS",
                                          str(stats["results"]))
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_FINAL_SUBJECTS",
                                          str(stats["subjects"]))
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_QUERY_COUNTS",
                                          str(util.get_cache_query_count()))
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_COMPUTE_COUNTS",
                                          str(util.get_cache_compute_count()))
            data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_HIT_RATIO",
                                          str(util.get_cache_hit_ratio()))

            for k in util.properties.keys():
                data_access.update_job_status(str(self.job), util.conn_string, data_access.PROPERTIES + "_" + k,
                                              util.properties[k])
            with self.output().open('w') as outfile:
                phenotype_helper.write_phenotype_results(db, self.job, phenotype, self.phenotype, self.phenotype)
                data_access.update_job_status(str(self.job), util.conn_string, data_access.COMPLETED,
                                              "Job completed successfully")
                outfile.write("DONE!")
                outfile.write('\n')
        except BulkWriteError as bwe:
            print(bwe.details)
            data_access.update_job_status(str(self.job), util.conn_string, data_access.WARNING, str(bwe.details))
        except Exception as ex:
            traceback.print_exc(file=sys.stdout)
            data_access.update_job_status(str(self.job), util.conn_string, data_access.FAILURE, str(ex))
            print(ex)
        finally:
            client.close()