def _get_custom_arg(str_key, str_variable_name, job_id, custom_arg_dict): """ Extract a value at the given key from the given dict, or return None if not found. Attempt to read from environmental variables, if known. """ value = None if str_key in custom_arg_dict: value = custom_arg_dict[str_key] if value is None: if str_key in util.properties: value = util.properties[str_key] # treat empty strings as None if str == type(value) and 0 == len(value): value = None # echo in job status and in log file msg = 'CQLExecutionTask: {0} == {1}'.format(str_variable_name, value) data_access.update_job_status(job_id, util.conn_string, data_access.IN_PROGRESS, msg) # write msg to log file log(msg) return value
def _get_custom_arg(str_key, str_variable_name, job_id, custom_arg_dict): """ Extract a value at the given key from the given dict, or return None if not found. """ value = None if str_key in custom_arg_dict: value = custom_arg_dict[str_key] # echo in job status and in log file msg = '{0}: {1}'.format(str_variable_name, value) data_access.update_job_status(job_id, util.conn_string, data_access.IN_PROGRESS, msg) # write msg to log file print(msg) return value
def run(self): print('dependencies done; run phenotype reconciliation') client = MongoClient(util.mongo_host, util.mongo_port) try: data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Finished Pipelines") phenotype = data_access.query_phenotype(int(self.phenotype), util.conn_string) print(phenotype) db = client[util.mongo_db] data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Filtering Results") with self.output().open('w') as outfile: phenotype_helper.write_phenotype_results( db, self.job, phenotype, self.phenotype, self.phenotype) data_access.update_job_status(str(self.job), util.conn_string, data_access.COMPLETED, "Job completed successfully") outfile.write("DONE!") outfile.write('\n') except BulkWriteError as bwe: print(bwe.details) data_access.update_job_status(str(self.job), util.conn_string, data_access.WARNING, str(bwe.details)) except Exception as ex: traceback.print_exc(file=sys.stdout) data_access.update_job_status(str(self.job), util.conn_string, data_access.FAILURE, str(ex)) print(ex) finally: client.close()
def run(self): log('dependencies done; run phenotype reconciliation') client = util.mongo_client() try: data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Finished Pipelines") phenotype = data_access.query_phenotype(int(self.phenotype), util.conn_string) # log(phenotype) db = client[util.mongo_db] data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Filtering Results") stats = phenotype_stats(str(self.job), True) intermediate_stats = phenotype_stats(str(self.job), False) data_access.update_job_status( str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_RESULTS", str(intermediate_stats["results"])) data_access.update_job_status( str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_SUBJECTS", str(intermediate_stats["subjects"])) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_FINAL_RESULTS", str(stats["results"])) data_access.update_job_status( str(self.job), util.conn_string, data_access.STATS + "_FINAL_SUBJECTS", str(stats["subjects"])) log("writing job stats....") log(json.dumps(stats, indent=4)) # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_QUERY_COUNTS", # str(util.get_cache_query_count())) # data_access.update_job_status(str(self.job), util.conn_string,data_access.STATS + "_CACHE_COMPUTE_COUNTS", # str(util.get_cache_compute_count())) # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_HIT_RATIO", # str(util.get_cache_hit_ratio())) for k in util.properties.keys(): data_access.update_job_status(str(self.job), util.conn_string, data_access.PROPERTIES + "_" + k, util.properties[k]) with self.output().open('w') as outfile: phenotype_helper.write_phenotype_results( db, self.job, phenotype, self.phenotype, self.phenotype) # do tuple processing now that all tasks have completed succeeded = tuple_processor.process_tuples( db['phenotype_results'], int(self.job)) if not succeeded: log('*** ERROR: tuple processing failed ***') # force all mongo writes to complete by calling fsync on the admin db, then releasing the lock wrote_docs = False for tries in range(1, _MAX_ATTEMPTS): try: with ILock(_LOCK_NAME, timeout=_LOCK_WAIT_SECS): # only a SINGLE ClarityNLP process can execute this code at any time # force writes to disk by locking the Mongo admin database log('*** Job {0}: FORCING MONGO WRITES ***'.format( self.job)) admin_db = client['admin'] fsync_result = admin_db.command('fsync', lock=True) assert 1 == fsync_result['lockCount'] unlock_result = admin_db.command('fsyncUnlock') assert 0 == unlock_result['lockCount'] log('*** Job {0}: ALL MONGO WRITES COMPLETED ***'. format(self.job)) wrote_docs = True except ILockException: # timed out before acquiring the lock, will try again pass if wrote_docs: break if not wrote_docs: log('Job {0} failed to lock the Mongo admin database.'. format(self.job)) data_access.update_job_status(str(self.job), util.conn_string, data_access.COMPLETED, "Job completed successfully") outfile.write("DONE!") outfile.write('\n') log("job {} done!".format(self.job)) except BulkWriteError as bwe: log(bwe.details) data_access.update_job_status(str(self.job), util.conn_string, data_access.WARNING, str(bwe.details)) except Exception as ex: traceback.print_exc(file=sys.stdout) data_access.update_job_status(str(self.job), util.conn_string, data_access.FAILURE, str(ex)) log(ex) finally: client.close()
def run(self): log('dependencies done; run phenotype reconciliation') client = util.mongo_client() try: data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Finished Pipelines") phenotype = data_access.query_phenotype(int(self.phenotype), util.conn_string) # log(phenotype) db = client[util.mongo_db] data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Filtering Results") stats = phenotype_stats(str(self.job), True) intermediate_stats = phenotype_stats(str(self.job), False) data_access.update_job_status( str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_RESULTS", str(intermediate_stats["results"])) data_access.update_job_status( str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_SUBJECTS", str(intermediate_stats["subjects"])) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_FINAL_RESULTS", str(stats["results"])) data_access.update_job_status( str(self.job), util.conn_string, data_access.STATS + "_FINAL_SUBJECTS", str(stats["subjects"])) log("writing job stats....") log(json.dumps(stats, indent=4)) # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_QUERY_COUNTS", # str(util.get_cache_query_count())) # data_access.update_job_status(str(self.job), util.conn_string,data_access.STATS + "_CACHE_COMPUTE_COUNTS", # str(util.get_cache_compute_count())) # data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_HIT_RATIO", # str(util.get_cache_hit_ratio())) for k in util.properties.keys(): data_access.update_job_status(str(self.job), util.conn_string, data_access.PROPERTIES + "_" + k, util.properties[k]) with self.output().open('w') as outfile: phenotype_helper.write_phenotype_results( db, self.job, phenotype, self.phenotype, self.phenotype) data_access.update_job_status(str(self.job), util.conn_string, data_access.COMPLETED, "Job completed successfully") outfile.write("DONE!") outfile.write('\n') log("job {} done!".format(self.job)) except BulkWriteError as bwe: log(bwe.details) data_access.update_job_status(str(self.job), util.conn_string, data_access.WARNING, str(bwe.details)) except Exception as ex: traceback.print_exc(file=sys.stdout) data_access.update_job_status(str(self.job), util.conn_string, data_access.FAILURE, str(ex)) log(ex) finally: client.close()
def run(self): print('dependencies done; run phenotype reconciliation') client = MongoClient(util.mongo_host, util.mongo_port) try: data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Finished Pipelines") phenotype = data_access.query_phenotype(int(self.phenotype), util.conn_string) print(phenotype) db = client[util.mongo_db] data_access.update_job_status(str(self.job), util.conn_string, data_access.IN_PROGRESS, "Filtering Results") stats = phenotype_stats(str(self.job), True) intermediate_stats = phenotype_stats(str(self.job), False) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_RESULTS", str(intermediate_stats["results"])) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_INTERMEDIATE_SUBJECTS", str(intermediate_stats["subjects"])) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_FINAL_RESULTS", str(stats["results"])) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_FINAL_SUBJECTS", str(stats["subjects"])) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_QUERY_COUNTS", str(util.get_cache_query_count())) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_COMPUTE_COUNTS", str(util.get_cache_compute_count())) data_access.update_job_status(str(self.job), util.conn_string, data_access.STATS + "_CACHE_HIT_RATIO", str(util.get_cache_hit_ratio())) for k in util.properties.keys(): data_access.update_job_status(str(self.job), util.conn_string, data_access.PROPERTIES + "_" + k, util.properties[k]) with self.output().open('w') as outfile: phenotype_helper.write_phenotype_results(db, self.job, phenotype, self.phenotype, self.phenotype) data_access.update_job_status(str(self.job), util.conn_string, data_access.COMPLETED, "Job completed successfully") outfile.write("DONE!") outfile.write('\n') except BulkWriteError as bwe: print(bwe.details) data_access.update_job_status(str(self.job), util.conn_string, data_access.WARNING, str(bwe.details)) except Exception as ex: traceback.print_exc(file=sys.stdout) data_access.update_job_status(str(self.job), util.conn_string, data_access.FAILURE, str(ex)) print(ex) finally: client.close()