def start_init_task(): global TIME TIME = time.time() tr = tasks.TaskReg.get_instance() tr.set_up_task(task=Commands.INIT, subtask="Start") for client in Registry.get_instance().list_clients(): Registry.get_instance().set_client_state(client['name'], Commands.INIT)
def filter_finished(client_name, state): Registry.get_instance().set_client_state(client_name, "Filterd") if not Registry.get_instance().num_clients_in_state(state): logging.info(f"Done with filtering in {Commands.QC} stage.") task_init.make_plots("QC_post_filter.png") logging.info(f"QC took roughly {time.time() - TIME:.1f} seconds.") return True return False
def start_client_qc_task(filters, stage=Commands.QC): global TIME TIME = time.time() if stage == Commands.QC: filters["mask_prefix"] = "QC" else: filters["mask_prefix"] = "PCA" data = pickle.dumps(filters) networking.message_clients("qc", data=data, env=app.config["ENV"]) for client in clients: Registry.get_instance().set_client_state(client['name'], stage)
def start_subtask(task_name, subtask_name, client_name): if task_name == Commands.INIT: if subtask_name == 'POS': task_init.store_positions(request.data, client_name) elif subtask_name == 'COUNT': task_init.store_counts(request.data, client_name) elif task_name.startswith(Commands.QC): if subtask_name == "FIN": if task_qc.filter_finished(client_name, Commands.QC): logging.info("Done with QC.") elif task_name.startswith(Commands.PCA): if subtask_name == "FIN": if task_qc.filter_finished(client_name, Commands.PCA): logging.info("Done with PCA filters. Initiating pruning") reset_states("PRUNE") ld_agg = task_pca.CovarianceAggregator.get_instance( len(Registry.get_instance().list_clients()), 50) # send message to start LD pruning ld_agg.send_request({}) elif subtask_name == "LD": ld_agg = task_pca.CovarianceAggregator.get_instance( len(Registry.get_instance().list_clients()), 50) ld_agg.update(request.data) elif subtask_name == "PCAPOS": task_pca.Position_reporter.get_instance().report_pos() elif subtask_name == "COV": task_pca.store_covariance(client_name, request.data) elif task_name.startswith(Commands.ASSO): ass_agg = task_ass.LogisticAdmm.get_instance({}, active=2) if subtask_name == "adjust": ass_agg.update_stats(request.data) elif subtask_name == "estimate": ass_agg.update(request.data) elif subtask_name == "pval": ass_agg.update_pval(request.data) elif subtask_name == "hessians": model, have_all_info = ass_agg.newton_stats_update(request.data) if have_all_info: ass_agg.newton_iter(model) elif subtask_name == "valback": ass_agg.collect_likelihoods(request.data) elif task_name == Commands.ECHO: if subtask_name == "ITR": echo = task_init.Echo.get_instance(1) echo.echo(client_name) avg_t = echo.echo(client_name) if avg_t is not None: logging.info(f"Avg echo time={avg_t}") return networking.create_response(200)
def start_client_qc_task(filters, stage=Commands.QC): global TIME TIME = time.time() if stage == Commands.QC: filters["mask_prefix"] = "QC" else: filters["mask_prefix"] = "PCA" #data = pickle.dumps(filters) tr = tasks.TaskReg.get_instance() tr.set_up_task(task=Commands.QC, subtask="start", other={"data": filters}) #networking.message_clients("qc", data=data, env=app.config["ENV"]) for client in clients: Registry.get_instance().set_client_state(client['name'], stage)
def lst_clients(): """ List all registered clients """ registry = Registry.get_instance() msg = registry.list_clients() return networking.create_response(200, msg)
def remove_client(client_name): """ Remove a client from the registry """ if not isinstance(client_name, str): msg = 'client name must be a string' return networking.create_response(400, msg) registry = Registry.get_instance() registry.remove_client(client_name) logging.info(f"Removed {client_name}") return networking.create_response(200)
def add_client(client): """ Add a client to the registry """ registry = Registry.get_instance() added = registry.add_client(client) if not added: return networking.create_response(400, 'Client already registered') logging.info( f"""Added {client['name']}, listening host: {client['listen_host']}, \ external_host: {client['external_host']}""") return networking.create_response(200)
def count_stats(): N = float(store.attrs["N"]) task = "INIT" clients = Registry.get_instance().list_clients() msg = {} for chrom in store.keys(): counts_dset = store["{}/counts".format(chrom)].value missing_rate = counts_dset[:, 3] / float(N) store.create_dataset(f"{chrom}/missing_rates", data=missing_rate) af = (counts_dset[:, 2] * 2 + counts_dset[:, 1]).astype(float) af /= (np.sum(counts_dset[:, :3], axis=1) * 2).astype(float) # af = np.minimum(af, 1-af) store.create_dataset("{}/allele_freq".format(chrom), data=af) # var = counts_dset[:,0] * (2*af)**2 # var += counts_dset[:,1] * (1-2*af)**2 # var += counts_dset[:,2] * (2-2*af)**2 # var /= (N-counts_dset[:,3]) # 2*af*(1-af) var = 2 * af * (1 - af) store.create_dataset("{}/var".format(chrom), data=var) hwe = hweP(counts_dset[:, :3].astype(np.int32), 1, 0) # Need to Recompile HWEP with uint32 msg[f"{chrom}"] = { "HWE": hwe.tolist(), "MISS": missing_rate.tolist(), "AF": af.tolist(), "VAR": var.tolist() } #msg = pickle.dumps(msg) #networking.message_clients("init/stats", env=app.config["ENV"], data=msg) store.create_dataset("{}/hwe".format(chrom), data=hwe) tr = tasks.TaskReg.get_instance() tr.set_up_task(task=Commands.INIT, subtask="stats", other={"data": msg}) for client in clients: Registry.get_instance().set_client_state(client['name'], "DONE_INIT") logging.info("Done with initialization") make_plots("QC_pre_filter.png") logging.info( f"Initialization took roughly {time.time()-TIME:.1f} seconds.")
def store_counts(data, client_name): message = pickle.loads(data) n = message["n"] if "START" in message: if "N" not in store.attrs: store.attrs["N"] = 0 logging.info('Storing counts.') store.attrs["N"] += n chrom = message["CHROM"] size = len(message["COUNTS"]) dsetname = "{}/counts".format(chrom) if dsetname not in store: dset = store.require_dataset(dsetname, (size, 4), dtype=np.int64) else: dset = store[dsetname] counts = message["COUNTS"] homo_ref = n - np.sum(counts, axis=1)[:, np.newaxis].astype(np.int64) dset[:] += np.hstack((homo_ref, counts)) if "END" in message: Registry.get_instance().set_client_state(client_name, Commands.INIT_STATS) if Registry.get_instance().num_clients_in_state(Commands.INIT) == 0: logging.info('Transfering QC summary stats.') count_stats()
def echo(self, client_name): instances = Registry.get_instance() instances.set_client_state(client_name, Commands.ECHO) if instances.num_clients_in_state(Commands.ECHO) == len(instances): Echo.echos_left -= 1 for client in instances.list_clients(): instances.set_client_state(client["name"], None) if not Echo.echos_left: Echo.__instance = None # remove the instance (essentially) avg_t = (time.time() - Echo.t) / Echo.count networking.message_clients("End_echo", env=app.config["ENV"], data=avg_t) return avg_t else: networking.message_clients("echo", env=app.config["ENV"]) return None
def message_clients(address, client_name=None, args=None, env='production', data=None): clients = Registry.get_instance().list_clients() if client_name is None: client_list = clients else: client_list = list(filter(lambda x: x["name"] == client_name, clients)) for client in client_list: if args is None: requests.post( f'{get_protocol(env)}://{client["external_host"]}:{client["port"]}/api/{address}', data=data) else: requests.post( f'{get_protocol(env)}://{client["external_host"]}:{client["port"]}/api/{address}', params=args, data=data)
# third party lib import h5py import numpy as np from lib import networking from flask import current_app as app # internal lib from lib.settings import Settings, Options, Commands from lib.client_registry import Registry from lib import networking from server.lib import task_init from lib import tasks storePath = os.path.join(Settings.local_scratch, "central.h5py") store = h5py.File(storePath, "a") clients = Registry.get_instance().list_clients() def split_command(command): command = command.upper() refloat = "[0-9,E,.,-]*" filters = {} name = Options.HWE x = re.search(name + refloat, command) if x: # hwe filter filters[name] = float(x.group()[len(name):]) name = Options.MAF x = re.search(name + refloat, command) if x: # maf filter filters[name] = float(x.group()[len(name):]) name = Options.MPS
def start_init_task(): global TIME TIME = time.time() for client in Registry.get_instance().list_clients(): Registry.get_instance().set_client_state(client['name'], Commands.INIT) networking.message_clients("init", env=app.config["ENV"])
def reset_states(state): instance = Registry.get_instance() for client in instance.list_clients(): instance.set_client_state(client["name"], state)
def set_clients_state(self, state): for client in clients: Registry.get_instance().set_client_state(client['name'], state)