def run_permutations(self, temp_uuid): """Runs permutations and gets significant and suggestive LOD scores""" top_lod_scores = [] #logger.debug("self.num_perm:", self.num_perm) for permutation in range(self.num_perm): pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) np.random.shuffle(pheno_vector) key = "pylmm:input:" + temp_uuid if self.dataset.group.species == "human": p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) else: genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) genotype_matrix = np.array(trimmed_genotype_data).T params = dict(pheno_vector = pheno_vector.tolist(), genotype_matrix = genotype_matrix.tolist(), restricted_max_likelihood = True, refit = False, temp_uuid = temp_uuid, # meta data timestamp = datetime.datetime.now().isoformat(), ) json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other") shell(command) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] lowest_p_value = 1 for p_value in p_values: if p_value < lowest_p_value: lowest_p_value = p_value #logger.debug("lowest_p_value:", lowest_p_value) top_lod_scores.append(-math.log10(lowest_p_value)) #logger.debug("top_lod_scores:", top_lod_scores) self.suggestive = np.percentile(top_lod_scores, 67) self.significant = np.percentile(top_lod_scores, 95)
def gen_data(self, temp_uuid): """Generates p-values for each marker""" logger.debug("self.vals is:", self.vals) pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals]) #lmm_uuid = str(uuid.uuid4()) key = "pylmm:input:" + temp_uuid logger.debug("key is:", pf(key)) #with Bench("Loading cache"): # result = Redis.get(key) if self.dataset.group.species == "human": p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) #p_values = self.trim_results(p_values) else: logger.debug("NOW CWD IS:", os.getcwd()) genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) genotype_matrix = np.array(genotype_data).T #logger.debug("pheno_vector: ", pf(pheno_vector)) #logger.debug("genotype_matrix: ", pf(genotype_matrix)) #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape)) #params = {"pheno_vector": pheno_vector, # "genotype_matrix": genotype_matrix, # "restricted_max_likelihood": True, # "refit": False, # "temp_data": tempdata} # logger.debug("genotype_matrix:", str(genotype_matrix.tolist())) # logger.debug("pheno_vector:", str(pheno_vector.tolist())) params = dict(pheno_vector = pheno_vector.tolist(), genotype_matrix = genotype_matrix.tolist(), restricted_max_likelihood = True, refit = False, temp_uuid = temp_uuid, # meta data timestamp = datetime.datetime.now().isoformat(), ) json_params = json.dumps(params) #logger.debug("json_params:", json_params) Redis.set(key, json_params) Redis.expire(key, 60*60) logger.debug("before printing command") command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other") logger.debug("command is:", command) logger.debug("after printing command") shell(command) #t_stats, p_values = lmm.run(key) #lmm.run(key) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] #logger.debug("p_values:", p_values[:10]) #p_values = self.trim_results(p_values) t_stats = results['t_stats'] #t_stats, p_values = lmm.run( # pheno_vector, # genotype_matrix, # restricted_max_likelihood=True, # refit=False, # temp_data=tempdata #) #logger.debug("p_values:", p_values) self.dataset.group.markers.add_pvalues(p_values) #self.get_lod_score_cutoff() return self.dataset.group.markers.markers
def gen_data(self, temp_uuid): """Generates p-values for each marker""" logger.debug("self.vals is:", self.vals) pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals]) #lmm_uuid = str(uuid.uuid4()) key = "pylmm:input:" + temp_uuid logger.debug("key is:", pf(key)) #with Bench("Loading cache"): # result = Redis.get(key) if self.dataset.group.species == "human": p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) #p_values = self.trim_results(p_values) else: logger.debug("NOW CWD IS:", os.getcwd()) genotype_data = [ marker['genotypes'] for marker in self.dataset.group.markers.markers ] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes( genotype_data, no_val_samples) genotype_matrix = np.array(genotype_data).T #logger.debug("pheno_vector: ", pf(pheno_vector)) #logger.debug("genotype_matrix: ", pf(genotype_matrix)) #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape)) #params = {"pheno_vector": pheno_vector, # "genotype_matrix": genotype_matrix, # "restricted_max_likelihood": True, # "refit": False, # "temp_data": tempdata} # logger.debug("genotype_matrix:", str(genotype_matrix.tolist())) # logger.debug("pheno_vector:", str(pheno_vector.tolist())) params = dict( pheno_vector=pheno_vector.tolist(), genotype_matrix=genotype_matrix.tolist(), restricted_max_likelihood=True, refit=False, temp_uuid=temp_uuid, # meta data timestamp=datetime.datetime.now().isoformat(), ) json_params = json.dumps(params) #logger.debug("json_params:", json_params) Redis.set(key, json_params) Redis.expire(key, 60 * 60) logger.debug("before printing command") command = PYLMM_COMMAND + ' --key {} --species {}'.format( key, "other") logger.debug("command is:", command) logger.debug("after printing command") shell(command) #t_stats, p_values = lmm.run(key) #lmm.run(key) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] #logger.debug("p_values:", p_values[:10]) #p_values = self.trim_results(p_values) t_stats = results['t_stats'] #t_stats, p_values = lmm.run( # pheno_vector, # genotype_matrix, # restricted_max_likelihood=True, # refit=False, # temp_data=tempdata #) #logger.debug("p_values:", p_values) self.dataset.group.markers.add_pvalues(p_values) #self.get_lod_score_cutoff() return self.dataset.group.markers.markers