def localpermutation(self, nperm, random_seed, gene_group=False): ''' Run permutation on local computer ''' self.gene_based = gene_group aa_results = self.get_assoc_analysis() #create a random seed if (random_seed == None): random_seed = _create_random_seed(8) snp_to_group = common.map_snp_to_geneset(self.group, gene_group) self.plink.set_snp_to_group_map(snp_to_group) nperm = int(nperm) permperjob = int(50) if self.plink.covar_file: permperjob = 1 jobs = _create_seeds_for_jobs(nperm, permperjob, random_seed) resultfiles = self.plink.run_permutation_single_core(jobs) results_merged = merge_results(resultfiles) for key, perm_score_per_group in results_merged.iteritems(): perm_out_string = perm_score_per_group.format_permout() perm_out_filename = str(random_seed) + ".P" + str(key) + ".perm" #save permutation score as table with random seed in filename perm_filename = self.in_out.save_text_to_filename(perm_out_filename, perm_out_string) log.info("\nSaved permutation results as " + perm_filename) key = str(key) self.files[key] = {"perm":perm_filename} if (self.no_emp == False): column_aaresults_mapping = {} for i in range(len(aa_results)): column_aaresults_mapping[str(aa_results[i].column_number)] = i current_aa_results = aa_results[column_aaresults_mapping[key]] empp_out_text = perm_score_per_group.format_permutated_results(current_aa_results) emp_filename = "P" + key + ".empp" emp_filename = self.in_out.save_text_to_filename(emp_filename, empp_out_text) log.info("Saved empirical pvalues as " + emp_filename) self.files[str(key)] ["empp"] = emp_filename if self.in_out.run_rproject: import jag.plot_with_r as plot_with_r plotter = plot_with_r.call_r(self.in_out) plotter.draw_dist_plot(self.files, key)
def run_step1(self, plink, geneset, gene_group=False, adjusted=False): """ main function of association analysis. this function calls all function needed for aa analysis. plink. a plink object. Example to create this kind of object can be found in test/test_jag.py group: path to file which contains file mapping """ #check files are available #read geneset as dictionary (key=snp, value=geneset_name) if (not geneset == None): snptogeneset = common.map_snp_to_geneset(geneset, gene_group) pheno_arg = "" if (not plink.pheno_file == None): pheno_arg = " --pheno " + str(plink.pheno_file + " --all-pheno ") if not plink.covar_file == None: pheno_arg += " --covar " + plink.covar_file + " --hide-covar " plink.set_plink_arguments("--bfile " + plink.bfile + " " + pheno_arg) else: plink.set_plink_arguments("--bfile " + plink.bfile + " --assoc " + pheno_arg) resultfile = plink.run_plink() assoc_files = glob.glob(resultfile + "*.*assoc*") if adjusted: assoc_files = [assoc_file for assoc_file in assoc_files if assoc_file.endswith("adjusted")] else: assoc_files = [assoc_file for assoc_file in assoc_files if not assoc_file.endswith("adjusted")] pheno_nr=1 for assoc_file in assoc_files: clusterresults = CR.Clusterresults() # read values from assoc assoc_file and lookup in snptogroup map: clusterresults.pheno_path = plink.pheno_file clusterresults.total_columns = plink.get_number_of_pheno_types() clusterresults.column_number = str(in_out.get_assoc_file_number(resultfile, assoc_file)) self.files[clusterresults.column_number] = {} clusterresults.bfile_path = plink.bfile clusterresults.geneset_path = geneset moved_assoc_file = self.in_and_out.copy_assoc_file(assoc_file) if (not geneset == None): clusterresults.map_p_values_from_assoc_file(snptogeneset, assoc_file, adjusted) # extract need p values from assoc file self.files[clusterresults.column_number]["raw_p_files"] = clusterresults.write_all_pvalues() clusterresults.createcleanresults() self.files[clusterresults.column_number]["clusterresults"] = clusterresults self.in_and_out.save_sumlog(assoc_file, clusterresults) #'generate .sumlog' else: log.info ("No geneset file is set: no sumlog file is made.") sys.exit("JAG is terminated.") # code for QQ plots self.files[clusterresults.column_number]["assoc_files"] = moved_assoc_file #saving full file path for creating plot later on assoc_pheno = [values["assoc_files"] for values in self.files.itervalues()] assoc_files = [ assoc_file for assoc_file in plot_with_r.from_iterable(assoc_pheno)] raw_p_pheno = [values["raw_p_files"] for values in self.files.itervalues()] set_files = [ assoc_file for assoc_file in plot_with_r.from_iterable(raw_p_pheno)] if self.in_and_out.run_rproject: plotter = plot_with_r.call_r(self.in_and_out) plotter.draw_qq_plots(assoc_files, pheno_nr, adjusted) plotter.draw_qq_plots_for_sets(raw_p_pheno, pheno_nr) pheno_nr=pheno_nr+1 common.remove_plink_output(resultfile) # remove all output from plink return self.files