Exemple #1
0
    def localpermutation(self, nperm, random_seed, gene_group=False):
        '''
        Run permutation on local computer
        '''
        
        self.gene_based = gene_group

        aa_results = self.get_assoc_analysis()
        #create a random seed
        if (random_seed == None):
            random_seed = _create_random_seed(8)

        snp_to_group = common.map_snp_to_geneset(self.group, gene_group)
        self.plink.set_snp_to_group_map(snp_to_group)

        nperm = int(nperm)
        
        permperjob = int(50)

        if self.plink.covar_file:
            permperjob = 1

        jobs = _create_seeds_for_jobs(nperm, permperjob, random_seed)
     
        resultfiles = self.plink.run_permutation_single_core(jobs)                        
        results_merged = merge_results(resultfiles)
               
        for key, perm_score_per_group in results_merged.iteritems():
            perm_out_string = perm_score_per_group.format_permout()
            perm_out_filename = str(random_seed) + ".P" + str(key) + ".perm"
            #save permutation score as table with random seed in filename
            perm_filename = self.in_out.save_text_to_filename(perm_out_filename, perm_out_string)
            log.info("\nSaved permutation results as " + perm_filename)
            key = str(key)

            self.files[key] = {"perm":perm_filename}
                                
            if (self.no_emp == False):
                column_aaresults_mapping = {}
                
                for i in range(len(aa_results)):
                    column_aaresults_mapping[str(aa_results[i].column_number)] = i
                    
                current_aa_results = aa_results[column_aaresults_mapping[key]]
                empp_out_text = perm_score_per_group.format_permutated_results(current_aa_results)

                emp_filename = "P" + key + ".empp"
                emp_filename = self.in_out.save_text_to_filename(emp_filename, empp_out_text)
                log.info("Saved empirical pvalues as " + emp_filename)
                self.files[str(key)] ["empp"] = emp_filename
                
                if self.in_out.run_rproject:
                    import jag.plot_with_r as plot_with_r
                    plotter = plot_with_r.call_r(self.in_out)
                    plotter.draw_dist_plot(self.files, key)
    def run_step1(self, plink, geneset, gene_group=False, adjusted=False):
        """
        main function of association analysis. this function calls all function 
        needed for aa analysis.
        plink. a plink object. Example to create this kind of object can be found in test/test_jag.py
        group: path to file which contains file mapping 
        """
        #check files are available
        #read geneset as dictionary (key=snp, value=geneset_name)
        if (not geneset == None):
            snptogeneset = common.map_snp_to_geneset(geneset, gene_group)
                    
        pheno_arg = ""
        
        if (not plink.pheno_file == None):
            pheno_arg = " --pheno " + str(plink.pheno_file + " --all-pheno ")
            
        if not plink.covar_file == None:
            pheno_arg += " --covar " + plink.covar_file + " --hide-covar "
            plink.set_plink_arguments("--bfile " + plink.bfile + " " + pheno_arg)
        
        else:
            plink.set_plink_arguments("--bfile " + plink.bfile + " --assoc " + pheno_arg)

        resultfile = plink.run_plink()
        assoc_files = glob.glob(resultfile + "*.*assoc*")
                
        if adjusted:
            assoc_files = [assoc_file for assoc_file in assoc_files if assoc_file.endswith("adjusted")]
                
        else:
            assoc_files = [assoc_file for assoc_file in assoc_files if not assoc_file.endswith("adjusted")]
          
        pheno_nr=1
        for assoc_file in assoc_files: 
            clusterresults = CR.Clusterresults()            #    read values from assoc assoc_file and lookup in snptogroup map:
            clusterresults.pheno_path = plink.pheno_file
            clusterresults.total_columns = plink.get_number_of_pheno_types()
            clusterresults.column_number = str(in_out.get_assoc_file_number(resultfile, assoc_file))
            self.files[clusterresults.column_number] = {}
            clusterresults.bfile_path = plink.bfile
            clusterresults.geneset_path = geneset

            moved_assoc_file = self.in_and_out.copy_assoc_file(assoc_file)
            
            if (not geneset == None):
                clusterresults.map_p_values_from_assoc_file(snptogeneset, assoc_file, adjusted)      # extract need p values from assoc file
                self.files[clusterresults.column_number]["raw_p_files"] = clusterresults.write_all_pvalues()
                clusterresults.createcleanresults()
                self.files[clusterresults.column_number]["clusterresults"] = clusterresults
                self.in_and_out.save_sumlog(assoc_file, clusterresults) #'generate .sumlog'
            
            else:
                log.info ("No geneset file is set: no sumlog file is made.")
                sys.exit("JAG is terminated.")
            
            # code for QQ plots    
            self.files[clusterresults.column_number]["assoc_files"] = moved_assoc_file              #saving full file path for creating plot later on
            
            assoc_pheno = [values["assoc_files"] for values in self.files.itervalues()]           
            assoc_files = [ assoc_file for assoc_file in plot_with_r.from_iterable(assoc_pheno)]
            
            raw_p_pheno = [values["raw_p_files"] for values in self.files.itervalues()] 
            set_files = [ assoc_file for assoc_file in plot_with_r.from_iterable(raw_p_pheno)]
            
            if self.in_and_out.run_rproject:
                plotter = plot_with_r.call_r(self.in_and_out)
                plotter.draw_qq_plots(assoc_files, pheno_nr, adjusted)
                plotter.draw_qq_plots_for_sets(raw_p_pheno, pheno_nr)
                
            pheno_nr=pheno_nr+1
                       
        common.remove_plink_output(resultfile)          # remove all output from plink
        
        return self.files