예제 #1
0
파일: plink.py 프로젝트: dposthuma/jag
    def run_permutation(self, seeds):
        """
        performs permutations  
        """
        permuted_pheno_file = common.create_pheno_permutation_file(self, seeds)
        pheno_arg = " --pheno " + str(permuted_pheno_file + " --all-pheno" + " --extract " + self.group)

        if self.covar_file:
            permuted_covar_file = common.create_covar_permutation_file(self, seeds)
            pheno_arg += " --covar " + permuted_covar_file + " --hide-covar"
            self.set_plink_arguments("--bfile " + self.bfile + pheno_arg)
        else:
            self.set_plink_arguments("--bfile " + self.bfile + " --assoc " + pheno_arg)


        resultfile = self.run_plink() # run plink
        results_extension = common.plink_results_extension(resultfile)
        resultfiles = self.point_outputfile_to_phenotype(seeds, resultfile, results_extension)
        results = extract_permutated_scores(self.snp_to_group_map, resultfiles, seeds)
                        
        os.remove(permuted_pheno_file)  #remove permuted pheno file 
        common.remove_plink_output(resultfile)  # remove all output from plink
     
        return results
예제 #2
0
    def run_step1(self, plink, geneset, gene_group=False, adjusted=False):
        """
        main function of association analysis. this function calls all function 
        needed for aa analysis.
        plink. a plink object. Example to create this kind of object can be found in test/test_jag.py
        group: path to file which contains file mapping 
        """
        #check files are available
        #read geneset as dictionary (key=snp, value=geneset_name)
        if (not geneset == None):
            snptogeneset = common.map_snp_to_geneset(geneset, gene_group)
                    
        pheno_arg = ""
        
        if (not plink.pheno_file == None):
            pheno_arg = " --pheno " + str(plink.pheno_file + " --all-pheno ")
            
        if not plink.covar_file == None:
            pheno_arg += " --covar " + plink.covar_file + " --hide-covar "
            plink.set_plink_arguments("--bfile " + plink.bfile + " " + pheno_arg)
        
        else:
            plink.set_plink_arguments("--bfile " + plink.bfile + " --assoc " + pheno_arg)

        resultfile = plink.run_plink()
        assoc_files = glob.glob(resultfile + "*.*assoc*")
                
        if adjusted:
            assoc_files = [assoc_file for assoc_file in assoc_files if assoc_file.endswith("adjusted")]
                
        else:
            assoc_files = [assoc_file for assoc_file in assoc_files if not assoc_file.endswith("adjusted")]
          
        pheno_nr=1
        for assoc_file in assoc_files: 
            clusterresults = CR.Clusterresults()            #    read values from assoc assoc_file and lookup in snptogroup map:
            clusterresults.pheno_path = plink.pheno_file
            clusterresults.total_columns = plink.get_number_of_pheno_types()
            clusterresults.column_number = str(in_out.get_assoc_file_number(resultfile, assoc_file))
            self.files[clusterresults.column_number] = {}
            clusterresults.bfile_path = plink.bfile
            clusterresults.geneset_path = geneset

            moved_assoc_file = self.in_and_out.copy_assoc_file(assoc_file)
            
            if (not geneset == None):
                clusterresults.map_p_values_from_assoc_file(snptogeneset, assoc_file, adjusted)      # extract need p values from assoc file
                self.files[clusterresults.column_number]["raw_p_files"] = clusterresults.write_all_pvalues()
                clusterresults.createcleanresults()
                self.files[clusterresults.column_number]["clusterresults"] = clusterresults
                self.in_and_out.save_sumlog(assoc_file, clusterresults) #'generate .sumlog'
            
            else:
                log.info ("No geneset file is set: no sumlog file is made.")
                sys.exit("JAG is terminated.")
            
            # code for QQ plots    
            self.files[clusterresults.column_number]["assoc_files"] = moved_assoc_file              #saving full file path for creating plot later on
            
            assoc_pheno = [values["assoc_files"] for values in self.files.itervalues()]           
            assoc_files = [ assoc_file for assoc_file in plot_with_r.from_iterable(assoc_pheno)]
            
            raw_p_pheno = [values["raw_p_files"] for values in self.files.itervalues()] 
            set_files = [ assoc_file for assoc_file in plot_with_r.from_iterable(raw_p_pheno)]
            
            if self.in_and_out.run_rproject:
                plotter = plot_with_r.call_r(self.in_and_out)
                plotter.draw_qq_plots(assoc_files, pheno_nr, adjusted)
                plotter.draw_qq_plots_for_sets(raw_p_pheno, pheno_nr)
                
            pheno_nr=pheno_nr+1
                       
        common.remove_plink_output(resultfile)          # remove all output from plink
        
        return self.files