Ejemplo n.º 1
0
    def run_analysis(self, requestform):
        print("Starting ePheWAS analysis on dataset")
        genofilelocation = locate(
            "BXD.geno", "genotype")  # Get the location of the BXD genotypes
        tissuealignerloc = locate(
            "Tissue_color_aligner.csv",
            "auwerx")  # Get the location of the Tissue_color_aligner

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)

        # Print some debug
        print "self.trait_id:" + self.trait_id + "\n"
        print "self.datasetname:" + self.datasetname + "\n"
        print "self.dataset.type:" + self.dataset.type + "\n"

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
            snpinfo.append(marker["name"])
            snpinfo.append(marker["chr"])
            snpinfo.append(marker["Mb"])

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo,
                                 nrow=len(parser.markers),
                                 dimnames=r_list(rnames,
                                                 r_c("SNP", "Chr", "Pos")),
                                 ncol=3,
                                 byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        print("Initialization of ePheWAS done !")
    def run_analysis(self, requestform):
        print("Starting CTL analysis on dataset")
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        print("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        print("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        print("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        print("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        print(dataset.group)
        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
          markernames.append(marker["name"])
          markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        print(len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
          print("retrieving data for", trait)
          if trait != "":
            ts = trait.split(':')
            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
            gt = TRAIT.retrieve_sample_data(gt, dataset, individuals)
            for ind in individuals:
              if ind in gt.data.keys():
                traits.append(gt.data[ind].value)
              else:
                traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))

        print(rPheno)

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno, check_names = False)
        rGeno = r_data_frame(rGeno, check_names = False)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance = significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png')
        self.r_lineplot(res, significance = significance)
        r_dev_off()

        n = 2                                                 # We start from 2, since R starts from 1 :)
        for trait in self.trait_db_list:
          # Create the QTL like CTL plots
          self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
          self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
          r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png')
          self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
          r_dev_off()
          n = n + 1

        # Flush any output from R
        sys.stdout.flush()

        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
        print(type(significant))
        if not type(significant) == ri.RNULLType:
          for x in range(len(significant[0])):
            print(significant[0][x], significant[1][x], significant[2][x])            # Debug to console
            tsS = significant[0][x].split(':')                                        # Source
            tsT = significant[2][x].split(':')                                        # Target
            gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1])            # Retrieve Source info from the DB
            gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1])            # Retrieve Target info from the DB
            self.addNode(gtS)
            self.addNode(gtT)
            self.addEdge(gtS, gtT, significant, x)

            significant[0][x] = gtS.symbol + " (" + gtS.name + ")"                    # Update the trait name for the displayed table
            significant[2][x] = gtT.symbol + " (" + gtT.name + ")"                    # Update the trait name for the displayed table

        self.elements = json.dumps(self.nodes_list + self.edges_list)
Ejemplo n.º 3
0
    def run_analysis(self, requestform):
        logger.info("Starting PheWAS analysis on dataset")
        genofilelocation = locate(
            "BXD.geno", "genotype")  # Get the location of the BXD genotypes
        precompfile = locate_phewas(
            "PheWAS_pval_EMMA_norm.RData",
            "auwerx")  # Get the location of the pre-computed EMMA results

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)
        self.region = int(requestform["num_region"])
        self.mtadjust = str(requestform["sel_mtadjust"])

        # Logger.Info some debug
        logger.info("self.trait_id:" + self.trait_id + "\n")
        logger.info("self.datasetname:" + self.datasetname + "\n")
        logger.info("self.dataset.type:" + self.dataset.type + "\n")

        # GN Magic ?
        self.this_trait = GeneralTrait(dataset=self.dataset,
                                       name=self.trait_id,
                                       get_qtl_info=False,
                                       get_sample_info=False)
        logger.info(vars(self.this_trait))

        # Set the values we need
        self.chr = str(self.this_trait.chr)
        self.mb = int(self.this_trait.mb)

        # logger.info some debug
        logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" +
                    str(self.region) + "\n")

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
            snpinfo.append(marker["name"])
            snpinfo.append(marker["chr"])
            snpinfo.append(marker["Mb"])

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo,
                                 nrow=len(parser.markers),
                                 dimnames=r_list(rnames,
                                                 r_c("SNP", "Chr", "Pos")),
                                 ncol=3,
                                 byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
        self.results['mtadjust'] = self.mtadjust
        logger.info("IMAGE AT:", self.results['imgurl1'])
        logger.info("IMAGE AT:", self.results['imgloc1'])
        # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input)
        # TODO: generate the PDF in the temp folder, with a unique name
        assert (precompfile)
        assert (phenoaligner)
        assert (snpaligner)
        phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner,
                                           snpaligner, "None", self.chr,
                                           self.mb, self.region,
                                           self.results['imgloc1'],
                                           self.mtadjust)
        self.results['phewas1'] = phewasres[0]
        self.results['phewas2'] = phewasres[1]
        self.results['tabulardata'] = phewasres[2]
        self.results['R_debuglog'] = phewasres[3]

        #self.r_PheWASManhattan(allpvalues)
        #self.r_Stop()

        logger.info("Initialization of PheWAS done !")