def read_genotype_file(self): '''Read genotype from .geno file instead of database''' #genotype_1 is Dataset Object without parents and f1 #genotype_2 is Dataset Object with parents and f1 (not for intercross) genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) genotype_1.read(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) else: genotype_2 = genotype_1 #determine default genotype object if self.incparentsf1 and genotype_1.type != "intercross": genotype = genotype_2 else: self.incparentsf1 = 0 genotype = genotype_1 self.samplelist = list(genotype.prgy) return genotype
def read_genotype_file(self, use_reaper=False): '''Read genotype from .geno file instead of database''' # genotype_1 is Dataset Object without parents and f1 # genotype_2 is Dataset Object with parents and f1 (not for intercross) #genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: if "RData" in self.genofile: # ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData full_filename = str( locate(self.genofile.split(".")[0] + ".geno", 'genotype')) else: full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) genotype_1 = gen_geno_ob.genotype(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) # , F1=_f1) else: genotype_2 = genotype_1 # determine default genotype object if self.incparentsf1 and genotype_1.type != "intercross": genotype = genotype_2 else: self.incparentsf1 = 0 genotype = genotype_1 self.samplelist = list(genotype.prgy) return genotype
def __init__(self, name): json_data_fh = open(locate(name + ".json", 'genotype/json')) try: markers = [] with open(locate(name + "_snps.txt", 'r')) as bimbam_fh: marker = {} if len(bimbam_fh[0].split(", ")) > 2: delimiter = ", " elif len(bimbam_fh[0].split(",")) > 2: delimiter = "," elif len(bimbam_fh[0].split("\t")) > 2: delimiter = "\t" else: delimiter = " " for line in bimbam_fh: marker['name'] = line.split(delimiter)[0] marker['Mb'] marker['chr'] = line.split(delimiter)[2] marker['cM'] markers.append(marker) #try: # markers = json.load(json_data_fh) except: markers = [] for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) self.markers = markers
def run_analysis(self, requestform): print("Starting ePheWAS analysis on dataset") genofilelocation = locate("BXD.geno", "genotype") # Get the location of the BXD genotypes tissuealignerloc = locate("Tissue_color_aligner.csv", "auwerx") # Get the location of the Tissue_color_aligner # Get user parameters, trait_id and dataset, and store/update them in self self.trait_id = requestform["trait_id"] self.datasetname = requestform["dataset"] self.dataset = data_set.create_dataset(self.datasetname) # Print some debug print "self.trait_id:" + self.trait_id + "\n" print "self.datasetname:" + self.datasetname + "\n" print "self.dataset.type:" + self.dataset.type + "\n" # Load in the genotypes file *sigh* to make the markermap parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() snpinfo = [] for marker in parser.markers: snpinfo.append(marker["name"]); snpinfo.append(marker["chr"]); snpinfo.append(marker["Mb"]); rnames = r_seq(1, len(parser.markers)) # Create the snp aligner object out of the BXD genotypes snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames = r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol = 3, byrow=True) # Create the phenotype aligner object using R phenoaligner = self.r_create_Pheno_aligner() print("Initialization of ePheWAS done !")
def __init__(self, name): json_data_fh = open(locate(name + ".json",'genotype/json')) try: markers = [] with open(locate(name + "_snps.txt", 'r')) as bimbam_fh: marker = {} if len(bimbam_fh[0].split(", ")) > 2: delimiter = ", " elif len(bimbam_fh[0].split(",")) > 2: delimiter = "," elif len(bimbam_fh[0].split("\t")) > 2: delimiter = "\t" else: delimiter = " " for line in bimbam_fh: marker['name'] = line.split(delimiter)[0] marker['Mb'] marker['chr'] = line.split(delimiter)[2] marker['cM'] markers.append(marker) #try: # markers = json.load(json_data_fh) except: markers = [] for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) self.markers = markers
def __init__(self, name): json_data_fh = open(locate(name + ".json", 'genotype/json')) markers = [] with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh: if len(bimbam_fh.readline().split(", ")) > 2: delimiter = ", " elif len(bimbam_fh.readline().split(",")) > 2: delimiter = "," elif len(bimbam_fh.readline().split("\t")) > 2: delimiter = "\t" else: delimiter = " " for line in bimbam_fh: marker = {} marker['name'] = line.split(delimiter)[0].rstrip() marker['Mb'] = float( line.split(delimiter)[1].rstrip()) / 1000000 marker['chr'] = line.split(delimiter)[2].rstrip() markers.append(marker) for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y") and (marker['chr'] != "M"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) self.markers = markers
def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = locate(self.dataset.group.name, "mapping") plink_input = input.plink(file_base, type='b') input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0], 1)) kinship_matrix = np.fromfile(open(file_base + '.kin', 'r'), sep=" ") kinship_matrix.resize( (len(plink_input.indivs), len(plink_input.indivs))) logger.debug("Before creating params") params = dict( pheno_vector=pheno_vector.tolist(), covariate_matrix=covariate_matrix.tolist(), input_file_name=input_file_name, kinship_matrix=kinship_matrix.tolist(), refit=False, temp_uuid=temp_uuid, # meta data timestamp=datetime.datetime.now().isoformat(), ) logger.debug("After creating params") json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60 * 60) logger.debug("Before creating the command") command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "human") logger.debug("command is:", command) os.system(command) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60) results = json.loads(json_results[1]) t_stats = results['t_stats'] p_values = results['p_values'] #p_values, t_stats = lmm.run_human(key) #p_values, t_stats = lmm.run_human( # pheno_vector, # covariate_matrix, # input_file_name, # kinship_matrix, # loading_progress=tempdata # ) return p_values, t_stats
def generate_cross_from_rdata(dataset): rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") ro.r(""" generate_cross_from_rdata <- function(filename = '%s') { load(file=filename) cross = cunique return(cross) } """ % (rdata_location))
def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = locate(self.dataset.group.name,"mapping") plink_input = input.plink(file_base, type='b') input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) logger.debug("Before creating params") params = dict(pheno_vector = pheno_vector.tolist(), covariate_matrix = covariate_matrix.tolist(), input_file_name = input_file_name, kinship_matrix = kinship_matrix.tolist(), refit = False, temp_uuid = temp_uuid, # meta data timestamp = datetime.datetime.now().isoformat(), ) logger.debug("After creating params") json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) logger.debug("Before creating the command") command = PYLMM_COMMAND+' --key {} --species {}'.format(key, "human") logger.debug("command is:", command) os.system(command) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) t_stats = results['t_stats'] p_values = results['p_values'] #p_values, t_stats = lmm.run_human(key) #p_values, t_stats = lmm.run_human( # pheno_vector, # covariate_matrix, # input_file_name, # kinship_matrix, # loading_progress=tempdata # ) return p_values, t_stats
def run_analysis(self, requestform): print("Starting ePheWAS analysis on dataset") genofilelocation = locate( "BXD.geno", "genotype") # Get the location of the BXD genotypes tissuealignerloc = locate( "Tissue_color_aligner.csv", "auwerx") # Get the location of the Tissue_color_aligner # Get user parameters, trait_id and dataset, and store/update them in self self.trait_id = requestform["trait_id"] self.datasetname = requestform["dataset"] self.dataset = data_set.create_dataset(self.datasetname) # Print some debug print "self.trait_id:" + self.trait_id + "\n" print "self.datasetname:" + self.datasetname + "\n" print "self.dataset.type:" + self.dataset.type + "\n" # Load in the genotypes file *sigh* to make the markermap parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() snpinfo = [] for marker in parser.markers: snpinfo.append(marker["name"]) snpinfo.append(marker["chr"]) snpinfo.append(marker["Mb"]) rnames = r_seq(1, len(parser.markers)) # Create the snp aligner object out of the BXD genotypes snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames=r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol=3, byrow=True) # Create the phenotype aligner object using R phenoaligner = self.r_create_Pheno_aligner() print("Initialization of ePheWAS done !")
def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors): """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)""" pheno_file = write_phenotype_file(trait_name, samples, vals, dataset, cofactors, perm_strata_list) if dataset.group.genofile: geno_file = locate(dataset.group.genofile, "genotype") else: geno_file = locate(dataset.group.name + ".geno", "genotype") post_data = { "pheno_file": pheno_file, "geno_file": geno_file, "model": model, "method": method, "nperm": num_perm, "scale": mapping_scale } if do_control == "true" and control_marker: post_data["control"] = control_marker if not manhattan_plot: post_data["interval"] = True if cofactors: post_data["addcovar"] = True if perm_strata_list: post_data["pstrata"] = True rqtl_output = requests.post(GN3_LOCAL_URL + "api/rqtl/compute", data=post_data).json() if num_perm > 0: return rqtl_output['perm_results'], rqtl_output[ 'suggestive'], rqtl_output['significant'], rqtl_output['results'] else: return rqtl_output['results']
def __init__(self, name): json_data_fh = open(locate(name + '.json','genotype/json')) try: markers = json.load(json_data_fh) except: markers = [] for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) self.markers = markers
def __init__(self, name): json_data_fh = open(locate(name + ".json",'genotype/json')) try: markers = json.load(json_data_fh) except: markers = [] for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) self.markers = markers
def __init__(self, name, specified_markers = []): marker_data_fh = open(locate('genotype') + '/' + name + '.bim') self.markers = [] for line in marker_data_fh: splat = line.strip().split() #logger.debug("splat:", splat) if len(specified_markers) > 0: if splat[1] in specified_markers: marker = {} marker['chr'] = int(splat[0]) marker['name'] = splat[1] marker['Mb'] = float(splat[3]) / 1000000 else: continue else: marker = {} marker['chr'] = int(splat[0]) marker['name'] = splat[1] marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker)
def run_analysis(self, requestform): print("Starting CTL analysis on dataset") self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] print("strategy:", requestform.get("strategy")) strategy = requestform.get("strategy") print("nperm:", requestform.get("nperm")) nperm = int(requestform.get("nperm")) print("parametric:", requestform.get("parametric")) parametric = bool(requestform.get("parametric")) print("significance:", requestform.get("significance")) significance = float(requestform.get("significance")) # Get the name of the .geno file belonging to the first phenotype datasetname = self.trait_db_list[0].split(":")[1] dataset = data_set.create_dataset(datasetname) genofilelocation = locate(dataset.group.name + ".geno", "genotype") parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() # Create a genotype matrix individuals = parser.individuals markers = [] markernames = [] for marker in parser.markers: markernames.append(marker["name"]) markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) print(len(genotypes) / len(individuals), "==", len(parser.markers)) rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: print("retrieving data for", trait) if trait != "": ts = trait.split(':') gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) gt.retrieve_sample_data(individuals) for ind in individuals: if ind in gt.data.keys(): traits.append(gt.data[ind].value) else: traits.append("-999") rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) # Use a data frame to store the objects rPheno = r_data_frame(rPheno) rGeno = r_data_frame(rGeno) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6) # Get significant interactions significant = self.r_CTLsignificant(res, significance = significance) # Create an image for output self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant self.results['requestform'] = requestform # Store the user specified parameters for the output page # Create the lineplot r_png(self.results['imgloc1'], width=1000, height=600) self.r_lineplot(res, significance = significance) r_dev_off() n = 2 for trait in self.trait_db_list: # Create the QTL like CTL plots self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] r_png(self.results['imgloc' + str(n)], width=1000, height=600) self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) r_dev_off() n = n + 1 # Flush any output from R sys.stdout.flush()
def run_rqtl_geno(self): print("Calling R/qtl") self.geno_to_rqtl_function() ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function r_sum = ro.r["sum"] # Map the sum function plot = ro.r["plot"] # Map the plot function postscript = ro.r["postscript"] # Map the postscript function png = ro.r["png"] # Map the png function dev_off = ro.r["dev.off"] # Map the device off function print(r_library("qtl")) # Load R/qtl ## Get pointers to some R/qtl functions scanone = ro.r["scanone"] # Map the scanone function scantwo = ro.r["scantwo"] # Map the scantwo function calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function read_cross = ro.r["read.cross"] # Map the read.cross function write_cross = ro.r["write.cross"] # Map the write.cross function GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossname = self.dataset.group.name genofilelocation = locate(crossname + ".geno", "genotype") crossfilelocation = TMPDIR + crossname + ".cross" print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if self.manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype()) # Add the phenotype # for debug: write_cross(cross_object, "csvr", "test.csvr") # Scan for QTLs covar = self.create_covariates(cross_object) # Create the additive covariate matrix if self.pair_scan: if self.do_control == "true": # If sum(covar) > 0 we have a covariate matrix print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=self.model, method=self.method, n_cluster = 16) else: print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=self.model, method=self.method, n_cluster = 16) print("Pair scan results:", result_data_frame) self.pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" png(file=TMPDIR+self.pair_scan_filename) plot(result_data_frame) dev_off() return self.process_pair_scan_results(result_data_frame) else: if self.do_control == "true": print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=self.model, method=self.method) else: print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=self.model, method=self.method) if int(self.num_perm) > 0: # Do permutation (if requested by user) if self.do_control == "true": perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = int(self.num_perm), model=self.model, method=self.method) else: perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = int(self.num_perm), model=self.model, method=self.method) self.process_rqtl_perm_results(perm_data_frame) # Functions that sets the thresholds for the webinterface return self.process_rqtl_results(result_data_frame)
def run_analysis(self, requestform): logger.info("Starting PheWAS analysis on dataset") genofilelocation = locate( "BXD.geno", "genotype") # Get the location of the BXD genotypes precompfile = locate_phewas( "PheWAS_pval_EMMA_norm.RData", "auwerx") # Get the location of the pre-computed EMMA results # Get user parameters, trait_id and dataset, and store/update them in self self.trait_id = requestform["trait_id"] self.datasetname = requestform["dataset"] self.dataset = data_set.create_dataset(self.datasetname) self.region = int(requestform["num_region"]) self.mtadjust = str(requestform["sel_mtadjust"]) # Logger.Info some debug logger.info("self.trait_id:" + self.trait_id + "\n") logger.info("self.datasetname:" + self.datasetname + "\n") logger.info("self.dataset.type:" + self.dataset.type + "\n") # GN Magic ? self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, get_qtl_info=False, get_sample_info=False) logger.info(vars(self.this_trait)) # Set the values we need self.chr = str(self.this_trait.chr) self.mb = int(self.this_trait.mb) # logger.info some debug logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" + str(self.region) + "\n") # Load in the genotypes file *sigh* to make the markermap parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() snpinfo = [] for marker in parser.markers: snpinfo.append(marker["name"]) snpinfo.append(marker["chr"]) snpinfo.append(marker["Mb"]) rnames = r_seq(1, len(parser.markers)) # Create the snp aligner object out of the BXD genotypes snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames=r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol=3, byrow=True) # Create the phenotype aligner object using R phenoaligner = self.r_create_Pheno_aligner() self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['mtadjust'] = self.mtadjust logger.info("IMAGE AT:", self.results['imgurl1']) logger.info("IMAGE AT:", self.results['imgloc1']) # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input) # TODO: generate the PDF in the temp folder, with a unique name assert (precompfile) assert (phenoaligner) assert (snpaligner) phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner, snpaligner, "None", self.chr, self.mb, self.region, self.results['imgloc1'], self.mtadjust) self.results['phewas1'] = phewasres[0] self.results['phewas2'] = phewasres[1] self.results['tabulardata'] = phewasres[2] self.results['R_debuglog'] = phewasres[3] #self.r_PheWASManhattan(allpvalues) #self.r_Stop() logger.info("Initialization of PheWAS done !")
def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan): geno_to_rqtl_function(dataset) ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function r_sum = ro.r["sum"] # Map the sum function plot = ro.r["plot"] # Map the plot function postscript = ro.r["postscript"] # Map the postscript function png = ro.r["png"] # Map the png function dev_off = ro.r["dev.off"] # Map the device off function print(r_library("qtl")) # Load R/qtl ## Get pointers to some R/qtl functions scanone = ro.r["scanone"] # Map the scanone function scantwo = ro.r["scantwo"] # Map the scantwo function calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function read_cross = ro.r["read.cross"] # Map the read.cross function write_cross = ro.r["write.cross"] # Map the write.cross function GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossname = dataset.group.name genofilelocation = locate(crossname + ".geno", "genotype") crossfilelocation = TMPDIR + crossname + ".cross" #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) cross_object = GENOtoCSVR( genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") cross_object = add_phenotype( cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype # for debug: write_cross(cross_object, "csvr", "test.csvr") # Scan for QTLs covar = create_covariates( control_marker, cross_object) # Create the additive covariate matrix if pair_scan: if do_control == "true": # If sum(covar) > 0 we have a covariate matrix print("Using covariate") result_data_frame = scantwo(cross_object, pheno="the_pheno", addcovar=covar, model=model, method=method, n_cluster=16) else: print("No covariates") result_data_frame = scantwo(cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16) #print("Pair scan results:", result_data_frame) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" png(file=TEMPDIR + pair_scan_filename) plot(result_data_frame) dev_off() return process_pair_scan_results(result_data_frame) else: if do_control == "true": print("Using covariate") result_data_frame = scanone(cross_object, pheno="the_pheno", addcovar=covar, model=model, method=method) else: print("No covariates") result_data_frame = scanone(cross_object, pheno="the_pheno", model=model, method=method) if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) if do_control == "true": perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covar, n_perm=num_perm, model=model, method=method) else: perm_data_frame = scanone(cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method) perm_output, suggestive, significant = process_rqtl_perm_results( num_perm, perm_data_frame ) # Functions that sets the thresholds for the webinterface return perm_output, suggestive, significant, process_rqtl_results( result_data_frame) else: return process_rqtl_results(result_data_frame)
def run_analysis(self, requestform): logger.info("Starting PheWAS analysis on dataset") genofilelocation = locate("BXD.geno", "genotype") # Get the location of the BXD genotypes precompfile = locate_phewas("PheWAS_pval_EMMA_norm.RData", "auwerx") # Get the location of the pre-computed EMMA results # Get user parameters, trait_id and dataset, and store/update them in self self.trait_id = requestform["trait_id"] self.datasetname = requestform["dataset"] self.dataset = data_set.create_dataset(self.datasetname) self.region = int(requestform["num_region"]) self.mtadjust = str(requestform["sel_mtadjust"]) # Logger.Info some debug logger.info("self.trait_id:" + self.trait_id + "\n") logger.info("self.datasetname:" + self.datasetname + "\n") logger.info("self.dataset.type:" + self.dataset.type + "\n") # GN Magic ? self.this_trait = GeneralTrait(dataset=self.dataset, name = self.trait_id, get_qtl_info = False, get_sample_info=False) logger.info(vars(self.this_trait)) # Set the values we need self.chr = str(self.this_trait.chr); self.mb = int(self.this_trait.mb); # logger.info some debug logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" + str(self.region) + "\n") # Load in the genotypes file *sigh* to make the markermap parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() snpinfo = [] for marker in parser.markers: snpinfo.append(marker["name"]); snpinfo.append(marker["chr"]); snpinfo.append(marker["Mb"]); rnames = r_seq(1, len(parser.markers)) # Create the snp aligner object out of the BXD genotypes snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames = r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol = 3, byrow=True) # Create the phenotype aligner object using R phenoaligner = self.r_create_Pheno_aligner() self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['mtadjust'] = self.mtadjust logger.info("IMAGE AT:", self.results['imgurl1'] ) logger.info("IMAGE AT:", self.results['imgloc1'] ) # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input) # TODO: generate the PDF in the temp folder, with a unique name assert(precompfile) assert(phenoaligner) assert(snpaligner) phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner, snpaligner, "None", self.chr, self.mb, self.region, self.results['imgloc1'] , self.mtadjust) self.results['phewas1'] = phewasres[0] self.results['phewas2'] = phewasres[1] self.results['tabulardata'] = phewasres[2] self.results['R_debuglog'] = phewasres[3] #self.r_PheWASManhattan(allpvalues) #self.r_Stop() logger.info("Initialization of PheWAS done !")
def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function plot = ro.r["plot"] # Map the plot function png = ro.r["png"] # Map the png function dev_off = ro.r["dev.off"] # Map the device off function print(r_library("qtl")) # Load R/qtl ## Get pointers to some R/qtl functions scanone = ro.r["scanone"] # Map the scanone function scantwo = ro.r["scantwo"] # Map the scantwo function calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function crossname = dataset.group.name #try: # generate_cross_from_rdata(dataset) # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function # genofilelocation = locate(crossname + ".RData", "genotype/rdata") # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function #except: generate_cross_from_geno(dataset) GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossfilelocation = TMPDIR + crossname + ".cross" if dataset.group.genofile: genofilelocation = locate(dataset.group.genofile, "genotype") else: genofilelocation = locate(dataset.group.name + ".geno", "genotype") cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") pheno_string = sanitize_rqtl_phenotype(vals) cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype # Scan for QTLs marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers if cofactors != "": cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits ro.r('all_covars <- cbind(marker_covars, trait_covars)') else: ro.r('all_covars <- marker_covars') covars = ro.r['all_covars'] if pair_scan: if do_control == "true": logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16) else: logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" png(file=TEMPDIR+pair_scan_filename) plot(result_data_frame) dev_off() return process_pair_scan_results(result_data_frame) else: if do_control == "true" or cofactors != "": logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) else: logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) if do_control == "true" or cofactors != "": perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) else: perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method) else: if do_control == "true" or cofactors != "": perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method) else: perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface the_scale = check_mapping_scale(genofilelocation) return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale else: the_scale = check_mapping_scale(genofilelocation) return process_rqtl_results(result_data_frame, dataset.group.species), the_scale
def run_analysis(self, requestform): print("Starting CTL analysis on dataset") self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] print("strategy:", requestform.get("strategy")) strategy = requestform.get("strategy") print("nperm:", requestform.get("nperm")) nperm = int(requestform.get("nperm")) print("parametric:", requestform.get("parametric")) parametric = bool(requestform.get("parametric")) print("significance:", requestform.get("significance")) significance = float(requestform.get("significance")) # Get the name of the .geno file belonging to the first phenotype datasetname = self.trait_db_list[0].split(":")[1] dataset = data_set.create_dataset(datasetname) genofilelocation = locate(dataset.group.name + ".geno", "genotype") parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() print(dataset.group) # Create a genotype matrix individuals = parser.individuals markers = [] markernames = [] for marker in parser.markers: markernames.append(marker["name"]) markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) print(len(genotypes) / len(individuals), "==", len(parser.markers)) rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: print("retrieving data for", trait) if trait != "": ts = trait.split(':') gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) gt = TRAIT.retrieve_sample_data(gt, dataset, individuals) for ind in individuals: if ind in gt.data.keys(): traits.append(gt.data[ind].value) else: traits.append("-999") rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) print(rPheno) # Use a data frame to store the objects rPheno = r_data_frame(rPheno, check_names = False) rGeno = r_data_frame(rGeno, check_names = False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6) # Get significant interactions significant = self.r_CTLsignificant(res, significance = significance) # Create an image for output self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant self.results['requestform'] = requestform # Store the user specified parameters for the output page # Create the lineplot r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') self.r_lineplot(res, significance = significance) r_dev_off() n = 2 # We start from 2, since R starts from 1 :) for trait in self.trait_db_list: # Create the QTL like CTL plots self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) r_dev_off() n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) print(type(significant)) if not type(significant) == ri.RNULLType: for x in range(len(significant[0])): print(significant[0][x], significant[1][x], significant[2][x]) # Debug to console tsS = significant[0][x].split(':') # Source tsT = significant[2][x].split(':') # Target gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) significant[0][x] = gtS.symbol + " (" + gtS.name + ")" # Update the trait name for the displayed table significant[2][x] = gtT.symbol + " (" + gtT.name + ")" # Update the trait name for the displayed table self.elements = json.dumps(self.nodes_list + self.edges_list)
def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan): geno_to_rqtl_function(dataset) ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function plot = ro.r["plot"] # Map the plot function png = ro.r["png"] # Map the png function dev_off = ro.r["dev.off"] # Map the device off function print(r_library("qtl")) # Load R/qtl ## Get pointers to some R/qtl functions scanone = ro.r["scanone"] # Map the scanone function scantwo = ro.r["scantwo"] # Map the scantwo function calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossname = dataset.group.name genofilelocation = locate(crossname + ".geno", "genotype") crossfilelocation = TMPDIR + crossname + ".cross" cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype # Scan for QTLs covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix if pair_scan: if do_control == "true": logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16) else: logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" png(file=TEMPDIR+pair_scan_filename) plot(result_data_frame) dev_off() return process_pair_scan_results(result_data_frame) else: if do_control == "true": logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method) else: logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) if do_control == "true": perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method) else: perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface return perm_output, suggestive, significant, process_rqtl_results(result_data_frame) else: return process_rqtl_results(result_data_frame)