def test_run_gemma_firstrun_set_false(self, mock_parse_loco): """add tests for gemma function where first run is set to false""" dataset = AttributeSetter( {"group": AttributeSetter({"genofile": "genofile.geno"})}) output_file = "file1" mock_parse_loco.return_value = [] this_trait = AttributeSetter({"name": "t1"}) result = run_gemma(this_trait=this_trait, this_dataset=dataset, samples=[], vals=[], covariates="", use_loco=True, first_run=False, output_files=output_file) expected_results = ([], "file1") self.assertEqual(expected_results, result)
def test_run_gemma_firstrun_set_true(self, mock_gen_pheno_txt, mock_os, mock_choice, mock_gen_covar, mock_flat_files, mock_parse_loco): """add tests for run_gemma where first run is set to true""" this_chromosomes = {} for i in range(1, 5): this_chromosomes[f'CH{i}'] = (AttributeSetter({"name": f"CH{i}"})) chromosomes = AttributeSetter({"chromosomes": this_chromosomes}) dataset_group = MockGroup({"name": "GP1", "genofile": "file_geno"}) dataset = AttributeSetter({ "group": dataset_group, "name": "dataset1_name", "species": AttributeSetter({"chromosomes": chromosomes}) }) trait = AttributeSetter({"name": "trait1"}) samples = [] mock_gen_pheno_txt.return_value = None mock_os.path.isfile.return_value = True mock_gen_covar.return_value = None mock_choice.return_value = "R" mock_flat_files.return_value = "/home/genotype/bimbam" mock_parse_loco.return_value = [] results = run_gemma(this_trait=trait, this_dataset=dataset, samples=[], vals=[], covariates="", use_loco=True) self.assertEqual(mock_os.system.call_count, 2) mock_gen_pheno_txt.assert_called_once() mock_parse_loco.assert_called_once_with(dataset, "GP1_GWA_RRRRRR", True) mock_os.path.isfile.assert_called_once_with( ('/home/user/imgfile_output.assoc.txt')) self.assertEqual(mock_flat_files.call_count, 4) self.assertEqual(results, ([], "GP1_GWA_RRRRRR"))
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait, self.dataset, self.samples, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") self.dataset.group.genofile = start_vars['genofile'] if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, qtl_results = self.qtl_results ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y": if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # logger.debug("json_data:", self.json_data) self.js_data = dict( result_score_type = self.score_type, json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, num_perm = self.num_perm, perm_results = self.perm_output, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) # tempdata = temp_data.TempData(temp_uuid) self.json_data = {} self.json_data["lodnames"] = ["lod.hk"] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars["value:" + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars["method"] if start_vars["manhattan_plot"] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars["maf"] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" # ZS: LRS or LOD self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) self.dataset.group.get_specified_markers(markers=included_markers) self.dataset.group.markers.add_pvalues(p_values) results = self.dataset.group.markers.markers elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" if start_vars["num_perm"] == "": self.num_perm = 0 else: self.num_perm = start_vars["num_perm"] self.control = start_vars["control_marker"] self.do_control = start_vars["do_control"] print("StartVars:", start_vars) self.method = start_vars["mapmethod_rqtl_geno"] self.model = start_vars["mapmodel_rqtl_geno"] if start_vars["pair_scan"] == "true": self.pair_scan = True results = self.run_rqtl_geno() print("qtl_results:", results) elif self.mapping_method == "plink": results = self.run_plink() # print("qtl_results:", pf(results)) elif self.mapping_method == "pylmm": print("RUNNING PYLMM") self.num_perm = start_vars["num_perm"] if self.num_perm != "": if int(self.num_perm) > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 # This is needed in order to convert the highest chr to X/Y for marker in results: if marker["chr1"] > 0 or marker["chr1"] == "X" or marker["chr1"] == "X/Y": if marker["chr1"] > highest_chr or marker["chr1"] == "X" or marker["chr1"] == "X/Y": highest_chr = marker["chr1"] if "lod_score" in marker: self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data["chr1"].append(str(qtl["chr1"])) self.json_data["chr2"].append(str(qtl["chr2"])) self.json_data["Mb"].append(qtl["Mb"]) self.json_data["markernames"].append(qtl["name"]) self.js_data = dict( json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, qtl_results=self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 # This is needed in order to convert the highest chr to X/Y for marker in results: if marker["chr"] > 0 or marker["chr"] == "X" or marker["chr"] == "X/Y": if marker["chr"] > highest_chr or marker["chr"] == "X" or marker["chr"] == "X/Y": highest_chr = marker["chr"] if "lod_score" in marker: self.qtl_results.append(marker) self.json_data["chr"] = [] self.json_data["pos"] = [] self.json_data["lod.hk"] = [] self.json_data["markernames"] = [] self.json_data["suggestive"] = self.suggestive self.json_data["significant"] = self.significant # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): if index < 40: print("lod score is:", qtl["lod_score"]) if qtl["chr"] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data["chr"].append("X") else: self.json_data["chr"].append(str(qtl["chr"])) self.json_data["pos"].append(qtl["Mb"]) if "lrs_value" in qtl: self.json_data["lod.hk"].append(str(qtl["lrs_value"])) else: self.json_data["lod.hk"].append(str(qtl["lod_score"])) self.json_data["markernames"].append(qtl["name"]) # Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data["chrnames"] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data["chrnames"].append( [ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length, ] ) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, )
def do_mapping_for_api(start_vars): assert ('db' in start_vars) assert ('trait_id' in start_vars) dataset = data_set.create_dataset(dataset_name=start_vars['db']) dataset.group.get_markers() this_trait = GeneralTrait(dataset=dataset, name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, dataset) samples = [] vals = [] for sample in dataset.group.samplelist: in_trait_data = False for item in this_trait.data: if this_trait.data[item].name == sample: value = str(this_trait.data[item].value) samples.append(item) vals.append(value) in_trait_data = True break if not in_trait_data: vals.append("x") mapping_params = initialize_parameters(start_vars, dataset, this_trait) covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. if mapping_params['mapping_method'] == "gemma": header_row = ["name", "chr", "Mb", "lod_score", "p_value"] if mapping_params[ 'use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api result_markers = gemma_mapping.run_gemma( this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] else: result_markers = gemma_mapping.run_gemma( this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) elif mapping_params['mapping_method'] == "rqtl": header_row = ["name", "chr", "cM", "lod_score"] if mapping_params['num_perm'] > 0: _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno( vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], mapping_params['perm_check'], mapping_params['num_perm'], mapping_params['do_control'], mapping_params['control_marker'], mapping_params['manhattan_plot'], mapping_params['pair_scan']) else: result_markers = rqtl_mapping.run_rqtl_geno( vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], mapping_params['perm_check'], mapping_params['num_perm'], mapping_params['do_control'], mapping_params['control_marker'], mapping_params['manhattan_plot'], mapping_params['pair_scan']) if mapping_params['limit_to']: result_markers = result_markers[:mapping_params['limit_to']] if mapping_params['format'] == "csv": output_rows = [] output_rows.append(header_row) for marker in result_markers: this_row = [marker[header] for header in header_row] output_rows.append(this_row) return output_rows, mapping_params['format'] elif mapping_params['format'] == "json": return result_markers, mapping_params['format'] else: return result_markers, None
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) else: self.num_perm = 0 except: self.num_perm = 0 self.LRSCheck = self.score_type self.permCheck = "ON" self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" self.manhattan_plot = True with Bench("Running GEMMA"): included_markers, p_values = gemma_mapping.run_gemma( self.dataset, self.samples, self.vals) with Bench("Getting markers from csv"): marker_obs = get_markers_from_csv(included_markers, p_values, self.dataset.group.name) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True results = self.run_rqtl_geno() elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] results = self.gen_reaper_results() elif self.mapping_method == "plink": results = self.run_plink() elif self.mapping_method == "pylmm": print("RUNNING PYLMM") if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) #ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] #ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() self.vals = [] self.samples = [] self.sample_vals = start_vars['sample_vals'] sample_val_dict = json.loads(self.sample_vals) samples = sample_val_dict.keys() if (len(genofile_samplelist) != 0): for sample in genofile_samplelist: self.samples.append(sample) if sample in samples: self.vals.append(sample_val_dict[sample]) else: self.vals.append("x") else: for sample in self.dataset.group.samplelist: if sample in samples: self.vals.append(sample_val_dict[sample]) self.samples.append(sample) if 'n_samples' in start_vars: self.n_samples = start_vars['n_samples'] else: self.n_samples = len([val for val in self.vals if val != "x"]) #ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: if start_vars['manhattan_plot'].lower() != "false": self.color_scheme = "alternating" if "color_scheme" in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": self.manhattan_single_color = start_vars[ 'manhattan_single_color'] self.manhattan_plot = True self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" #self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: self.categorical_vars = start_vars["categorical_vars"].split( ",") if len(self.categorical_vars ) and start_vars["perm_strata"] == "True": primary_samples = SampleList(dataset=self.dataset, sample_names=self.samples, this_trait=self.this_trait) perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] if 'mapmethod_rqtl_geno' in start_vars: self.method = start_vars['mapmethod_rqtl_geno'] else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.reaper_version = start_vars['reaper_version'] self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") if self.reaper_version == "new": self.first_run = True self.output_files = None if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False if 'output_files' in start_vars: self.output_files = start_vars['output_files'].split( ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot, self.first_run, self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: self.no_results = True else: if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in list(marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] self.results_for_browser = [] self.annotations_for_browser = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if 'Mb' in marker: this_ps = marker['Mb'] * 1000000 else: this_ps = marker['cM'] * 1000000 browser_marker = dict(chr=str(marker['chr']), rs=marker['name'], ps=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") if self.geno_db_exists == "True": annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") else: annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps) if 'lrs_value' in marker and marker['lrs_value'] > 0: browser_marker['p_wald'] = 10**-(marker['lrs_value'] / 4.61) elif 'lod_score' in marker and marker['lod_score'] > 0: browser_marker['p_wald'] = 10**-(marker['lod_score']) else: browser_marker['p_wald'] = 0 self.results_for_browser.append(browser_marker) self.annotations_for_browser.append(annot_marker) if str(marker['chr']) > '0' or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": if str(marker['chr']) > str(highest_chr) or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): if 'Mb' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.6f}".format( marker['Mb']) elif 'cM' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.3f}".format( marker['cM']) else: marker['display_pos'] = "N/A" self.qtl_results.append(marker) total_markers = len(self.qtl_results) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) self.results_for_browser = trim_markers_for_figure( self.results_for_browser) filtered_annotations = [] for marker in self.results_for_browser: for annot_marker in self.annotations_for_browser: if annot_marker['rs'] == marker['rs']: filtered_annotations.append(annot_marker) break self.annotations_for_browser = filtered_annotations browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) else: browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) #ZS: For zooming into genome browser, need to pass chromosome name instead of number if self.dataset.group.species == "mouse": if self.selected_chr == 20: this_chr = "X" else: this_chr = str(self.selected_chr) elif self.dataset.group.species == "rat": if self.selected_chr == 21: this_chr = "X" else: this_chr = str(self.selected_chr) else: if self.selected_chr == 22: this_chr = "X" elif self.selected_chr == 23: this_chr = "Y" else: this_chr = str(self.selected_chr) if self.mapping_method != "gemma": if self.score_type == "LRS": significant_for_browser = self.significant / 4.61 else: significant_for_browser = self.significant self.js_data = dict( #result_score_type = self.score_type, #this_trait = self.this_trait.name, #data_set = self.dataset.name, #maf = self.maf, #manhattan_plot = self.manhattan_plot, #mapping_scale = self.mapping_scale, #chromosomes = chromosome_mb_lengths, #qtl_results = self.qtl_results, categorical_vars=self.categorical_vars, chr_lengths=chr_lengths, num_perm=self.num_perm, perm_results=self.perm_output, significant=significant_for_browser, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers) else: self.js_data = dict(chr_lengths=chr_lengths, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers)
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) #ZS: Check if genotypes exist in the DB in order to create links for markers if "geno_db_exists" in start_vars: self.geno_db_exists = start_vars['geno_db_exists'] else: try: self.geno_db_exists = "True" except: self.geno_db_exists = "False" self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else None #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.use_loco) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[ key].mb_length self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, num_perm=self.num_perm, perm_results=self.perm_output, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: self.selected_chr = int(start_vars['selected_chr']) self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) self.dataset.group.get_specified_markers(markers = included_markers) self.dataset.group.markers.add_pvalues(p_values) results = self.dataset.group.markers.markers elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" if start_vars['num_perm'] == "": self.num_perm = 0 else: self.num_perm = start_vars['num_perm'] self.control = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True results = self.run_rqtl_geno() elif self.mapping_method == "reaper": if start_vars['num_perm'] == "": self.num_perm = 0 else: self.num_perm = int(start_vars['num_perm']) self.additive = False self.control = start_vars['control_marker'] self.do_control = start_vars['do_control'] results = self.gen_reaper_results() elif self.mapping_method == "plink": results = self.run_plink() elif self.mapping_method == "pylmm": print("RUNNING PYLMM") self.num_perm = start_vars['num_perm'] if self.num_perm != "": if int(self.num_perm) > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, qtl_results = self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y": if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type = self.score_type, json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, )