Beispiel #1
0
    def test_run_gemma_firstrun_set_false(self, mock_parse_loco):
        """add tests for gemma function where  first run is set to false"""
        dataset = AttributeSetter(
            {"group": AttributeSetter({"genofile": "genofile.geno"})})

        output_file = "file1"
        mock_parse_loco.return_value = []
        this_trait = AttributeSetter({"name": "t1"})

        result = run_gemma(this_trait=this_trait,
                           this_dataset=dataset,
                           samples=[],
                           vals=[],
                           covariates="",
                           use_loco=True,
                           first_run=False,
                           output_files=output_file)

        expected_results = ([], "file1")
        self.assertEqual(expected_results, result)
Beispiel #2
0
    def test_run_gemma_firstrun_set_true(self, mock_gen_pheno_txt, mock_os,
                                         mock_choice, mock_gen_covar,
                                         mock_flat_files, mock_parse_loco):
        """add tests for run_gemma where first run is set to true"""
        this_chromosomes = {}
        for i in range(1, 5):
            this_chromosomes[f'CH{i}'] = (AttributeSetter({"name": f"CH{i}"}))
        chromosomes = AttributeSetter({"chromosomes": this_chromosomes})

        dataset_group = MockGroup({"name": "GP1", "genofile": "file_geno"})
        dataset = AttributeSetter({
            "group":
            dataset_group,
            "name":
            "dataset1_name",
            "species":
            AttributeSetter({"chromosomes": chromosomes})
        })
        trait = AttributeSetter({"name": "trait1"})
        samples = []
        mock_gen_pheno_txt.return_value = None
        mock_os.path.isfile.return_value = True
        mock_gen_covar.return_value = None
        mock_choice.return_value = "R"
        mock_flat_files.return_value = "/home/genotype/bimbam"
        mock_parse_loco.return_value = []
        results = run_gemma(this_trait=trait,
                            this_dataset=dataset,
                            samples=[],
                            vals=[],
                            covariates="",
                            use_loco=True)
        self.assertEqual(mock_os.system.call_count, 2)
        mock_gen_pheno_txt.assert_called_once()
        mock_parse_loco.assert_called_once_with(dataset, "GP1_GWA_RRRRRR",
                                                True)
        mock_os.path.isfile.assert_called_once_with(
            ('/home/user/imgfile_output.assoc.txt'))
        self.assertEqual(mock_flat_files.call_count, 4)
        self.assertEqual(results, ([], "GP1_GWA_RRRRRR"))
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = [] # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' + self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf'] # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS" #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait,
                                                                                                                                                        self.dataset,
                                                                                                                                                        self.samples,
                                                                                                                                                        self.json_data,
                                                                                                                                                        self.num_perm,
                                                                                                                                                        self.bootCheck,
                                                                                                                                                        self.num_bootstrap,
                                                                                                                                                        self.do_control,
                                                                                                                                                        self.control_marker,
                                                                                                                                                        self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            self.dataset.group.genofile = start_vars['genofile']
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                qtl_results = self.qtl_results
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
                    chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

                # logger.debug("json_data:", self.json_data)

                self.js_data = dict(
                    result_score_type = self.score_type,
                    json_data = self.json_data,
                    this_trait = self.this_trait.name,
                    data_set = self.dataset.name,
                    maf = self.maf,
                    manhattan_plot = self.manhattan_plot,
                    mapping_scale = self.mapping_scale,
                    chromosomes = chromosome_mb_lengths,
                    qtl_results = self.qtl_results,
                    num_perm = self.num_perm,
                    perm_results = self.perm_output,
                )
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        # tempdata = temp_data.TempData(temp_uuid)

        self.json_data = {}
        self.json_data["lodnames"] = ["lod.hk"]

        self.samples = []  # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars["value:" + sample]
            self.samples.append(str(sample))
            self.vals.append(value)

        self.mapping_method = start_vars["method"]
        if start_vars["manhattan_plot"] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars["maf"]  # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  # ZS: LRS or LOD

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            self.dataset.group.get_specified_markers(markers=included_markers)
            self.dataset.group.markers.add_pvalues(p_values)
            results = self.dataset.group.markers.markers
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            if start_vars["num_perm"] == "":
                self.num_perm = 0
            else:
                self.num_perm = start_vars["num_perm"]
            self.control = start_vars["control_marker"]
            self.do_control = start_vars["do_control"]
            print("StartVars:", start_vars)
            self.method = start_vars["mapmethod_rqtl_geno"]
            self.model = start_vars["mapmodel_rqtl_geno"]

            if start_vars["pair_scan"] == "true":
                self.pair_scan = True

            results = self.run_rqtl_geno()
            print("qtl_results:", results)
        elif self.mapping_method == "plink":
            results = self.run_plink()
            # print("qtl_results:", pf(results))
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            self.num_perm = start_vars["num_perm"]
            if self.num_perm != "":
                if int(self.num_perm) > 0:
                    self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  # This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker["chr1"] > 0 or marker["chr1"] == "X" or marker["chr1"] == "X/Y":
                    if marker["chr1"] > highest_chr or marker["chr1"] == "X" or marker["chr1"] == "X/Y":
                        highest_chr = marker["chr1"]
                    if "lod_score" in marker:
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data["chr1"].append(str(qtl["chr1"]))
                self.json_data["chr2"].append(str(qtl["chr2"]))
                self.json_data["Mb"].append(qtl["Mb"])
                self.json_data["markernames"].append(qtl["name"])

            self.js_data = dict(
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                qtl_results=self.qtl_results,
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1  # This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker["chr"] > 0 or marker["chr"] == "X" or marker["chr"] == "X/Y":
                    if marker["chr"] > highest_chr or marker["chr"] == "X" or marker["chr"] == "X/Y":
                        highest_chr = marker["chr"]
                    if "lod_score" in marker:
                        self.qtl_results.append(marker)

            self.json_data["chr"] = []
            self.json_data["pos"] = []
            self.json_data["lod.hk"] = []
            self.json_data["markernames"] = []

            self.json_data["suggestive"] = self.suggestive
            self.json_data["significant"] = self.significant

            # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                if index < 40:
                    print("lod score is:", qtl["lod_score"])
                if qtl["chr"] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    print("changing to X")
                    self.json_data["chr"].append("X")
                else:
                    self.json_data["chr"].append(str(qtl["chr"]))
                self.json_data["pos"].append(qtl["Mb"])
                if "lrs_value" in qtl:
                    self.json_data["lod.hk"].append(str(qtl["lrs_value"]))
                else:
                    self.json_data["lod.hk"].append(str(qtl["lod_score"]))
                self.json_data["markernames"].append(qtl["name"])

            # Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data["chrnames"] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data["chrnames"].append(
                    [
                        self.species.chromosomes.chromosomes[key].name,
                        self.species.chromosomes.chromosomes[key].mb_length,
                    ]
                )
                chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

            # print("json_data:", self.json_data)

            self.js_data = dict(
                result_score_type=self.score_type,
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                chromosomes=chromosome_mb_lengths,
                qtl_results=self.qtl_results,
            )
Beispiel #5
0
def do_mapping_for_api(start_vars):
    assert ('db' in start_vars)
    assert ('trait_id' in start_vars)

    dataset = data_set.create_dataset(dataset_name=start_vars['db'])
    dataset.group.get_markers()
    this_trait = GeneralTrait(dataset=dataset, name=start_vars['trait_id'])
    this_trait = retrieve_sample_data(this_trait, dataset)

    samples = []
    vals = []

    for sample in dataset.group.samplelist:
        in_trait_data = False
        for item in this_trait.data:
            if this_trait.data[item].name == sample:
                value = str(this_trait.data[item].value)
                samples.append(item)
                vals.append(value)
                in_trait_data = True
                break
        if not in_trait_data:
            vals.append("x")

    mapping_params = initialize_parameters(start_vars, dataset, this_trait)

    covariates = ""  #ZS: It seems to take an empty string as default. This should probably be changed.

    if mapping_params['mapping_method'] == "gemma":
        header_row = ["name", "chr", "Mb", "lod_score", "p_value"]
        if mapping_params[
                'use_loco'] == "True":  #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
            result_markers = gemma_mapping.run_gemma(
                this_trait, dataset, samples, vals, covariates,
                mapping_params['use_loco'], mapping_params['maf'])[0]
        else:
            result_markers = gemma_mapping.run_gemma(
                this_trait, dataset, samples, vals, covariates,
                mapping_params['use_loco'], mapping_params['maf'])
    elif mapping_params['mapping_method'] == "rqtl":
        header_row = ["name", "chr", "cM", "lod_score"]
        if mapping_params['num_perm'] > 0:
            _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(
                vals, dataset, mapping_params['rqtl_method'],
                mapping_params['rqtl_model'], mapping_params['perm_check'],
                mapping_params['num_perm'], mapping_params['do_control'],
                mapping_params['control_marker'],
                mapping_params['manhattan_plot'], mapping_params['pair_scan'])
        else:
            result_markers = rqtl_mapping.run_rqtl_geno(
                vals, dataset, mapping_params['rqtl_method'],
                mapping_params['rqtl_model'], mapping_params['perm_check'],
                mapping_params['num_perm'], mapping_params['do_control'],
                mapping_params['control_marker'],
                mapping_params['manhattan_plot'], mapping_params['pair_scan'])

    if mapping_params['limit_to']:
        result_markers = result_markers[:mapping_params['limit_to']]

    if mapping_params['format'] == "csv":
        output_rows = []
        output_rows.append(header_row)
        for marker in result_markers:
            this_row = [marker[header] for header in header_row]
            output_rows.append(this_row)

        return output_rows, mapping_params['format']
    elif mapping_params['format'] == "json":
        return result_markers, mapping_params['format']
    else:
        return result_markers, None
Beispiel #6
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
                else:
                    self.num_perm = 0
            except:
                self.num_perm = 0

            self.LRSCheck = self.score_type
            self.permCheck = "ON"
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                included_markers, p_values = gemma_mapping.run_gemma(
                    self.dataset, self.samples, self.vals)
            with Bench("Getting markers from csv"):
                marker_obs = get_markers_from_csv(included_markers, p_values,
                                                  self.dataset.group.name)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            results = self.run_rqtl_geno()
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            results = self.gen_reaper_results()
        elif self.mapping_method == "plink":
            results = self.run_plink()
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                mapping_scale=self.mapping_scale,
                qtl_results=self.qtl_results,
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            self.json_data['chr'] = []
            self.json_data['pos'] = []
            self.json_data['lod.hk'] = []
            self.json_data['markernames'] = []

            self.json_data['suggestive'] = self.suggestive
            self.json_data['significant'] = self.significant

            #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                #if index<40:
                #    print("lod score is:", qtl['lod_score'])
                if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    #print("changing to X")
                    self.json_data['chr'].append("X")
                else:
                    self.json_data['chr'].append(str(qtl['chr']))
                self.json_data['pos'].append(qtl['Mb'])
                if 'lrs_value' in qtl.keys():
                    self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                else:
                    self.json_data['lod.hk'].append(str(qtl['lod_score']))
                self.json_data['markernames'].append(qtl['name'])

            #Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data['chrnames'] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data['chrnames'].append([
                    self.species.chromosomes.chromosomes[key].name,
                    self.species.chromosomes.chromosomes[key].mb_length
                ])
                chromosome_mb_lengths[
                    key] = self.species.chromosomes.chromosomes[key].mb_length

            # print("json_data:", self.json_data)

            self.js_data = dict(
                result_score_type=self.score_type,
                json_data=self.json_data,
                this_trait=self.this_trait.name,
                data_set=self.dataset.name,
                maf=self.maf,
                manhattan_plot=self.manhattan_plot,
                mapping_scale=self.mapping_scale,
                chromosomes=chromosome_mb_lengths,
                qtl_results=self.qtl_results,
            )
    def __init__(self, start_vars, temp_uuid):
        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        #ZS: Needed to zoom in or remap temp traits like PCA traits
        if "temp_trait" in start_vars and start_vars['temp_trait'] != "False":
            self.temp_trait = "True"
            self.group = self.dataset.group.name

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        #ZS: Sometimes a group may have a genofile that only includes a subset of samples
        genofile_samplelist = []
        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
                genofile_samplelist = get_genofile_samplelist(self.dataset)

        all_samples_ordered = self.dataset.group.all_samples_ordered()

        self.vals = []
        self.samples = []
        self.sample_vals = start_vars['sample_vals']
        sample_val_dict = json.loads(self.sample_vals)
        samples = sample_val_dict.keys()
        if (len(genofile_samplelist) != 0):
            for sample in genofile_samplelist:
                self.samples.append(sample)
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                else:
                    self.vals.append("x")
        else:
            for sample in self.dataset.group.samplelist:
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                    self.samples.append(sample)

        if 'n_samples' in start_vars:
            self.n_samples = start_vars['n_samples']
        else:
            self.n_samples = len([val for val in self.vals if val != "x"])

        #ZS: Check if genotypes exist in the DB in order to create links for markers

        self.geno_db_exists = geno_db_exists(self.dataset)

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        self.manhattan_plot = False
        if 'manhattan_plot' in start_vars:
            if start_vars['manhattan_plot'].lower() != "false":
                self.color_scheme = "alternating"
                if "color_scheme" in start_vars:
                    self.color_scheme = start_vars['color_scheme']
                    if self.color_scheme == "single":
                        self.manhattan_single_color = start_vars[
                            'manhattan_single_color']
                self.manhattan_plot = True

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        if 'transform' in start_vars:
            self.transform = start_vars['transform']
        else:
            self.transform = ""
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        if "mapping_scale" in start_vars:
            self.mapping_scale = start_vars['mapping_scale']
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else ""
        self.categorical_vars = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        #self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.first_run = True
            self.output_files = None
            if 'output_files' in start_vars:
                self.output_files = start_vars['output_files']
            if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                self.first_run = False
            self.score_type = "-logP"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                if self.use_loco == "True":
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
                else:
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            perm_strata = []
            if "perm_strata" in start_vars and "categorical_vars" in start_vars:
                self.categorical_vars = start_vars["categorical_vars"].split(
                    ",")
                if len(self.categorical_vars
                       ) and start_vars["perm_strata"] == "True":
                    primary_samples = SampleList(dataset=self.dataset,
                                                 sample_names=self.samples,
                                                 this_trait=self.this_trait)

                    perm_strata = get_perm_strata(self.this_trait,
                                                  primary_samples,
                                                  self.categorical_vars,
                                                  self.samples)
            self.score_type = "LOD"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            if 'mapmethod_rqtl_geno' in start_vars:
                self.method = start_vars['mapmethod_rqtl_geno']
            else:
                self.method = "em"
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.reaper_version = start_vars['reaper_version']

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")

            if self.reaper_version == "new":
                self.first_run = True
                self.output_files = None
                if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                    self.first_run = False
                    if 'output_files' in start_vars:
                        self.output_files = start_vars['output_files'].split(
                            ",")

                results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot, self.first_run, self.output_files)
            else:
                results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-logP"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        else:
            logger.debug("RUNNING NOTHING")

        self.no_results = False
        if len(results) == 0:
            self.no_results = True
        else:
            if self.pair_scan == True:
                self.qtl_results = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                            'chr1'] == "X/Y":
                        if marker['chr1'] > highest_chr or marker[
                                'chr1'] == "X" or marker['chr1'] == "X/Y":
                            highest_chr = marker['chr1']
                        if 'lod_score' in list(marker.keys()):
                            self.qtl_results.append(marker)

                self.trimmed_markers = results

                for qtl in enumerate(self.qtl_results):
                    self.json_data['chr1'].append(str(qtl['chr1']))
                    self.json_data['chr2'].append(str(qtl['chr2']))
                    self.json_data['Mb'].append(qtl['Mb'])
                    self.json_data['markernames'].append(qtl['name'])

                self.js_data = dict(json_data=self.json_data,
                                    this_trait=self.this_trait.name,
                                    data_set=self.dataset.name,
                                    maf=self.maf,
                                    manhattan_plot=self.manhattan_plot,
                                    mapping_scale=self.mapping_scale,
                                    qtl_results=self.qtl_results)

            else:
                self.qtl_results = []
                self.results_for_browser = []
                self.annotations_for_browser = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if 'Mb' in marker:
                        this_ps = marker['Mb'] * 1000000
                    else:
                        this_ps = marker['cM'] * 1000000

                    browser_marker = dict(chr=str(marker['chr']),
                                          rs=marker['name'],
                                          ps=this_ps,
                                          url="/show_trait?trait_id=" +
                                          marker['name'] + "&dataset=" +
                                          self.dataset.group.name + "Geno")

                    if self.geno_db_exists == "True":
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps,
                                            url="/show_trait?trait_id=" +
                                            marker['name'] + "&dataset=" +
                                            self.dataset.group.name + "Geno")
                    else:
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps)

                    if 'lrs_value' in marker and marker['lrs_value'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lrs_value'] /
                                                         4.61)
                    elif 'lod_score' in marker and marker['lod_score'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lod_score'])
                    else:
                        browser_marker['p_wald'] = 0

                    self.results_for_browser.append(browser_marker)
                    self.annotations_for_browser.append(annot_marker)
                    if str(marker['chr']) > '0' or str(
                            marker['chr']) == "X" or str(
                                marker['chr']) == "X/Y":
                        if str(marker['chr']) > str(highest_chr) or str(
                                marker['chr']) == "X" or str(
                                    marker['chr']) == "X/Y":
                            highest_chr = marker['chr']
                        if ('lod_score'
                                in marker.keys()) or ('lrs_value'
                                                      in marker.keys()):
                            if 'Mb' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.6f}".format(
                                        marker['Mb'])
                            elif 'cM' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.3f}".format(
                                        marker['cM'])
                            else:
                                marker['display_pos'] = "N/A"
                            self.qtl_results.append(marker)

                total_markers = len(self.qtl_results)

                with Bench("Exporting Results"):
                    export_mapping_results(self.dataset, self.this_trait,
                                           self.qtl_results,
                                           self.mapping_results_path,
                                           self.mapping_scale, self.score_type)

                with Bench("Trimming Markers for Figure"):
                    if len(self.qtl_results) > 30000:
                        self.qtl_results = trim_markers_for_figure(
                            self.qtl_results)
                        self.results_for_browser = trim_markers_for_figure(
                            self.results_for_browser)
                        filtered_annotations = []
                        for marker in self.results_for_browser:
                            for annot_marker in self.annotations_for_browser:
                                if annot_marker['rs'] == marker['rs']:
                                    filtered_annotations.append(annot_marker)
                                    break
                        self.annotations_for_browser = filtered_annotations
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)
                    else:
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)

                with Bench("Trimming Markers for Table"):
                    self.trimmed_markers = trim_markers_for_table(results)

                chr_lengths = get_chr_lengths(self.mapping_scale,
                                              self.mapping_method,
                                              self.dataset, self.qtl_results)

                #ZS: For zooming into genome browser, need to pass chromosome name instead of number
                if self.dataset.group.species == "mouse":
                    if self.selected_chr == 20:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                elif self.dataset.group.species == "rat":
                    if self.selected_chr == 21:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                else:
                    if self.selected_chr == 22:
                        this_chr = "X"
                    elif self.selected_chr == 23:
                        this_chr = "Y"
                    else:
                        this_chr = str(self.selected_chr)

                if self.mapping_method != "gemma":
                    if self.score_type == "LRS":
                        significant_for_browser = self.significant / 4.61
                    else:
                        significant_for_browser = self.significant

                    self.js_data = dict(
                        #result_score_type = self.score_type,
                        #this_trait = self.this_trait.name,
                        #data_set = self.dataset.name,
                        #maf = self.maf,
                        #manhattan_plot = self.manhattan_plot,
                        #mapping_scale = self.mapping_scale,
                        #chromosomes = chromosome_mb_lengths,
                        #qtl_results = self.qtl_results,
                        categorical_vars=self.categorical_vars,
                        chr_lengths=chr_lengths,
                        num_perm=self.num_perm,
                        perm_results=self.perm_output,
                        significant=significant_for_browser,
                        browser_files=browser_files,
                        selected_chr=this_chr,
                        total_markers=total_markers)
                else:
                    self.js_data = dict(chr_lengths=chr_lengths,
                                        browser_files=browser_files,
                                        selected_chr=this_chr,
                                        total_markers=total_markers)
Beispiel #8
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' +
                                       self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        #ZS: Check if genotypes exist in the DB in order to create links for markers
        if "geno_db_exists" in start_vars:
            self.geno_db_exists = start_vars['geno_db_exists']
        else:
            try:
                self.geno_db_exists = "True"
            except:
                self.geno_db_exists = "False"

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else None

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset,
                                                     self.samples, self.vals,
                                                     self.covariates,
                                                     self.use_loco)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(
                self.this_trait, self.dataset, self.samples, self.vals,
                self.json_data, self.num_perm, self.bootCheck,
                self.num_bootstrap, self.do_control, self.control_marker,
                self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(json_data=self.json_data,
                                this_trait=self.this_trait.name,
                                data_set=self.dataset.name,
                                maf=self.maf,
                                manhattan_plot=self.manhattan_plot,
                                mapping_scale=self.mapping_scale,
                                qtl_results=self.qtl_results)

        else:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            with Bench("Exporting Results"):
                export_mapping_results(self.dataset, self.this_trait,
                                       self.qtl_results,
                                       self.mapping_results_path,
                                       self.mapping_scale, self.score_type)

            with Bench("Trimming Markers for Figure"):
                if len(self.qtl_results) > 30000:
                    self.qtl_results = trim_markers_for_figure(
                        self.qtl_results)

            with Bench("Trimming Markers for Table"):
                self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([
                        self.species.chromosomes.chromosomes[key].name,
                        self.species.chromosomes.chromosomes[key].mb_length
                    ])
                    chromosome_mb_lengths[
                        key] = self.species.chromosomes.chromosomes[
                            key].mb_length

                self.js_data = dict(
                    result_score_type=self.score_type,
                    json_data=self.json_data,
                    this_trait=self.this_trait.name,
                    data_set=self.dataset.name,
                    maf=self.maf,
                    manhattan_plot=self.manhattan_plot,
                    mapping_scale=self.mapping_scale,
                    chromosomes=chromosome_mb_lengths,
                    qtl_results=self.qtl_results,
                    num_perm=self.num_perm,
                    perm_results=self.perm_output,
                )
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        #tempdata = temp_data.TempData(temp_uuid)
        
        self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']
        
        self.samples = [] # Want only ones with values
        self.vals = []

        for sample in self.dataset.group.samplelist:
            value = start_vars['value:' + sample]
            self.samples.append(str(sample))
            self.vals.append(value)
 
        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "true":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf'] # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS" #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1        
        if "selected_chr" in start_vars:
            self.selected_chr = int(start_vars['selected_chr'])
 
        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "LOD"
            included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            self.dataset.group.get_specified_markers(markers = included_markers)
            self.dataset.group.markers.add_pvalues(p_values)
            results = self.dataset.group.markers.markers
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            if start_vars['num_perm'] == "":
                self.num_perm = 0
            else:
                self.num_perm = start_vars['num_perm']
            self.control = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']

            if start_vars['pair_scan'] == "true":
                self.pair_scan = True

            results = self.run_rqtl_geno()
        elif self.mapping_method == "reaper":
            if start_vars['num_perm'] == "":
                self.num_perm = 0
            else:
                self.num_perm = int(start_vars['num_perm'])
            self.additive = False
            self.control = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            results = self.gen_reaper_results()
        elif self.mapping_method == "plink":
            results = self.run_plink()
        elif self.mapping_method == "pylmm":
            print("RUNNING PYLMM")
            self.num_perm = start_vars['num_perm']
            if self.num_perm != "":
                if int(self.num_perm) > 0:
	             self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            print("RUNNING NOTHING")
            
        if self.pair_scan == True:  
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                qtl_results = self.qtl_results,
            )

        else:
            self.cutoff = 2    
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
                        self.qtl_results.append(marker)

            self.json_data['chr'] = []
            self.json_data['pos'] = []
            self.json_data['lod.hk'] = []
            self.json_data['markernames'] = []

            self.json_data['suggestive'] = self.suggestive
            self.json_data['significant'] = self.significant

            #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
            for index, qtl in enumerate(self.qtl_results):
                #if index<40:
                #    print("lod score is:", qtl['lod_score'])
                if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                    print("changing to X")
                    self.json_data['chr'].append("X")
                else:
                    self.json_data['chr'].append(str(qtl['chr']))
                self.json_data['pos'].append(qtl['Mb'])
                if 'lrs_value' in qtl.keys():
                    self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                else:
                    self.json_data['lod.hk'].append(str(qtl['lod_score']))
                self.json_data['markernames'].append(qtl['name'])

            #Get chromosome lengths for drawing the interval map plot
            chromosome_mb_lengths = {}
            self.json_data['chrnames'] = []
            for key in self.species.chromosomes.chromosomes.keys():
                self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
                chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
        
            # print("json_data:", self.json_data)
        

            self.js_data = dict(
                result_score_type = self.score_type,
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                chromosomes = chromosome_mb_lengths,
                qtl_results = self.qtl_results,
            )