def gen_search_result(self):
        """
        Get the info displayed in the search result table from the set of results computed in
        the "search" function

        """
        self.trait_list = []
        
        species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)
        
        # result_set represents the results for each search term; a search of 
        # "shh grin2b" would have two sets of results, one for each term
        print("self.results is:", pf(self.results))
        for result in self.results:
            if not result:
                continue
            
            #### Excel file needs to be generated ####

            print("foo locals are:", locals())
            trait_id = result[0]
            this_trait = GeneralTrait(dataset=self.dataset, name=trait_id)
            this_trait.retrieve_info(get_qtl_info=True)
            self.trait_list.append(this_trait)

        self.dataset.get_trait_info(self.trait_list, species)
Exemple #2
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open(
            "{}/{}_covariates.txt".format(flat_files('mapping'),
                                          this_dataset.group.name),
            "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
Exemple #3
0
    def gen_search_result(self):
        """
        Get the info displayed in the search result table from the set of results computed in
        the "search" function

        """
        self.trait_list = []

        species = webqtlDatabaseFunction.retrieve_species(
            self.dataset.group.name)

        # result_set represents the results for each search term; a search of
        # "shh grin2b" would have two sets of results, one for each term
        print("self.results is:", pf(self.results))
        for result in self.results:
            if not result:
                continue

            #### Excel file needs to be generated ####

            #print("foo locals are:", locals())
            trait_id = result[0]
            this_trait = GeneralTrait(dataset=self.dataset,
                                      name=trait_id,
                                      get_qtl_info=True,
                                      get_sample_info=False)
            self.trait_list.append(this_trait)

        self.dataset.get_trait_info(self.trait_list, species)
Exemple #4
0
 def get_trait_db_obs(self, trait_db_list):
     self.trait_list = []
     for i, trait_db in enumerate(trait_db_list):
         if i == (len(trait_db_list) - 1):
             break
         trait_name, dataset_name = trait_db.split(":")
         dataset_ob = data_set.create_dataset(dataset_name)
         trait_ob = GeneralTrait(dataset=dataset_ob,
                                 name=trait_name,
                                 cellid=None)
         self.trait_list.append((trait_ob, dataset_ob))
Exemple #5
0
def do_correlation(start_vars):
    assert ('db' in start_vars)
    assert ('target_db' in start_vars)
    assert ('trait_id' in start_vars)

    this_dataset = data_set.create_dataset(dataset_name=start_vars['db'])
    target_dataset = data_set.create_dataset(
        dataset_name=start_vars['target_db'])
    this_trait = GeneralTrait(dataset=this_dataset,
                              name=start_vars['trait_id'])
    this_trait = retrieve_sample_data(this_trait, this_dataset)

    corr_params = init_corr_params(start_vars)

    corr_results = calculate_results(this_trait, this_dataset, target_dataset,
                                     corr_params)
    #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0])))

    final_results = []
    for _trait_counter, trait in enumerate(
            corr_results.keys()[:corr_params['return_count']]):
        if corr_params['type'] == "tissue":
            [sample_r, num_overlap, sample_p, symbol] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "#_strains": num_overlap,
                "p_value": sample_p,
                "symbol": symbol
            }
        elif corr_params['type'] == "literature" or corr_params[
                'type'] == "lit":
            [gene_id, sample_r] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "gene_id": gene_id
            }
        else:
            [sample_r, sample_p, num_overlap] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "#_strains": num_overlap,
                "p_value": sample_p
            }

        final_results.append(result_dict)

    # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True)

    return final_results
Exemple #6
0
def add_cofactors(cross, this_dataset, covariates, samples):
    ro.numpy2ri.activate()

    covariate_list = covariates.split(",")
    covar_name_string = "c("
    for i, covariate in enumerate(covariate_list):
        this_covar_data = []
        covar_as_string = "c("
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        trait_sample_data = trait_ob.data
        for index, sample in enumerate(trait_samples):
            if sample in samples:
                if sample in trait_sample_data:
                    sample_value = trait_sample_data[sample].value
                    this_covar_data.append(sample_value)
                else:
                    this_covar_data.append("NA")

        for j, item in enumerate(this_covar_data):
            if j < (len(this_covar_data) - 1):
                covar_as_string += str(item) + ","
            else:
                covar_as_string += str(item)

        covar_as_string += ")"

        col_name = "covar_" + str(i)
        cross = add_phenotype(cross, covar_as_string, col_name)

        if i < (len(covariate_list) - 1):
            covar_name_string += '"' + col_name + '", '
        else:
            covar_name_string += '"' + col_name + '"'

    covar_name_string += ")"

    covars_ob = pull_var("trait_covars", cross, covar_name_string)

    return cross, covars_ob
Exemple #7
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_" + kw['species'] + "_" + kw[
                'group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
            self.trait_vals = kw['trait_paste'].split()
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #ZS: Get verify/rna-seq link URLs
        try:
            blatsequence = self.this_trait.blatseq
            if not blatsequence:
                #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                query1 = """SELECT Probe.Sequence, Probe.Name
                           FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                           WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                 ProbeSetFreeze.Name = '%s' AND
                                 ProbeSet.Name = '%s' AND
                                 Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (
                    self.this_trait.dataset.name, self.this_trait.name)
                seqs = g.db.execute(query1).fetchall()
                if not seqs:
                    raise ValueError
                else:
                    blatsequence = ''
                    for seqt in seqs:
                        if int(seqt[1][-1]) % 2 == 1:
                            blatsequence += string.strip(seqt[0])

            #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
            blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'
            #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
            query2 = """SELECT Probe.Sequence, Probe.Name
                        FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                              ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                              ProbeSetFreeze.Name = '%s' AND
                              ProbeSet.Name = '%s' AND
                              Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (
                self.this_trait.dataset.name, self.this_trait.name)

            seqs = g.db.execute(query2).fetchall()
            for seqt in seqs:
                if int(seqt[1][-1]) % 2 == 1:
                    blatsequence += '%3EProbe_' + string.strip(
                        seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A'

            if self.dataset.group.species == "rat":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = ""
            elif self.dataset.group.species == "mouse":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % (
                    'mouse', 'mm10', blatsequence)
            elif self.dataset.group.species == "human":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38',
                                                               blatsequence)
                self.UTHSC_BLAT_URL = ""
            else:
                self.UCSC_BLAT_URL = ""
                self.UTHSC_BLAT_URL = ""
        except:
            self.UCSC_BLAT_URL = ""
            self.UTHSC_BLAT_URL = ""

        if self.dataset.type == "ProbeSet":
            self.show_probes = "True"

        trait_units = get_trait_units(self.this_trait)
        self.get_external_links()
        self.build_correlation_tools()

        self.ncbi_summary = get_ncbi_summary(self.this_trait)

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if check_if_attr_exists(
                    self.this_trait, 'locus_chr'
            ) and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(
                    self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        self.qnorm_vals = quantile_normalize_vals(self.sample_groups)
        self.z_scores = get_z_scores(self.sample_groups)

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
            self.sample_group_types['samples_other'] = "Other"
            self.sample_group_types['samples_all'] = "All"
        else:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        categorical_var_list = []
        if not self.temp_trait:
            categorical_var_list = get_categorical_variables(
                self.this_trait, self.sample_groups[0]
            )  #ZS: Only using first samplelist, since I think mapping only uses those samples

        #ZS: Get list of chromosomes to select for mapping
        self.chr_list = [["All", -1]]
        for i, this_chr in enumerate(
                self.dataset.species.chromosomes.chromosomes):
            self.chr_list.append([
                self.dataset.species.chromosomes.chromosomes[this_chr].name, i
            ])

        self.genofiles = self.dataset.group.get_genofiles()

        self.has_num_cases = has_num_cases(self.this_trait)

        self.stats_table_width, self.trait_table_width = get_table_widths(
            self.sample_groups, self.has_num_cases)

        #ZS: Needed to know whether to display bar chart + get max sample name length in order to set table column width
        self.num_values = 0
        self.binary = "true"  #ZS: So it knows whether to display the Binary R/qtl mapping method, which doesn't work unless all values are 0 or 1
        self.negative_vals_exist = "false"  #ZS: Since we don't want to show log2 transform option for situations where it doesn't make sense
        max_samplename_width = 1
        for group in self.sample_groups:
            for sample in group.sample_list:
                if len(sample.name) > max_samplename_width:
                    max_samplename_width = len(sample.name)
                if sample.display_value != "x":
                    self.num_values += 1
                    if sample.display_value != 0 or sample.display_value != 1:
                        self.binary = "false"
                    if sample.value < 0:
                        self.negative_vals_exist = "true"

        sample_column_width = max_samplename_width * 8

        if self.num_values >= 5000:
            self.maf = 0.01
        else:
            self.maf = 0.05

        trait_symbol = None
        short_description = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol
                short_description = trait_symbol

            elif hasattr(self.this_trait, 'post_publication_abbreviation'):
                short_description = self.this_trait.post_publication_abbreviation

            elif hasattr(self.this_trait, 'pre_publication_abbreviation'):
                short_description = self.this_trait.pre_publication_abbreviation

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples,
                                             ' ')
        hddn['primary_samples'] = string.join(self.primary_sample_names, ',')
        hddn['trait_id'] = self.trait_id
        hddn['trait_display_name'] = self.this_trait.display_name
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
            hddn['temp_trait'] = True
            hddn['group'] = self.temp_group
            hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "gemma"
        hddn['selected_chr'] = -1
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['categorical_vars'] = ""
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.05
        hddn['compare_traits'] = []
        hddn['export_data'] = ""
        hddn['export_format'] = "excel"

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        js_data = dict(trait_id=self.trait_id,
                       trait_symbol=trait_symbol,
                       short_description=short_description,
                       unit_type=trait_units,
                       dataset_type=self.dataset.type,
                       data_scale=self.dataset.data_scale,
                       sample_group_types=self.sample_group_types,
                       sample_lists=sample_lists,
                       attribute_names=self.sample_groups[0].attributes,
                       categorical_vars=",".join(categorical_var_list),
                       num_values=self.num_values,
                       qnorm_values=self.qnorm_vals,
                       zscore_values=self.z_scores,
                       sample_column_width=sample_column_width,
                       temp_uuid=self.temp_uuid)
        self.js_data = js_data
Exemple #8
0
    def __init__(self, kw):
        self.type = kw['type']
        self.terms = kw['terms']
        if self.type == "gene":
            sql = """
                SELECT
                Species.`Name` AS species_name,
                InbredSet.`Name` AS inbredset_name,
                Tissue.`Name` AS tissue_name,
                ProbeSetFreeze.Name AS probesetfreeze_name,
                ProbeSet.Name AS probeset_name,
                ProbeSet.Symbol AS probeset_symbol,
                ProbeSet.`description` AS probeset_description,
                ProbeSet.Chr AS chr,
                ProbeSet.Mb AS mb,
                ProbeSetXRef.Mean AS mean,
                ProbeSetXRef.LRS AS lrs,
                ProbeSetXRef.`Locus` AS locus,
                ProbeSetXRef.`pValue` AS pvalue,
                ProbeSetXRef.`additive` AS additive
                FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue
                WHERE InbredSet.`SpeciesId`=Species.`Id`
                AND ProbeFreeze.InbredSetId=InbredSet.`Id`
                AND ProbeFreeze.`TissueId`=Tissue.`Id`
                AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id
                AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
                AND ProbeSet.Id = ProbeSetXRef.ProbeSetId
                AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
                AND ProbeSetFreeze.confidentiality < 1
                AND ProbeSetFreeze.public > 0
                ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
                LIMIT 6000
                """ % (self.terms)
            with Bench("Running query"):
                logger.sql(sql)
                re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    dataset = create_dataset(line[3],
                                             "ProbeSet",
                                             get_samplelist=False)
                    trait_id = line[4]
                    #with Bench("Building trait object"):
                    this_trait = GeneralTrait(dataset=dataset,
                                              name=trait_id,
                                              get_qtl_info=True,
                                              get_sample_info=False)
                    self.trait_list.append(this_trait)

        elif self.type == "phenotype":
            sql = """
                SELECT
                Species.`Name`,
                InbredSet.`Name`,
                PublishFreeze.`Name`,
                PublishXRef.`Id`,
                Phenotype.`Post_publication_description`,
                Publication.`Authors`,
                Publication.`Year`,
                PublishXRef.`LRS`,
                PublishXRef.`Locus`,
                PublishXRef.`additive`
                FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
                WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
                AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
                AND InbredSet.`SpeciesId`=Species.`Id`
                AND PublishXRef.`PhenotypeId`=Phenotype.`Id`
                AND PublishXRef.`PublicationId`=Publication.`Id`
                AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]"
                    OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]")
                ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
                LIMIT 6000
                """ % (self.terms, self.terms, self.terms, self.terms,
                       self.terms, self.terms, self.terms, self.terms,
                       self.terms, self.terms)
            logger.sql(sql)
            re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    dataset = create_dataset(line[2], "Publish")
                    trait_id = line[3]
                    this_trait = GeneralTrait(dataset=dataset,
                                              name=trait_id,
                                              get_qtl_info=True,
                                              get_sample_info=False)
                    self.trait_list.append(this_trait)
Exemple #9
0
    def __init__(self, params):
        self.data_set_1 = data_set.create_dataset(params['dataset_1'])
        self.data_set_2 = data_set.create_dataset(params['dataset_2'])
        #self.data_set_3 = data_set.create_dataset(params['dataset_3'])
        self.trait_1 = GeneralTrait(name=params['trait_1'],
                                    dataset=self.data_set_1)
        self.trait_2 = GeneralTrait(name=params['trait_2'],
                                    dataset=self.data_set_2)
        #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3)

        samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(
            self.trait_1.data, self.trait_2.data)

        self.data = []
        self.indIDs = samples_1.keys()
        vals_1 = []
        for sample in samples_1.keys():
            vals_1.append(samples_1[sample].value)
        self.data.append(vals_1)
        vals_2 = []
        for sample in samples_2.keys():
            vals_2.append(samples_2[sample].value)
        self.data.append(vals_2)

        slope, intercept, r_value, p_value, std_err = stats.linregress(
            vals_1, vals_2)

        if slope < 0.001:
            slope_string = '%.3E' % slope
        else:
            slope_string = '%.3f' % slope

        x_buffer = (max(vals_1) - min(vals_1)) * 0.1
        y_buffer = (max(vals_2) - min(vals_2)) * 0.1

        x_range = [min(vals_1) - x_buffer, max(vals_1) + x_buffer]
        y_range = [min(vals_2) - y_buffer, max(vals_2) + y_buffer]

        intercept_coords = get_intercept_coords(slope, intercept, x_range,
                                                y_range)

        rx = stats.rankdata(vals_1)
        ry = stats.rankdata(vals_2)
        self.rdata = []
        self.rdata.append(rx.tolist())
        self.rdata.append(ry.tolist())
        srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(
            rx, ry)

        if srslope < 0.001:
            srslope_string = '%.3E' % srslope
        else:
            srslope_string = '%.3f' % srslope

        x_buffer = (max(rx) - min(rx)) * 0.1
        y_buffer = (max(ry) - min(ry)) * 0.1

        sr_range = [min(rx) - x_buffer, max(rx) + x_buffer]

        sr_intercept_coords = get_intercept_coords(srslope, srintercept,
                                                   sr_range, sr_range)

        self.collections_exist = "False"
        if g.user_session.num_collections > 0:
            self.collections_exist = "True"

        self.js_data = dict(
            data=self.data,
            rdata=self.rdata,
            indIDs=self.indIDs,
            trait_1=self.trait_1.dataset.name + ": " + str(self.trait_1.name),
            trait_2=self.trait_2.dataset.name + ": " + str(self.trait_2.name),
            samples_1=samples_1,
            samples_2=samples_2,
            num_overlap=num_overlap,
            vals_1=vals_1,
            vals_2=vals_2,
            x_range=x_range,
            y_range=y_range,
            sr_range=sr_range,
            intercept_coords=intercept_coords,
            sr_intercept_coords=sr_intercept_coords,
            slope=slope,
            slope_string=slope_string,
            intercept=intercept,
            r_value=r_value,
            p_value=p_value,
            srslope=srslope,
            srslope_string=srslope_string,
            srintercept=srintercept,
            srr_value=srr_value,
            srp_value=srp_value

            #trait3 = self.trait_3.data,
            #vals_3 = vals_3
        )
        self.jsdata = self.js_data
Exemple #10
0
    def __init__(self, start_vars):
        # get trait list from db (database name)
        # calculate correlation with Base vector and targets

        # Check parameters
        assert ('corr_type' in start_vars)
        assert (is_str(start_vars['corr_type']))
        assert ('dataset' in start_vars)
        # assert('group' in start_vars) permitted to be empty?
        assert ('corr_sample_method' in start_vars)
        assert ('corr_samples_group' in start_vars)
        assert ('corr_dataset' in start_vars)
        #assert('min_expr' in start_vars)
        assert ('corr_return_results' in start_vars)
        if 'loc_chr' in start_vars:
            assert ('min_loc_mb' in start_vars)
            assert ('max_loc_mb' in start_vars)

        with Bench("Doing correlations"):
            if start_vars['dataset'] == "Temp":
                self.dataset = data_set.create_dataset(
                    dataset_name="Temp",
                    dataset_type="Temp",
                    group_name=start_vars['group'])
                self.trait_id = start_vars['trait_id']
                self.this_trait = GeneralTrait(dataset=self.dataset,
                                               name=self.trait_id,
                                               cellid=None)
            else:
                helper_functions.get_species_dataset_trait(self, start_vars)

            #self.dataset.group.read_genotype_file()

            corr_samples_group = start_vars['corr_samples_group']

            self.sample_data = {}
            self.corr_type = start_vars['corr_type']
            self.corr_method = start_vars['corr_sample_method']
            self.min_expr = get_float(start_vars, 'min_expr')
            self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0)
            self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0)

            if ('loc_chr' in start_vars and 'min_loc_mb' in start_vars
                    and 'max_loc_mb' in start_vars):

                self.location_chr = get_string(start_vars, 'loc_chr')
                self.min_location_mb = get_int(start_vars, 'min_loc_mb')
                self.max_location_mb = get_int(start_vars, 'max_loc_mb')
            else:
                self.location_chr = self.min_location_mb = self.max_location_mb = None

            self.get_formatted_corr_type()
            self.return_number = int(start_vars['corr_return_results'])

            #The two if statements below append samples to the sample list based upon whether the user
            #rselected Primary Samples Only, Other Samples Only, or All Samples

            primary_samples = self.dataset.group.samplelist
            if self.dataset.group.parlist != None:
                primary_samples += self.dataset.group.parlist
            if self.dataset.group.f1list != None:
                primary_samples += self.dataset.group.f1list

            #If either BXD/whatever Only or All Samples, append all of that group's samplelist
            if corr_samples_group != 'samples_other':
                self.process_samples(start_vars, primary_samples)

            #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
            #exclude the primary samples (because they would have been added in the previous
            #if statement if the user selected All Samples)
            if corr_samples_group != 'samples_primary':
                if corr_samples_group == 'samples_other':
                    primary_samples = [
                        x for x in primary_samples
                        if x not in (self.dataset.group.parlist +
                                     self.dataset.group.f1list)
                    ]
                self.process_samples(start_vars, self.this_trait.data.keys(),
                                     primary_samples)

            self.target_dataset = data_set.create_dataset(
                start_vars['corr_dataset'])
            self.target_dataset.get_trait_data(self.sample_data.keys())

            self.header_fields = get_header_fields(self.target_dataset.type,
                                                   self.corr_method)

            if self.target_dataset.type == "ProbeSet":
                self.filter_cols = [7, 6]
            elif self.target_dataset.type == "Publish":
                self.filter_cols = [6, 0]
            else:
                self.filter_cols = [4, 0]

            self.correlation_results = []

            self.correlation_data = {}

            if self.corr_type == "tissue":
                self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")

                tissue_corr_data = self.do_tissue_correlation_for_all_traits()
                if tissue_corr_data != None:
                    for trait in tissue_corr_data.keys()[:self.return_number]:
                        self.get_sample_r_and_p_values(
                            trait, self.target_dataset.trait_data[trait])
                else:
                    for trait, values in self.target_dataset.trait_data.iteritems(
                    ):
                        self.get_sample_r_and_p_values(trait, values)

            elif self.corr_type == "lit":
                self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId")
                lit_corr_data = self.do_lit_correlation_for_all_traits()

                for trait in lit_corr_data.keys()[:self.return_number]:
                    self.get_sample_r_and_p_values(
                        trait, self.target_dataset.trait_data[trait])

            elif self.corr_type == "sample":
                for trait, values in self.target_dataset.trait_data.iteritems(
                ):
                    self.get_sample_r_and_p_values(trait, values)

            self.correlation_data = collections.OrderedDict(
                sorted(self.correlation_data.items(),
                       key=lambda t: -abs(t[1][0])))

            if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                #ZS: Convert min/max chromosome to an int for the location range option
                range_chr_as_int = None
                for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems(
                ):
                    if 'loc_chr' in start_vars:
                        if chr_info.name == self.location_chr:
                            range_chr_as_int = order_id

            for _trait_counter, trait in enumerate(
                    self.correlation_data.keys()[:self.return_number]):
                trait_object = GeneralTrait(dataset=self.target_dataset,
                                            name=trait,
                                            get_qtl_info=True,
                                            get_sample_info=False)

                if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                    #ZS: Convert trait chromosome to an int for the location range option
                    chr_as_int = 0
                    for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems(
                    ):
                        if chr_info.name == trait_object.chr:
                            chr_as_int = order_id

                if (float(
                        self.correlation_data[trait][0]) >= self.p_range_lower
                        and float(self.correlation_data[trait][0]) <=
                        self.p_range_upper):

                    if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":

                        if (self.min_expr != None) and (float(
                                trait_object.mean) < self.min_expr):
                            continue
                        elif range_chr_as_int != None and (chr_as_int !=
                                                           range_chr_as_int):
                            continue
                        elif (self.min_location_mb != None) and (float(
                                trait_object.mb) < float(
                                    self.min_location_mb)):
                            continue
                        elif (self.max_location_mb != None) and (float(
                                trait_object.mb) > float(
                                    self.max_location_mb)):
                            continue

                    (trait_object.sample_r, trait_object.sample_p,
                     trait_object.num_overlap) = self.correlation_data[trait]

                    # Set some sane defaults
                    trait_object.tissue_corr = 0
                    trait_object.tissue_pvalue = 0
                    trait_object.lit_corr = 0
                    if self.corr_type == "tissue" and tissue_corr_data != None:
                        trait_object.tissue_corr = tissue_corr_data[trait][1]
                        trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                    elif self.corr_type == "lit":
                        trait_object.lit_corr = lit_corr_data[trait][1]
                    self.correlation_results.append(trait_object)

            self.target_dataset.get_trait_info(
                self.correlation_results, self.target_dataset.group.species)

            if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_lit_correlation_for_trait_list()

            if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_tissue_correlation_for_trait_list()

        self.json_results = generate_corr_json(self.correlation_results,
                                               self.this_trait, self.dataset,
                                               self.target_dataset)
Exemple #11
0
def do_mapping_for_api(start_vars):
    assert ('db' in start_vars)
    assert ('trait_id' in start_vars)

    dataset = data_set.create_dataset(dataset_name=start_vars['db'])
    dataset.group.get_markers()
    this_trait = GeneralTrait(dataset=dataset, name=start_vars['trait_id'])
    this_trait = retrieve_sample_data(this_trait, dataset)

    samples = []
    vals = []

    for sample in dataset.group.samplelist:
        in_trait_data = False
        for item in this_trait.data:
            if this_trait.data[item].name == sample:
                value = str(this_trait.data[item].value)
                samples.append(item)
                vals.append(value)
                in_trait_data = True
                break
        if not in_trait_data:
            vals.append("x")

    mapping_params = initialize_parameters(start_vars, dataset, this_trait)

    covariates = ""  #ZS: It seems to take an empty string as default. This should probably be changed.

    if mapping_params['mapping_method'] == "gemma":
        header_row = ["name", "chr", "Mb", "lod_score", "p_value"]
        if mapping_params[
                'use_loco'] == "True":  #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
            result_markers = gemma_mapping.run_gemma(
                this_trait, dataset, samples, vals, covariates,
                mapping_params['use_loco'], mapping_params['maf'])[0]
        else:
            result_markers = gemma_mapping.run_gemma(
                this_trait, dataset, samples, vals, covariates,
                mapping_params['use_loco'], mapping_params['maf'])
    elif mapping_params['mapping_method'] == "rqtl":
        header_row = ["name", "chr", "cM", "lod_score"]
        if mapping_params['num_perm'] > 0:
            _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(
                vals, dataset, mapping_params['rqtl_method'],
                mapping_params['rqtl_model'], mapping_params['perm_check'],
                mapping_params['num_perm'], mapping_params['do_control'],
                mapping_params['control_marker'],
                mapping_params['manhattan_plot'], mapping_params['pair_scan'])
        else:
            result_markers = rqtl_mapping.run_rqtl_geno(
                vals, dataset, mapping_params['rqtl_method'],
                mapping_params['rqtl_model'], mapping_params['perm_check'],
                mapping_params['num_perm'], mapping_params['do_control'],
                mapping_params['control_marker'],
                mapping_params['manhattan_plot'], mapping_params['pair_scan'])

    if mapping_params['limit_to']:
        result_markers = result_markers[:mapping_params['limit_to']]

    if mapping_params['format'] == "csv":
        output_rows = []
        output_rows.append(header_row)
        for marker in result_markers:
            this_row = [marker[header] for header in header_row]
            output_rows.append(this_row)

        return output_rows, mapping_params['format']
    elif mapping_params['format'] == "json":
        return result_markers, mapping_params['format']
    else:
        return result_markers, None
Exemple #12
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        #ZS: Get verify/rna-seq link URLs
        try:
            blatsequence = self.this_trait.blatseq
            if not blatsequence:
                #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                query1 = """SELECT Probe.Sequence, Probe.Name
                           FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                           WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                 ProbeSetFreeze.Name = '%s' AND
                                 ProbeSet.Name = '%s' AND
                                 Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name)
                seqs = g.db.execute(query1).fetchall()
                if not seqs:
                    raise ValueError
                else:
                    blatsequence = ''
                    for seqt in seqs:
                        if int(seqt[1][-1]) % 2 == 1:
                            blatsequence += string.strip(seqt[0])

            #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
            blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'
            #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
            query2 = """SELECT Probe.Sequence, Probe.Name
                        FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                              ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                              ProbeSetFreeze.Name = '%s' AND
                              ProbeSet.Name = '%s' AND
                              Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name)

            seqs = g.db.execute(query2).fetchall()
            for seqt in seqs:
                if int(seqt[1][-1]) %2 == 1:
                    blatsequence += '%3EProbe_' + string.strip(seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A'

            if self.dataset.group.species == "rat":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn3', blatsequence)
                self.UTHSC_BLAT_URL = ""
            elif self.dataset.group.species == "mouse":
                self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('mouse', 'mm10', blatsequence)
                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence)
            elif self.dataset.group.species == "human":
                self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('human', 'hg19', blatsequence)
                self.UTHSC_BLAT_URL = ""
            else:
                self.UCSC_BLAT_URL = ""
                self.UTHSC_BLAT_URL = ""
        except:
            self.UCSC_BLAT_URL = ""
            self.UTHSC_BLAT_URL = ""

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
           hddn['temp_trait'] = True
           hddn['group'] = self.temp_group
           hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
            self.sample_group_types['samples_other'] = "Other"
            self.sample_group_types['samples_all'] = "All"
        else:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id = self.trait_id,
                       trait_symbol = trait_symbol,
                       dataset_type = self.dataset.type,
                       data_scale = self.dataset.data_scale,
                       sample_group_types = self.sample_group_types,
                       sample_lists = sample_lists,
                       attribute_names = self.sample_groups[0].attributes,
                       temp_uuid = self.temp_uuid)
        self.js_data = js_data
    def run_analysis(self, requestform):
        logger.info("Starting PheWAS analysis on dataset")
        genofilelocation = locate(
            "BXD.geno", "genotype")  # Get the location of the BXD genotypes
        precompfile = locate_phewas(
            "PheWAS_pval_EMMA_norm.RData",
            "auwerx")  # Get the location of the pre-computed EMMA results

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)
        self.region = int(requestform["num_region"])
        self.mtadjust = str(requestform["sel_mtadjust"])

        # Logger.Info some debug
        logger.info("self.trait_id:" + self.trait_id + "\n")
        logger.info("self.datasetname:" + self.datasetname + "\n")
        logger.info("self.dataset.type:" + self.dataset.type + "\n")

        # GN Magic ?
        self.this_trait = GeneralTrait(dataset=self.dataset,
                                       name=self.trait_id,
                                       get_qtl_info=False,
                                       get_sample_info=False)
        logger.info(vars(self.this_trait))

        # Set the values we need
        self.chr = str(self.this_trait.chr)
        self.mb = int(self.this_trait.mb)

        # logger.info some debug
        logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" +
                    str(self.region) + "\n")

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
            snpinfo.append(marker["name"])
            snpinfo.append(marker["chr"])
            snpinfo.append(marker["Mb"])

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo,
                                 nrow=len(parser.markers),
                                 dimnames=r_list(rnames,
                                                 r_c("SNP", "Chr", "Pos")),
                                 ncol=3,
                                 byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
        self.results['mtadjust'] = self.mtadjust
        logger.info("IMAGE AT:", self.results['imgurl1'])
        logger.info("IMAGE AT:", self.results['imgloc1'])
        # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input)
        # TODO: generate the PDF in the temp folder, with a unique name
        assert (precompfile)
        assert (phenoaligner)
        assert (snpaligner)
        phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner,
                                           snpaligner, "None", self.chr,
                                           self.mb, self.region,
                                           self.results['imgloc1'],
                                           self.mtadjust)
        self.results['phewas1'] = phewasres[0]
        self.results['phewas2'] = phewasres[1]
        self.results['tabulardata'] = phewasres[2]
        self.results['R_debuglog'] = phewasres[3]

        #self.r_PheWASManhattan(allpvalues)
        #self.r_Stop()

        logger.info("Initialization of PheWAS done !")
Exemple #14
0
    def __init__(self, kw):
        assert ('type' in kw)
        assert ('terms' in kw)

        self.type = kw['type']
        self.terms = kw['terms']
        assert (is_str(self.type))

        if self.type == "gene":
            sql = """
                SELECT
                Species.`Name` AS species_name,
                InbredSet.`Name` AS inbredset_name,
                Tissue.`Name` AS tissue_name,
                ProbeSetFreeze.Name AS probesetfreeze_name,
                ProbeSetFreeze.FullName AS probesetfreeze_fullname,
                ProbeSet.Name AS probeset_name,
                ProbeSet.Symbol AS probeset_symbol,
                ProbeSet.`description` AS probeset_description,
                ProbeSet.Chr AS chr,
                ProbeSet.Mb AS mb,
                ProbeSetXRef.Mean AS mean,
                ProbeSetXRef.LRS AS lrs,
                ProbeSetXRef.`Locus` AS locus,
                ProbeSetXRef.`pValue` AS pvalue,
                ProbeSetXRef.`additive` AS additive
                FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue
                WHERE InbredSet.`SpeciesId`=Species.`Id`
                AND ProbeFreeze.InbredSetId=InbredSet.`Id`
                AND ProbeFreeze.`TissueId`=Tissue.`Id`
                AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id
                AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
                AND ProbeSet.Id = ProbeSetXRef.ProbeSetId
                AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
                AND ProbeSetFreeze.confidentiality < 1
                AND ProbeSetFreeze.public > 0
                ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
                LIMIT 6000
                """ % (self.terms)
            with Bench("Running query"):
                logger.sql(sql)
                re = g.db.execute(sql).fetchall()

            trait_list = []
            with Bench("Creating trait objects"):
                for i, line in enumerate(re):
                    this_trait = {}
                    this_trait['index'] = i + 1
                    this_trait['name'] = line[5]
                    this_trait['dataset'] = line[3]
                    this_trait['dataset_fullname'] = line[4]
                    this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(
                        line[5], line[3]))
                    this_trait['species'] = line[0]
                    this_trait['group'] = line[1]
                    this_trait['tissue'] = line[2]
                    this_trait['symbol'] = line[6]
                    this_trait['description'] = line[7].decode(
                        'utf-8', 'replace')
                    this_trait['location_repr'] = 'N/A'
                    if (line[8] != "NULL"
                            and line[8] != "") and (line[9] != 0):
                        this_trait['location_repr'] = 'Chr%s: %.6f' % (
                            line[8], float(line[9]))
                    try:
                        this_trait['mean'] = '%.3f' % line[10]
                    except:
                        this_trait['mean'] = "N/A"
                    this_trait['LRS_score_repr'] = "N/A"
                    if line[11] != "" and line[11] != None:
                        this_trait['LRS_score_repr'] = '%3.1f' % line[11]
                    this_trait['additive'] = "N/A"
                    if line[14] != "" and line[14] != None:
                        this_trait['additive'] = '%.3f' % line[14]

                    #dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False)
                    #trait_id = line[4]
                    #with Bench("Building trait object"):
                    trait_ob = GeneralTrait(dataset_name=this_trait['dataset'],
                                            name=this_trait['name'],
                                            get_qtl_info=True,
                                            get_sample_info=False)
                    max_lrs_text = "N/A"
                    if trait_ob.locus_chr != "" and trait_ob.locus_mb != "":
                        max_lrs_text = "Chr" + str(
                            trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb)
                    this_trait['max_lrs_text'] = max_lrs_text

                    trait_list.append(this_trait)

            self.trait_count = len(trait_list)
            self.trait_list = json.dumps(trait_list)

            self.header_fields = [
                'Index', 'Record', 'Species', 'Group', 'Tissue', 'Dataset',
                'Symbol', 'Description', 'Location', 'Mean', 'Max LRS',
                'Max LRS Location', 'Additive Effect'
            ]

        elif self.type == "phenotype":
            search_term = self.terms
            group_clause = ""
            if "_" in self.terms:
                if len(self.terms.split("_")[0]) == 3:
                    search_term = self.terms.split("_")[1]
                    group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(
                        self.terms.split("_")[0])
            sql = """
                SELECT
                Species.`Name`,
                InbredSet.`Name`,
                PublishFreeze.`Name`,
                PublishFreeze.`FullName`,
                PublishXRef.`Id`,
                Phenotype.`Pre_publication_description`,
                Phenotype.`Post_publication_description`,
                Publication.`Authors`,
                Publication.`Year`,
                Publication.`PubMed_ID`,
                PublishXRef.`LRS`,
                PublishXRef.`additive`,
                InbredSet.`InbredSetCode`,
                PublishXRef.`mean`
                FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
                WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
                AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
                AND InbredSet.`SpeciesId`=Species.`Id`
                {0}
                AND PublishXRef.`PhenotypeId`=Phenotype.`Id`
                AND PublishXRef.`PublicationId`=Publication.`Id`
                AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Pre_publication_description REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Phenotype.Lab_code REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.PubMed_ID REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.Abstract REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.Title REGEXP "[[:<:]]{1}[[:>:]]"
                    OR Publication.Authors REGEXP "[[:<:]]{1}[[:>:]]"
                    OR PublishXRef.Id REGEXP "[[:<:]]{1}[[:>:]]")
                ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
                LIMIT 6000
                """.format(group_clause, search_term)
            logger.sql(sql)
            re = g.db.execute(sql).fetchall()
            trait_list = []
            with Bench("Creating trait objects"):
                for i, line in enumerate(re):
                    this_trait = {}
                    this_trait['index'] = i + 1
                    this_trait['name'] = str(line[4])
                    if len(str(line[12])) == 3:
                        this_trait['display_name'] = str(
                            line[12]) + "_" + this_trait['name']
                    else:
                        this_trait['display_name'] = this_trait['name']
                    this_trait['dataset'] = line[2]
                    this_trait['dataset_fullname'] = line[3]
                    this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(
                        line[4], line[2]))
                    this_trait['species'] = line[0]
                    this_trait['group'] = line[1]
                    if line[9] != None and line[6] != None:
                        this_trait['description'] = line[6].decode(
                            'utf-8', 'replace')
                    elif line[5] != None:
                        this_trait['description'] = line[5].decode(
                            'utf-8', 'replace')
                    else:
                        this_trait['description'] = "N/A"
                    if line[13] != None and line[13] != "":
                        this_trait['mean'] = line[13]
                    else:
                        this_trait['mean'] = "N/A"
                    this_trait['authors'] = line[7]
                    this_trait['year'] = line[8]
                    if this_trait['year'].isdigit():
                        this_trait['pubmed_text'] = this_trait['year']
                    else:
                        this_trait['pubmed_text'] = "N/A"
                    if line[9] != "" and line[9] != None:
                        this_trait[
                            'pubmed_link'] = webqtlConfig.PUBMEDLINK_URL % line[
                                8]
                    else:
                        this_trait['pubmed_link'] = "N/A"
                        if line[12]:
                            this_trait['display_name'] = line[12] + "_" + str(
                                this_trait['name'])
                    this_trait['LRS_score_repr'] = "N/A"
                    if line[10] != "" and line[10] != None:
                        this_trait['LRS_score_repr'] = '%3.1f' % line[10]
                    this_trait['additive'] = "N/A"
                    if line[11] != "" and line[11] != None:
                        this_trait['additive'] = '%.3f' % line[11]

                    #dataset = create_dataset(line[2], "Publish")
                    #trait_id = line[3]
                    #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False)
                    this_trait['max_lrs_text'] = "N/A"
                    if this_trait[
                            'dataset'] == this_trait['group'] + "Publish":
                        try:
                            trait_ob = GeneralTrait(
                                dataset_name=this_trait['dataset'],
                                name=this_trait['name'],
                                get_qtl_info=True,
                                get_sample_info=False)
                            if trait_ob.locus_chr != "" and trait_ob.locus_mb != "":
                                this_trait['max_lrs_text'] = "Chr" + str(
                                    trait_ob.locus_chr) + ": " + str(
                                        trait_ob.locus_mb)
                        except:
                            this_trait['max_lrs_text'] = "N/A"

                    trait_list.append(this_trait)

            self.trait_count = len(trait_list)
            self.trait_list = json.dumps(trait_list)

            self.header_fields = [
                'Index', 'Species', 'Group', 'Record', 'Description',
                'Authors', 'Year', 'Max LRS', 'Max LRS Location',
                'Additive Effect'
            ]
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_" + kw['species'] + "_" + kw[
                'group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name="Temp",
                                                   dataset_type="Temp",
                                                   group_name=self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(
                    self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples,
                                             ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
            hddn['temp_trait'] = True
            hddn['group'] = self.temp_group
            hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(
                    self.this_trait, 'locus_chr'
            ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name + " Only"
            self.sample_group_types[
                'samples_other'] = "Non-" + self.dataset.group.name
            self.sample_group_types['samples_all'] = "All Cases"
        else:
            self.sample_group_types[
                'samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.trait_table_width = get_trait_table_width(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id=self.trait_id,
                       trait_symbol=trait_symbol,
                       dataset_type=self.dataset.type,
                       data_scale=self.dataset.data_scale,
                       sample_group_types=self.sample_group_types,
                       sample_lists=sample_lists,
                       attribute_names=self.sample_groups[0].attributes,
                       temp_uuid=self.temp_uuid)
        self.js_data = js_data
    def __init__(self, params):
        self.data_set_1 = data_set.create_dataset(params['dataset_1'])
        self.data_set_2 = data_set.create_dataset(params['dataset_2'])
        self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1)
        self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2)

        try:
            width = int(params['width'])
        except:
            width = 800
        self.width = width

        try:
            height = int(params['height'])
        except:
            height = 600
        self.height = height

        try:
            circle_color = params['circle_color']
        except:
            circle_color = '#3D85C6'
        self.circle_color = circle_color

        try:
            circle_radius = int(params['circle_radius'])
        except:
            circle_radius = 5
        self.circle_radius = circle_radius

        try:
            line_color = params['line_color']
        except:
            line_color = '#FF0000'
        self.line_color = line_color

        try:
            line_width = int(params['line_width'])
        except:
            line_width = 1
        self.line_width = line_width

        samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)

        self.data = []
        self.indIDs = samples_1.keys()
        vals_1 = []
        for sample in samples_1.keys():
            vals_1.append(samples_1[sample].value)
        self.data.append(vals_1)
        vals_2 = []
        for sample in samples_2.keys():
            vals_2.append(samples_2[sample].value)
        self.data.append(vals_2)

        x = np.array(vals_1)
        y = np.array(vals_2)
        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
        
        rx = stats.rankdata(x)
        ry = stats.rankdata(y)        
        self.rdata = []
        self.rdata.append(rx.tolist())
        self.rdata.append(ry.tolist())        
        srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry)

        self.js_data = dict(
            data = self.data,
            rdata = self.rdata,
            indIDs = self.indIDs,
            trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name),
            trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name),
            samples_1 = samples_1,
            samples_2 = samples_2,
            num_overlap = num_overlap,
            vals_1 = vals_1,
            vals_2 = vals_2,
            
            slope = slope,
            intercept = intercept,
            r_value = r_value,
            p_value = p_value,
            
            srslope = srslope,
            srintercept = srintercept,
            srr_value = srr_value,
            srp_value = srp_value,
            
            width = width,
            height = height,
            circle_color = circle_color,
            circle_radius = circle_radius,
            line_color = line_color,
            line_width = line_width
        )
        self.jsdata = self.js_data
Exemple #17
0
    def __init__(self, params):
        self.data_set_1 = data_set.create_dataset(params['dataset_1'])
        self.data_set_2 = data_set.create_dataset(params['dataset_2'])
        #self.data_set_3 = data_set.create_dataset(params['dataset_3'])
        self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1)
        self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2)
        #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3)

        samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)

        self.data = []
        self.indIDs = samples_1.keys()
        vals_1 = []
        for sample in samples_1.keys():
            vals_1.append(samples_1[sample].value)
        self.data.append(vals_1)
        vals_2 = []
        for sample in samples_2.keys():
            vals_2.append(samples_2[sample].value)
        self.data.append(vals_2)

        x = np.array(vals_1)
        y = np.array(vals_2)
        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
        
        rx = stats.rankdata(x)
        ry = stats.rankdata(y)        
        self.rdata = []
        self.rdata.append(rx.tolist())
        self.rdata.append(ry.tolist())        
        srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry)

        #vals_3 = []
        #for sample in self.trait_3.data:
        #    vals_3.append(self.trait_3.data[sample].value)

        self.js_data = dict(
            data = self.data,
            rdata = self.rdata,
            indIDs = self.indIDs,
            trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name),
            trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name),
            samples_1 = samples_1,
            samples_2 = samples_2,
            num_overlap = num_overlap,
            vals_1 = vals_1,
            vals_2 = vals_2,

            slope = slope,
            intercept = intercept,
            r_value = r_value,
            p_value = p_value,

            srslope = srslope,
            srintercept = srintercept,
            srr_value = srr_value,
            srp_value = srp_value

            #trait3 = self.trait_3.data,
            #vals_3 = vals_3
        )
        self.jsdata = self.js_data
    def __init__(self, start_vars):
        # get trait list from db (database name)
        # calculate correlation with Base vector and targets

        # Check parameters
        assert('corr_type' in start_vars)
        assert(is_str(start_vars['corr_type']))
        assert('dataset' in start_vars)
        # assert('group' in start_vars) permitted to be empty?
        assert('corr_sample_method' in start_vars)
        assert('corr_samples_group' in start_vars)
        assert('corr_dataset' in start_vars)
        assert('min_expr' in start_vars)
        assert('corr_return_results' in start_vars)
        if 'loc_chr' in start_vars:
            assert('min_loc_mb' in start_vars)
            assert('max_loc_mb' in start_vars)

        with Bench("Doing correlations"):
            if start_vars['dataset'] == "Temp":
                self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
                self.trait_id = "Temp"
                self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            else:
                helper_functions.get_species_dataset_trait(self, start_vars)

            self.dataset.group.read_genotype_file()

            corr_samples_group = start_vars['corr_samples_group']

            self.sample_data = {}
            self.corr_type = start_vars['corr_type']
            self.corr_method = start_vars['corr_sample_method']
            self.min_expr = get_float(start_vars,'min_expr')
            self.p_range_lower = get_float(start_vars,'p_range_lower',-1.0)
            self.p_range_upper = get_float(start_vars,'p_range_upper',1.0)

            if ('loc_chr' in start_vars and
                'min_loc_mb' in start_vars and
                'max_loc_mb' in start_vars):

                self.location_chr = get_string(start_vars,'loc_chr')
                self.min_location_mb = get_int(start_vars,'min_loc_mb')
                self.max_location_mb = get_int(start_vars,'max_loc_mb')

            self.get_formatted_corr_type()
            self.return_number = int(start_vars['corr_return_results'])

            #The two if statements below append samples to the sample list based upon whether the user
            #rselected Primary Samples Only, Other Samples Only, or All Samples

            primary_samples = self.dataset.group.samplelist
            if self.dataset.group.parlist != None:
                primary_samples += self.dataset.group.parlist
            if self.dataset.group.f1list != None:
                primary_samples += self.dataset.group.f1list

            #If either BXD/whatever Only or All Samples, append all of that group's samplelist
            if corr_samples_group != 'samples_other':
                self.process_samples(start_vars, primary_samples)

            #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
            #exclude the primary samples (because they would have been added in the previous
            #if statement if the user selected All Samples)
            if corr_samples_group != 'samples_primary':
                if corr_samples_group == 'samples_other':
                    primary_samples = [x for x in primary_samples if x not in (
                                    self.dataset.group.parlist + self.dataset.group.f1list)]
                self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)

            self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
            self.target_dataset.get_trait_data(self.sample_data.keys())

            self.correlation_results = []

            self.correlation_data = {}

            if self.corr_type == "tissue":
                self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")

                tissue_corr_data = self.do_tissue_correlation_for_all_traits()
                if tissue_corr_data != None:
                    for trait in tissue_corr_data.keys()[:self.return_number]:
                        self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
                else:
                    for trait, values in self.target_dataset.trait_data.iteritems():
                        self.get_sample_r_and_p_values(trait, values)

            elif self.corr_type == "lit":
                self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId")
                lit_corr_data = self.do_lit_correlation_for_all_traits()

                for trait in lit_corr_data.keys()[:self.return_number]:
                    self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])

            elif self.corr_type == "sample":
                for trait, values in self.target_dataset.trait_data.iteritems():
                    self.get_sample_r_and_p_values(trait, values)

            self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
                                                                   key=lambda t: -abs(t[1][0])))


            if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                #ZS: Convert min/max chromosome to an int for the location range option
                range_chr_as_int = None
                for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems():
                    if chr_info.name == self.location_chr:
                        range_chr_as_int = order_id

            for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
                trait_object = GeneralTrait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)

                if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                    #ZS: Convert trait chromosome to an int for the location range option
                    chr_as_int = 0
                    for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems():
                        if chr_info.name == trait_object.chr:
                            chr_as_int = order_id

                if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
                    float(self.correlation_data[trait][0]) <= self.p_range_upper):

                    if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":

                        if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
                            continue
                        elif range_chr_as_int != None and (chr_as_int != range_chr_as_int):
                            continue
                        elif (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
                            continue
                        elif (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
                            continue

                        (trait_object.sample_r,
                        trait_object.sample_p,
                        trait_object.num_overlap) = self.correlation_data[trait]

                        # Set some sane defaults
                        trait_object.tissue_corr = 0
                        trait_object.tissue_pvalue = 0
                        trait_object.lit_corr = 0
                        if self.corr_type == "tissue" and tissue_corr_data != None:
                            trait_object.tissue_corr = tissue_corr_data[trait][1]
                            trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                        elif self.corr_type == "lit":
                            trait_object.lit_corr = lit_corr_data[trait][1]
                        self.correlation_results.append(trait_object)
                    else:
                        (trait_object.sample_r,
                        trait_object.sample_p,
                        trait_object.num_overlap) = self.correlation_data[trait]

                        # Set some sane defaults
                        trait_object.tissue_corr = 0
                        trait_object.tissue_pvalue = 0
                        trait_object.lit_corr = 0
                        if self.corr_type == "tissue":
                            trait_object.tissue_corr = tissue_corr_data[trait][1]
                            trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                        elif self.corr_type == "lit":
                            trait_object.lit_corr = lit_corr_data[trait][1]
                        self.correlation_results.append(trait_object)

            self.target_dataset.get_trait_info(self.correlation_results, self.target_dataset.group.species)

            if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_lit_correlation_for_trait_list()

            if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_tissue_correlation_for_trait_list()

        self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)