Example #1
0
    def get_sample_r_and_p_values(self, trait, target_samples):
        """Calculates the sample r (or rho) and p-value

        Given a primary trait and a target trait's sample values,
        calculates either the pearson r or spearman rho and the p-value
        using the corresponding scipy functions.

        """

        self.this_trait_vals = []
        target_vals = []
        for index, sample in enumerate(self.target_dataset.samplelist):
            if sample in self.sample_data:
                sample_value = self.sample_data[sample]
                target_sample_value = target_samples[index]
                self.this_trait_vals.append(sample_value)
                target_vals.append(target_sample_value)

        self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals)

        if num_overlap > 5:
            #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
            if self.corr_method == 'bicor':
                sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals)
            elif self.corr_method == 'pearson':
                sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals)
            else:
                sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals)

            if numpy.isnan(sample_r):
                pass
            else:
                self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
Example #2
0
def get_sample_r_and_p_values(this_trait, this_dataset, target_vals,
                              target_dataset, type):
    """
    Calculates the sample r (or rho) and p-value

    Given a primary trait and a target trait's sample values,
    calculates either the pearson r or spearman rho and the p-value
    using the corresponding scipy functions.
    """

    this_trait_vals = []
    shared_target_vals = []
    for i, sample in enumerate(target_dataset.group.samplelist):
        if sample in this_trait.data:
            this_sample_value = this_trait.data[sample].value
            target_sample_value = target_vals[i]
            this_trait_vals.append(this_sample_value)
            shared_target_vals.append(target_sample_value)

    this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(
        this_trait_vals, shared_target_vals)

    if type == 'pearson':
        sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals,
                                                  shared_target_vals)
    else:
        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals,
                                                   shared_target_vals)

    if num_overlap > 5:
        if scipy.isnan(sample_r):
            return None
        else:
            return [sample_r, sample_p, num_overlap]
    def get_sample_r_and_p_values(self, trait, target_samples):
        """Calculates the sample r (or rho) and p-value

        Given a primary trait and a target trait's sample values,
        calculates either the pearson r or spearman rho and the p-value
        using the corresponding scipy functions.

        """

        self.this_trait_vals = []
        target_vals = []
        for index, sample in enumerate(self.target_dataset.samplelist):
            if sample in self.sample_data:
                sample_value = self.sample_data[sample]
                target_sample_value = target_samples[index]
                self.this_trait_vals.append(sample_value)
                target_vals.append(target_sample_value)

        self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals)

        #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
        if self.corr_method == 'pearson':
            sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals)
        else:
            sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals)

        if num_overlap > 5:
            self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
Example #4
0
    def __init__(self, start_vars):
        trait_db_list = [
            trait.strip() for trait in start_vars['trait_list'].split(',')
        ]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        self.insufficient_shared_samples = False
        self.do_PCA = True
        this_group = self.trait_list[0][
            1].group.name  #ZS: Getting initial group name before verifying all traits are in the same group in the following loop
        for trait_db in self.trait_list:
            this_group = trait_db[1].group.name
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data

            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
            self.sample_data.append(this_trait_vals)

        if len(this_trait_vals) < len(
                self.trait_list
        ):  #Shouldn't do PCA if there are more traits than observations/samples
            self.do_PCA = False

        self.lowest_overlap = 8  #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.corr_results = []
        self.pca_corr_results = []
        self.shared_samples_list = self.all_sample_list
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]

            this_db_samples = this_db.group.all_samples_ordered()
            this_sample_data = this_trait.data

            corr_result_row = []
            pca_corr_result_row = []
            is_spearman = False  #ZS: To determine if it's above or below the diagonal
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.all_samples_ordered()
                target_sample_data = target_trait.data

                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):
                    if (sample
                            in this_sample_data) and (sample
                                                      in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)
                    else:
                        if sample in self.shared_samples_list:
                            self.shared_samples_list.remove(sample)

                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
                    this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap < 2:
                    corr_result_row.append([target_trait, 0, num_overlap])
                    pca_corr_result_row.append(0)
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(
                        this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            is_spearman = True
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(
                            this_trait_vals, target_vals)

                    corr_result_row.append(
                        [target_trait, sample_r, num_overlap])
                    pca_corr_result_row.append(pearson_r)

            self.corr_results.append(corr_result_row)
            self.pca_corr_results.append(pca_corr_result_row)

        self.export_filename, self.export_filepath = export_corr_matrix(
            self.corr_results)

        self.trait_data_array = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]
            this_db_samples = this_db.group.all_samples_ordered()
            this_sample_data = this_trait.data

            this_trait_vals = []
            for index, sample in enumerate(this_db_samples):
                if (sample
                        in this_sample_data) and (sample
                                                  in self.shared_samples_list):
                    sample_value = this_sample_data[sample].value
                    this_trait_vals.append(sample_value)
            self.trait_data_array.append(this_trait_vals)

        corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
        corr_eigen_value, corr_eigen_vectors = sortEigenVectors(
            corr_result_eigen)

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        try:
            if self.do_PCA == True:
                self.pca_works = "True"
                self.pca_trait_ids = []
                pca = self.calculate_pca(list(range(len(self.traits))),
                                         corr_eigen_value, corr_eigen_vectors)
                self.loadings_array = self.process_loadings()
            else:
                self.pca_works = "False"
        except:
            self.pca_works = "False"

        self.js_data = dict(
            traits=[trait.name for trait in self.traits],
            groups=groups,
            cols=list(range(len(self.traits))),
            rows=list(range(len(self.traits))),
            samples=self.all_sample_list,
            sample_data=self.sample_data,
        )
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
        
        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data
            
            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data
            
            #self.sample_data[this_trait.name] = []
            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                    #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
                    #self.sample_data[this_trait.name].append('')
            self.sample_data.append(this_trait_vals)

        self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.corr_results = []
        self.pca_corr_results = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]
            
            this_db_samples = this_db.group.samplelist
            this_sample_data = this_trait.data
            
            corr_result_row = []
            pca_corr_result_row = []
            is_spearman = False #ZS: To determine if it's above or below the diagonal
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist

                target_sample_data = target_trait.data
                print("target_samples", len(target_samples))
                
                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):
                    
                    if (sample in this_sample_data) and (sample in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)
        
                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap == 0:
                    corr_result_row.append([target_trait, 0, num_overlap])
                    pca_corr_result_row.append(0)
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            is_spearman = True
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)

                    corr_result_row.append([target_trait, sample_r, num_overlap])
                    pca_corr_result_row.append(pearson_r)
                
            self.corr_results.append(corr_result_row)
            self.pca_corr_results.append(pca_corr_result_row)

        print("corr_results:", pf(self.corr_results))

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        pca = self.calculate_pca(self.pca_corr_results, range(len(self.traits)))

        self.loadings_array = self.process_loadings()

        self.js_data = dict(traits = [trait.name for trait in self.traits],
                            groups = groups,
                            cols = range(len(self.traits)),
                            rows = range(len(self.traits)),
                            samples = self.all_sample_list,
                            sample_data = self.sample_data,)
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
        
        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data
            
            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data
            
            #self.sample_data[this_trait.name] = []
            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                    #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
                    #self.sample_data[this_trait.name].append('')
            self.sample_data.append(this_trait_vals)

        self.corr_results = []
        self.corr_results_for_pca = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]
            
            this_db_samples = this_db.group.samplelist
            
            #for sample in this_db_samples:
            #    if sample not in self.samples:
            #        self.samples.append(sample)
            
            this_sample_data = this_trait.data
            print("this_sample_data", len(this_sample_data))
            
            #for sample in this_sample_data:
            #    if sample not in self.all_sample_list:
            #        self.all_sample_list.append(sample)
            
            corr_result_row = []
            is_spearman = False #ZS: To determine if it's above or below the diagonal
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist
                
                #if this_trait == target_trait and this_db == target_db:
                #    corr_result_row.append(1)
                #    continue

                target_sample_data = target_trait.data
                print("target_samples", len(target_samples))
                
                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):
                    
                    if (sample in this_sample_data) and (sample in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)
        
                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
                if num_overlap == 0:
                    corr_result_row.append([target_trait, 0, num_overlap])
                else:
                    if is_spearman == False:
                        sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                        if sample_r == 1:
                            is_spearman = True
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)

                    corr_result_row.append([target_trait, sample_r, num_overlap])
                
            self.corr_results.append(corr_result_row)

        print("corr_results:", pf(self.corr_results))

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        #pca = self.calculate_pca(self.corr_results, range(len(self.traits)))


        self.js_data = dict(traits = [trait.name for trait in self.traits],
                            groups = groups,
                            cols = range(len(self.traits)),
                            rows = range(len(self.traits)),
                            samples = self.all_sample_list,
                            sample_data = self.sample_data,)
Example #7
0
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        self.insufficient_shared_samples = False
        this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop
        for trait_db in self.trait_list:
            if trait_db[1].group.name != this_group:
                self.insufficient_shared_samples = True
                break
            else:
                this_group = trait_db[1].group.name
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        if self.insufficient_shared_samples:
            pass
        else:
            self.sample_data = []
            for trait_db in self.trait_list:
                this_trait = trait_db[0]
                this_sample_data = this_trait.data

                this_trait_vals = []
                for sample in self.all_sample_list:
                    if sample in this_sample_data:
                        this_trait_vals.append(this_sample_data[sample].value)
                    else:
                        this_trait_vals.append('')
                self.sample_data.append(this_trait_vals)

            if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
                return False

            self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

            self.corr_results = []
            self.pca_corr_results = []
            self.trait_data_array = []
            for trait_db in self.trait_list:
                this_trait = trait_db[0]
                this_db = trait_db[1]

                this_db_samples = this_db.group.all_samples_ordered()
                this_sample_data = this_trait.data

                this_trait_vals = []
                for index, sample in enumerate(this_db_samples):
                    if (sample in this_sample_data):
                        sample_value = this_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                self.trait_data_array.append(this_trait_vals)

                corr_result_row = []
                pca_corr_result_row = []
                is_spearman = False #ZS: To determine if it's above or below the diagonal
                for target in self.trait_list:
                    target_trait = target[0]
                    target_db = target[1]
                    target_samples = target_db.group.all_samples_ordered()
                    target_sample_data = target_trait.data

                    this_trait_vals = []
                    target_vals = []
                    for index, sample in enumerate(target_samples):
                        if (sample in this_sample_data) and (sample in target_sample_data):
                            sample_value = this_sample_data[sample].value
                            target_sample_value = target_sample_data[sample].value
                            this_trait_vals.append(sample_value)
                            target_vals.append(target_sample_value)

                    this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)

                    if num_overlap < self.lowest_overlap:
                        self.lowest_overlap = num_overlap
                    if num_overlap == 0:
                        corr_result_row.append([target_trait, 0, num_overlap])
                        pca_corr_result_row.append(0)
                    else:
                        pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                        if is_spearman == False:
                            sample_r, sample_p = pearson_r, pearson_p
                            if sample_r == 1:
                                is_spearman = True
                        else:
                            sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)

                        corr_result_row.append([target_trait, sample_r, num_overlap])
                        pca_corr_result_row.append(pearson_r)

                self.corr_results.append(corr_result_row)
                self.pca_corr_results.append(pca_corr_result_row)

            corr_result_eigen = la.eigenvectors(numarray.array(self.pca_corr_results))
            corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)

            groups = []
            for sample in self.all_sample_list:
                groups.append(1)

            pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors)

            self.loadings_array = self.process_loadings()

            self.js_data = dict(traits = [trait.name for trait in self.traits],
                                groups = groups,
                                cols = range(len(self.traits)),
                                rows = range(len(self.traits)),
                                samples = self.all_sample_list,
                                sample_data = self.sample_data,)
Example #8
0
    def __init__(self, start_vars):
        trait_db_list = [
            trait.strip() for trait in start_vars['trait_list'].split(',')
        ]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data

            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
            self.sample_data.append(this_trait_vals)

        self.lowest_overlap = 8  #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.nodes_list = []
        self.edges_list = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]

            this_db_samples = this_db.group.samplelist
            this_sample_data = this_trait.data

            corr_result_row = []
            is_spearman = False  #ZS: To determine if it's above or below the diagonal

            max_corr = 0  #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used

            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist

                target_sample_data = target_trait.data

                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):

                    if (sample
                            in this_sample_data) and (sample
                                                      in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)

                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
                    this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap == 0:
                    continue
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(
                        this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            continue
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(
                            this_trait_vals, target_vals)

                    if -1 <= sample_r < -0.7:
                        color = "#0000ff"
                        width = 3
                    elif -0.7 <= sample_r < -0.5:
                        color = "#00ff00"
                        width = 2
                    elif -0.5 <= sample_r < 0:
                        color = "#000000"
                        width = 0.5
                    elif 0 <= sample_r < 0.5:
                        color = "#ffc0cb"
                        width = 0.5
                    elif 0.5 <= sample_r < 0.7:
                        color = "#ffa500"
                        width = 2
                    elif 0.7 <= sample_r <= 1:
                        color = "#ff0000"
                        width = 3
                    else:
                        color = "#000000"
                        width = 0

                    if abs(sample_r) > max_corr:
                        max_corr = abs(sample_r)

                    edge_data = {
                        'id':
                        str(this_trait.name) + '_to_' + str(target_trait.name),
                        'source':
                        str(this_trait.name) + ":" +
                        str(this_trait.dataset.name),
                        'target':
                        str(target_trait.name) + ":" +
                        str(target_trait.dataset.name),
                        'correlation':
                        round(sample_r, 3),
                        'abs_corr':
                        abs(round(sample_r, 3)),
                        'p_value':
                        round(sample_p, 3),
                        'overlap':
                        num_overlap,
                        'color':
                        color,
                        'width':
                        width
                    }

                    edge_dict = {'data': edge_data}

                    self.edges_list.append(edge_dict)

            if trait_db[1].type == "ProbeSet":
                node_dict = {
                    'data': {
                        'id':
                        str(this_trait.name) + ":" +
                        str(this_trait.dataset.name),
                        'label':
                        this_trait.symbol,
                        'symbol':
                        this_trait.symbol,
                        'geneid':
                        this_trait.geneid,
                        'omim':
                        this_trait.omim,
                        'max_corr':
                        max_corr
                    }
                }
            elif trait_db[1].type == "Publish":
                node_dict = {
                    'data': {
                        'id':
                        str(this_trait.name) + ":" +
                        str(this_trait.dataset.name),
                        'label':
                        this_trait.name,
                        'max_corr':
                        max_corr
                    }
                }
            else:
                node_dict = {
                    'data': {
                        'id':
                        str(this_trait.name) + ":" +
                        str(this_trait.dataset.name),
                        'label':
                        this_trait.name,
                        'max_corr':
                        max_corr
                    }
                }
            self.nodes_list.append(node_dict)

        self.elements = json.dumps(self.nodes_list + self.edges_list)
        self.gn2_url = GN2_BRANCH_URL

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        self.js_data = dict(
            traits=[trait.name for trait in self.traits],
            groups=groups,
            cols=range(len(self.traits)),
            rows=range(len(self.traits)),
            samples=self.all_sample_list,
            sample_data=self.sample_data,
            elements=self.elements,
        )
Example #9
0
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data

            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
            self.sample_data.append(this_trait_vals)

        self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.network_data = {}
        self.nodes_list = []
        self.edges_list = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]

            this_db_samples = this_db.group.samplelist
            this_sample_data = this_trait.data

            corr_result_row = []
            is_spearman = False #ZS: To determine if it's above or below the diagonal
            
            max_corr = 0 #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used
            
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist

                target_sample_data = target_trait.data

                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):

                    if (sample in this_sample_data) and (sample in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)

                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap == 0:
                    continue
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            continue
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
 
                    if -1 <= sample_r < -0.7:
                        color = "#0000ff"
                        width = 3
                    elif -0.7 <= sample_r < -0.5:
                        color = "#00ff00"
                        width = 2
                    elif -0.5 <= sample_r < 0:
                        color = "#000000"
                        width = 0.5
                    elif 0 <= sample_r < 0.5:
                        color = "#ffc0cb"
                        width = 0.5
                    elif 0.5 <= sample_r < 0.7:
                        color = "#ffa500"
                        width = 2
                    elif 0.7 <= sample_r <= 1:
                        color = "#ff0000"  
                        width = 3 
                    else:
                        color = "#000000"
                        width = 0                      
 
                    if abs(sample_r) > max_corr:
                        max_corr = abs(sample_r)
 
                    edge_data = {'id' : str(this_trait.name) + '_to_' + str(target_trait.name),
                                 'source' : str(this_trait.name) + ":" + str(this_trait.dataset.name),
                                 'target' : str(target_trait.name) + ":" + str(target_trait.dataset.name),
                                 'correlation' : round(sample_r, 3),
                                 'abs_corr' : abs(round(sample_r, 3)),
                                 'p_value' : round(sample_p, 3),
                                 'overlap' : num_overlap,
                                 'color' : color,
                                 'width' : width }
                                 
                    edge_dict = { 'data' : edge_data }
                                 
                    self.edges_list.append(edge_dict)
      
            if trait_db[1].type == "ProbeSet":
                node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 
                                        'label' : this_trait.symbol,
                                        'symbol' : this_trait.symbol,
                                        'geneid' : this_trait.geneid,
                                        'omim' : this_trait.omim,
                                        'max_corr' : max_corr } }
            elif trait_db[1].type == "Publish":
                node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 
                                        'label' : this_trait.name,
                                        'max_corr' : max_corr } }
            else:
                node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 
                                        'label' : this_trait.name,
                                        'max_corr' : max_corr } }
            self.nodes_list.append(node_dict)
      
        #self.network_data['dataSchema'] = {'nodes' : [{'name' : "label" , 'type' : "string"}],
        #                                   'edges' : [{'name' : "label" , 'type' : "string"}] }
        
        #self.network_data['data'] = {'nodes' : self.nodes_list,
        #                             'edges' : self.edges_list }

        self.elements = json.dumps(self.nodes_list + self.edges_list)
        
        groups = []
        for sample in self.all_sample_list:
            groups.append(1)
            
        self.js_data = dict(traits = [trait.name for trait in self.traits],
                            groups = groups,
                            cols = range(len(self.traits)),
                            rows = range(len(self.traits)),
                            samples = self.all_sample_list,
                            sample_data = self.sample_data,
                            elements = self.elements,)
Example #10
0
    def __init__(self, start_vars):
        trait_db_list = [
            trait.strip() for trait in start_vars['trait_list'].split(',')
        ]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data

            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
            self.sample_data.append(this_trait_vals)

        self.lowest_overlap = 8  #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.corr_results = []
        self.pca_corr_results = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]

            this_db_samples = this_db.group.samplelist
            this_sample_data = this_trait.data

            corr_result_row = []
            pca_corr_result_row = []
            is_spearman = False  #ZS: To determine if it's above or below the diagonal
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist

                target_sample_data = target_trait.data

                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):

                    if (sample
                            in this_sample_data) and (sample
                                                      in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)

                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
                    this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap == 0:
                    corr_result_row.append([target_trait, 0, num_overlap])
                    pca_corr_result_row.append(0)
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(
                        this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            is_spearman = True
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(
                            this_trait_vals, target_vals)

                    corr_result_row.append(
                        [target_trait, sample_r, num_overlap])
                    pca_corr_result_row.append(pearson_r)

            self.corr_results.append(corr_result_row)
            self.pca_corr_results.append(pca_corr_result_row)

        print("corr_results:", pf(self.corr_results))

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        pca = self.calculate_pca(self.pca_corr_results,
                                 range(len(self.traits)))

        self.loadings_array = self.process_loadings()

        self.js_data = dict(
            traits=[trait.name for trait in self.traits],
            groups=groups,
            cols=range(len(self.traits)),
            rows=range(len(self.traits)),
            samples=self.all_sample_list,
            sample_data=self.sample_data,
        )
 def test_normalize_values(self):
     """Test that a list is normalised correctly"""
     self.assertEqual(
         normalize_values([2.3, None, None, 3.2, 4.1, 5],
                          [3.4, 7.2, 1.3, None, 6.2, 4.1]),
         ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3))