def get_sample_r_and_p_values(self, trait, target_samples): """Calculates the sample r (or rho) and p-value Given a primary trait and a target trait's sample values, calculates either the pearson r or spearman rho and the p-value using the corresponding scipy functions. """ self.this_trait_vals = [] target_vals = [] for index, sample in enumerate(self.target_dataset.samplelist): if sample in self.sample_data: sample_value = self.sample_data[sample] target_sample_value = target_samples[index] self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) if num_overlap > 5: #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'bicor': sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals) elif self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) else: sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) if numpy.isnan(sample_r): pass else: self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): """ Calculates the sample r (or rho) and p-value Given a primary trait and a target trait's sample values, calculates either the pearson r or spearman rho and the p-value using the corresponding scipy functions. """ this_trait_vals = [] shared_target_vals = [] for i, sample in enumerate(target_dataset.group.samplelist): if sample in this_trait.data: this_sample_value = this_trait.data[sample].value target_sample_value = target_vals[i] this_trait_vals.append(this_sample_value) shared_target_vals.append(target_sample_value) this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values( this_trait_vals, shared_target_vals) if type == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) if num_overlap > 5: if scipy.isnan(sample_r): return None else: return [sample_r, sample_p, num_overlap]
def get_sample_r_and_p_values(self, trait, target_samples): """Calculates the sample r (or rho) and p-value Given a primary trait and a target trait's sample values, calculates either the pearson r or spearman rho and the p-value using the corresponding scipy functions. """ self.this_trait_vals = [] target_vals = [] for index, sample in enumerate(self.target_dataset.samplelist): if sample in self.sample_data: sample_value = self.sample_data[sample] target_sample_value = target_samples[index] self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) else: sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) if num_overlap > 5: self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
def __init__(self, start_vars): trait_db_list = [ trait.strip() for trait in start_vars['trait_list'].split(',') ] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False self.do_PCA = True this_group = self.trait_list[0][ 1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) if len(this_trait_vals) < len( self.trait_list ): #Shouldn't do PCA if there are more traits than observations/samples self.do_PCA = False self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.corr_results = [] self.pca_corr_results = [] self.shared_samples_list = self.all_sample_list for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.all_samples_ordered() this_sample_data = this_trait.data corr_result_row = [] pca_corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.all_samples_ordered() target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) else: if sample in self.shared_samples_list: self.shared_samples_list.remove(sample) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap < 2: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: pearson_r, pearson_p = scipy.stats.pearsonr( this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr( this_trait_vals, target_vals) corr_result_row.append( [target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) self.export_filename, self.export_filepath = export_corr_matrix( self.corr_results) self.trait_data_array = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.all_samples_ordered() this_sample_data = this_trait.data this_trait_vals = [] for index, sample in enumerate(this_db_samples): if (sample in this_sample_data) and (sample in self.shared_samples_list): sample_value = this_sample_data[sample].value this_trait_vals.append(sample_value) self.trait_data_array.append(this_trait_vals) corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) corr_eigen_value, corr_eigen_vectors = sortEigenVectors( corr_result_eigen) groups = [] for sample in self.all_sample_list: groups.append(1) try: if self.do_PCA == True: self.pca_works = "True" self.pca_trait_ids = [] pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() else: self.pca_works = "False" except: self.pca_works = "False" self.js_data = dict( traits=[trait.name for trait in self.traits], groups=groups, cols=list(range(len(self.traits))), rows=list(range(len(self.traits))), samples=self.all_sample_list, sample_data=self.sample_data, )
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data #self.sample_data[this_trait.name] = [] this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) #self.sample_data[this_trait.name].append(this_sample_data[sample].value) else: this_trait_vals.append('') #self.sample_data[this_trait.name].append('') self.sample_data.append(this_trait_vals) self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.corr_results = [] self.pca_corr_results = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist this_sample_data = this_trait.data corr_result_row = [] pca_corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist target_sample_data = target_trait.data print("target_samples", len(target_samples)) this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) corr_result_row.append([target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) print("corr_results:", pf(self.corr_results)) groups = [] for sample in self.all_sample_list: groups.append(1) pca = self.calculate_pca(self.pca_corr_results, range(len(self.traits))) self.loadings_array = self.process_loadings() self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data,)
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data #self.sample_data[this_trait.name] = [] this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) #self.sample_data[this_trait.name].append(this_sample_data[sample].value) else: this_trait_vals.append('') #self.sample_data[this_trait.name].append('') self.sample_data.append(this_trait_vals) self.corr_results = [] self.corr_results_for_pca = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist #for sample in this_db_samples: # if sample not in self.samples: # self.samples.append(sample) this_sample_data = this_trait.data print("this_sample_data", len(this_sample_data)) #for sample in this_sample_data: # if sample not in self.all_sample_list: # self.all_sample_list.append(sample) corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist #if this_trait == target_trait and this_db == target_db: # corr_result_row.append(1) # continue target_sample_data = target_trait.data print("target_samples", len(target_samples)) this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) else: if is_spearman == False: sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) corr_result_row.append([target_trait, sample_r, num_overlap]) self.corr_results.append(corr_result_row) print("corr_results:", pf(self.corr_results)) groups = [] for sample in self.all_sample_list: groups.append(1) #pca = self.calculate_pca(self.corr_results, range(len(self.traits))) self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data,)
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: if trait_db[1].group.name != this_group: self.insufficient_shared_samples = True break else: this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) if self.insufficient_shared_samples: pass else: self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples return False self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.corr_results = [] self.pca_corr_results = [] self.trait_data_array = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.all_samples_ordered() this_sample_data = this_trait.data this_trait_vals = [] for index, sample in enumerate(this_db_samples): if (sample in this_sample_data): sample_value = this_sample_data[sample].value this_trait_vals.append(sample_value) self.trait_data_array.append(this_trait_vals) corr_result_row = [] pca_corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.all_samples_ordered() target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) corr_result_row.append([target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) corr_result_eigen = la.eigenvectors(numarray.array(self.pca_corr_results)) corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) groups = [] for sample in self.all_sample_list: groups.append(1) pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data,)
def __init__(self, start_vars): trait_db_list = [ trait.strip() for trait in start_vars['trait_list'].split(',') ] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.nodes_list = [] self.edges_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist this_sample_data = this_trait.data corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal max_corr = 0 #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: continue else: pearson_r, pearson_p = scipy.stats.pearsonr( this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: continue else: sample_r, sample_p = scipy.stats.spearmanr( this_trait_vals, target_vals) if -1 <= sample_r < -0.7: color = "#0000ff" width = 3 elif -0.7 <= sample_r < -0.5: color = "#00ff00" width = 2 elif -0.5 <= sample_r < 0: color = "#000000" width = 0.5 elif 0 <= sample_r < 0.5: color = "#ffc0cb" width = 0.5 elif 0.5 <= sample_r < 0.7: color = "#ffa500" width = 2 elif 0.7 <= sample_r <= 1: color = "#ff0000" width = 3 else: color = "#000000" width = 0 if abs(sample_r) > max_corr: max_corr = abs(sample_r) edge_data = { 'id': str(this_trait.name) + '_to_' + str(target_trait.name), 'source': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'target': str(target_trait.name) + ":" + str(target_trait.dataset.name), 'correlation': round(sample_r, 3), 'abs_corr': abs(round(sample_r, 3)), 'p_value': round(sample_p, 3), 'overlap': num_overlap, 'color': color, 'width': width } edge_dict = {'data': edge_data} self.edges_list.append(edge_dict) if trait_db[1].type == "ProbeSet": node_dict = { 'data': { 'id': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label': this_trait.symbol, 'symbol': this_trait.symbol, 'geneid': this_trait.geneid, 'omim': this_trait.omim, 'max_corr': max_corr } } elif trait_db[1].type == "Publish": node_dict = { 'data': { 'id': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label': this_trait.name, 'max_corr': max_corr } } else: node_dict = { 'data': { 'id': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label': this_trait.name, 'max_corr': max_corr } } self.nodes_list.append(node_dict) self.elements = json.dumps(self.nodes_list + self.edges_list) self.gn2_url = GN2_BRANCH_URL groups = [] for sample in self.all_sample_list: groups.append(1) self.js_data = dict( traits=[trait.name for trait in self.traits], groups=groups, cols=range(len(self.traits)), rows=range(len(self.traits)), samples=self.all_sample_list, sample_data=self.sample_data, elements=self.elements, )
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.network_data = {} self.nodes_list = [] self.edges_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist this_sample_data = this_trait.data corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal max_corr = 0 #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: continue else: pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: continue else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) if -1 <= sample_r < -0.7: color = "#0000ff" width = 3 elif -0.7 <= sample_r < -0.5: color = "#00ff00" width = 2 elif -0.5 <= sample_r < 0: color = "#000000" width = 0.5 elif 0 <= sample_r < 0.5: color = "#ffc0cb" width = 0.5 elif 0.5 <= sample_r < 0.7: color = "#ffa500" width = 2 elif 0.7 <= sample_r <= 1: color = "#ff0000" width = 3 else: color = "#000000" width = 0 if abs(sample_r) > max_corr: max_corr = abs(sample_r) edge_data = {'id' : str(this_trait.name) + '_to_' + str(target_trait.name), 'source' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'target' : str(target_trait.name) + ":" + str(target_trait.dataset.name), 'correlation' : round(sample_r, 3), 'abs_corr' : abs(round(sample_r, 3)), 'p_value' : round(sample_p, 3), 'overlap' : num_overlap, 'color' : color, 'width' : width } edge_dict = { 'data' : edge_data } self.edges_list.append(edge_dict) if trait_db[1].type == "ProbeSet": node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.symbol, 'symbol' : this_trait.symbol, 'geneid' : this_trait.geneid, 'omim' : this_trait.omim, 'max_corr' : max_corr } } elif trait_db[1].type == "Publish": node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.name, 'max_corr' : max_corr } } else: node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.name, 'max_corr' : max_corr } } self.nodes_list.append(node_dict) #self.network_data['dataSchema'] = {'nodes' : [{'name' : "label" , 'type' : "string"}], # 'edges' : [{'name' : "label" , 'type' : "string"}] } #self.network_data['data'] = {'nodes' : self.nodes_list, # 'edges' : self.edges_list } self.elements = json.dumps(self.nodes_list + self.edges_list) groups = [] for sample in self.all_sample_list: groups.append(1) self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data, elements = self.elements,)
def __init__(self, start_vars): trait_db_list = [ trait.strip() for trait in start_vars['trait_list'].split(',') ] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.corr_results = [] self.pca_corr_results = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist this_sample_data = this_trait.data corr_result_row = [] pca_corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: pearson_r, pearson_p = scipy.stats.pearsonr( this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr( this_trait_vals, target_vals) corr_result_row.append( [target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) print("corr_results:", pf(self.corr_results)) groups = [] for sample in self.all_sample_list: groups.append(1) pca = self.calculate_pca(self.pca_corr_results, range(len(self.traits))) self.loadings_array = self.process_loadings() self.js_data = dict( traits=[trait.name for trait in self.traits], groups=groups, cols=range(len(self.traits)), rows=range(len(self.traits)), samples=self.all_sample_list, sample_data=self.sample_data, )
def test_normalize_values(self): """Test that a list is normalised correctly""" self.assertEqual( normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1]), ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3))