def get_RSA_frequencies(natural_proteins, lower_RSA_boundary, upper_RSA_boundary): natural_distribution = af.get_AA_distribution(natural_proteins) natural_RSA = af.get_RSA_Values(natural_proteins) natural_RSA_array = af.make_array(natural_RSA) seq_length = len(natural_RSA) bin_1 = [] bin_2 = [] bin_3 = [] bin_4 = [] bin_5 = [] i = 0 count = 0 for site in natural_distribution: if (lower_RSA_boundary<=natural_RSA_array[i] and natural_RSA_array[i]<= upper_RSA_boundary): #print natural_RSA_array[i] #print site[0:4] bin_1.append(site[0]) bin_2.append(site[1]) bin_3.append(site[2]) bin_4.append(site[3]) bin_5.append(site[4]) i = i + 1 count = count + 1 else: i = i + 1 if count == 0: frequency_data = [0.0, 0.0, 0.0, 0.0, 0.0] else: frequency_data = [mean(bin_1)/mean(bin_1), mean(bin_2)/mean(bin_1), mean(bin_3)/mean(bin_1), mean(bin_4)/mean(bin_1), mean(bin_5)/mean(bin_1)] if (mean(bin_1)) == 0.0: print "MEAN OF BIN 1 is ZERO!!!!" print frequency_data #frequency_data = [mean(bin_1), mean(bin_2), mean(bin_3), mean(bin_4), mean(bin_5)] print "Number of residues in bin: " + str(count) return frequency_data
def get_RSA_frequencies(natural_proteins, lower_RSA_boundary, upper_RSA_boundary): natural_distribution = af.get_AA_distribution(natural_proteins) #natural_distribution = get_AA_distribution_mod(natural_proteins) #natural_dis_array = array(natural_distribution) #m,n = natural_dis_array.shape #print "num_residues, length of alignment: " + str(n),m #print natural_distribution natural_RSA = af.get_RSA_Values(natural_proteins) natural_RSA_array = af.make_array(natural_RSA) seq_length = len(natural_RSA) frequency_data = [] bin_1 = [] bin_2 = [] bin_3 = [] bin_4 = [] bin_5 = [] bin_6 = [] bin_7 = [] bin_8 = [] bin_9 = [] bin_10 = [] bin_11 = [] bin_12 = [] bin_13 = [] bin_14 = [] bin_15 = [] bin_16 = [] bin_17 = [] bin_18 = [] bin_19 = [] bin_20 = [] i = 0 count = 0 for site in natural_distribution: if (lower_RSA_boundary<=natural_RSA_array[i] and natural_RSA_array[i]<= upper_RSA_boundary): #print natural_RSA_array[i] #print site[0:4] bin_1.append(site[0]) bin_2.append(site[1]) bin_3.append(site[2]) bin_4.append(site[3]) bin_5.append(site[4]) bin_6.append(site[5]) bin_7.append(site[6]) bin_8.append(site[7]) bin_9.append(site[8]) bin_10.append(site[9]) bin_11.append(site[10]) bin_12.append(site[11]) bin_13.append(site[12]) bin_14.append(site[13]) bin_15.append(site[14]) bin_16.append(site[15]) bin_17.append(site[16]) bin_18.append(site[17]) bin_19.append(site[18]) bin_20.append(site[19]) i = i + 1 count = count + 1 else: i = i + 1 if count == 0: #Need to find a way to exclude the point frequency_data = [-1] # [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] #return frequency_data else: frequencies = [np.mean(bin_1)/np.mean(bin_1), np.mean(bin_2)/np.mean(bin_1), np.mean(bin_3)/np.mean(bin_1), np.mean(bin_4)/np.mean(bin_1), np.mean(bin_5)/np.mean(bin_1), np.mean(bin_6)/np.mean(bin_1), np.mean(bin_7)/np.mean(bin_1), np.mean(bin_8)/np.mean(bin_1), np.mean(bin_9)/np.mean(bin_1), np.mean(bin_10)/np.mean(bin_1), np.mean(bin_11)/np.mean(bin_1), np.mean(bin_12)/np.mean(bin_1), np.mean(bin_13)/np.mean(bin_1), np.mean(bin_14)/np.mean(bin_1), np.mean(bin_15)/np.mean(bin_1), np.mean(bin_16)/np.mean(bin_1), np.mean(bin_17)/np.mean(bin_1), np.mean(bin_18)/np.mean(bin_1), np.mean(bin_19)/np.mean(bin_1), np.mean(bin_20)/np.mean(bin_1)] for element in frequencies: if element == 0.0: frequency_data.append(0.0) else: frequency_data.append(np.log(element)) #print np.log(element) #if (mean(bin_1)) == 0.0: # print "MEAN OF BIN 1 is ZERO!!!!" #print frequency_data #frequency_data = [mean(bin_1), mean(bin_2), mean(bin_3), mean(bin_4), mean(bin_5)] #print "Number of residues in bin: " + str(count) return frequency_data
chain_names.append(chain_id) natural_proteins = file #Open the files with results designed_proteins_00 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(0.0) + ".dat" designed_proteins_01 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(0.1) + ".dat" designed_proteins_03 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(0.3) + ".dat" designed_proteins_06 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(0.6) + ".dat" designed_proteins_09 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(0.9) + ".dat" designed_proteins_12 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(1.2) + ".dat" designed_proteins_003 = "align_data_array_" + pdb_id + "_" + chain_id + "_" + str(0.03) + ".dat" split_natural_1 = "align_natural_sample1_data_array_" + pdb_id + "_" + chain_id + ".dat" split_natural_2 = "align_natural_sample2_data_array_" + pdb_id + "_" + chain_id + ".dat" #Calculates all of the data for comparison (ex. entropy) natural_distribution = analysis_functions.get_AA_distribution(natural_proteins) natural_entropy = analysis_functions.get_native_entropy(natural_proteins) natural_entropy_array = analysis_functions.make_array(natural_entropy) natural_RSA = analysis_functions.get_RSA_Values(natural_proteins) natural_RSA_array = analysis_functions.make_array(natural_RSA) natural_mean_RSA_values.append(mean(natural_RSA_array)) natural_mean_entropy_values.append(mean(natural_entropy_array)) designed_distribution_00 = analysis_functions.get_AA_distribution(designed_proteins_00) designed_entropy_00 = analysis_functions.get_native_entropy(designed_proteins_00) designed_entropy_array_00 = analysis_functions.make_array(designed_entropy_00) designed_RSA_00 = analysis_functions.get_RSA_Values(designed_proteins_00) designed_RSA_array_00 = analysis_functions.make_array(designed_RSA_00) designed_mean_RSA_values_00.append(mean(designed_RSA_array_00)) designed_mean_entropy_values_00.append(mean(designed_entropy_array_00))