import phase_2_model import pickle if __name__ == "__main__": #Get data #Load the pickled weights #Random for now experts_weights = np.zeros((L, L, n_aa, n_aa)) #RASH L = 166 msa_file = home + '/Documents/Protein_data/RASH/RASH_HUMAN2_833a6535-26d0-4c47-8463-7970dae27a32_evfold_result/alignment/RASH_HUMAN2_RASH_HUMAN2_jackhmmer_e-10_m30_complete_run.fa' msa, n_aa = protein_model_tools.convert_msa(L, msa_file) print len(msa), len(msa[0]) #Learn hyperparameters - Grid search nu_list = [.1] rho_list = [.00001] sigma_adj_list = [.01] sigma_repel_list = [.1] best_error = -1 best_hypers = [] best_coords = [] for iter1 in range(1): print 'Iteration ' + str(iter1)
rao = 0 mac = 1 if rao == 1: msa_file = home + '/protein_data/RASH_HUMAN2_RASH_HUMAN2_jackhmmer_e-10_m30_complete_run.fa' if mac == 1: msa_file = home + '/Documents/Protein_data/RASH/RASH_HUMAN2_833a6535-26d0-4c47-8463-7970dae27a32_evfold_result/alignment/RASH_HUMAN2_RASH_HUMAN2_jackhmmer_e-10_m30_complete_run.fa' #RASH L = 166 msa, n_aa = tools.convert_msa(L, msa_file) print len(msa), len(msa[0]), n_aa #Convert to matrix msa_vectors = [] for samp in range(2000): msa_vectors.append(np.ndarray.flatten(tools.convert_samp_to_one_hot(msa[samp], n_aa))) msa_vectors = np.array(msa_vectors) print msa_vectors.shape #PCA pca = PCA(n_components=20) pca.fit(msa_vectors[1000:]) a_samps_pca = pca.transform(msa_vectors[1000:]) b_samps_pca = pca.transform(msa_vectors[:1000])
if __name__ == "__main__": #Get data #MAKE MY OWN DATA # L=166 # n_aa=22 # msa = protein_model_tools.make_data(n_samps=20000, L=166, n_aa=22) #RASH L = 166 msa_file = home + '/Documents/Protein_data/RASH/RASH_HUMAN2_833a6535-26d0-4c47-8463-7970dae27a32_evfold_result/alignment/RASH_HUMAN2_RASH_HUMAN2_jackhmmer_e-10_m30_complete_run.fa' msa, n_aa = protein_model_tools.convert_msa(L, msa_file) print len(msa), len(msa[0]) #Learn the experts experts_weights = np.zeros((L,L,n_aa,n_aa)) # experts_biases = np.zeros((L,L,n_aa)) total = L*L for i in range(L): for j in range(L): if i == j: continue