def load_LatticeProteins(local_env, path=''): all_data = Proteins_utils.load_FASTA(path + 'Lattice_Proteins_MSA.fasta') B = all_data.shape[0] seed = utilities.check_random_state(0) shuffle = np.argsort(seed.rand(B)) train_data = all_data[shuffle][:int(0.8 * B)] test_data = all_data[shuffle][int(0.8 * B):] train_weights = None test_weights = None true_contacts_A = [(21, 12), (26, 3), (9, 6), (18, 1), (27, 8), (25, 18), (27, 16), (20, 1), (19, 12), (13, 10), (15, 8), (19, 14), (7, 4), (17, 14), (25, 16), (26, 7), (23, 2), (23, 20), (24, 19), (24, 15), (15, 10), (24, 7), (24, 11), (23, 4), (22, 11), (22, 5), (25, 2), (11, 6)] contact_map = np.zeros([27, 27]) for i, j in true_contacts_A: contact_map[i - 1, j - 1] = 1 contact_map[j - 1, i - 1] = 1 local_env['train_data'] = train_data.astype(curr_int) local_env['test_data'] = test_data.astype(curr_int) local_env['train_weights'] = train_weights local_env['test_weights'] = test_weights local_env['contact_map'] = contact_map.astype(curr_int)
def load_WW(local_env, path=''): all_data = Proteins_utils.load_FASTA(path + 'WW_domain_MSA.fasta') num_neighbours = Proteins_utils.count_neighbours(all_data) all_weights = 1.0 / num_neighbours B = all_data.shape[0] seed = utilities.check_random_state(0) shuffle = np.argsort(seed.rand(B)) train_data = all_data[shuffle][:int(0.8 * B)] test_data = all_data[shuffle][int(0.8 * B):] train_weights = all_weights[shuffle][:int(0.8 * B)] test_weights = all_weights[shuffle][int(0.8 * B):] env = pickle.load(open(path + 'WW_test_sequences.data', 'rb'), encoding=encoding) experimental_data = np.asarray(np.concatenate([ env['sequences_1'], env['sequences_2'], env['sequences_3'], env['sequences_4'] ], axis=0), dtype='int') experimental_labels = np.asarray(np.concatenate([ np.zeros(len(env['sequences_1'])), np.ones(len(env['sequences_2']) + len(env['sequences_3'])), 2 * np.ones(len(env['sequences_4'])) ], axis=0), dtype='int') local_env['train_data'] = train_data.astype(curr_int) local_env['test_data'] = test_data.astype(curr_int) local_env['train_weights'] = train_weights.astype(curr_float) local_env['test_weights'] = test_weights.astype(curr_float) local_env['experimental_data'] = experimental_data.astype(curr_int) local_env['experimental_labels'] = experimental_labels.astype(curr_int)
def __init__(self, N=100, nature='Bernoulli', n_c=1, random_state=None, gauge='zerosum', zero_field=False): self.N = N self.nature = nature self.random_state = utilities.check_random_state(random_state) if self.nature == 'Potts': self.n_c = n_c else: self.n_c = 1 self.zero_field = zero_field self.interpolate = False super(BM, self).__init__(n_layers=1, layers_size=[self.N], layers_nature=[self.nature + '_coupled'], layers_n_c=[self.n_c], layers_name=['layer']) self.gauge = gauge self.layer = layer.initLayer(N=self.N, nature=self.nature + '_coupled', position='visible', n_c=self.n_c, random_state=self.random_state, zero_field=self.zero_field, gauge=self.gauge) self.init_couplings(0.01) self.tmp_l2_fields = 0
def load_Audition_souris(local_env, path=''): from scipy.io import loadmat all_data = np.asarray(loadmat(path + 'Audition.mat')['binNinf_double_seg'], dtype='int') B = all_data.shape[0] seed = utilities.check_random_state(0) shuffle = np.argsort(seed.rand(B)) train_data = all_data[shuffle][:int(0.8 * B)] test_data = all_data[shuffle][int(0.8 * B):] train_weights = None test_weights = None local_env['train_data'] = train_data.astype(curr_int) local_env['test_data'] = test_data.astype(curr_int) local_env['train_weights'] = train_weights local_env['test_weights'] = test_weights
def load_Hsp70(local_env, path=''): all_data, all_labels = Proteins_utils.load_FASTA(path + 'Hsp70_protein_MSA.fasta', with_labels=True) all_weights = pickle.load(open(path + 'Hsp70_info.data', 'rb'), encoding=encoding)['all_weights'] B = all_data.shape[0] seed = utilities.check_random_state(0) shuffle = np.argsort(seed.rand(B)) train_data = all_data[shuffle][:int(0.8 * B)] test_data = all_data[shuffle][int(0.8 * B):] train_weights = all_weights[shuffle][:int(0.8 * B)] test_weights = all_weights[shuffle][int(0.8 * B):] local_env['train_data'] = train_data.astype(curr_int) local_env['test_data'] = test_data.astype(curr_int) local_env['train_weights'] = train_weights.astype(curr_float) local_env['test_weights'] = test_weights.astype(curr_float)
def load_Kunitz(local_env, path=''): all_data = Proteins_utils.load_FASTA(path + 'Kunitz_domain_MSA.fasta') num_neighbours = Proteins_utils.count_neighbours(all_data) all_weights = 1.0 / num_neighbours B = all_data.shape[0] seed = utilities.check_random_state(0) shuffle = np.argsort(seed.rand(B)) train_data = all_data[shuffle][:int(0.8 * B)] test_data = all_data[shuffle][int(0.8 * B):] train_weights = all_weights[shuffle][:int(0.8 * B)] test_weights = all_weights[shuffle][int(0.8 * B):] from scipy.io import loadmat contact_map = loadmat(path + 'contact_map14_extended.mat')['cm'] > 0 contact_map += contact_map.T # Load contact map. local_env['train_data'] = train_data.astype(curr_int) local_env['test_data'] = test_data.astype(curr_int) local_env['train_weights'] = train_weights.astype(curr_float) local_env['test_weights'] = test_weights.astype(curr_float) local_env['contact_map'] = contact_map
def __init__(self, N=10, M=5, n_c=2, nature='Potts', gauge='zerosum', random_state=None): self.N = N self.M = M self.n_c = n_c self.gauge = gauge self.nature = nature self.muh = np.ones(M, dtype=curr_float) / M self.cum_muh = np.cumsum(self.muh) self.gh = np.zeros(M, dtype=curr_float) if nature == 'Potts': self.weights = np.zeros([M, N, n_c], dtype=curr_float) else: self.weights = np.zeros([M, N], dtype=curr_float) if nature == 'Bernoulli': self.cond_muv = np.ones([M, N], dtype=curr_float) / 2 elif nature == 'Spin': self.cond_muv = np.zeros([M, N], dtype=curr_float) elif nature == 'Potts': self.cond_muv = np.ones([M, N, n_c], dtype=curr_float) / n_c self.cum_cond_muv = np.cumsum(self.cond_muv, axis=-1) self.random_state = utilities.check_random_state(random_state) self.logpartition()
figsize=(h, w), ticks_every=10, ticks_labels_size=10, title_size=12, dpi=400, molecule=molecule) dest = './trial/' g15p = './trial/v3_c0_all.txt' # import data and weights all_data = Proteins_utils.load_FASTA(g15p, drop_duplicates=True, type='dna') affs = get_affinities(g15p, all_data) seed = utilities.check_random_state(0) permutation = np.argsort(seed.rand(all_data.shape[0])) affs = affs[permutation] all_data = all_data[permutation] # Shuffle data. #WEIGHTS num_neighbours = Proteins_utils.count_neighbours(all_data) # all_weights = 1.0/num_neighbours weights = np.asarray([float(i) / 1000. for i in affs], dtype='float') #mu = utilities.average(all_data,c=4,weights=all_weights) #sequence_logo.Sequence_logo(mu,ticks_every=5); #PARAMETERS