Example #1
0
def load_LatticeProteins(local_env, path=''):
    all_data = Proteins_utils.load_FASTA(path + 'Lattice_Proteins_MSA.fasta')
    B = all_data.shape[0]
    seed = utilities.check_random_state(0)
    shuffle = np.argsort(seed.rand(B))
    train_data = all_data[shuffle][:int(0.8 * B)]
    test_data = all_data[shuffle][int(0.8 * B):]
    train_weights = None
    test_weights = None
    true_contacts_A = [(21, 12), (26, 3), (9, 6), (18, 1), (27, 8), (25, 18),
                       (27, 16), (20, 1), (19, 12), (13, 10), (15, 8),
                       (19, 14), (7, 4), (17, 14), (25, 16), (26, 7), (23, 2),
                       (23, 20), (24, 19), (24, 15), (15, 10), (24, 7),
                       (24, 11), (23, 4), (22, 11), (22, 5), (25, 2), (11, 6)]

    contact_map = np.zeros([27, 27])
    for i, j in true_contacts_A:
        contact_map[i - 1, j - 1] = 1
        contact_map[j - 1, i - 1] = 1

    local_env['train_data'] = train_data.astype(curr_int)
    local_env['test_data'] = test_data.astype(curr_int)
    local_env['train_weights'] = train_weights
    local_env['test_weights'] = test_weights
    local_env['contact_map'] = contact_map.astype(curr_int)
Example #2
0
def load_WW(local_env, path=''):
    all_data = Proteins_utils.load_FASTA(path + 'WW_domain_MSA.fasta')
    num_neighbours = Proteins_utils.count_neighbours(all_data)
    all_weights = 1.0 / num_neighbours
    B = all_data.shape[0]
    seed = utilities.check_random_state(0)
    shuffle = np.argsort(seed.rand(B))
    train_data = all_data[shuffle][:int(0.8 * B)]
    test_data = all_data[shuffle][int(0.8 * B):]
    train_weights = all_weights[shuffle][:int(0.8 * B)]
    test_weights = all_weights[shuffle][int(0.8 * B):]
    env = pickle.load(open(path + 'WW_test_sequences.data', 'rb'),
                      encoding=encoding)
    experimental_data = np.asarray(np.concatenate([
        env['sequences_1'], env['sequences_2'], env['sequences_3'],
        env['sequences_4']
    ],
                                                  axis=0),
                                   dtype='int')
    experimental_labels = np.asarray(np.concatenate([
        np.zeros(len(env['sequences_1'])),
        np.ones(len(env['sequences_2']) + len(env['sequences_3'])),
        2 * np.ones(len(env['sequences_4']))
    ],
                                                    axis=0),
                                     dtype='int')

    local_env['train_data'] = train_data.astype(curr_int)
    local_env['test_data'] = test_data.astype(curr_int)
    local_env['train_weights'] = train_weights.astype(curr_float)
    local_env['test_weights'] = test_weights.astype(curr_float)
    local_env['experimental_data'] = experimental_data.astype(curr_int)
    local_env['experimental_labels'] = experimental_labels.astype(curr_int)
Example #3
0
File: bm.py Project: jertubiana/PGM
    def __init__(self,
                 N=100,
                 nature='Bernoulli',
                 n_c=1,
                 random_state=None,
                 gauge='zerosum',
                 zero_field=False):
        self.N = N
        self.nature = nature
        self.random_state = utilities.check_random_state(random_state)
        if self.nature == 'Potts':
            self.n_c = n_c
        else:
            self.n_c = 1
        self.zero_field = zero_field
        self.interpolate = False

        super(BM, self).__init__(n_layers=1,
                                 layers_size=[self.N],
                                 layers_nature=[self.nature + '_coupled'],
                                 layers_n_c=[self.n_c],
                                 layers_name=['layer'])

        self.gauge = gauge

        self.layer = layer.initLayer(N=self.N,
                                     nature=self.nature + '_coupled',
                                     position='visible',
                                     n_c=self.n_c,
                                     random_state=self.random_state,
                                     zero_field=self.zero_field,
                                     gauge=self.gauge)
        self.init_couplings(0.01)
        self.tmp_l2_fields = 0
Example #4
0
def load_Audition_souris(local_env, path=''):
    from scipy.io import loadmat
    all_data = np.asarray(loadmat(path + 'Audition.mat')['binNinf_double_seg'],
                          dtype='int')
    B = all_data.shape[0]
    seed = utilities.check_random_state(0)
    shuffle = np.argsort(seed.rand(B))
    train_data = all_data[shuffle][:int(0.8 * B)]
    test_data = all_data[shuffle][int(0.8 * B):]
    train_weights = None
    test_weights = None
    local_env['train_data'] = train_data.astype(curr_int)
    local_env['test_data'] = test_data.astype(curr_int)
    local_env['train_weights'] = train_weights
    local_env['test_weights'] = test_weights
Example #5
0
def load_Hsp70(local_env, path=''):
    all_data, all_labels = Proteins_utils.load_FASTA(path +
                                                     'Hsp70_protein_MSA.fasta',
                                                     with_labels=True)
    all_weights = pickle.load(open(path + 'Hsp70_info.data', 'rb'),
                              encoding=encoding)['all_weights']
    B = all_data.shape[0]
    seed = utilities.check_random_state(0)
    shuffle = np.argsort(seed.rand(B))
    train_data = all_data[shuffle][:int(0.8 * B)]
    test_data = all_data[shuffle][int(0.8 * B):]
    train_weights = all_weights[shuffle][:int(0.8 * B)]
    test_weights = all_weights[shuffle][int(0.8 * B):]
    local_env['train_data'] = train_data.astype(curr_int)
    local_env['test_data'] = test_data.astype(curr_int)
    local_env['train_weights'] = train_weights.astype(curr_float)
    local_env['test_weights'] = test_weights.astype(curr_float)
Example #6
0
def load_Kunitz(local_env, path=''):
    all_data = Proteins_utils.load_FASTA(path + 'Kunitz_domain_MSA.fasta')
    num_neighbours = Proteins_utils.count_neighbours(all_data)
    all_weights = 1.0 / num_neighbours
    B = all_data.shape[0]
    seed = utilities.check_random_state(0)
    shuffle = np.argsort(seed.rand(B))
    train_data = all_data[shuffle][:int(0.8 * B)]
    test_data = all_data[shuffle][int(0.8 * B):]
    train_weights = all_weights[shuffle][:int(0.8 * B)]
    test_weights = all_weights[shuffle][int(0.8 * B):]
    from scipy.io import loadmat
    contact_map = loadmat(path + 'contact_map14_extended.mat')['cm'] > 0
    contact_map += contact_map.T  # Load contact map.

    local_env['train_data'] = train_data.astype(curr_int)
    local_env['test_data'] = test_data.astype(curr_int)
    local_env['train_weights'] = train_weights.astype(curr_float)
    local_env['test_weights'] = test_weights.astype(curr_float)
    local_env['contact_map'] = contact_map
Example #7
0
    def __init__(self, N=10, M=5, n_c=2, nature='Potts', gauge='zerosum', random_state=None):
        self.N = N
        self.M = M
        self.n_c = n_c
        self.gauge = gauge
        self.nature = nature
        self.muh = np.ones(M, dtype=curr_float) / M
        self.cum_muh = np.cumsum(self.muh)
        self.gh = np.zeros(M, dtype=curr_float)
        if nature == 'Potts':
            self.weights = np.zeros([M, N, n_c], dtype=curr_float)
        else:
            self.weights = np.zeros([M, N], dtype=curr_float)
        if nature == 'Bernoulli':
            self.cond_muv = np.ones([M, N], dtype=curr_float) / 2
        elif nature == 'Spin':
            self.cond_muv = np.zeros([M, N], dtype=curr_float)
        elif nature == 'Potts':
            self.cond_muv = np.ones([M, N, n_c], dtype=curr_float) / n_c
            self.cum_cond_muv = np.cumsum(self.cond_muv, axis=-1)

        self.random_state = utilities.check_random_state(random_state)
        self.logpartition()
Example #8
0
                                          figsize=(h, w),
                                          ticks_every=10,
                                          ticks_labels_size=10,
                                          title_size=12,
                                          dpi=400,
                                          molecule=molecule)


dest = './trial/'
g15p = './trial/v3_c0_all.txt'

# import data and weights
all_data = Proteins_utils.load_FASTA(g15p, drop_duplicates=True, type='dna')
affs = get_affinities(g15p, all_data)

seed = utilities.check_random_state(0)
permutation = np.argsort(seed.rand(all_data.shape[0]))

affs = affs[permutation]
all_data = all_data[permutation]  # Shuffle data.

#WEIGHTS
num_neighbours = Proteins_utils.count_neighbours(all_data)
# all_weights = 1.0/num_neighbours
weights = np.asarray([float(i) / 1000. for i in affs], dtype='float')

#mu = utilities.average(all_data,c=4,weights=all_weights)

#sequence_logo.Sequence_logo(mu,ticks_every=5);

#PARAMETERS