예제 #1
0
  def batch_to_feed_dict(self, batch):
    """Converts the current batch of mol_graphs into tensorflow feed_dict.

    Assigns the graph information in array of ConvMol objects to the
    placeholders tensors

    params
    ------
    batch : np.ndarray
      Array of ConvMol objects

    returns
    -------
    feed_dict : dict
      Can be merged with other feed_dicts for input into tensorflow
    """
    # Merge mol conv objects
    batch = ConvMol.agglomerate_mols(batch)
    atoms = batch.get_atom_features()
    deg_adj_lists = [batch.deg_adj_lists[deg]
                     for deg in range(1, self.max_deg+1)]

    # Generate dicts
    deg_adj_dict = dict(list(zip(self.deg_adj_lists_placeholders, deg_adj_lists)))
    atoms_dict = {self.atom_features_placeholder : atoms,
                  self.deg_slice_placeholder : batch.deg_slice,
                  self.membership_placeholder : batch.membership}
    return merge_dicts([atoms_dict, deg_adj_dict])
예제 #2
0
  def test_graph_gather(self):
    """Test that GraphGather can be invoked."""
    batch_size = 2
    n_features = 75
    n_atoms = 4  # In CCC and C, there are 4 atoms
    raw_smiles = ['CCC', 'C']
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    mols = featurizer.featurize(mols)
    multi_mol = ConvMol.agglomerate_mols(mols)
    atom_features = multi_mol.get_atom_features()
    degree_slice = multi_mol.deg_slice
    membership = multi_mol.membership
    deg_adjs = multi_mol.get_deg_adjacency_lists()[1:]

    with self.session() as sess:
      atom_features = tf.convert_to_tensor(atom_features, dtype=tf.float32)
      degree_slice = tf.convert_to_tensor(degree_slice, dtype=tf.int32)
      membership = tf.convert_to_tensor(membership, dtype=tf.int32)
      deg_adjs_tf = []
      for deg_adj in deg_adjs:
        deg_adjs_tf.append(tf.convert_to_tensor(deg_adj, dtype=tf.int32))
      args = [atom_features, degree_slice, membership] + deg_adjs_tf
      out_tensor = GraphGather(batch_size)(*args)
      sess.run(tf.global_variables_initializer())
      out_tensor = out_tensor.eval()
      # TODO(rbharath): Why is it 2*n_features instead of n_features?
      assert out_tensor.shape == (batch_size, 2 * n_features)
예제 #3
0
 def default_generator(self,
                       dataset,
                       epochs=1,
                       predict=False,
                       deterministic=True,
                       pad_batches=True):
   for epoch in range(epochs):
     for ind, (X_b, y_b, w_b, ids_b) in enumerate(
         dataset.iterbatches(
             self.batch_size,
             pad_batches=pad_batches,
             deterministic=deterministic)):
       d = {}
       if self.mode == 'classification':
         d[self.labels[0]] = to_one_hot(y_b.flatten(), self.n_classes).reshape(
             -1, self.n_tasks, self.n_classes)
       else:
         d[self.labels[0]] = y_b
       d[self.task_weights[0]] = w_b
       multiConvMol = ConvMol.agglomerate_mols(X_b)
       d[self.atom_features] = multiConvMol.get_atom_features()
       d[self.degree_slice] = multiConvMol.deg_slice
       d[self.membership] = multiConvMol.membership
       for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
         d[self.deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
       yield d
예제 #4
0
  def test_agglomerate_molecules(self):
    """Test AggrMol.agglomerate_mols."""
    molecules = []

    #### First example molecule
    N_feat = 4
    # Artificial feature array.
    atom_features = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
    adj_list = [[1], [0, 2], [1]]
    molecules.append(ConvMol(atom_features, adj_list))

    #### Second example molecule
    atom_features = np.array([[20, 21, 22, 23], [24, 25, 26, 27],
                              [28, 29, 30, 31], [32, 33, 34, 35]])
    adj_list = [[1, 2], [0, 3], [0, 3], [1, 2]]
    molecules.append(ConvMol(atom_features, adj_list))

    ### Third example molecule
    atom_features = np.array([[40, 41, 42, 43], [44, 45, 46, 47],
                              [48, 49, 50, 51], [52, 53, 54,
                                                 55], [56, 57, 58, 59]])
    adj_list = [[1, 2], [0, 3], [0, 3], [1, 2, 4], [3]]
    molecules.append(ConvMol(atom_features, adj_list))

    # Test agglomerate molecule method
    concat_mol = ConvMol.agglomerate_mols(molecules)

    assert concat_mol.get_num_atoms() == 12
    assert concat_mol.get_num_molecules() == 3

    atom_features = concat_mol.get_atom_features()
    assert np.array_equal(atom_features[0, :], [1, 2, 3, 4])
    assert np.array_equal(atom_features[2, :], [56, 57, 58, 59])
    assert np.array_equal(atom_features[11, :], [52, 53, 54, 55])
    assert np.array_equal(atom_features[4, :], [20, 21, 22, 23])

    deg_adj_lists = concat_mol.get_deg_adjacency_lists()
    # No atoms of degree 0
    assert np.array_equal(deg_adj_lists[0], np.zeros([0, 0]))
    # 3 atoms of degree 1
    assert np.array_equal(deg_adj_lists[1], [[3], [3], [11]])
    # 8 atoms of degree 2
    assert np.array_equal(
        deg_adj_lists[2],
        [[0, 1], [5, 6], [4, 7], [4, 7], [5, 6], [9, 10], [8, 11], [8, 11]])
    # 1 atom of degree 3
    assert np.array_equal(deg_adj_lists[3], [[9, 10, 2]])
    # 0 atoms of degree 4
    assert np.array_equal(deg_adj_lists[4], np.zeros([0, 4]))
    # 0 atoms of degree 5
    assert np.array_equal(deg_adj_lists[5], np.zeros([0, 5]))
 def feed_dict_generator(dataset, batch_size, epochs=1):
   for epoch in range(epochs):
     for ind, (X_b, y_b, w_b, ids_b) in enumerate(
         dataset.iterbatches(batch_size, pad_batches=True)):
       d = {}
       for index, label in enumerate(labels):
         d[label] = to_one_hot(y_b[:, index])
       d[task_weights] = w_b
       multiConvMol = ConvMol.agglomerate_mols(X_b)
       d[atom_features] = multiConvMol.get_atom_features()
       d[degree_slice] = multiConvMol.deg_slice
       d[membership] = multiConvMol.membership
       for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
         d[deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
       yield d
예제 #6
0
 def _construct_feed_dict(self, X_b, y_b, w_b, ids_b):
   feed_dict = dict()
   if y_b is not None:
     for index, label in enumerate(self.labels):
       feed_dict[label.out_tensor] = to_one_hot(y_b[:, index])
   if self.task_weights is not None and w_b is not None:
     feed_dict[self.task_weights[0].out_tensor] = w_b
   if self.features is not None:
     multiConvMol = ConvMol.agglomerate_mols(X_b)
     feed_dict[self.features[0].out_tensor] = multiConvMol.get_atom_features()
     feed_dict[self.features[1].out_tensor] = multiConvMol.deg_slice
     feed_dict[self.features[2].out_tensor] = multiConvMol.membership
     for i in range(self.max_degree):
       feed_dict[self.features[i + 3]
                 .out_tensor] = multiConvMol.get_deg_adjacency_lists()[i + 1]
   return feed_dict
예제 #7
0
def data_generator(dataset, epochs=1, predict=False, pad_batches=True):
    for epoch in range(epochs):
        for ind, (X_b, y_b, w_b, ids_b) in enumerate(
                dataset.iterbatches(batch_size,
                                    pad_batches=pad_batches,
                                    deterministic=True)):
            d = {}
            for index, label in enumerate(labels):
                d[label] = to_one_hot(y_b[:, index])
            d[weights] = w_b
            multiConvMol = ConvMol.agglomerate_mols(X_b)
            d[atom_features] = multiConvMol.get_atom_features()
            d[degree_slice] = multiConvMol.deg_slice
            d[membership] = multiConvMol.membership
            for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
                d[deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
            yield d
예제 #8
0
 def _construct_feed_dict(self, X_b, y_b, w_b, ids_b):
     feed_dict = dict()
     if y_b is not None:
         for index, label in enumerate(self.labels):
             feed_dict[label.out_tensor] = to_one_hot(y_b[:, index])
     if self.task_weights is not None and w_b is not None:
         feed_dict[self.task_weights[0].out_tensor] = w_b
     if self.features is not None:
         multiConvMol = ConvMol.agglomerate_mols(X_b)
         feed_dict[self.features[0].
                   out_tensor] = multiConvMol.get_atom_features()
         feed_dict[self.features[1].out_tensor] = multiConvMol.deg_slice
         feed_dict[self.features[2].out_tensor] = multiConvMol.membership
         for i in range(self.max_degree):
             feed_dict[self.features[
                 i +
                 3].out_tensor] = multiConvMol.get_deg_adjacency_lists()[i +
                                                                         1]
     return feed_dict
예제 #9
0
def data_generator(dataset, n_epoch=1, predict=False):
    for ee in range(n_epoch):
        if not predict:
            print('Starting epoch %i' % ee)
        for ind, (X_b, y_b, w_b, ids_b) in enumerate(
                dataset.iterbatches(n_batch,
                                    pad_batches=True,
                                    deterministic=True)):
            fd = {}
            for ts, label_t in enumerate(label15):
                fd[label_t] = to_one_hot(y_b[:, ts])
            mol = ConvMol.agglomerate_mols(X_b)
            fd[atom_features] = mol.get_atom_features()
            fd[degree_slice] = mol.deg_slice
            fd[membership] = mol.membership
            deg_adj_list = mol.get_deg_adjacency_lists()
            for ii in range(1, 11):
                fd[deg_adjs[ii - 1]] = deg_adj_list[ii]
            yield fd
def data_generator(dataset, predict=False, pad_batches=True):
    # iterbatches: Get an object that iterates over minibatches from the dataset.
    for ind, (X_b, y_b, w_b, ids_b) in enumerate(
            dataset.iterbatches(batch_size,
                                pad_batches=pad_batches,
                                deterministic=True)):
        # Concatenates list of ConvMol’s into one mol object that
        # can be used to feed into tensorflow placeholders.
        # agglomerate_mols -> mols: ConvMol objects to be combined into one molecule.
        multiConvMol = ConvMol.agglomerate_mols(X_b)
        # get_atom_features: Returns canonicalized version of atom features
        inputs = [
            multiConvMol.get_atom_features(), multiConvMol.deg_slice,
            np.array(multiConvMol.membership)
        ]

        # Returns adjacency lists grouped by atom degree.
        for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
            inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
        labels = [to_one_hot(y_b.flatten(), 2).reshape(-1, n_tasks, 2)]
        weights = [w_b]
        yield (inputs, labels, weights)
예제 #11
0
  def amino_acid_embedding(self, name=None):
    if name == None:
      name = 'AAEmbedding_'+str(self.module_count)+'_'
      self.module_count += 1
    feat = ConvMolFeaturizer()
    featurized_AA = [feat._featurize(Chem.MolFromSmiles(smile)) for smile in AminoAcid_SMILES]
    multiConvMol = ConvMol.agglomerate_mols(featurized_AA, max_deg=3)
    atom_features = TensorWrapper(tf.constant(multiConvMol.get_atom_features(), dtype=tf.float32), name=name+'atom_features')
    degree_slice = TensorWrapper(tf.constant(multiConvMol.deg_slice, dtype=tf.int32), name=name+'degree')
    membership = TensorWrapper(tf.constant(multiConvMol.membership, dtype=tf.int32), name=name+'membership')

    deg_adjs = []
    for i in range(0, 3):
      deg_adjs.append(TensorWrapper(tf.constant(multiConvMol.get_deg_adjacency_lists()[i+1], dtype=tf.int32), name=name+'deg_'+str(i)))
      
    gc1 = GraphConv(
        64,
        max_deg=3,
        activation_fn=tf.nn.relu,
        in_layers=[atom_features, degree_slice, membership] + deg_adjs, name=name+'gc1')
    batch_norm1 = BatchNorm(in_layers=[gc1, self.training_placeholder], name=name+'bn1')
    gp1 = GraphPool(max_degree=3, in_layers=[batch_norm1, degree_slice, membership] + deg_adjs, name=name+'gp1')
    gc2 = GraphConv(
        64,
        max_deg=3,
        activation_fn=tf.nn.relu,
        in_layers=[gp1, degree_slice, membership] + deg_adjs, name=name+'gc2')
    batch_norm2 = BatchNorm(in_layers=[gc2, self.training_placeholder], name=name+'bn2')
    gp2 = GraphPool(max_degree=3, in_layers=[batch_norm2, degree_slice, membership] + deg_adjs, name=name+'gp2')
    dense = Dense(out_channels=self.embedding_length/2, activation_fn=tf.nn.relu, in_layers=[gp2], name=name+'dense1')
    batch_norm3 = BatchNorm(in_layers=[dense, self.training_placeholder], name=name+'bn3')
    readout = GraphGather(
        batch_size=21,
        activation_fn=tf.nn.tanh,
        in_layers=[batch_norm3, degree_slice, membership] + deg_adjs, name=name+'gg')
    padding = AminoAcidPad(
        embedding_length=self.embedding_length,
        in_layers=[readout], name=name+'pad')
    return padding
예제 #12
0
 def default_generator(self,
                       dataset,
                       epochs=1,
                       mode='fit',
                       deterministic=True,
                       pad_batches=True):
   for epoch in range(epochs):
     for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
         batch_size=self.batch_size,
         deterministic=deterministic,
         pad_batches=pad_batches):
       if self.mode == 'classification':
         y_b = to_one_hot(y_b.flatten(), self.n_classes).reshape(
             -1, self.n_tasks, self.n_classes)
       multiConvMol = ConvMol.agglomerate_mols(X_b)
       n_samples = np.array(X_b.shape[0])
       inputs = [
           multiConvMol.get_atom_features(), multiConvMol.deg_slice,
           np.array(multiConvMol.membership), n_samples
       ]
       for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
         inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
       yield (inputs, [y_b], [w_b])
예제 #13
0
 def default_generator(self,
                       dataset,
                       epochs=1,
                       predict=False,
                       pad_batches=True):
   for epoch in range(epochs):
     for ind, (X_b, y_b, w_b, ids_b) in enumerate(
         dataset.iterbatches(
             self.batch_size, pad_batches=True, deterministic=True)):
       d = {}
       for index, label in enumerate(self.my_labels):
         if self.mode == 'classification':
           d[label] = to_one_hot(y_b[:, index])
         if self.mode == 'regression':
           d[label] = np.expand_dims(y_b[:, index], -1)
       d[self.my_task_weights] = w_b
       multiConvMol = ConvMol.agglomerate_mols(X_b)
       d[self.atom_features] = multiConvMol.get_atom_features()
       d[self.degree_slice] = multiConvMol.deg_slice
       d[self.membership] = multiConvMol.membership
       for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
         d[self.deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
       yield d
예제 #14
0
파일: model.py 프로젝트: truongbuu/Invivo
    def data_generator(self, dataset, prior_label , task = None, num_prior = 0\
                                   , epochs=1, pad_batches=True):
        """Data generator for training and evaluation"""
        for epoch in range(epochs):
            for ind, (X_b, y_b, w_b, ids_b) in enumerate(
                dataset.iterbatches(
                    self.batch_size, pad_batches=pad_batches, deterministic=True)):
              d = {}
              for index, label in enumerate(self.labels):
                d[label] = to_one_hot(y_b[:, index])
              #if epochs < 12:
              w_b = w_b*(0.1) + (w_b*y_b*self.scaled_w)/10.0
              multiConvMol = ConvMol.agglomerate_mols(X_b[:,0])
              circular_feat = X_b[:,1:]
              d[self.circular_feat] = circular_feat
              """Encode labels into the atom_features"""
              if prior_label:
                  prior = []
                  if task is None:
                      for e in range(self.batch_size):
                          arr = np.zeros(self.num_task * 2)
                          if random.random() < 0.5:
                              index = random.sample(range(self.num_task), \
                                           random.randint(0, self.num_task -1))
                          else:
                              index = []
                          if len(index) != 0:
                              for sth in index:
                                  if w_b[e,sth] != 0:
                                      if y_b[e,sth] == 1:
                                          arr[2*sth+1] = 1
                                      else:
                                          arr[2*sth] = 1
                                      w_b[e,sth] = w_b[e,sth]*0.001
                          prior.append(arr)
                  else:
                      for e in range(self.batch_size):
                          arr = np.zeros(self.num_task * 2)
                          list_t = list(range(self.num_task))
                          list_t.pop(task)
                          index = random.sample(list_t, num_prior)
                          if len(index) != 0:
                              for sth in index:
                                  if w_b[e,sth] != 0:
                                      if y_b[e,sth] == 1:
                                          arr[2*sth+1] = 1
                                      else:
                                          arr[2*sth] = 1
                                      w_b[e,sth] = w_b[e,sth]*0.001
                          prior.append(arr)
                          w_b[e,task] = 1.0
                          arr[2*task] = 0.
                          arr[2*task+1] = 0.

                  prior = np.array(prior)
                  atom_feat = multiConvMol.get_atom_features()
                  member = multiConvMol.membership

                  new_atom_feats = []
                  for i in range(atom_feat.shape[0]):
                      new_atom_feat = np.concatenate((atom_feat[i],\
                                                        prior[member[i]]))
                      new_atom_feats.append(new_atom_feat)
                  new_atom_feats = np.array(new_atom_feats)
                  d[self.atom_features] = new_atom_feats
              else:
                  d[self.atom_features] = multiConvMol.get_atom_features()

              d[self.weights] = w_b
              d[self.degree_slice] = multiConvMol.deg_slice
              d[self.membership] = multiConvMol.membership
              for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
                  d[self.deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
              yield d