def __call__(self, inputs, state, scope=None): """ :param inputs: [N, d + JQ + JQ * d] :param state: [N, d] :param scope: :return: """ with tf.variable_scope(scope or self.__class__.__name__): c_prev, h_prev = state x = tf.slice(inputs, [0, 0], [-1, self._input_size]) q_mask = tf.slice(inputs, [0, self._input_size], [-1, self._q_len]) # [N, JQ] qs = tf.slice(inputs, [0, self._input_size + self._q_len], [-1, -1]) qs = tf.reshape(qs, [-1, self._q_len, self._input_size]) # [N, JQ, d] x_tiled = tf.tile(tf.expand_dims(x, 1), [1, self._q_len, 1]) # [N, JQ, d] h_prev_tiled = tf.tile(tf.expand_dims(h_prev, 1), [1, self._q_len, 1]) # [N, JQ, d] f = tf.tanh( linear([qs, x_tiled, h_prev_tiled], self._input_size, True, scope='f')) # [N, JQ, d] a = tf.nn.softmax( exp_mask(linear(f, 1, True, squeeze=True, scope='a'), q_mask)) # [N, JQ] q = tf.reduce_sum(qs * tf.expand_dims(a, -1), 1) z = tf.concat([x, q], 1) # [N, 2d] return self._cell(z, state)
def linear_controller(inputs, state, memory): rank = len(memory.get_shape()) '''3d''' _memory_size = tf.shape(memory)[rank - 2] tiled_inputs = tf.tile(tf.expand_dims(inputs, 1), [1, _memory_size, 1]) # print(tiled_inputs) if isinstance(state, tuple): tiled_states = [ tf.tile(tf.expand_dims(each, 1), [1, _memory_size, 1]) for each in state ] else: tiled_states = [ tf.tile(tf.expand_dims(state, 1), [1, _memory_size, 1]) ] # [N, M, d] in_ = tf.concat([tiled_inputs] + tiled_states + [memory], 2) out = linear(in_, 1, bias, squeeze=True, input_keep_prob=input_keep_prob, is_train=is_train) return out
def highway(input_, size, layer_size=1, bias=-2, f=tf.nn.relu): """Highway Network (cf. http://arxiv.org/abs/1505.00387). t = sigmoid(Wy + b) z = t * g(Wy + b) + (1 - t) * y where g is nonlinearity, t is transform gate, and (1 - t) is carry gate. """ with tf.variable_scope('Highway'): output = input_ for idx in range(layer_size): output = f( linear(output, size, 0, scope='output_lin_%d' % idx, init='he')) transform_gate = tf.sigmoid( linear(input_, size, 0, scope='transform_lin_%d' % idx) + bias) carry_gate = 1. - transform_gate output = transform_gate * output + carry_gate * input_ return output
ainput = linearND(input_atom, hidden_size, "atom_embedding", init_bias=None) nei_message = linearND(tf.batch_matmul(atom_graph, message), hidden_size, "output", init_bias=None) atom_hidden = tf.nn.relu(ainput + nei_message) fp = node_mask * atom_hidden fp = tf.reduce_sum(fp, 1) fp = tf.nn.relu(linearND(fp, hidden_size, "pooling")) score = tf.squeeze(linear(fp, 1, "score"), [1]) loss = tf.nn.l2_loss(score - label) * 2 lr = tf.placeholder(tf.float32, []) optimizer = tf.train.AdamOptimizer(learning_rate=lr) param_norm = tf.global_norm(tf.trainable_variables()) grads_and_vars = optimizer.compute_gradients(loss / batch_size) grads, var = zip(*grads_and_vars) grad_norm = tf.global_norm(grads) backprop = optimizer.apply_gradients(grads_and_vars) tf.global_variables_initializer().run(session=session) size_func = lambda v: reduce(lambda x, y: x * y, v.get_shape().as_list()) n = sum(size_func(v) for v in tf.trainable_variables()) print "Model size: %dK" % (n / 1000, )
input_atom, input_bond, atom_graph, bond_graph, num_nbs, label = q.dequeue() input_atom.set_shape([None, None, adim]) input_bond.set_shape([None, None, bdim]) atom_graph.set_shape([None, None, max_nb, 2]) bond_graph.set_shape([None, None, max_nb, 2]) num_nbs.set_shape([None, None]) label.set_shape([None]) graph_inputs = (input_atom, input_bond, atom_graph, bond_graph, num_nbs) with tf.variable_scope("encoder"): _, fp = rcnn_wl_last(graph_inputs, hidden_size=hidden_size, depth=depth) reactant = fp[0:1,:] candidates = fp[1:,:] candidates = candidates - reactant candidates = linear(candidates, hidden_size, "candidate") match = tf.nn.relu(candidates) score = tf.squeeze(linear(match, 1, "score"), [1]) loss = tf.nn.softmax_cross_entropy_with_logits(score, label) pred = tf.argmax(score, 0) _lr = tf.placeholder(tf.float32, []) optimizer = tf.train.AdamOptimizer(learning_rate=_lr) param_norm = tf.global_norm(tf.trainable_variables()) grads_and_vars = optimizer.compute_gradients(loss) grads, var = zip(*grads_and_vars) grad_norm = tf.global_norm(grads) backprop = optimizer.apply_gradients(grads_and_vars) tf.global_variables_initializer().run(session=session) size_func = lambda v: reduce(lambda x, y: x*y, v.get_shape().as_list())
num_nbs = tf.placeholder(tf.int32, [batch_size, None]) node_mask = tf.placeholder(tf.float32, [batch_size, None]) src_holder = [ input_atom, input_bond, atom_graph, bond_graph, num_nbs, node_mask ] label = tf.placeholder(tf.float32, [batch_size]) node_mask = tf.expand_dims(node_mask, -1) graph_inputs = (input_atom, input_bond, atom_graph, bond_graph, num_nbs, node_mask) with tf.variable_scope("encoder"): _, fp = gated_wln(graph_inputs, batch_size=batch_size, hidden_size=hidden_size, depth=depth) fp = linear(fp, hidden_size, "output") fp = tf.nn.relu(fp) score = tf.squeeze(linear(fp, 1, "score"), [1]) loss = tf.nn.l2_loss(score - label) * 2 tf.global_variables_initializer().run(session=session) def load_data(path): data = [] with open(path) as f: f.readline() for line in f: r, v = line.strip("\r\n ").split() data.append((r, float(v)))
num_nbs.set_shape([None, None]) graph_inputs = (input_atom, input_bond, atom_graph, bond_graph, num_nbs) with tf.variable_scope("mol_encoder"): fp_all_atoms = rcnn_wl_only(graph_inputs, hidden_size=hidden_size, depth=depth) reactant = fp_all_atoms[0:1,:] candidates = fp_all_atoms[1:,:] candidates = candidates - reactant candidates = tf.concat(0, [reactant, candidates]) with tf.variable_scope("diff_encoder"): reaction_fp = wl_diff_net(graph_inputs, candidates, hidden_size=hidden_size, depth=depth) reaction_fp = reaction_fp[1:] reaction_fp = tf.nn.relu(linear(reaction_fp, hidden_size, "rex_hidden")) score = tf.squeeze(linear(reaction_fp, 1, "score"), [1]) tk = tf.minimum(TOPK, tf.shape(score)[0]) _, pred_topk = tf.nn.top_k(score, tk) tf.global_variables_initializer().run(session=session) queue = Queue() def read_data(coord): data = [] data_f = open(opts.test_path, 'r') cand_f = open(opts.cand_path, 'r')