def __init__(self, input_size, output_size, batch_size, activation_function=None, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.input_size = input_size self.output_size = output_size self.batch_size = batch_size self.activation_function = activation_function if init_weights: with tf.variable_scope(self.name): self.W = tf.Variable(tf.random_uniform( [input_size, output_size], -1. / input_size**0.5, 1. / input_size**0.5), name='W') self.b = tf.Variable(tf.zeros([1, output_size]), name='b') self.eval_model = type(self)(input_size, output_size, 1, activation_function, self.name + '_eval', False) self.eval_model.W = self.W self.eval_model.b = self.b
def __init__(self, size, batch_size, n_hidden_layers, name=None, init_weights=True, trainable=True, connection_mode='head'): self.name = NameCreator.name_it(self, name) self.n_hidden_layers = n_hidden_layers self.size = size self.batch_size = batch_size self.input_size = size self.output_size = size self.connection_mode = connection_mode with tf.variable_scope(self.name): self.layers = [] if init_weights: for i in range(n_hidden_layers): self.layers.append( TridirectionalHighwayLayer(size, batch_size, trainable=trainable)) self.eval_model = type(self)(size, 1, n_hidden_layers, self.name + '_eval', False, trainable) self.eval_model.layers = [ layer.eval_model for layer in self.layers ] self.tail = self.head = None
def __init__(self, input_size, output_size, batch_size, activation_function=tf.nn.sigmoid, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.input_size = input_size self.output_size = output_size self.batch_size = batch_size self.activation_function = activation_function self._state_shape = [batch_size, output_size] with tf.variable_scope(self.name): if init_weights: self.iW = tf.Variable( tf.truncated_normal([input_size, output_size], 0, 0.1)) self.oW = tf.Variable( tf.truncated_normal([output_size, output_size], 0, 0.1)) self.vb = tf.Variable(tf.zeros([1, output_size])) self.saved_output = tf.Variable(tf.zeros(self._state_shape), trainable=False) self.output = self.saved_output if init_weights: self.eval_model = type(self)(input_size, output_size, 1, activation_function, self.name + '_eval', False) self.eval_model.iW = self.iW self.eval_model.oW = self.oW self.eval_model.vb = self.vb
def __init__(self, mem_key_size, mem_content_size, num_cells, batch_size, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.mem_key_size = mem_key_size self.mem_content_size = mem_content_size self.num_cells = num_cells self.batch_size = batch_size self._content_size = [batch_size, num_cells, mem_content_size] with tf.variable_scope(self.name): if init_weights: # Additional key for NOP self.keys = tf.Variable(tf.truncated_normal( [num_cells + 1, mem_key_size], -0.1, 0.1), name='keys') self.saved_content = tf.Variable(tf.zeros(self._content_size), dtype=tf.float32, trainable=False, name='keys') self.content = self.saved_content if init_weights: self.eval_model = Tape(mem_key_size, mem_content_size, num_cells, 1, self.name + '_eval', False) self.eval_model.keys = self.keys
def __init__(self, input_size, output_size, batch_size, controller, mem_key_size, mem_content_size, num_cells, activation_function=None, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.input_size = input_size self.output_size = output_size self.batch_size = batch_size self.mem_key_size = mem_key_size self.mem_content_size = mem_content_size self.num_cells = num_cells self.activation_function = activation_function self.extended_input_size = input_size + mem_content_size self.extended_output_size = output_size + mem_content_size + 2 * mem_key_size + 3 self._read_result_shape = (batch_size, mem_content_size) if init_weights: self.tape = Tape(mem_key_size, mem_content_size, num_cells, batch_size, self.name + '_Tape') with tf.variable_scope(self.name): self.input_adapter = FeedForward(self.extended_input_size, controller.input_size, batch_size) self.output_adapter = FeedForward(controller.output_size, self.extended_output_size, batch_size) self.controller = controller self.extended_controller = ConnectLayers( [self.input_adapter, controller, self.output_adapter]) with tf.variable_scope(self.name): self.saved_read_result = tf.Variable( tf.zeros(self._read_result_shape)) self.read_result = self.saved_read_result if batch_size > 1: self.eval_model = NTM(input_size, output_size, 1, controller, mem_key_size, mem_content_size, num_cells, activation_function, self.name + '_eval', init_weights=False) self.eval_model.tape = self.tape.eval_model self.eval_model.extended_controller = self.extended_controller.eval_model
def __init__(self, input_size, output_size, batch_size, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.input_size = input_size self.output_size = output_size self.batch_size = batch_size self._state_shape = [batch_size, output_size] with tf.variable_scope(self.name): if init_weights: self.iW_g = tf.Variable(tf.truncated_normal( [input_size, 2 * output_size], 0, 0.1), name='iW_g') self.oW_g = tf.Variable(tf.truncated_normal( [output_size, 2 * output_size], 0, 0.1), name='oW_g') self.iW = tf.Variable(tf.truncated_normal( [input_size, output_size], 0, 0.1), name='iW') self.oW = tf.Variable(tf.truncated_normal( [output_size, output_size], 0, 0.1), name='oW') self.b_g = tf.Variable(2 * tf.ones([1, 2 * output_size]), name='b_g') self.b = tf.Variable(tf.zeros([1, output_size]), name='b') self.saved_output = tf.Variable(tf.zeros(self._state_shape), trainable=False, name='saved_output') self.output = self.saved_output if init_weights: self.eval_model = type(self)(input_size, output_size, 1, name=self.name + '_eval', init_weights=False) self.eval_model.iW_g = self.iW_g self.eval_model.oW_g = self.oW_g self.eval_model.iW = self.iW self.eval_model.oW = self.oW self.eval_model.b_g = self.b_g self.eval_model.b = self.b
def __init__(self, input_size, output_size, batch_size, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.input_size = input_size self.output_size = output_size self.batch_size = batch_size self._state_shape = [batch_size, output_size] with tf.variable_scope(self.name): self.saved_output = tf.Variable(tf.zeros(self._state_shape), trainable=False, name='saved_output') self.saved_state = tf.Variable(tf.zeros(self._state_shape), trainable=False, name='saved_input') self.output = self.saved_output self.state = self.saved_state if init_weights: with tf.variable_scope(self.name): b = np.column_stack((-2 * np.ones( (1, output_size)), 2 * np.ones( (1, 2 * output_size)), np.zeros((1, output_size)))) b = np.array(b, dtype=np.float32) self.iW = tf.Variable(tf.truncated_normal( [input_size, 4 * output_size], 0, 0.1), name='iW') self.oW = tf.Variable(tf.truncated_normal( [output_size, 4 * output_size], 0, 0.1), name='oW') self.b = tf.Variable(b) self.eval_model = type(self)(input_size, output_size, 1, name=self.name + '_eval', init_weights=False) self.eval_model.iW = self.iW self.eval_model.oW = self.oW self.eval_model.b = self.b
def __init__(self, layer, name=None, init_weights=True): self.name = NameCreator.name_it(self, name) self.input_size = layer.input_size self.output_size = layer.output_size self.batch_size = layer.batch_size self.layer = layer if init_weights: with tf.variable_scope(self.name): self.W = tf.Variable(tf.random_uniform( [self.input_size, self.output_size], -1. / self.input_size**0.5, 1. / self.input_size**0.5), name='W') self.b = tf.Variable(3 * tf.ones([1, self.output_size]), name='b') self.eval_model = type(self)(layer.eval_model, self.name + '_eval', False) self.eval_model.W = self.W self.eval_model.b = self.b
def __init__(self, size, batch_size, name=None, init_weights=True, trainable=True): self.name = NameCreator.name_it(self, name) self.size = size self.batch_size = batch_size self.input_size = size self.output_size = size self._state_shape = [batch_size, size] with tf.variable_scope(self.name): if init_weights: W_g1 = np.random.uniform(-0.1, 0.1, (2 * size, 3 * size)) W_g2 = np.random.uniform(-0.1, 0.1, (2 * size, 3 * size)) W_o1 = np.array((np.ones((size, size)) - np.eye(size)) * np.random.uniform(-0.1, 0.1, (size, size)) + np.eye(size), dtype=np.float32) W_o2 = np.array((np.ones((size, size)) - np.eye(size)) * np.random.uniform(-0.1, 0.1, (size, size)) + np.eye(size), dtype=np.float32) W_i1 = np.array(np.random.uniform(-0.1, 0.1, (size, size)), dtype=np.float32) W_i2 = np.array(np.random.uniform(-0.1, 0.1, (size, size)), dtype=np.float32) self.b_g1 = tf.Variable(2 * tf.ones((1, 3 * size)), trainable) self.b_g2 = tf.Variable(10 * tf.ones((1, 3 * size)), trainable) self.W_g1 = tf.Variable(W_g1, trainable, dtype=tf.float32) self.W_g2 = tf.Variable(W_g2, trainable, dtype=tf.float32) self.b_o1 = tf.Variable(tf.zeros((1, size)), trainable) self.b_o2 = tf.Variable(tf.zeros((1, size)), trainable) self.W_o1 = tf.Variable(W_o1, trainable) self.W_o2 = tf.Variable(W_o2, trainable) self.b_i1 = tf.Variable(tf.zeros((1, size)), trainable) self.b_i2 = tf.Variable(tf.zeros((1, size)), trainable) self.W_i1 = tf.Variable(W_i1, trainable) self.W_i2 = tf.Variable(W_i2, trainable) self.eval_model = type(self)(size, 1, self.name + '_eval', False, trainable) self.eval_model.b_g1 = self.b_g1 self.eval_model.b_g2 = self.b_g2 self.eval_model.W_g1 = self.W_g1 self.eval_model.W_g2 = self.W_g2 self.eval_model.b_o1 = self.b_o1 self.eval_model.b_o2 = self.b_o2 self.eval_model.W_o1 = self.W_o1 self.eval_model.W_o2 = self.W_o2 self.eval_model.b_i1 = self.b_i1 self.eval_model.b_i2 = self.b_i2 self.eval_model.W_i1 = self.W_i1 self.eval_model.W_i2 = self.W_i2 self.saved_output = tf.Variable( tf.zeros([batch_size, size], name="default_output"), False) self.output = self.saved_output self.feed_direction = "f"