def __init__(self, name, x, attn1, attn2, mask, x_in, attn1_in, attn2_in, n_out, path=None, init_func=normal_weight): SuperLayer.__init__(self, name, path) if path is None: self.params['Wx'] = to_shared(init_func((x_in, n_out)), self.name + '_Wx') self.params['Wv'] = to_shared(init_func((n_out, )), self.name + '_Wv') self.params['Wa_1'] = to_shared(init_func((attn1_in, n_out)), self.name + '_Wa_1') self.params['Wa_2'] = to_shared(init_func((attn2_in, n_out)), self.name + '_Wa_2') self.params['b'] = to_shared(init_bias((n_out, )), self.name + '_b') self.output = self.stream(x, attn1, attn2, mask)
def __init__(self, name, x, n_in, n_out, path=None, activation=T.tanh, init_func=normal_weight): SuperLayer.__init__(self, name, path) self.activation = activation if path is None: self.params['W'] = to_shared(init_func((n_in, n_out)), self.name + '_W') self.params['b'] = to_shared(init_bias((n_out,)), self.name + '_b') self.output = self.stream(x)
def __init__(self, name, x, mask, n_in, hidden_size, n_out, path=None, init_func=normal_weight): SuperLayer.__init__(self, name, path) if path is None: self.params['Wi'] = to_shared(init_func((n_in, hidden_size)), self.name + '_Wi') self.params['Wf'] = to_shared(init_func((n_in, hidden_size)), self.name + '_Wf') self.params['Wo'] = to_shared(init_func((n_in, hidden_size)), self.name + '_Wo') self.params['Wc'] = to_shared(init_func((n_in, hidden_size)), self.name + '_Wc') self.params['Ui'] = to_shared( init_func((hidden_size, hidden_size)), self.name + '_Ui') self.params['Uf'] = to_shared( init_func((hidden_size, hidden_size)), self.name + '_Uf') self.params['Uo'] = to_shared( init_func((hidden_size, hidden_size)), self.name + '_Uo') self.params['Uc'] = to_shared( init_func((hidden_size, hidden_size)), self.name + '_Uc') self.params['bi'] = to_shared(init_bias((n_out, )), self.name + '_bi') self.params['bf'] = to_shared(init_bias((n_out, )), self.name + '_bf') self.params['bo'] = to_shared(init_bias((n_out, )), self.name + '_bo') self.params['bc'] = to_shared(init_bias((n_out, )), self.name + '_bc') self.output = self.stream(x, mask)
def __init__(self, name, image, filter_shape, image_shape, path=None, init_func=normal_weight): SuperLayer.__init__(self, name, path) self.filter_shape = filter_shape self.image_shape = image_shape if path is None: self.params['W'] = to_shared(init_func(filter_shape), self.name + '_W') self.params['b'] = to_shared(init_bias((filter_shape[0], )), self.name + '_b') self.output = self.stream(image)
def __init__(self, params): self.p = params if self.p.name == 'mnist': (x_train, y_train), ( x_test, y_test) = tf.keras.datasets.mnist.load_data(self.p.data_dir + 'mnist.npz') shape = np.shape(x_train) num_train = np.shape(x_train)[0] num_test = np.shape(x_test)[0] x_train = np.reshape(x_train / 255., [num_train, -1]).astype(np.float32) x_test = np.reshape(x_test / 255., [num_test, -1]).astype(np.float32) self.init_bias, self.init_m = u.init_bias(x_train) else: raise ValueError('Unknown dataset.') self.data_train = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).shuffle(num_train).batch(self.p.batch_size) self.train_steps_per_epoch = num_train // self.p.batch_size self.data_test = tf.data.Dataset.from_tensor_slices( (x_test, y_test)).batch(self.p.test_batch_size)
def __init__(self, word_to_idx, batch_size=100, dim_feature=[196, 512], dim_embed=128, dim_hidden=128, n_time_step=None, cell_type='rnn', dtype=tf.float32): if cell_type not in {'rnn', 'lstm'}: raise ValueError('Invalid cell_type "%s"' % cell_type) # Initialize some hyper parameters self.cell_type = cell_type self.word_to_idx = word_to_idx self.idx_to_word = {i: w for w, i in word_to_idx.iteritems()} self.V = len(word_to_idx) self.N = batch_size self.H = dim_hidden self.M = dim_embed self.L = dim_feature[0] self.D = dim_feature[1] self.T = n_time_step self.dtype = dtype self.params = {} self._null = word_to_idx['<NULL>'] self._start = word_to_idx.get('<START>', None) self._end = word_to_idx.get('<END>', None) # Initialize parameters for generating initial hidden and cell states self.params['W_init_h'] = init_weight('W_init_h', [self.D, self.H]) self.params['b_init_h'] = init_bias('b_init_h', [self.H]) self.params['W_init_c'] = init_weight('W_init_c', [self.D, self.H]) self.params['b_init_c'] = init_bias('b_init_c', [self.H]) # Initialize word vectors self.params['W_embed'] = init_weight('W_embed', [self.V, self.M]) # Initialize parametres for attention layer self.params['W_proj_x'] = init_weight('W_proj_x', [self.D, self.D]) self.params['W_proj_h'] = init_weight('W_proj_h', [self.H, self.D]) self.params['b_proj'] = init_bias('b_proj', [self.D]) self.params['W_att'] = init_weight('W_att', [self.D, 1]) # Initialize parameters for the RNN/LSTM dim_mul = {'lstm': 4, 'rnn': 1}[cell_type] self.params['Wx'] = init_weight('Wx', [self.M, self.H * dim_mul]) self.params['Wh'] = init_weight('Wh', [self.H, self.H * dim_mul]) self.params['Wz'] = init_weight('Wz', [self.D, self.H * dim_mul]) self.params['b'] = init_bias('b', [self.H * dim_mul]) # Initialize parameters for output-to-vocab self.params['W_vocab'] = init_weight('W_vocab', [self.H, self.V]) self.params['b_vocab'] = init_bias('b_vocab', [self.V]) # Cast parameters to correct dtype for k, v in self.params.iteritems(): self.params[k] = tf.cast(v, self.dtype) # Place holder for features and captions self.features = tf.placeholder(tf.float32, [self.N, self.L, self.D]) self.captions = tf.placeholder(tf.int32, [self.N, self.T + 1])