Exemple #1
0
 def __init__(self,
              name,
              x,
              attn1,
              attn2,
              mask,
              x_in,
              attn1_in,
              attn2_in,
              n_out,
              path=None,
              init_func=normal_weight):
     SuperLayer.__init__(self, name, path)
     if path is None:
         self.params['Wx'] = to_shared(init_func((x_in, n_out)),
                                       self.name + '_Wx')
         self.params['Wv'] = to_shared(init_func((n_out, )),
                                       self.name + '_Wv')
         self.params['Wa_1'] = to_shared(init_func((attn1_in, n_out)),
                                         self.name + '_Wa_1')
         self.params['Wa_2'] = to_shared(init_func((attn2_in, n_out)),
                                         self.name + '_Wa_2')
         self.params['b'] = to_shared(init_bias((n_out, )),
                                      self.name + '_b')
     self.output = self.stream(x, attn1, attn2, mask)
Exemple #2
0
 def __init__(self, name, x, n_in, n_out, path=None, activation=T.tanh, init_func=normal_weight):
     SuperLayer.__init__(self, name, path)
     self.activation = activation
     if path is None:
         self.params['W'] = to_shared(init_func((n_in, n_out)), self.name + '_W')
         self.params['b'] = to_shared(init_bias((n_out,)), self.name + '_b')
     self.output = self.stream(x)
Exemple #3
0
    def __init__(self,
                 name,
                 x,
                 mask,
                 n_in,
                 hidden_size,
                 n_out,
                 path=None,
                 init_func=normal_weight):
        SuperLayer.__init__(self, name, path)
        if path is None:
            self.params['Wi'] = to_shared(init_func((n_in, hidden_size)),
                                          self.name + '_Wi')
            self.params['Wf'] = to_shared(init_func((n_in, hidden_size)),
                                          self.name + '_Wf')
            self.params['Wo'] = to_shared(init_func((n_in, hidden_size)),
                                          self.name + '_Wo')
            self.params['Wc'] = to_shared(init_func((n_in, hidden_size)),
                                          self.name + '_Wc')

            self.params['Ui'] = to_shared(
                init_func((hidden_size, hidden_size)), self.name + '_Ui')
            self.params['Uf'] = to_shared(
                init_func((hidden_size, hidden_size)), self.name + '_Uf')
            self.params['Uo'] = to_shared(
                init_func((hidden_size, hidden_size)), self.name + '_Uo')
            self.params['Uc'] = to_shared(
                init_func((hidden_size, hidden_size)), self.name + '_Uc')

            self.params['bi'] = to_shared(init_bias((n_out, )),
                                          self.name + '_bi')
            self.params['bf'] = to_shared(init_bias((n_out, )),
                                          self.name + '_bf')
            self.params['bo'] = to_shared(init_bias((n_out, )),
                                          self.name + '_bo')
            self.params['bc'] = to_shared(init_bias((n_out, )),
                                          self.name + '_bc')
        self.output = self.stream(x, mask)
Exemple #4
0
    def __init__(self,
                 name,
                 image,
                 filter_shape,
                 image_shape,
                 path=None,
                 init_func=normal_weight):
        SuperLayer.__init__(self, name, path)

        self.filter_shape = filter_shape
        self.image_shape = image_shape

        if path is None:
            self.params['W'] = to_shared(init_func(filter_shape),
                                         self.name + '_W')
            self.params['b'] = to_shared(init_bias((filter_shape[0], )),
                                         self.name + '_b')

        self.output = self.stream(image)
Exemple #5
0
    def __init__(self, params):
        self.p = params
        if self.p.name == 'mnist':
            (x_train, y_train), (
                x_test,
                y_test) = tf.keras.datasets.mnist.load_data(self.p.data_dir +
                                                            'mnist.npz')
            shape = np.shape(x_train)
            num_train = np.shape(x_train)[0]
            num_test = np.shape(x_test)[0]
            x_train = np.reshape(x_train / 255.,
                                 [num_train, -1]).astype(np.float32)
            x_test = np.reshape(x_test / 255.,
                                [num_test, -1]).astype(np.float32)
            self.init_bias, self.init_m = u.init_bias(x_train)

        else:
            raise ValueError('Unknown dataset.')

        self.data_train = tf.data.Dataset.from_tensor_slices(
            (x_train, y_train)).shuffle(num_train).batch(self.p.batch_size)
        self.train_steps_per_epoch = num_train // self.p.batch_size
        self.data_test = tf.data.Dataset.from_tensor_slices(
            (x_test, y_test)).batch(self.p.test_batch_size)
    def __init__(self,
                 word_to_idx,
                 batch_size=100,
                 dim_feature=[196, 512],
                 dim_embed=128,
                 dim_hidden=128,
                 n_time_step=None,
                 cell_type='rnn',
                 dtype=tf.float32):

        if cell_type not in {'rnn', 'lstm'}:
            raise ValueError('Invalid cell_type "%s"' % cell_type)

        # Initialize some hyper parameters
        self.cell_type = cell_type
        self.word_to_idx = word_to_idx
        self.idx_to_word = {i: w for w, i in word_to_idx.iteritems()}
        self.V = len(word_to_idx)
        self.N = batch_size
        self.H = dim_hidden
        self.M = dim_embed
        self.L = dim_feature[0]
        self.D = dim_feature[1]
        self.T = n_time_step
        self.dtype = dtype
        self.params = {}

        self._null = word_to_idx['<NULL>']
        self._start = word_to_idx.get('<START>', None)
        self._end = word_to_idx.get('<END>', None)

        # Initialize parameters for generating initial hidden and cell states
        self.params['W_init_h'] = init_weight('W_init_h', [self.D, self.H])
        self.params['b_init_h'] = init_bias('b_init_h', [self.H])
        self.params['W_init_c'] = init_weight('W_init_c', [self.D, self.H])
        self.params['b_init_c'] = init_bias('b_init_c', [self.H])

        # Initialize word vectors
        self.params['W_embed'] = init_weight('W_embed', [self.V, self.M])

        # Initialize parametres for attention layer
        self.params['W_proj_x'] = init_weight('W_proj_x', [self.D, self.D])
        self.params['W_proj_h'] = init_weight('W_proj_h', [self.H, self.D])
        self.params['b_proj'] = init_bias('b_proj', [self.D])
        self.params['W_att'] = init_weight('W_att', [self.D, 1])

        # Initialize parameters for the RNN/LSTM
        dim_mul = {'lstm': 4, 'rnn': 1}[cell_type]
        self.params['Wx'] = init_weight('Wx', [self.M, self.H * dim_mul])
        self.params['Wh'] = init_weight('Wh', [self.H, self.H * dim_mul])
        self.params['Wz'] = init_weight('Wz', [self.D, self.H * dim_mul])
        self.params['b'] = init_bias('b', [self.H * dim_mul])

        # Initialize parameters for output-to-vocab
        self.params['W_vocab'] = init_weight('W_vocab', [self.H, self.V])
        self.params['b_vocab'] = init_bias('b_vocab', [self.V])

        # Cast parameters to correct dtype
        for k, v in self.params.iteritems():
            self.params[k] = tf.cast(v, self.dtype)

        # Place holder for features and captions
        self.features = tf.placeholder(tf.float32, [self.N, self.L, self.D])
        self.captions = tf.placeholder(tf.int32, [self.N, self.T + 1])