def __init__(self, cl_logits_input_dim=None): self.global_step = tf.train.get_or_create_global_step() self.vocab_freqs = _get_vocab_freqs() # Cache VatxtInput objects self.cl_inputs = None self.lm_inputs = None # Cache intermediate Tensors that are reused self.tensors = {} # Construct layers which are reused in constructing the LM and # Classification graphs. Instantiating them all once here ensures that # variable reuse works correctly. self.layers = {} self.layers['embedding'] = layers_lib.Embedding( FLAGS.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings, self.vocab_freqs, FLAGS.keep_prob_emb) self.layers['lstm'] = layers_lib.LSTM(FLAGS.rnn_cell_size, FLAGS.rnn_num_layers, FLAGS.keep_prob_lstm_out) self.layers['lm_loss'] = layers_lib.SoftmaxLoss( FLAGS.vocab_size, FLAGS.num_candidate_samples, self.vocab_freqs, name='LM_loss') cl_logits_input_dim = cl_logits_input_dim or FLAGS.rnn_cell_size self.layers['cl_logits'] = layers_lib.cl_logits_subgraph( [FLAGS.cl_hidden_size] * FLAGS.cl_num_layers, cl_logits_input_dim, FLAGS.num_classes, FLAGS.keep_prob_cl_hidden)
def _init_embed(self): self.char_embed = tf.Variable(initial_value=tf.truncated_normal( [ len(self.model_args.characterEmbed.character2id), self.args.char_rnn_size ], stddev=0.01), trainable=True, name="char_embeddings", dtype=tf.float32) self.word_embed_matrix = tf.Variable(initial_value=self.word_embed_pl, trainable=False) self.layers = {} self.layers['BiLSTM'] = layers_lib.BiLSTM(self.args.rnn_size) self.layers['LSTM'] = layers_lib.LSTM( self.args.char_rnn_size) #随便设置的char的 self.label_embedding = tf.get_variable('label_embeddings', initializer=tf.truncated_normal( [500, self.args.class_size], stddev=0.01), trainable=True) self.input_data_embed = tf.nn.embedding_lookup(self.word_embed_matrix, self.input_data)
def __init__(self): super(VatxtBidirModel, self).__init__(cl_logits_input_dim=FLAGS.rnn_cell_size * 2) # Reverse LSTM and LM loss for bidirectional models self.layers['lstm_reverse'] = layers_lib.LSTM(FLAGS.rnn_cell_size, FLAGS.rnn_num_layers, FLAGS.keep_prob_lstm_out, name='LSTM_Reverse') self.layers['lm_loss_reverse'] = layers_lib.SoftmaxLoss( FLAGS.vocab_size, FLAGS.num_candidate_samples, self.vocab_freqs, name='LM_loss_reverse')
def __init__(self, cl_logits_input_dim=None): self.layers = {} self.initialize_vocab() self.layers['embedding'] = layers_lib.Embedding( self.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings, self.vocab_freqs, FLAGS.keep_prob_emb) self.layers['embedding_1'] = layers_lib.Embedding( self.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings, self.vocab_freqs, FLAGS.keep_prob_emb, name='embedding_1') self.layers['lstm'] = layers_lib.LSTM(FLAGS.rnn_cell_size, FLAGS.rnn_num_layers) self.layers['lstm_1'] = layers_lib.BiLSTM(FLAGS.rnn_cell_size, FLAGS.rnn_num_layers, name="Bilstm") action_type = 5 if FLAGS.action == 'all' else 4 self.layers['action_select'] = layers_lib.Actionselect( action_type, FLAGS.keep_prob_dense, name='action_output') self.layers['cl_logits'] = layers_lib.Project_layer( FLAGS.num_classes, FLAGS.keep_prob_dense, name='project_layer')
def __init__(self, n_inputs: int, n_outputs: int, state_size: int, n_layers: int): super().__init__() self.lstm = layers.LSTM(n_inputs, state_size, n_layers) self.output_projection = layers.Linear(state_size, n_outputs)
def build(self): if self.options.debug: theano.config.compute_test_value = "warn" ''' Create Theano Variables ''' features = T.ftensor3('features') # (m,f,c)' features.tag.test_value = self.test_values[0] features_mask = T.fmatrix('features_mask') # (m,f) features_mask.tag.test_value = self.test_values[1] context_b = T.lmatrix('context b') # (m,f,c)' context_b.tag.test_value = self.test_values[2] context_b_mask = T.fmatrix('context_b_mask') # (m,f) context_b_mask.tag.test_value = self.test_values[3] context_a = T.lmatrix('context a') # (m,f,c)' context_a.tag.test_value = self.test_values[4] context_a_mask = T.fmatrix('context_a_mask') # (m,f) context_a_mask.tag.test_value = self.test_values[5] label = T.lvector('label') # (m,) label.tag.test_value = self.test_values[6] ''' Initialize model param ''' self.params = [] self.Wemb = theano.shared( common.norm_weight(self.options.n_words, self.options.dim_word), name='Wemb') if self.options.train_emb: self.params.append(self.Wemb) self.lstm_vid = layers.LSTM(options=self.options, num_inputs=self.options.input_dim, num_hiddens=self.options.hdims, prefix="lstm_vid") self.lstm_vid.init_tparams() if not self.options.text_only: self.params += self.lstm_vid.params self.lstm_ctxb = layers.LSTM(options=self.options, num_inputs=self.options.dim_word, num_hiddens=self.options.hdims, prefix="ctxb_lstm") self.lstm_ctxb.init_tparams() self.params += self.lstm_ctxb.params self.lstm_ctxa = layers.LSTM(options=self.options, num_inputs=self.options.dim_word, num_hiddens=self.options.hdims, prefix="ctxa_lstm") self.lstm_ctxa.init_tparams() self.params += self.lstm_ctxa.params ''' Get population statistics ''' popstats = OrderedDict() popstats.update(self.lstm_vid.get_popstat()) popstats.update(self.lstm_ctxb.get_popstat()) popstats.update(self.lstm_ctxa.get_popstat()) popstats['ff_x_mean'] = T.fvector('x_mean_ff') popstats['ff_x_var'] = T.fvector('x_var_ff') '''Classification ''' n_words = self.options.n_words_out if self.options.text_only: self.W, self.b = init_tparams_fc(nin=2*self.options.hdims, nout=n_words, prefix='logit') else: self.W, self.b = init_tparams_fc(nin=3*self.options.hdims, nout=n_words, prefix='logit') self.params += [self.W, self.b] ''' Construct theano grah ''' [cost, ce, err, y_hat] = self.get_fprop(features, features_mask, context_b, context_b_mask, context_a, context_a_mask, label) '''Compute gradient''' # [grad_dummy_h_vid, grad_dummy_c_vid, grad_dummy_xW_hU_b_vid] = T.grad( # cost, [dummy_h_vid, dummy_c_vid, dummy_xW_hU_b_vid]) grads = T.grad(cost, wrt=self.params) if self.options.clip_c > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( T.switch(g2 > (self.options.clip_c**2), g / T.sqrt(g2) * self.options.clip_c, g)) grads = new_grads print 'start compiling theano fns' t0 = time.time() inputs = [features, features_mask, context_b, context_b_mask, context_a, context_a_mask, label] print 'compile train fns' self.f_grad_shared, self.f_update = eval(self.options.optimizer)( T.scalar(name='lr'), self.params, grads, inputs, cost, extra=[ce, err]) self.f_train = theano.function(inputs, [cost, ce, err, y_hat], name='f_train', on_unused_input='warn') ### Write y from y_hat and dictionary ''' Batch Norm population graph ''' print 'get estimate for inference' symbatchstats, estimators = get_stat_estimator([ce]) sample_stat_inputs = [] self.f_stat_estimator = None if len(estimators) > 0: self.f_stat_estimator = theano.function( [features, features_mask, context_a, context_a_mask, context_b, context_b_mask], estimators, on_unused_input='warn') self.options.use_popstats = True for v in symbatchstats: print v.tag.bn_label sample_stat_inputs.append(popstats[v.tag.bn_label]) # Get inference graph [cost, ce, err, y_hat] = self.get_fprop(features, features_mask, context_b, context_b_mask, context_a, context_a_mask, label, popstats=popstats) self.options.use_popstats = False print 'compile inference fns' inputs = inputs + sample_stat_inputs self.f_inference = theano.function(inputs, [cost, ce, err, y_hat], on_unused_input='warn', name='f_inference') print 'compiling theano fns used %.2f sec'%(time.time()-t0)