Python EmbeddingLayer Examples

Programming Language: Python

Namespace/Package Name: layers

Class/Type: EmbeddingLayer

Examples at hotexamples.com: 13

Python EmbeddingLayer - 13 examples found. These are the top rated real world Python examples of layers.EmbeddingLayer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EmbeddingLayer(12)

forward(1)

ids_to_words(1)

words_to_ids(1)

Example #1

Show file

    def __init__(self, input,embeddings,features,mini_batch_size=32,nhu=300,width=5,activation=hardtanh,seed=1234,n_out=9,name='SennaNER',params=None):
        self.name = name
        self.layers = []
        self.input = input
        self.output = None

        embedding_dim=embeddings.shape[1]
        features_dim = features.shape[1]

        rng=np.random.RandomState(seed)
        self.EmbeddingLayer = EmbeddingLayer(input=input[:,:,0],w_values=embeddings,embedding_dim=embedding_dim,mini_batch_size=mini_batch_size,width=width,params=params)
        self.EmbeddingLayer = EmbeddingLayer(input=input[:,:,1],w_values=features,embedding_dim=features_dim,mini_batch_size=mini_batch_size,width=width)
        self.HiddenLayer = DoubleInputHiddenLayer(input1=self.EmbeddingLayer.output, input2=self.StaticEmbeddingLayer.output, n_in1=embedding_dim*width, n_in2=features_dim*width, n_out=nhu, rng=rng, activation=activation,params=params)
        self.LogisticRegressionLayer = LogisticRegressionLayer(input=self.HiddenLayer.output,n_in=nhu,n_out=n_out, rng=rng, params=params)
        self.layers=[self.EmbeddingLayer,self.StaticEmbeddingLayer,self.HiddenLayer,self.LogisticRegressionLayer]

        self.L1 = T.sum([layer.L1 for layer in self.layers if "L1" in layer.__dict__])
        self.L2 = T.sum([layer.L2 for layer in self.layers if "L2" in layer.__dict__])

        self.params = list(itertools.chain(*[layer.params for layer in self.layers]))
       
        self.negative_log_likelihood = self.LogisticRegressionLayer.negative_log_likelihood
        self.errors = self.LogisticRegressionLayer.errors
        self.predictions = self.LogisticRegressionLayer.y_pred
        self.n_ins = list(itertools.chain(*[[layer.n_in]*len(layer.params) for layer in self.layers]))
        print self.n_ins
        print self.params

#class SennaNER_alt(Network):
#    def __init__(self, input,embeddings,mini_batch_size=32,nhu=300,width=5,activation=hardtanh,seed=1234,n_out=9,name='SennaNER',params=None):
#        self.name = name
#        self.layers = []
#        self.input = input
#        self.output = None
#
#        embedding_dim=embeddings.shape[1]
#        features = np.eye(4)
#
#        rng=np.random.RandomState(seed)
#        self.HiddenLayer = DoubleEmbeddingHiddenLayer(input=input,embeddings_values=embeddings,features_values=features,n_in1=embedding_dim*width,n_in2=features.shape[0]*width, batch_size=mini_batch_size, n_out=nhu, rng=rng, activation=activation,params=params)
##        self.EmbeddingLayer = EmbeddingLayer(input=input[:,:,0],w_values=embeddings,embedding_dim=embedding_dim,mini_batch_size=mini_batch_size,width=width,params=params)
##        self.StaticEmbeddingLayer = StaticEmbeddingLayer(input=input[:,:,1],w_values=features,embedding_dim=features.shape[0],mini_batch_size=mini_batch_size,width=width)
##        self.HiddenLayer = DoubleInputHiddenLayer(input1=self.EmbeddingLayer.output, input2=self.StaticEmbeddingLayer.output, n_in1=embedding_dim*width, n_in2=features.shape[0]*width, n_out=nhu, rng=rng, activation=activation,params=params)
#        self.LogisticRegressionLayer = LogisticRegressionLayer(input=self.HiddenLayer.output,n_in=nhu,n_out=n_out, rng=rng, params=params)
#        self.layers=[self.HiddenLayer,self.LogisticRegressionLayer]
#
#        self.L1 = T.sum([layer.L1 for layer in self.layers if "L1" in layer.__dict__])
#        self.L2 = T.sum([layer.L2 for layer in self.layers if "L2" in layer.__dict__])
#
#        self.params = list(itertools.chain(*[layer.params for layer in self.layers]))
#       
#        self.negative_log_likelihood = self.LogisticRegressionLayer.negative_log_likelihood
#        self.errors = self.LogisticRegressionLayer.errors
#        self.predictions = self.LogisticRegressionLayer.y_pred
#        self.n_ins = list(itertools.chain(*[[layer.n_in]*len(layer.params) for layer in self.layers]))
#        print self.n_ins
#        print self.params

Example #2

Show file

File: model.py Project: BrandonSmithJ/tensorflow-rationale

	def __init__(self):	
		self.session = tf.InteractiveSession()
		tf.set_random_seed(FLAGS.seed)
		np.random.seed(FLAGS.seed)

		print('Reading embeddings...', end='', flush=True)
		self.embedding = EmbeddingLayer(FLAGS.embedding)
		print('done')

		# Define network parameters
		nkwargs = { 
			'in_size' 	: self.embedding.shape[1],
			'out_size'	: FLAGS.hidden,
			'depth'   	: FLAGS.depth,
			'batch_size': FLAGS.batch,
		}

		# Define the inputs, and their respective type & size
		inputs = {  
			'x'   		: [tf.int32,   [None, FLAGS.batch]],                
			'y'   		: [tf.float32, [FLAGS.batch, 5]],    
			'kp'  		: [tf.float32, []], 
			'lambda'	: [tf.float32, []],
			'sparsity' 	: [tf.float32, []],
			'coherency'	: [tf.float32, []],
		}

		# Create placeholders
		with tf.name_scope('Placeholders'):
			p = { name: tf.placeholder(*args, name=name) 
				  for name, args in inputs.items() }

		self.train_fd = lambda x,y, kp=FLAGS.keep_prob: {
			p['x'] 			: x,
			p['y'] 			: y,
			p['kp'] 		: kp,
			p['lambda'] 	: FLAGS.l2_reg,
			p['sparsity'] 	: FLAGS.sparsity,
			p['coherency'] 	: FLAGS.coherency,
		}

		dropout   = lambda x: tf.nn.dropout(x, p['kp'])
		bxentropy = lambda x,y: -(y * tf.log(x + 1e-8) + (1. - y) * tf.log(1. - x + 1e-8))
		sq_err    = lambda x,y: (x - y) ** 2

		pad_mask  = tf.to_float(tf.not_equal(p['x'], self.embedding.pad_id))
		embedding = dropout( self.embedding.forward(p['x']) ) 

		print('Creating model...', end='', flush=True)
		self.global_step = tf.Variable(0, name='global_step', trainable=False)
		self.generator = Generator(embedding, pad_mask, p, nkwargs, dropout, bxentropy)
		self.encoder   = Encoder(embedding, pad_mask, p, nkwargs, dropout, sq_err)
		self.encoder.create_minimization(self.generator.z)
		self.generator.create_minimization(self.encoder.loss_vec, self.global_step)
		print('done')

Example #3

Show file

File: lexical_shortcuts_transformer_probed.py Project: xgk/transformer_lexical_shortcuts

    def _build_graph(self):
        """ Defines the model graph. """
        with tf.variable_scope('{:s}_model'.format(self.name)):
            # Instantiate embedding layer(s)
            if self.config.untie_enc_dec_embeddings:
                enc_vocab_size = self.source_vocab_size
                dec_vocab_size = self.target_vocab_size
            else:
                assert self.source_vocab_size == self.target_vocab_size, \
                    'Input and output vocabularies should be identical when tying embedding tables.'
                enc_vocab_size = dec_vocab_size = self.source_vocab_size

            encoder_embedding_layer = EmbeddingLayer(enc_vocab_size,
                                                     self.config.embedding_size,
                                                     self.config.hidden_size,
                                                     self.float_dtype,
                                                     name='encoder_embedding_layer')
            if self.config.untie_enc_dec_embeddings:
                decoder_embedding_layer = EmbeddingLayer(dec_vocab_size,
                                                         self.config.embedding_size,
                                                         self.config.hidden_size,
                                                         self.float_dtype,
                                                         name='decoder_embedding_layer')
            else:
                decoder_embedding_layer = encoder_embedding_layer

            if self.config.untie_decoder_embeddings:
                softmax_projection_layer = EmbeddingLayer(dec_vocab_size,
                                                          self.config.embedding_size,
                                                          self.config.hidden_size,
                                                          self.float_dtype,
                                                          name='softmax_projection_layer')
            else:
                softmax_projection_layer = decoder_embedding_layer

            # Instantiate the component networks
            self.enc = TransformerEncoder(self.config,
                                          encoder_embedding_layer,
                                          self.training,
                                          self.float_dtype,
                                          self.gate_tracker,
                                          'encoder')
            self.dec = TransformerDecoder(self.config,
                                          decoder_embedding_layer,
                                          softmax_projection_layer,
                                          self.training,
                                          self.int_dtype,
                                          self.float_dtype,
                                          self.gate_tracker,
                                          'decoder')

        return dec_vocab_size

Example #4

Show file

    def __init__(self, hidden_size, batch_size, K, W_init, config,
                 max_sen_len):
        super(HAQA, self).__init__()
        self.embedding = EmbeddingLayer(W_init, config)
        embedding_size = W_init.shape[1] + config['char_filter_size']

        self.ga = GatedAttentionLayer()  # non-parametrized
        self.gaao = GatedAttentionAttOnly()  # non-parametrized
        self.ha = HopAttentionLayer()  # parametrized
        self.gating_w = Variable(torch.Tensor([0.5]),
                                 requires_grad=True).to(device)
        self.pred = AnswerPredictionLayer()  # non-parametrized
        self.K = K
        self.hidden_size = hidden_size

        self.context_gru_0 = BiGRU(embedding_size, hidden_size, batch_size)
        self.query_gru_0 = BiGRU(embedding_size, hidden_size, batch_size)

        self.context_gru_1 = BiGRU(2 * hidden_size, hidden_size, batch_size)
        self.query_gru_1 = BiGRU(embedding_size, hidden_size, batch_size)

        self.context_gru_2 = BiGRU(2 * hidden_size, hidden_size, batch_size)
        self.query_gru_2 = BiGRU(embedding_size, hidden_size, batch_size)

        self.context_gru_3 = BiGRU(2 * hidden_size, hidden_size, batch_size)
        self.query_gru_3 = BiGRU(embedding_size, hidden_size, batch_size)

        self.max_sentence = MaxAttSentence(max_sen_len, 2 * hidden_size)

Example #5

Show file

 def build_Emb_layer(self):
     return EmbeddingLayer(self.in_dim,
                           self.ins_dim,
                           self.hid_dim,
                           self.out_dim,
                           activation=self.activation,
                           side_information=self.side_information,
                           bias=self.bias,
                           dropout=self.dropout,
                           use_cuda=self.use_cuda)

Example #6

Show file

    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   cell,
                   rnn_dim,
                   rnn_num,
                   drop_out=0.5,
                   emb=None):
        if trained_model is not None:
            param_dic = {
                'nums_chars': self.nums_chars,
                'nums_tags': self.nums_tags,
                'crf': self.crf,
                'emb_dim': emb_dim,
                'cell': cell,
                'rnn_dim': rnn_dim,
                'rnn_num': rnn_num,
                'drop_out': drop_out,
                'buckets_char': self.buckets_char,
                'ngram': self.ngram,
                'is_space': self.is_space,
                'sent_seg': self.sent_seg,
                'emb_path': self.emb_path,
                'tag_scheme': self.tag_scheme
            }
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables
        batch_size_h = tf.placeholder(tf.int32, [], name='batch_size_holder')
        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.batch_size_h = batch_size_h
        self.drop_out = dr
        self.drop_out_v = drop_out
        # pdb.set_trace()
        self.emb_layer = EmbeddingLayer(self.nums_chars + 20,
                                        emb_dim,
                                        weights=emb,
                                        name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 5000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if cell == 'gru':
                fw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #forward
                bw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #backward
            else:
                fw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.contrib.rnn.MultiRNNCell([fw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.MultiRNNCell([bw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)

        output_wrapper = HiddenLayer(rnn_dim * 2,
                                     self.nums_tags,
                                     activation='linear',
                                     name='hidden')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()
            batch_size = self.real_batches[idx]

            input_v1 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_1' + str(bucket))
            input_v2 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_2' + str(bucket))
            self.input_v1.append([input_v1])
            self.input_v2.append([input_v2])
            #output = None
            output = []
            for i in range(self.num_gpus):
                with tf.device('/gpu:{}'.format(i)):
                    input_1 = input_v1[i * batch_size_h:(i + 1) * batch_size_h]

                    input_2 = input_v2[i * batch_size_h:(i + 1) * batch_size_h]

                    emb_set1 = []
                    emb_set2 = []

                    word_out1 = self.emb_layer(input_1)
                    word_out2 = self.emb_layer(input_2)
                    emb_set1.append(word_out1)
                    emb_set2.append(word_out2)

                    # if self.ngram is not None:
                    # 	for i in range(len(self.ngram)):
                    # 		input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    # 		self.input_v[-1].append(input_g)
                    # 		gram_out = self.gram_layers[i](input_g)
                    # 		emb_set.append(gram_out)

                    if len(emb_set1) > 1:
                        emb_out1 = tf.concat(axis=2, values=emb_set1)
                        emb_out2 = tf.concat(axis=2, values=emb_set2)

                    else:
                        emb_out1 = emb_set1[0]
                        emb_out2 = emb_set2[0]

                    emb_out1 = DropoutLayer(dr)(emb_out1)
                    emb_out2 = DropoutLayer(dr)(emb_out2)

                    rnn_out = BiLSTM(rnn_dim,
                                     fw_cell=fw_rnn_cell,
                                     bw_cell=bw_rnn_cell,
                                     p=dr,
                                     name='BiLSTM' + str(bucket),
                                     scope='BiRNN')(emb_out1, emb_out2,
                                                    input_v1)

                    output_g = output_wrapper(rnn_out)
                    # if output == None:
                    # output = output_g
                    # else:
                    # output = tf.concat([output,output_g],axis = 0)
                    #pdb.set_trace()
                    output.append(output_g)
            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket - 1],
                               name='tags' + str(bucket))
            ])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v1) == len(self.output)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()

Example #7

Show file

    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None, None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim*pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])
                pix_out = tf.unpack(pix_out, axis=1)

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unpack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)


            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)
                emb_out = tf.unpack(emb_out)

            else:
                emb_out = emb_set[0]

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)

            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()

Example #8

Show file

File: model.py Project: UppsalaNLP/segmenter

    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, emb=None):
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'crf': self.crf, 'emb_dim': emb_dim,
                         'gru': gru, 'rnn_dim': rnn_dim, 'rnn_num': rnn_num, 'drop_out': drop_out, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram, 'is_space': self.is_space, 'sent_seg': self.sent_seg, 'emb_path': self.emb_path,
                         'tag_scheme': self.tag_scheme}
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        self.emb_layer = EmbeddingLayer(self.nums_chars + 20, emb_dim, weights=emb, name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 5000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags, activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            word_out = self.emb_layer(input_v)
            emb_set.append(word_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)

            else:
                emb_out = emb_set[0]

            emb_out = DropoutLayer(dr)(emb_out)
            emb_out = tf.unpack(emb_out)

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)
            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()

Example #9

Show file

    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   gru,
                   rnn_dim,
                   rnn_num,
                   fnn_dim,
                   window_size,
                   drop_out=0.5,
                   rad_dim=30,
                   emb=None,
                   ng_embs=None,
                   pixels=None,
                   con_width=None,
                   filters=None,
                   pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['fnn_dim'] = fnn_dim
            param_dic['window_size'] = window_size
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            param_dic['mode'] = self.mode
            #print param_dic
            if self.metric == 'All':
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(
                        trained_model[:pindex] + m + '_' +
                        trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        #concat_emb_dim = emb_dim * 2
        concat_emb_dim = 0

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500,
                                            emb_dim,
                                            weights=emb,
                                            name='emb_layer')
            concat_emb_dim += emb_dim

        if self.radical:
            self.radical_layer = EmbeddingLayer(216,
                                                rad_dim,
                                                name='radical_layer')
            concat_emb_dim += rad_dim

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 1000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))
                concat_emb_dim += emb_dim

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2 = None, None, None
        wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = Convolution(con_width, 1, filters, name='conv_1')
            wrapper_mp_1 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = Convolution(con_width,
                                         filters,
                                         filters,
                                         name='conv_2')
            wrapper_mp_2 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_2')
            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = HiddenLayer(p_size_2 * p_size_2 * filters,
                                        100,
                                        activation='tanh',
                                        name='conv_dense')
            wrapper_dr = DropoutLayer(self.drop_out)

            concat_emb_dim += 100

        fw_rnn_cell, bw_rnn_cell = None, None

        if self.mode == 'RNN':
            with tf.variable_scope('BiRNN'):

                if gru:
                    fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                    bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                else:
                    fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)

                if rnn_num > 1:
                    fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [bw_rnn_cell] * rnn_num, state_is_tuple=True)

            output_wrapper = HiddenLayer(rnn_dim * 2,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')
            fnn_weights, fnn_bias = None, None

        else:

            with tf.variable_scope('FNN'):
                fnn_weights = tf.get_variable(
                    'conv_w',
                    [2 * window_size + 1, concat_emb_dim, 1, fnn_dim])
                fnn_bias = tf.get_variable(
                    'conv_b', [fnn_dim],
                    initializer=tf.constant_initializer(0.1))

            output_wrapper = HiddenLayer(fnn_dim,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket],
                                     name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket],
                                         name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_g' + str(i) +
                                             str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32,
                                         [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)
                pix_out = tf.reshape(input_p, [-1, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]

                pooling_out = tf.reshape(
                    pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            if self.mode == 'RNN':
                rnn_out = BiLSTM(rnn_dim,
                                 fw_cell=fw_rnn_cell,
                                 bw_cell=bw_rnn_cell,
                                 p=dr,
                                 name='BiLSTM' + str(bucket),
                                 scope='BiRNN')(emb_out, input_v)

                output = output_wrapper(rnn_out)

            else:
                emb_out = tf.pad(emb_out,
                                 [[0, 0], [window_size, window_size], [0, 0]])
                emb_out = tf.reshape(
                    emb_out, [-1, bucket + 2 * window_size, concat_emb_dim, 1])
                conv_out = tf.nn.conv2d(emb_out,
                                        fnn_weights, [1, 1, 1, 1],
                                        padding='VALID') + fnn_bias
                fnn_out = tf.nn.tanh(conv_out)
                fnn_out = tf.reshape(fnn_out, [-1, bucket, fnn_dim])

                output = output_wrapper(fnn_out)

            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket],
                               name='tags' + str(bucket))
            ])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) \
               and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()

Example #10

Show file

File: model.py Project: BrandonSmithJ/tensorflow-rationale

class Model(object):

	def __init__(self):	
		self.session = tf.InteractiveSession()
		tf.set_random_seed(FLAGS.seed)
		np.random.seed(FLAGS.seed)

		print('Reading embeddings...', end='', flush=True)
		self.embedding = EmbeddingLayer(FLAGS.embedding)
		print('done')

		# Define network parameters
		nkwargs = { 
			'in_size' 	: self.embedding.shape[1],
			'out_size'	: FLAGS.hidden,
			'depth'   	: FLAGS.depth,
			'batch_size': FLAGS.batch,
		}

		# Define the inputs, and their respective type & size
		inputs = {  
			'x'   		: [tf.int32,   [None, FLAGS.batch]],                
			'y'   		: [tf.float32, [FLAGS.batch, 5]],    
			'kp'  		: [tf.float32, []], 
			'lambda'	: [tf.float32, []],
			'sparsity' 	: [tf.float32, []],
			'coherency'	: [tf.float32, []],
		}

		# Create placeholders
		with tf.name_scope('Placeholders'):
			p = { name: tf.placeholder(*args, name=name) 
				  for name, args in inputs.items() }

		self.train_fd = lambda x,y, kp=FLAGS.keep_prob: {
			p['x'] 			: x,
			p['y'] 			: y,
			p['kp'] 		: kp,
			p['lambda'] 	: FLAGS.l2_reg,
			p['sparsity'] 	: FLAGS.sparsity,
			p['coherency'] 	: FLAGS.coherency,
		}

		dropout   = lambda x: tf.nn.dropout(x, p['kp'])
		bxentropy = lambda x,y: -(y * tf.log(x + 1e-8) + (1. - y) * tf.log(1. - x + 1e-8))
		sq_err    = lambda x,y: (x - y) ** 2

		pad_mask  = tf.to_float(tf.not_equal(p['x'], self.embedding.pad_id))
		embedding = dropout( self.embedding.forward(p['x']) ) 

		print('Creating model...', end='', flush=True)
		self.global_step = tf.Variable(0, name='global_step', trainable=False)
		self.generator = Generator(embedding, pad_mask, p, nkwargs, dropout, bxentropy)
		self.encoder   = Encoder(embedding, pad_mask, p, nkwargs, dropout, sq_err)
		self.encoder.create_minimization(self.generator.z)
		self.generator.create_minimization(self.encoder.loss_vec, self.global_step)
		print('done')


	def train(self):
		print('Initializing variables...', end='', flush=True)
		logdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'log')
		writer = tf.summary.FileWriter(logdir, self.session.graph)
		saver  = tf.train.Saver()
		self.session.run(tf.global_variables_initializer())		
	
		checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint)
		if checkpoint and checkpoint.model_checkpoint_path:
			print('restoring previous checkpoint...', end='', flush=True)
			name = os.path.basename(checkpoint.model_checkpoint_path)
			saver.restore(self.session, os.path.join(FLAGS.checkpoint, name))
		merger = tf.summary.merge_all()
		print('done')

		print('Fetching data...', end='', flush=True)
		x,y   = read_data(FLAGS.training)
		train = ([self.embedding.words_to_ids(s) for s in x], y)
		x,y  = read_data(FLAGS.testing)
		test = ([self.embedding.words_to_ids(s) for s in x], y)
		print('done')
		
		for epoch in range(FLAGS.epochs):
			start_time = time.time()
			train_x, train_y = preprocess(train, FLAGS.batch, self.embedding.pad_id, 
										  FLAGS.maxlen)

			scost = ocost = tcost = p_one = 0
			for bx,by in zip(train_x, train_y):
				result = self.session.run([merger, 
									self.generator.train_g, self.encoder.train_e, 
									self.generator.reg, self.generator.obj, 
									self.encoder.loss, self.generator.z, 
									self.global_step],
									feed_dict=self.train_fd(bx, by))
				writer.add_summary(result[0], result[7])
				
				scost += result[3]
				ocost += result[4]
				tcost += result[5]
				p_one += np.sum(result[6]) / FLAGS.batch / len(bx[0])

			print('Regularization: ', scost / float(len(train_x)))
			print('Objective: ', ocost / float(len(train_x)))
			print('Prediction loss: ', tcost / float(len(train_x)))
			print('Generator Selection %: ', p_one / float(len(train_x)))

			if not epoch % 1:
				results = []
				ocost = tcost = 0
				test_x, test_y = preprocess(test, FLAGS.batch, self.embedding.pad_id, 
											FLAGS.maxlen)
				for bx,by in zip(test_x, test_y):
					preds, bz, gobj, eloss = self.session.run([
												self.encoder.preds, 
												self.generator.z, 
												self.generator.obj,
												self.encoder.loss],
												feed_dict=self.train_fd(bx, by, 1.))
					ocost += gobj
					tcost += eloss
					for p, x, y, z in zip(preds, bx.T, by, bz.T):
						w = self.embedding.ids_to_words(x)
						w = [u.replace('<pad>', '_') for u in w]
						r = [u if v == 1 else '_' for u,v in zip(w,z)]
						results.append((p, r, w, y))
					
				print('Test Objective: ', ocost / float(len(test_x)))
				print('Test Prediction loss: ',tcost / float(len(test_x)))

				with open(FLAGS.output, 'w+') as f:
					for p, r, w, y in results:
						f.write(json.dumps({
							'rationale' : ' '.join(r),
							'original'  : ' '.join(w),
							'y' : str(list(y)),
							'p' : str(list(p)),
						}) + '\n')

				saver.save(self.session, 
							os.path.join(FLAGS.checkpoint, 'GEN.model'), 
							global_step=self.global_step)

			print('Finished epoch %s in %.2f seconds\n' % (epoch, time.time() - start_time))

Example #11

Show file

File: bucket_model.py Project: reid3290/tagger

    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None,
                   ngram_embedding=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        """

        :param trained_model:
        :param scope:
        :param emb_dim:
        :param gru:
        :param rnn_dim:
        :param rnn_num:
        :param drop_out:
        :param rad_dim: n
        :param emb:
        :param ngram_embedding: 预训练 ngram embeddig 文件
        :param pixels:
        :param con_width:
        :param filters:
        :param pooling_size:
        :return:
        """
        # trained_model: 模型存储路径
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'tag_scheme': self.tag_scheme,
                         'graphic': self.graphic, 'pic_size': self.pic_size, 'word_vec': self.word_vec,
                         'radical': self.radical, 'crf': self.crf, 'emb_dim': emb_dim, 'gru': gru, 'rnn_dim': rnn_dim,
                         'rnn_num': rnn_num, 'drop_out': drop_out, 'filter_size': con_width, 'filters': filters,
                         'pooling_size': pooling_size, 'font': self.font, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram}
            print "RNN dimension is %d" % rnn_dim
            print "RNN number is %d" % rnn_num
            print "Character embedding size is %d" % emb_dim
            print "Ngram embedding dimension is %d" % emb_dim
            # 存储模型超参数
            if self.metric == 'All':
                # rindex() 返回子字符串 str 在字符串中最后出现的位置
                # 截取模型文件名
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(trained_model[:pindex] + m + '_' + trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        # 字向量层
        # 为什么字符数要加 500 ？
        # emb_dim 是每个字符的特征向量维度，可以通过命令行参数设置
        # weights 表示预训练的字向量，可以通过命令行参数设置
        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        # 偏旁部首向量
        # 依照《康熙字典》，共有 214 个偏旁部首。
        # 只用了常见汉字的偏旁部首，非常见汉字和非汉字的偏旁部首用其他两个特殊符号代替，
        # 所以共有 216 个偏旁部首
        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ngram_embedding is not None:
                assert len(ngram_embedding) == len(self.ngram)
            else:
                ngram_embedding = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ngram_embedding[i],
                                                       name=str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = \
            None, None, None, None, None, None

        if self.graphic:
            # 使用图像信息，需要用到 CNN
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(
                HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell] * rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell] * rnn_num, state_is_tuple=True)

        # 隐藏层，输入是前向 RNN 的输出加上 后向 RNN 的输出，所以输入维度为 rnn_dim * 2
        # 输出维度即标签个数
        output_wrapper = TimeDistributed(
            HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'),
            name='wrapper')

        # define model for each bucket
        # 每一个 bucket 中的句子长度不一样，所以需要定义单独的模型
        # bucket: bucket 中的句子长度
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer)
                # 只需要设置一次 reuse，后面就都 reuse 了
                scope.reuse_variables()
            t1 = time()

            # 输入的句子，one-hot 向量
            # shape = （batch_size, 句子长度）
            input_sentences = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_sentences])

            emb_set = []

            if self.word_vec:
                # 根据 one-hot 向量查找对应的字向量
                # word_out: shape=(batch_size, 句子长度，字向量维度（64）)
                word_out = self.emb_layer(input_sentences)
                emb_set.append(word_out)

            if self.radical:
                # 嵌入偏旁部首信息，shape = (batch_size, 句子长度)
                input_radicals = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_radicals)
                radical_out = self.radical_layer(input_radicals)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unstack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if self.window_size > 1:

                padding_size = int(np.floor(self.window_size / 2))
                word_padded = tf.pad(word_out, [[0, 0], [padding_size, padding_size], [0, 0]], 'CONSTANT')

                Ws = []
                for q in range(1, self.window_size + 1):
                    Ws.append(tf.get_variable("W_%d" % q, shape=[q * emb_dim, self.filters_number]))
                b = tf.get_variable("b", shape=[self.filters_number])

                z = [None for _ in range(0, bucket)]

                for q in range(1, self.window_size + 1):
                    for i in range(padding_size, bucket + padding_size):
                        low = i - int(np.floor((q - 1) / 2))
                        high = i + int(np.ceil((q + 1) / 2))
                        x = word_padded[:, low, :]
                        for j in range(low + 1, high):
                            x = tf.concat(values=[x, word_padded[:, j, :]], axis=1)
                        z_iq = tf.tanh(tf.nn.xw_plus_b(x, Ws[q - 1], b))
                        if z[i - padding_size] is None:
                            z[i - padding_size] = z_iq
                        else:
                            z[i - padding_size] = tf.concat([z[i - padding_size], z_iq], axis=1)

                z = tf.stack(z, axis=1)
                values, indices = tf.nn.top_k(z, sorted=False, k=emb_dim)

                # highway layer
                X = tf.unstack(word_out, axis=1)
                Conv_X = tf.unstack(values, axis=1)
                X_hat = []
                W_t = tf.get_variable("W_t", shape=[emb_dim, emb_dim])
                b_t = tf.get_variable("b_t", shape=[emb_dim])
                for x, conv_x in zip(X, Conv_X):
                    T_x = tf.sigmoid(tf.nn.xw_plus_b(x, W_t, b_t))
                    X_hat.append(tf.multiply(conv_x, T_x) + tf.multiply(x, 1 - T_x))
                X_hat = tf.stack(X_hat, axis=1)
                emb_set.append(X_hat)
            if len(emb_set) > 1:
                # 各种字向量直接 concat 起来（字向量、偏旁部首、n-gram、图像信息等）
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值
            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr,
                             name='BiLSTM' + str(bucket), scope='BiRNN')(self.highway(emb_out, "tag"), input_sentences)

            # 应用全连接层，Wx+b 得到最后的输出
            output = output_wrapper(rnn_out)
            # 为什么要 [output] 而不是 output 呢？
            self.output.append([output])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            # language model
            lm_rnn_dim = rnn_dim
            with tf.variable_scope('LM-BiRNN'):
                if gru:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                else:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)

                if rnn_num > 1:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_bw_rnn_cell] * rnn_num, state_is_tuple=True)
            lm_rnn_output = BiLSTM(lm_rnn_dim, fw_cell=lm_fw_rnn_cell,
                                   bw_cell=lm_bw_rnn_cell, p=dr,
                                   name='LM-BiLSTM' + str(bucket),
                                   scope='LM-BiRNN')(self.highway(emb_set[0]), input_sentences)

            lm_output_wrapper = TimeDistributed(
                HiddenLayer(lm_rnn_dim * 2, self.nums_chars + 2, activation='linear', name='lm_hidden'),
                name='lm_wrapper')
            lm_final_output = lm_output_wrapper(lm_rnn_output)
            self.lm_predictions.append([lm_final_output])
            self.lm_groundtruthes.append([tf.placeholder(tf.int32, [None, bucket], name='lm_targets' + str(bucket))])

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert \
            len(self.input_v) == len(self.output) and \
            len(self.output) == len(self.output_) and \
            len(self.lm_predictions) == len(self.lm_groundtruthes) and \
            len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()

Example #12

Show file

    def build_model(self, load_model=None):

        a = Input(shape=(self.max_length, ), dtype='int32',
                  name='words_1')  # For "premise"
        b = Input(shape=(self.max_length, ), dtype='int32',
                  name='words_2')  # For "hypothesis"

        # ------- Embedding Layer -------
        # Using "Glove" pre-trained embedding matrix as our initial weights
        embedding_layer = EmbeddingLayer(self.vocab_size,
                                         self.embedding_size,
                                         self.max_length,
                                         self.hidden_unit,
                                         init_weights=self.embedding_matrix,
                                         dropout=self.dropout_rate,
                                         nr_tune=5000)
        embedded_a = embedding_layer(a)
        embedded_b = embedding_layer(b)

        # ------- BiLSTM Layer -------
        # BiLSTM learns to represent a word and its context
        encoded_a = BiLSTM_Layer(self.max_length, self.hidden_unit)(embedded_a)
        encoded_b = BiLSTM_Layer(self.max_length, self.hidden_unit)(embedded_b)

        # ------- Attention Layer -------
        attention_ab = Lambda(attention, attention_output,
                              name='attention')([encoded_a, encoded_b])

        # ------- Soft-Alignment Layer -------
        # Modeling local inference needs to employ some forms of hard or soft alignment to associate the relevant
        # sub-components between a premise and a hypothesis
        # Using inter-sentence "alignment" (or attention) to softly align each word to the content of hypothesis (or premise)
        align_alpha = Lambda(
            attention_softmax3d,
            attention_softmax3d_output,
            name='soft_alignment_a')([attention_ab, encoded_b])
        align_beta = Lambda(attention_softmax3d,
                            attention_softmax3d_output,
                            name='soft_alignment_b')([attention_ab, encoded_a])

        # ------- Enhancement Layer -------
        # Compute the difference and the element-wise product for the tuple < encoded_a, align_a > and < encoded_b, align_b >
        # This operation could help sharpen local inference information between elements in the tuples and capture
        # inference relationships such as contradiction.
        sub_a = Lambda(substract, substract_output,
                       name='substract_a')([encoded_a, align_alpha])
        mul_a = Lambda(multiply, multiply_output,
                       name='multiply_a')([encoded_a, align_alpha])

        sub_b = Lambda(substract, substract_output,
                       name='substract_b')([encoded_b, align_beta])
        mul_b = Lambda(multiply, multiply_output,
                       name='multiply_b')([encoded_b, align_beta])

        m_a = merge([encoded_a, align_alpha, sub_a, mul_a],
                    mode='concat')  # shape=(batch_size, time-steps, 4 * units)
        m_b = merge([encoded_b, align_beta, sub_b, mul_b],
                    mode='concat')  # shape=(batch_size, time-steps, 4 * units)

        # ------- Composition Layer -------
        comp_a = Composition_Layer(self.hidden_unit, self.max_length)(m_a)
        comp_b = Composition_Layer(self.hidden_unit, self.max_length)(m_b)

        # ------- Pooling Layer -------
        preds = Pooling_Layer(self.hidden_unit,
                              self.n_classes,
                              dropout=self.dropout_rate,
                              l2_weight_decay=self.l2_weight_decay)(comp_a,
                                                                    comp_b)

        model = Model(inputs=[a, b], outputs=[preds])
        model.compile(optimizer=Adam(lr=self.learning_rate),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        if load_model is not None:
            print('Loading pre-trained weights from \'{}\'...'.format(
                load_model))
            model.load_weights(load_model)

        return model

Example #13

Show file

File: bucket_model.py Project: xihuateng/Chinese-word-segmentation-algorithm-test

    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   gru,
                   rnn_dim,
                   rnn_num,
                   drop_out=0.5,
                   emb=None,
                   ngram_embedding=None):
        """
        :param trained_model:
        :param scope:
        :param emb_dim:
        :param gru:
        :param rnn_dim:
        :param rnn_num:
        :param drop_out:
        :param emb:
        :return:
        """
        # trained_model: 模型存储路径
        if trained_model is not None:
            param_dic = {
                'nums_chars': self.nums_chars,
                'nums_tags': self.nums_tags,
                'tag_scheme': self.tag_scheme,
                'crf': self.crf,
                'emb_dim': emb_dim,
                'gru': gru,
                'rnn_dim': rnn_dim,
                'rnn_num': rnn_num,
                'drop_out': drop_out,
                'buckets_char': self.buckets_char,
                'ngram': self.ngram
            }
            print "RNN dimension is %d" % rnn_dim
            print "RNN number is %d" % rnn_num
            print "Character embedding size is %d" % emb_dim
            # 存储模型超参数
            if self.metric == 'All':
                # rindex() 返回子字符串 str 在字符串中最后出现的位置
                # 截取模型文件名
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(
                        trained_model[:pindex] + m + '_' +
                        trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        # 字向量层
        # 为什么字符数要加 500 ？
        # emb_dim 是每个字符的特征向量维度，可以通过命令行参数设置
        # weights 表示预训练的字向量，可以通过命令行参数设置
        self.emb_layer = EmbeddingLayer(self.nums_chars + 500,
                                        emb_dim,
                                        weights=emb,
                                        name='emb_layer')

        if self.ngram is not None:
            if ngram_embedding is not None:
                assert len(ngram_embedding) == len(self.ngram)
            else:
                ngram_embedding = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 1000 * (i + 2),
                                   emb_dim,
                                   weights=ngram_embedding[i],
                                   name=str(i + 2) + 'gram_layer'))

        # 隐藏层，输入是前向 RNN 的输出加上 后向 RNN 的输出，所以输入维度为 rnn_dim * 2
        # 输出维度即标签个数
        tag_output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2,
                                                         self.nums_tags[0],
                                                         activation='linear',
                                                         name='tag_hidden'),
                                             name='tag_output_wrapper')

        if self.char_freq_loss:
            freq_output_wrapper = TimeDistributed(HiddenLayer(
                rnn_dim * 2, 1, activation='sigmoid', name='freq_hidden'),
                                                  name='freq_output_wrapper')

        if self.co_train:
            lm_fw_wrapper = TimeDistributed(HiddenLayer(rnn_dim,
                                                        self.nums_chars + 2,
                                                        activation='linear',
                                                        name='lm_fw_hidden'),
                                            name='lm_fw_wrapper')
            lm_bw_wrapper = TimeDistributed(HiddenLayer(rnn_dim,
                                                        self.nums_chars + 2,
                                                        activation='linear',
                                                        name='lm_bw_hidden'),
                                            name='lm_bw_wrapper')

        # define model for each bucket
        # 每一个 bucket 中的句子长度不一样，所以需要定义单独的模型
        # bucket: bucket 中的句子长度
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer)
                # 只需要设置一次 reuse，后面就都 reuse 了
                scope.reuse_variables()
            t1 = time()

            # 输入的句子，one-hot 向量
            # shape = （batch_size, 句子长度）
            input_sentences = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_' + str(bucket))

            self.input_v.append([input_sentences])

            emb_set = []
            word_out = self.emb_layer(input_sentences)
            emb_set.append(word_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_g' + str(i) +
                                             str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if len(emb_set) > 1:
                # 各种字向量直接 concat 起来（字向量、偏旁部首、n-gram、图像信息等）
                word_embeddings = tf.concat(axis=2, values=emb_set)

            else:
                word_embeddings = emb_set[0]

            # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值
            rnn_out_fw, rnn_out_bw = BiRNN(rnn_dim,
                                           p=dr,
                                           concat_output=False,
                                           gru=gru,
                                           name='BiLSTM' + str(bucket),
                                           scope='Tag-BiRNN')(word_embeddings,
                                                              input_sentences)

            tag_rnn_out_fw, tag_rnn_out_bw = rnn_out_fw, rnn_out_bw
            if self.co_train:
                if self.highway_layers > 0:
                    tag_rnn_out_fw = highway_network(rnn_out_fw,
                                                     self.highway_layers,
                                                     True,
                                                     is_train=True,
                                                     scope="tag_fw")
                    tag_rnn_out_bw = highway_network(rnn_out_bw,
                                                     self.highway_layers,
                                                     True,
                                                     is_train=True,
                                                     scope="tag_bw")
            tag_rnn_out = tf.concat(values=[tag_rnn_out_fw, tag_rnn_out_bw],
                                    axis=2)

            # 应用全连接层，Wx+b 得到最后的输出
            output = tag_output_wrapper(tag_rnn_out)
            # 为什么要 [output] 而不是 output 呢？
            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket],
                               name='tags' + str(bucket))
            ])

            self.bucket_dit[bucket] = idx

            if self.co_train:
                # language model
                lm_rnn_out_fw, lm_rnn_out_bw = rnn_out_fw, rnn_out_bw
                if self.highway_layers > 0:
                    lm_rnn_out_fw = highway_network(rnn_out_fw,
                                                    self.highway_layers,
                                                    True,
                                                    is_train=True,
                                                    scope="lm_fw")
                    lm_rnn_out_bw = highway_network(rnn_out_bw,
                                                    self.highway_layers,
                                                    True,
                                                    is_train=True,
                                                    scope="lm_bw")

                self.lm_fw_predictions.append([lm_fw_wrapper(lm_rnn_out_fw)])
                self.lm_bw_predictions.append([lm_bw_wrapper(lm_rnn_out_bw)])
                self.lm_fw_groundtruthes.append([
                    tf.placeholder(tf.int32, [None, bucket],
                                   name='lm_fw_targets' + str(bucket))
                ])
                self.lm_bw_groundtruthes.append([
                    tf.placeholder(tf.int32, [None, bucket],
                                   name='lm_bw_targets' + str(bucket))
                ])

            if self.char_freq_loss:
                freq_rnn_out_fw, freq_rnn_out_bw = rnn_out_fw, rnn_out_bw
                if self.highway_layers > 0:
                    freq_rnn_out_fw = highway_network(rnn_out_fw,
                                                      self.highway_layers,
                                                      True,
                                                      is_train=True,
                                                      scope="freq_fw")
                    freq_rnn_out_bw = highway_network(rnn_out_bw,
                                                      self.highway_layers,
                                                      True,
                                                      is_train=True,
                                                      scope="freq_bw")
                freq_rnn_out = tf.concat(
                    values=[freq_rnn_out_fw, freq_rnn_out_bw], axis=2)

                self.char_freq_groundtruthes.append([
                    tf.placeholder(tf.float32, [None, bucket],
                                   name='freq_targets_%d' % bucket)
                ])
                self.char_freq_predictions.append(
                    [freq_output_wrapper(freq_rnn_out)])

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert \
            len(self.input_v) == len(self.output) and \
            len(self.output) == len(self.output_) and \
            len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()