Beispiel #1
0
    def __init__(self, input,embeddings,features,mini_batch_size=32,nhu=300,width=5,activation=hardtanh,seed=1234,n_out=9,name='SennaNER',params=None):
        self.name = name
        self.layers = []
        self.input = input
        self.output = None

        embedding_dim=embeddings.shape[1]
        features_dim = features.shape[1]

        rng=np.random.RandomState(seed)
        self.EmbeddingLayer = EmbeddingLayer(input=input[:,:,0],w_values=embeddings,embedding_dim=embedding_dim,mini_batch_size=mini_batch_size,width=width,params=params)
        self.EmbeddingLayer = EmbeddingLayer(input=input[:,:,1],w_values=features,embedding_dim=features_dim,mini_batch_size=mini_batch_size,width=width)
        self.HiddenLayer = DoubleInputHiddenLayer(input1=self.EmbeddingLayer.output, input2=self.StaticEmbeddingLayer.output, n_in1=embedding_dim*width, n_in2=features_dim*width, n_out=nhu, rng=rng, activation=activation,params=params)
        self.LogisticRegressionLayer = LogisticRegressionLayer(input=self.HiddenLayer.output,n_in=nhu,n_out=n_out, rng=rng, params=params)
        self.layers=[self.EmbeddingLayer,self.StaticEmbeddingLayer,self.HiddenLayer,self.LogisticRegressionLayer]

        self.L1 = T.sum([layer.L1 for layer in self.layers if "L1" in layer.__dict__])
        self.L2 = T.sum([layer.L2 for layer in self.layers if "L2" in layer.__dict__])

        self.params = list(itertools.chain(*[layer.params for layer in self.layers]))
       
        self.negative_log_likelihood = self.LogisticRegressionLayer.negative_log_likelihood
        self.errors = self.LogisticRegressionLayer.errors
        self.predictions = self.LogisticRegressionLayer.y_pred
        self.n_ins = list(itertools.chain(*[[layer.n_in]*len(layer.params) for layer in self.layers]))
        print self.n_ins
        print self.params

#class SennaNER_alt(Network):
#    def __init__(self, input,embeddings,mini_batch_size=32,nhu=300,width=5,activation=hardtanh,seed=1234,n_out=9,name='SennaNER',params=None):
#        self.name = name
#        self.layers = []
#        self.input = input
#        self.output = None
#
#        embedding_dim=embeddings.shape[1]
#        features = np.eye(4)
#
#        rng=np.random.RandomState(seed)
#        self.HiddenLayer = DoubleEmbeddingHiddenLayer(input=input,embeddings_values=embeddings,features_values=features,n_in1=embedding_dim*width,n_in2=features.shape[0]*width, batch_size=mini_batch_size, n_out=nhu, rng=rng, activation=activation,params=params)
##        self.EmbeddingLayer = EmbeddingLayer(input=input[:,:,0],w_values=embeddings,embedding_dim=embedding_dim,mini_batch_size=mini_batch_size,width=width,params=params)
##        self.StaticEmbeddingLayer = StaticEmbeddingLayer(input=input[:,:,1],w_values=features,embedding_dim=features.shape[0],mini_batch_size=mini_batch_size,width=width)
##        self.HiddenLayer = DoubleInputHiddenLayer(input1=self.EmbeddingLayer.output, input2=self.StaticEmbeddingLayer.output, n_in1=embedding_dim*width, n_in2=features.shape[0]*width, n_out=nhu, rng=rng, activation=activation,params=params)
#        self.LogisticRegressionLayer = LogisticRegressionLayer(input=self.HiddenLayer.output,n_in=nhu,n_out=n_out, rng=rng, params=params)
#        self.layers=[self.HiddenLayer,self.LogisticRegressionLayer]
#
#        self.L1 = T.sum([layer.L1 for layer in self.layers if "L1" in layer.__dict__])
#        self.L2 = T.sum([layer.L2 for layer in self.layers if "L2" in layer.__dict__])
#
#        self.params = list(itertools.chain(*[layer.params for layer in self.layers]))
#       
#        self.negative_log_likelihood = self.LogisticRegressionLayer.negative_log_likelihood
#        self.errors = self.LogisticRegressionLayer.errors
#        self.predictions = self.LogisticRegressionLayer.y_pred
#        self.n_ins = list(itertools.chain(*[[layer.n_in]*len(layer.params) for layer in self.layers]))
#        print self.n_ins
#        print self.params
	def __init__(self):	
		self.session = tf.InteractiveSession()
		tf.set_random_seed(FLAGS.seed)
		np.random.seed(FLAGS.seed)

		print('Reading embeddings...', end='', flush=True)
		self.embedding = EmbeddingLayer(FLAGS.embedding)
		print('done')

		# Define network parameters
		nkwargs = { 
			'in_size' 	: self.embedding.shape[1],
			'out_size'	: FLAGS.hidden,
			'depth'   	: FLAGS.depth,
			'batch_size': FLAGS.batch,
		}

		# Define the inputs, and their respective type & size
		inputs = {  
			'x'   		: [tf.int32,   [None, FLAGS.batch]],                
			'y'   		: [tf.float32, [FLAGS.batch, 5]],    
			'kp'  		: [tf.float32, []], 
			'lambda'	: [tf.float32, []],
			'sparsity' 	: [tf.float32, []],
			'coherency'	: [tf.float32, []],
		}

		# Create placeholders
		with tf.name_scope('Placeholders'):
			p = { name: tf.placeholder(*args, name=name) 
				  for name, args in inputs.items() }

		self.train_fd = lambda x,y, kp=FLAGS.keep_prob: {
			p['x'] 			: x,
			p['y'] 			: y,
			p['kp'] 		: kp,
			p['lambda'] 	: FLAGS.l2_reg,
			p['sparsity'] 	: FLAGS.sparsity,
			p['coherency'] 	: FLAGS.coherency,
		}

		dropout   = lambda x: tf.nn.dropout(x, p['kp'])
		bxentropy = lambda x,y: -(y * tf.log(x + 1e-8) + (1. - y) * tf.log(1. - x + 1e-8))
		sq_err    = lambda x,y: (x - y) ** 2

		pad_mask  = tf.to_float(tf.not_equal(p['x'], self.embedding.pad_id))
		embedding = dropout( self.embedding.forward(p['x']) ) 

		print('Creating model...', end='', flush=True)
		self.global_step = tf.Variable(0, name='global_step', trainable=False)
		self.generator = Generator(embedding, pad_mask, p, nkwargs, dropout, bxentropy)
		self.encoder   = Encoder(embedding, pad_mask, p, nkwargs, dropout, sq_err)
		self.encoder.create_minimization(self.generator.z)
		self.generator.create_minimization(self.encoder.loss_vec, self.global_step)
		print('done')
    def _build_graph(self):
        """ Defines the model graph. """
        with tf.variable_scope('{:s}_model'.format(self.name)):
            # Instantiate embedding layer(s)
            if self.config.untie_enc_dec_embeddings:
                enc_vocab_size = self.source_vocab_size
                dec_vocab_size = self.target_vocab_size
            else:
                assert self.source_vocab_size == self.target_vocab_size, \
                    'Input and output vocabularies should be identical when tying embedding tables.'
                enc_vocab_size = dec_vocab_size = self.source_vocab_size

            encoder_embedding_layer = EmbeddingLayer(enc_vocab_size,
                                                     self.config.embedding_size,
                                                     self.config.hidden_size,
                                                     self.float_dtype,
                                                     name='encoder_embedding_layer')
            if self.config.untie_enc_dec_embeddings:
                decoder_embedding_layer = EmbeddingLayer(dec_vocab_size,
                                                         self.config.embedding_size,
                                                         self.config.hidden_size,
                                                         self.float_dtype,
                                                         name='decoder_embedding_layer')
            else:
                decoder_embedding_layer = encoder_embedding_layer

            if self.config.untie_decoder_embeddings:
                softmax_projection_layer = EmbeddingLayer(dec_vocab_size,
                                                          self.config.embedding_size,
                                                          self.config.hidden_size,
                                                          self.float_dtype,
                                                          name='softmax_projection_layer')
            else:
                softmax_projection_layer = decoder_embedding_layer

            # Instantiate the component networks
            self.enc = TransformerEncoder(self.config,
                                          encoder_embedding_layer,
                                          self.training,
                                          self.float_dtype,
                                          self.gate_tracker,
                                          'encoder')
            self.dec = TransformerDecoder(self.config,
                                          decoder_embedding_layer,
                                          softmax_projection_layer,
                                          self.training,
                                          self.int_dtype,
                                          self.float_dtype,
                                          self.gate_tracker,
                                          'decoder')

        return dec_vocab_size
Beispiel #4
0
    def __init__(self, hidden_size, batch_size, K, W_init, config,
                 max_sen_len):
        super(HAQA, self).__init__()
        self.embedding = EmbeddingLayer(W_init, config)
        embedding_size = W_init.shape[1] + config['char_filter_size']

        self.ga = GatedAttentionLayer()  # non-parametrized
        self.gaao = GatedAttentionAttOnly()  # non-parametrized
        self.ha = HopAttentionLayer()  # parametrized
        self.gating_w = Variable(torch.Tensor([0.5]),
                                 requires_grad=True).to(device)
        self.pred = AnswerPredictionLayer()  # non-parametrized
        self.K = K
        self.hidden_size = hidden_size

        self.context_gru_0 = BiGRU(embedding_size, hidden_size, batch_size)
        self.query_gru_0 = BiGRU(embedding_size, hidden_size, batch_size)

        self.context_gru_1 = BiGRU(2 * hidden_size, hidden_size, batch_size)
        self.query_gru_1 = BiGRU(embedding_size, hidden_size, batch_size)

        self.context_gru_2 = BiGRU(2 * hidden_size, hidden_size, batch_size)
        self.query_gru_2 = BiGRU(embedding_size, hidden_size, batch_size)

        self.context_gru_3 = BiGRU(2 * hidden_size, hidden_size, batch_size)
        self.query_gru_3 = BiGRU(embedding_size, hidden_size, batch_size)

        self.max_sentence = MaxAttSentence(max_sen_len, 2 * hidden_size)
Beispiel #5
0
 def build_Emb_layer(self):
     return EmbeddingLayer(self.in_dim,
                           self.ins_dim,
                           self.hid_dim,
                           self.out_dim,
                           activation=self.activation,
                           side_information=self.side_information,
                           bias=self.bias,
                           dropout=self.dropout,
                           use_cuda=self.use_cuda)
Beispiel #6
0
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   cell,
                   rnn_dim,
                   rnn_num,
                   drop_out=0.5,
                   emb=None):
        if trained_model is not None:
            param_dic = {
                'nums_chars': self.nums_chars,
                'nums_tags': self.nums_tags,
                'crf': self.crf,
                'emb_dim': emb_dim,
                'cell': cell,
                'rnn_dim': rnn_dim,
                'rnn_num': rnn_num,
                'drop_out': drop_out,
                'buckets_char': self.buckets_char,
                'ngram': self.ngram,
                'is_space': self.is_space,
                'sent_seg': self.sent_seg,
                'emb_path': self.emb_path,
                'tag_scheme': self.tag_scheme
            }
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables
        batch_size_h = tf.placeholder(tf.int32, [], name='batch_size_holder')
        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.batch_size_h = batch_size_h
        self.drop_out = dr
        self.drop_out_v = drop_out
        # pdb.set_trace()
        self.emb_layer = EmbeddingLayer(self.nums_chars + 20,
                                        emb_dim,
                                        weights=emb,
                                        name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 5000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if cell == 'gru':
                fw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #forward
                bw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #backward
            else:
                fw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.contrib.rnn.MultiRNNCell([fw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.MultiRNNCell([bw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)

        output_wrapper = HiddenLayer(rnn_dim * 2,
                                     self.nums_tags,
                                     activation='linear',
                                     name='hidden')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()
            batch_size = self.real_batches[idx]

            input_v1 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_1' + str(bucket))
            input_v2 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_2' + str(bucket))
            self.input_v1.append([input_v1])
            self.input_v2.append([input_v2])
            #output = None
            output = []
            for i in range(self.num_gpus):
                with tf.device('/gpu:{}'.format(i)):
                    input_1 = input_v1[i * batch_size_h:(i + 1) * batch_size_h]

                    input_2 = input_v2[i * batch_size_h:(i + 1) * batch_size_h]

                    emb_set1 = []
                    emb_set2 = []

                    word_out1 = self.emb_layer(input_1)
                    word_out2 = self.emb_layer(input_2)
                    emb_set1.append(word_out1)
                    emb_set2.append(word_out2)

                    # if self.ngram is not None:
                    # 	for i in range(len(self.ngram)):
                    # 		input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    # 		self.input_v[-1].append(input_g)
                    # 		gram_out = self.gram_layers[i](input_g)
                    # 		emb_set.append(gram_out)

                    if len(emb_set1) > 1:
                        emb_out1 = tf.concat(axis=2, values=emb_set1)
                        emb_out2 = tf.concat(axis=2, values=emb_set2)

                    else:
                        emb_out1 = emb_set1[0]
                        emb_out2 = emb_set2[0]

                    emb_out1 = DropoutLayer(dr)(emb_out1)
                    emb_out2 = DropoutLayer(dr)(emb_out2)

                    rnn_out = BiLSTM(rnn_dim,
                                     fw_cell=fw_rnn_cell,
                                     bw_cell=bw_rnn_cell,
                                     p=dr,
                                     name='BiLSTM' + str(bucket),
                                     scope='BiRNN')(emb_out1, emb_out2,
                                                    input_v1)

                    output_g = output_wrapper(rnn_out)
                    # if output == None:
                    # output = output_g
                    # else:
                    # output = tf.concat([output,output_g],axis = 0)
                    #pdb.set_trace()
                    output.append(output_g)
            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket - 1],
                               name='tags' + str(bucket))
            ])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v1) == len(self.output)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #7
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None, None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim*pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])
                pix_out = tf.unpack(pix_out, axis=1)

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unpack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)


            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)
                emb_out = tf.unpack(emb_out)

            else:
                emb_out = emb_set[0]

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)

            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #8
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, emb=None):
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'crf': self.crf, 'emb_dim': emb_dim,
                         'gru': gru, 'rnn_dim': rnn_dim, 'rnn_num': rnn_num, 'drop_out': drop_out, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram, 'is_space': self.is_space, 'sent_seg': self.sent_seg, 'emb_path': self.emb_path,
                         'tag_scheme': self.tag_scheme}
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        self.emb_layer = EmbeddingLayer(self.nums_chars + 20, emb_dim, weights=emb, name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 5000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags, activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            word_out = self.emb_layer(input_v)
            emb_set.append(word_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)

            else:
                emb_out = emb_set[0]

            emb_out = DropoutLayer(dr)(emb_out)
            emb_out = tf.unpack(emb_out)

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)
            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #9
0
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   gru,
                   rnn_dim,
                   rnn_num,
                   fnn_dim,
                   window_size,
                   drop_out=0.5,
                   rad_dim=30,
                   emb=None,
                   ng_embs=None,
                   pixels=None,
                   con_width=None,
                   filters=None,
                   pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['fnn_dim'] = fnn_dim
            param_dic['window_size'] = window_size
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            param_dic['mode'] = self.mode
            #print param_dic
            if self.metric == 'All':
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(
                        trained_model[:pindex] + m + '_' +
                        trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        #concat_emb_dim = emb_dim * 2
        concat_emb_dim = 0

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500,
                                            emb_dim,
                                            weights=emb,
                                            name='emb_layer')
            concat_emb_dim += emb_dim

        if self.radical:
            self.radical_layer = EmbeddingLayer(216,
                                                rad_dim,
                                                name='radical_layer')
            concat_emb_dim += rad_dim

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 1000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))
                concat_emb_dim += emb_dim

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2 = None, None, None
        wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = Convolution(con_width, 1, filters, name='conv_1')
            wrapper_mp_1 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = Convolution(con_width,
                                         filters,
                                         filters,
                                         name='conv_2')
            wrapper_mp_2 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_2')
            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = HiddenLayer(p_size_2 * p_size_2 * filters,
                                        100,
                                        activation='tanh',
                                        name='conv_dense')
            wrapper_dr = DropoutLayer(self.drop_out)

            concat_emb_dim += 100

        fw_rnn_cell, bw_rnn_cell = None, None

        if self.mode == 'RNN':
            with tf.variable_scope('BiRNN'):

                if gru:
                    fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                    bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                else:
                    fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)

                if rnn_num > 1:
                    fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [bw_rnn_cell] * rnn_num, state_is_tuple=True)

            output_wrapper = HiddenLayer(rnn_dim * 2,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')
            fnn_weights, fnn_bias = None, None

        else:

            with tf.variable_scope('FNN'):
                fnn_weights = tf.get_variable(
                    'conv_w',
                    [2 * window_size + 1, concat_emb_dim, 1, fnn_dim])
                fnn_bias = tf.get_variable(
                    'conv_b', [fnn_dim],
                    initializer=tf.constant_initializer(0.1))

            output_wrapper = HiddenLayer(fnn_dim,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket],
                                     name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket],
                                         name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_g' + str(i) +
                                             str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32,
                                         [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)
                pix_out = tf.reshape(input_p, [-1, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]

                pooling_out = tf.reshape(
                    pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            if self.mode == 'RNN':
                rnn_out = BiLSTM(rnn_dim,
                                 fw_cell=fw_rnn_cell,
                                 bw_cell=bw_rnn_cell,
                                 p=dr,
                                 name='BiLSTM' + str(bucket),
                                 scope='BiRNN')(emb_out, input_v)

                output = output_wrapper(rnn_out)

            else:
                emb_out = tf.pad(emb_out,
                                 [[0, 0], [window_size, window_size], [0, 0]])
                emb_out = tf.reshape(
                    emb_out, [-1, bucket + 2 * window_size, concat_emb_dim, 1])
                conv_out = tf.nn.conv2d(emb_out,
                                        fnn_weights, [1, 1, 1, 1],
                                        padding='VALID') + fnn_bias
                fnn_out = tf.nn.tanh(conv_out)
                fnn_out = tf.reshape(fnn_out, [-1, bucket, fnn_dim])

                output = output_wrapper(fnn_out)

            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket],
                               name='tags' + str(bucket))
            ])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) \
               and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
class Model(object):

	def __init__(self):	
		self.session = tf.InteractiveSession()
		tf.set_random_seed(FLAGS.seed)
		np.random.seed(FLAGS.seed)

		print('Reading embeddings...', end='', flush=True)
		self.embedding = EmbeddingLayer(FLAGS.embedding)
		print('done')

		# Define network parameters
		nkwargs = { 
			'in_size' 	: self.embedding.shape[1],
			'out_size'	: FLAGS.hidden,
			'depth'   	: FLAGS.depth,
			'batch_size': FLAGS.batch,
		}

		# Define the inputs, and their respective type & size
		inputs = {  
			'x'   		: [tf.int32,   [None, FLAGS.batch]],                
			'y'   		: [tf.float32, [FLAGS.batch, 5]],    
			'kp'  		: [tf.float32, []], 
			'lambda'	: [tf.float32, []],
			'sparsity' 	: [tf.float32, []],
			'coherency'	: [tf.float32, []],
		}

		# Create placeholders
		with tf.name_scope('Placeholders'):
			p = { name: tf.placeholder(*args, name=name) 
				  for name, args in inputs.items() }

		self.train_fd = lambda x,y, kp=FLAGS.keep_prob: {
			p['x'] 			: x,
			p['y'] 			: y,
			p['kp'] 		: kp,
			p['lambda'] 	: FLAGS.l2_reg,
			p['sparsity'] 	: FLAGS.sparsity,
			p['coherency'] 	: FLAGS.coherency,
		}

		dropout   = lambda x: tf.nn.dropout(x, p['kp'])
		bxentropy = lambda x,y: -(y * tf.log(x + 1e-8) + (1. - y) * tf.log(1. - x + 1e-8))
		sq_err    = lambda x,y: (x - y) ** 2

		pad_mask  = tf.to_float(tf.not_equal(p['x'], self.embedding.pad_id))
		embedding = dropout( self.embedding.forward(p['x']) ) 

		print('Creating model...', end='', flush=True)
		self.global_step = tf.Variable(0, name='global_step', trainable=False)
		self.generator = Generator(embedding, pad_mask, p, nkwargs, dropout, bxentropy)
		self.encoder   = Encoder(embedding, pad_mask, p, nkwargs, dropout, sq_err)
		self.encoder.create_minimization(self.generator.z)
		self.generator.create_minimization(self.encoder.loss_vec, self.global_step)
		print('done')


	def train(self):
		print('Initializing variables...', end='', flush=True)
		logdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'log')
		writer = tf.summary.FileWriter(logdir, self.session.graph)
		saver  = tf.train.Saver()
		self.session.run(tf.global_variables_initializer())		
	
		checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint)
		if checkpoint and checkpoint.model_checkpoint_path:
			print('restoring previous checkpoint...', end='', flush=True)
			name = os.path.basename(checkpoint.model_checkpoint_path)
			saver.restore(self.session, os.path.join(FLAGS.checkpoint, name))
		merger = tf.summary.merge_all()
		print('done')

		print('Fetching data...', end='', flush=True)
		x,y   = read_data(FLAGS.training)
		train = ([self.embedding.words_to_ids(s) for s in x], y)
		x,y  = read_data(FLAGS.testing)
		test = ([self.embedding.words_to_ids(s) for s in x], y)
		print('done')
		
		for epoch in range(FLAGS.epochs):
			start_time = time.time()
			train_x, train_y = preprocess(train, FLAGS.batch, self.embedding.pad_id, 
										  FLAGS.maxlen)

			scost = ocost = tcost = p_one = 0
			for bx,by in zip(train_x, train_y):
				result = self.session.run([merger, 
									self.generator.train_g, self.encoder.train_e, 
									self.generator.reg, self.generator.obj, 
									self.encoder.loss, self.generator.z, 
									self.global_step],
									feed_dict=self.train_fd(bx, by))
				writer.add_summary(result[0], result[7])
				
				scost += result[3]
				ocost += result[4]
				tcost += result[5]
				p_one += np.sum(result[6]) / FLAGS.batch / len(bx[0])

			print('Regularization: ', scost / float(len(train_x)))
			print('Objective: ', ocost / float(len(train_x)))
			print('Prediction loss: ', tcost / float(len(train_x)))
			print('Generator Selection %: ', p_one / float(len(train_x)))

			if not epoch % 1:
				results = []
				ocost = tcost = 0
				test_x, test_y = preprocess(test, FLAGS.batch, self.embedding.pad_id, 
											FLAGS.maxlen)
				for bx,by in zip(test_x, test_y):
					preds, bz, gobj, eloss = self.session.run([
												self.encoder.preds, 
												self.generator.z, 
												self.generator.obj,
												self.encoder.loss],
												feed_dict=self.train_fd(bx, by, 1.))
					ocost += gobj
					tcost += eloss
					for p, x, y, z in zip(preds, bx.T, by, bz.T):
						w = self.embedding.ids_to_words(x)
						w = [u.replace('<pad>', '_') for u in w]
						r = [u if v == 1 else '_' for u,v in zip(w,z)]
						results.append((p, r, w, y))
					
				print('Test Objective: ', ocost / float(len(test_x)))
				print('Test Prediction loss: ',tcost / float(len(test_x)))

				with open(FLAGS.output, 'w+') as f:
					for p, r, w, y in results:
						f.write(json.dumps({
							'rationale' : ' '.join(r),
							'original'  : ' '.join(w),
							'y' : str(list(y)),
							'p' : str(list(p)),
						}) + '\n')

				saver.save(self.session, 
							os.path.join(FLAGS.checkpoint, 'GEN.model'), 
							global_step=self.global_step)

			print('Finished epoch %s in %.2f seconds\n' % (epoch, time.time() - start_time))
Beispiel #11
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None,
                   ngram_embedding=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        """

        :param trained_model:
        :param scope:
        :param emb_dim:
        :param gru:
        :param rnn_dim:
        :param rnn_num:
        :param drop_out:
        :param rad_dim: n
        :param emb:
        :param ngram_embedding: 预训练 ngram embeddig 文件
        :param pixels:
        :param con_width:
        :param filters:
        :param pooling_size:
        :return:
        """
        # trained_model: 模型存储路径
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'tag_scheme': self.tag_scheme,
                         'graphic': self.graphic, 'pic_size': self.pic_size, 'word_vec': self.word_vec,
                         'radical': self.radical, 'crf': self.crf, 'emb_dim': emb_dim, 'gru': gru, 'rnn_dim': rnn_dim,
                         'rnn_num': rnn_num, 'drop_out': drop_out, 'filter_size': con_width, 'filters': filters,
                         'pooling_size': pooling_size, 'font': self.font, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram}
            print "RNN dimension is %d" % rnn_dim
            print "RNN number is %d" % rnn_num
            print "Character embedding size is %d" % emb_dim
            print "Ngram embedding dimension is %d" % emb_dim
            # 存储模型超参数
            if self.metric == 'All':
                # rindex() 返回子字符串 str 在字符串中最后出现的位置
                # 截取模型文件名
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(trained_model[:pindex] + m + '_' + trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        # 字向量层
        # 为什么字符数要加 500 ?
        # emb_dim 是每个字符的特征向量维度,可以通过命令行参数设置
        # weights 表示预训练的字向量,可以通过命令行参数设置
        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        # 偏旁部首向量
        # 依照《康熙字典》,共有 214 个偏旁部首。
        # 只用了常见汉字的偏旁部首,非常见汉字和非汉字的偏旁部首用其他两个特殊符号代替,
        # 所以共有 216 个偏旁部首
        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ngram_embedding is not None:
                assert len(ngram_embedding) == len(self.ngram)
            else:
                ngram_embedding = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ngram_embedding[i],
                                                       name=str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = \
            None, None, None, None, None, None

        if self.graphic:
            # 使用图像信息,需要用到 CNN
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(
                HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell] * rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell] * rnn_num, state_is_tuple=True)

        # 隐藏层,输入是前向 RNN 的输出加上 后向 RNN 的输出,所以输入维度为 rnn_dim * 2
        # 输出维度即标签个数
        output_wrapper = TimeDistributed(
            HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'),
            name='wrapper')

        # define model for each bucket
        # 每一个 bucket 中的句子长度不一样,所以需要定义单独的模型
        # bucket: bucket 中的句子长度
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer)
                # 只需要设置一次 reuse,后面就都 reuse 了
                scope.reuse_variables()
            t1 = time()

            # 输入的句子,one-hot 向量
            # shape = (batch_size, 句子长度)
            input_sentences = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_sentences])

            emb_set = []

            if self.word_vec:
                # 根据 one-hot 向量查找对应的字向量
                # word_out: shape=(batch_size, 句子长度,字向量维度(64))
                word_out = self.emb_layer(input_sentences)
                emb_set.append(word_out)

            if self.radical:
                # 嵌入偏旁部首信息,shape = (batch_size, 句子长度)
                input_radicals = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_radicals)
                radical_out = self.radical_layer(input_radicals)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unstack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if self.window_size > 1:

                padding_size = int(np.floor(self.window_size / 2))
                word_padded = tf.pad(word_out, [[0, 0], [padding_size, padding_size], [0, 0]], 'CONSTANT')

                Ws = []
                for q in range(1, self.window_size + 1):
                    Ws.append(tf.get_variable("W_%d" % q, shape=[q * emb_dim, self.filters_number]))
                b = tf.get_variable("b", shape=[self.filters_number])

                z = [None for _ in range(0, bucket)]

                for q in range(1, self.window_size + 1):
                    for i in range(padding_size, bucket + padding_size):
                        low = i - int(np.floor((q - 1) / 2))
                        high = i + int(np.ceil((q + 1) / 2))
                        x = word_padded[:, low, :]
                        for j in range(low + 1, high):
                            x = tf.concat(values=[x, word_padded[:, j, :]], axis=1)
                        z_iq = tf.tanh(tf.nn.xw_plus_b(x, Ws[q - 1], b))
                        if z[i - padding_size] is None:
                            z[i - padding_size] = z_iq
                        else:
                            z[i - padding_size] = tf.concat([z[i - padding_size], z_iq], axis=1)

                z = tf.stack(z, axis=1)
                values, indices = tf.nn.top_k(z, sorted=False, k=emb_dim)

                # highway layer
                X = tf.unstack(word_out, axis=1)
                Conv_X = tf.unstack(values, axis=1)
                X_hat = []
                W_t = tf.get_variable("W_t", shape=[emb_dim, emb_dim])
                b_t = tf.get_variable("b_t", shape=[emb_dim])
                for x, conv_x in zip(X, Conv_X):
                    T_x = tf.sigmoid(tf.nn.xw_plus_b(x, W_t, b_t))
                    X_hat.append(tf.multiply(conv_x, T_x) + tf.multiply(x, 1 - T_x))
                X_hat = tf.stack(X_hat, axis=1)
                emb_set.append(X_hat)
            if len(emb_set) > 1:
                # 各种字向量直接 concat 起来(字向量、偏旁部首、n-gram、图像信息等)
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值
            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr,
                             name='BiLSTM' + str(bucket), scope='BiRNN')(self.highway(emb_out, "tag"), input_sentences)

            # 应用全连接层,Wx+b 得到最后的输出
            output = output_wrapper(rnn_out)
            # 为什么要 [output] 而不是 output 呢?
            self.output.append([output])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            # language model
            lm_rnn_dim = rnn_dim
            with tf.variable_scope('LM-BiRNN'):
                if gru:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                else:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)

                if rnn_num > 1:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_bw_rnn_cell] * rnn_num, state_is_tuple=True)
            lm_rnn_output = BiLSTM(lm_rnn_dim, fw_cell=lm_fw_rnn_cell,
                                   bw_cell=lm_bw_rnn_cell, p=dr,
                                   name='LM-BiLSTM' + str(bucket),
                                   scope='LM-BiRNN')(self.highway(emb_set[0]), input_sentences)

            lm_output_wrapper = TimeDistributed(
                HiddenLayer(lm_rnn_dim * 2, self.nums_chars + 2, activation='linear', name='lm_hidden'),
                name='lm_wrapper')
            lm_final_output = lm_output_wrapper(lm_rnn_output)
            self.lm_predictions.append([lm_final_output])
            self.lm_groundtruthes.append([tf.placeholder(tf.int32, [None, bucket], name='lm_targets' + str(bucket))])

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert \
            len(self.input_v) == len(self.output) and \
            len(self.output) == len(self.output_) and \
            len(self.lm_predictions) == len(self.lm_groundtruthes) and \
            len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Beispiel #12
0
    def build_model(self, load_model=None):

        a = Input(shape=(self.max_length, ), dtype='int32',
                  name='words_1')  # For "premise"
        b = Input(shape=(self.max_length, ), dtype='int32',
                  name='words_2')  # For "hypothesis"

        # ------- Embedding Layer -------
        # Using "Glove" pre-trained embedding matrix as our initial weights
        embedding_layer = EmbeddingLayer(self.vocab_size,
                                         self.embedding_size,
                                         self.max_length,
                                         self.hidden_unit,
                                         init_weights=self.embedding_matrix,
                                         dropout=self.dropout_rate,
                                         nr_tune=5000)
        embedded_a = embedding_layer(a)
        embedded_b = embedding_layer(b)

        # ------- BiLSTM Layer -------
        # BiLSTM learns to represent a word and its context
        encoded_a = BiLSTM_Layer(self.max_length, self.hidden_unit)(embedded_a)
        encoded_b = BiLSTM_Layer(self.max_length, self.hidden_unit)(embedded_b)

        # ------- Attention Layer -------
        attention_ab = Lambda(attention, attention_output,
                              name='attention')([encoded_a, encoded_b])

        # ------- Soft-Alignment Layer -------
        # Modeling local inference needs to employ some forms of hard or soft alignment to associate the relevant
        # sub-components between a premise and a hypothesis
        # Using inter-sentence "alignment" (or attention) to softly align each word to the content of hypothesis (or premise)
        align_alpha = Lambda(
            attention_softmax3d,
            attention_softmax3d_output,
            name='soft_alignment_a')([attention_ab, encoded_b])
        align_beta = Lambda(attention_softmax3d,
                            attention_softmax3d_output,
                            name='soft_alignment_b')([attention_ab, encoded_a])

        # ------- Enhancement Layer -------
        # Compute the difference and the element-wise product for the tuple < encoded_a, align_a > and < encoded_b, align_b >
        # This operation could help sharpen local inference information between elements in the tuples and capture
        # inference relationships such as contradiction.
        sub_a = Lambda(substract, substract_output,
                       name='substract_a')([encoded_a, align_alpha])
        mul_a = Lambda(multiply, multiply_output,
                       name='multiply_a')([encoded_a, align_alpha])

        sub_b = Lambda(substract, substract_output,
                       name='substract_b')([encoded_b, align_beta])
        mul_b = Lambda(multiply, multiply_output,
                       name='multiply_b')([encoded_b, align_beta])

        m_a = merge([encoded_a, align_alpha, sub_a, mul_a],
                    mode='concat')  # shape=(batch_size, time-steps, 4 * units)
        m_b = merge([encoded_b, align_beta, sub_b, mul_b],
                    mode='concat')  # shape=(batch_size, time-steps, 4 * units)

        # ------- Composition Layer -------
        comp_a = Composition_Layer(self.hidden_unit, self.max_length)(m_a)
        comp_b = Composition_Layer(self.hidden_unit, self.max_length)(m_b)

        # ------- Pooling Layer -------
        preds = Pooling_Layer(self.hidden_unit,
                              self.n_classes,
                              dropout=self.dropout_rate,
                              l2_weight_decay=self.l2_weight_decay)(comp_a,
                                                                    comp_b)

        model = Model(inputs=[a, b], outputs=[preds])
        model.compile(optimizer=Adam(lr=self.learning_rate),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        if load_model is not None:
            print('Loading pre-trained weights from \'{}\'...'.format(
                load_model))
            model.load_weights(load_model)

        return model
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   gru,
                   rnn_dim,
                   rnn_num,
                   drop_out=0.5,
                   emb=None,
                   ngram_embedding=None):
        """
        :param trained_model:
        :param scope:
        :param emb_dim:
        :param gru:
        :param rnn_dim:
        :param rnn_num:
        :param drop_out:
        :param emb:
        :return:
        """
        # trained_model: 模型存储路径
        if trained_model is not None:
            param_dic = {
                'nums_chars': self.nums_chars,
                'nums_tags': self.nums_tags,
                'tag_scheme': self.tag_scheme,
                'crf': self.crf,
                'emb_dim': emb_dim,
                'gru': gru,
                'rnn_dim': rnn_dim,
                'rnn_num': rnn_num,
                'drop_out': drop_out,
                'buckets_char': self.buckets_char,
                'ngram': self.ngram
            }
            print "RNN dimension is %d" % rnn_dim
            print "RNN number is %d" % rnn_num
            print "Character embedding size is %d" % emb_dim
            # 存储模型超参数
            if self.metric == 'All':
                # rindex() 返回子字符串 str 在字符串中最后出现的位置
                # 截取模型文件名
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(
                        trained_model[:pindex] + m + '_' +
                        trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        # 字向量层
        # 为什么字符数要加 500 ?
        # emb_dim 是每个字符的特征向量维度,可以通过命令行参数设置
        # weights 表示预训练的字向量,可以通过命令行参数设置
        self.emb_layer = EmbeddingLayer(self.nums_chars + 500,
                                        emb_dim,
                                        weights=emb,
                                        name='emb_layer')

        if self.ngram is not None:
            if ngram_embedding is not None:
                assert len(ngram_embedding) == len(self.ngram)
            else:
                ngram_embedding = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 1000 * (i + 2),
                                   emb_dim,
                                   weights=ngram_embedding[i],
                                   name=str(i + 2) + 'gram_layer'))

        # 隐藏层,输入是前向 RNN 的输出加上 后向 RNN 的输出,所以输入维度为 rnn_dim * 2
        # 输出维度即标签个数
        tag_output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2,
                                                         self.nums_tags[0],
                                                         activation='linear',
                                                         name='tag_hidden'),
                                             name='tag_output_wrapper')

        if self.char_freq_loss:
            freq_output_wrapper = TimeDistributed(HiddenLayer(
                rnn_dim * 2, 1, activation='sigmoid', name='freq_hidden'),
                                                  name='freq_output_wrapper')

        if self.co_train:
            lm_fw_wrapper = TimeDistributed(HiddenLayer(rnn_dim,
                                                        self.nums_chars + 2,
                                                        activation='linear',
                                                        name='lm_fw_hidden'),
                                            name='lm_fw_wrapper')
            lm_bw_wrapper = TimeDistributed(HiddenLayer(rnn_dim,
                                                        self.nums_chars + 2,
                                                        activation='linear',
                                                        name='lm_bw_hidden'),
                                            name='lm_bw_wrapper')

        # define model for each bucket
        # 每一个 bucket 中的句子长度不一样,所以需要定义单独的模型
        # bucket: bucket 中的句子长度
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer)
                # 只需要设置一次 reuse,后面就都 reuse 了
                scope.reuse_variables()
            t1 = time()

            # 输入的句子,one-hot 向量
            # shape = (batch_size, 句子长度)
            input_sentences = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_' + str(bucket))

            self.input_v.append([input_sentences])

            emb_set = []
            word_out = self.emb_layer(input_sentences)
            emb_set.append(word_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_g' + str(i) +
                                             str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if len(emb_set) > 1:
                # 各种字向量直接 concat 起来(字向量、偏旁部首、n-gram、图像信息等)
                word_embeddings = tf.concat(axis=2, values=emb_set)

            else:
                word_embeddings = emb_set[0]

            # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值
            rnn_out_fw, rnn_out_bw = BiRNN(rnn_dim,
                                           p=dr,
                                           concat_output=False,
                                           gru=gru,
                                           name='BiLSTM' + str(bucket),
                                           scope='Tag-BiRNN')(word_embeddings,
                                                              input_sentences)

            tag_rnn_out_fw, tag_rnn_out_bw = rnn_out_fw, rnn_out_bw
            if self.co_train:
                if self.highway_layers > 0:
                    tag_rnn_out_fw = highway_network(rnn_out_fw,
                                                     self.highway_layers,
                                                     True,
                                                     is_train=True,
                                                     scope="tag_fw")
                    tag_rnn_out_bw = highway_network(rnn_out_bw,
                                                     self.highway_layers,
                                                     True,
                                                     is_train=True,
                                                     scope="tag_bw")
            tag_rnn_out = tf.concat(values=[tag_rnn_out_fw, tag_rnn_out_bw],
                                    axis=2)

            # 应用全连接层,Wx+b 得到最后的输出
            output = tag_output_wrapper(tag_rnn_out)
            # 为什么要 [output] 而不是 output 呢?
            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket],
                               name='tags' + str(bucket))
            ])

            self.bucket_dit[bucket] = idx

            if self.co_train:
                # language model
                lm_rnn_out_fw, lm_rnn_out_bw = rnn_out_fw, rnn_out_bw
                if self.highway_layers > 0:
                    lm_rnn_out_fw = highway_network(rnn_out_fw,
                                                    self.highway_layers,
                                                    True,
                                                    is_train=True,
                                                    scope="lm_fw")
                    lm_rnn_out_bw = highway_network(rnn_out_bw,
                                                    self.highway_layers,
                                                    True,
                                                    is_train=True,
                                                    scope="lm_bw")

                self.lm_fw_predictions.append([lm_fw_wrapper(lm_rnn_out_fw)])
                self.lm_bw_predictions.append([lm_bw_wrapper(lm_rnn_out_bw)])
                self.lm_fw_groundtruthes.append([
                    tf.placeholder(tf.int32, [None, bucket],
                                   name='lm_fw_targets' + str(bucket))
                ])
                self.lm_bw_groundtruthes.append([
                    tf.placeholder(tf.int32, [None, bucket],
                                   name='lm_bw_targets' + str(bucket))
                ])

            if self.char_freq_loss:
                freq_rnn_out_fw, freq_rnn_out_bw = rnn_out_fw, rnn_out_bw
                if self.highway_layers > 0:
                    freq_rnn_out_fw = highway_network(rnn_out_fw,
                                                      self.highway_layers,
                                                      True,
                                                      is_train=True,
                                                      scope="freq_fw")
                    freq_rnn_out_bw = highway_network(rnn_out_bw,
                                                      self.highway_layers,
                                                      True,
                                                      is_train=True,
                                                      scope="freq_bw")
                freq_rnn_out = tf.concat(
                    values=[freq_rnn_out_fw, freq_rnn_out_bw], axis=2)

                self.char_freq_groundtruthes.append([
                    tf.placeholder(tf.float32, [None, bucket],
                                   name='freq_targets_%d' % bucket)
                ])
                self.char_freq_predictions.append(
                    [freq_output_wrapper(freq_rnn_out)])

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert \
            len(self.input_v) == len(self.output) and \
            len(self.output) == len(self.output_) and \
            len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()