Ejemplo n.º 1
0
        def model_fn(features, labels, mode, params):
            ########### embedding #################
            if not self.use_language_model:
                self.init_embedding()
                self.embed_query = self.embedding(features=features,
                                                  name='x_query')
            else:
                self.embedding = None
            #############  encoder  #################
            #model params
            self.encoder.keep_prob = params['keep_prob']
            self.encoder.is_training = params['is_training']
            global_step = tf.train.get_or_create_global_step()
            if not self.use_language_model:
                out = self.encoder(self.embed_query,
                                   name='x_query',
                                   features=features)
            else:
                out = self.encoder(features=features)
            #pred = tf.nn.softmax(tf.layers.dense(out, self.num_class))
            pred = tf.nn.softmax(out)

            ############### predict ##################
            if mode == tf.estimator.ModeKeys.PREDICT:
                predictions = {
                    'encode': out,
                    'logit': pred,
                    'label': features['label']
                }
                return tf.estimator.EstimatorSpec(mode,
                                                  predictions=predictions)

            ############### loss ##################
            loss = self.cal_loss(pred, labels, self.batch_size, self.conf)

            ############### train ##################
            if mode == tf.estimator.ModeKeys.TRAIN:
                if self.use_clr:
                    self.learning_rate = cyclic_learning_rate(
                        global_step=global_step,
                        learning_rate=self.learning_rate,
                        mode=self.clr_mode)
                optimizer = get_train_op(global_step,
                                         self.optimizer_type,
                                         loss,
                                         self.learning_rate,
                                         clip_grad=5)
                return tf.estimator.EstimatorSpec(mode,
                                                  loss=loss,
                                                  train_op=optimizer)

            ############### eval ##################
            if mode == tf.estimator.ModeKeys.EVAL:
                eval_metric_ops = {}
                #{"accuracy": tf.metrics.accuracy(
                #    labels=labels, predictions=predictions["classes"])}
                return tf.estimator.EstimatorSpec(
                    mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
Ejemplo n.º 2
0
        def model_fn(features, labels, mode, params):
            if not self.use_language_model:
                self.init_embedding()
                if self.tfrecords_mode == 'class':
                    self.embed_query = self.embedding(features = features, name = 'x_query')
                else:
                    self.embed_query = self.embedding(features = features, name = 'x_query')
                    self.embed_sample = self.embedding(features = features, name = 'x_sample')
            else:
                self.embedding = None
            #model params
            self.encoder.keep_prob = params['keep_prob']
            self.encoder.is_training = params['is_training']
            global_step = tf.train.get_or_create_global_step()
            if self.sim_mode == 'cross':
                if not self.use_language_model:
                    pred = self.encoder(x_query = self.embed_query, 
                                        x_sample = self.embed_sample,
                                        features = features)
                else:
                    pred = self.encoder(features = features)
            elif self.sim_mode == 'represent':
                if not self.use_language_model:
                    #features['x_query_length'] = features['length']
                    pred = self.encoder(self.embed_query, 
                                                     name = 'x_query', 
                                                     features = features)
                else:
                    pred = self.encoder(features = features)
            else:
                raise ValueError('unknown sim mode')

            pos_target = tf.ones(shape = [int(self.batch_size/2)], dtype = tf.float32)
            neg_target = tf.zeros(shape = [int(self.batch_size/2)], dtype = tf.float32)
            if mode == tf.estimator.ModeKeys.PREDICT:
                predictions = {
                    'pred': pred,
                    'label': features['label']
                }
                return tf.estimator.EstimatorSpec(mode, predictions=predictions)
            loss = self.cal_loss(pred,
                             labels,
                             pos_target,
                             neg_target,
                             self.batch_size,
                             self.conf)
            if mode == tf.estimator.ModeKeys.TRAIN:
                if self.use_clr:
                    self.learning_rate = cyclic_learning_rate(global_step=global_step,
                                                          learning_rate = self.learning_rate, 
                                                          mode = self.clr_mode)
                optimizer = get_train_op(global_step, 
                                         self.optimizer_type, 
                                         loss,
                                         self.learning_rate, 
                                         clip_grad = 5)
                return tf.estimator.EstimatorSpec(mode, loss = loss,
                                                      train_op=optimizer)
            if mode == tf.estimator.ModeKeys.EVAL:
                eval_metric_ops = {}
                #{"accuracy": tf.metrics.accuracy(
                #    labels=labels, predictions=predictions["classes"])}
                return tf.estimator.EstimatorSpec(
                    mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
Ejemplo n.º 3
0
    def __init__(self, conf):
        self.conf = conf
        for attr in conf:
            setattr(self, attr, conf[attr])
        self.task_type = 'ner'
        self.clip_grad = 5.0
        self.optimizer_type = self.optimizer_type
        self.label2tag = {
            self.tag2label[item]: item
            for item in self.tag2label
        }
        self.shuffle = True

        self.is_training = tf.placeholder(tf.bool, [], name="is_training")
        self.global_step = tf.Variable(0, trainable=False)
        self.keep_prob = tf.where(self.is_training, 0.5, 1.0)

        self.pre = Preprocess()
        self.text_list, self.label_list = load_ner_data(self.train_path)
        if self.maxlen == -1:
            self.maxlen = max([len(text.split()) for text in self.text_list])
        self.trans_label_list(self.label_list, self.tag2label)

        self.text_list = [
            self.pre.get_dl_input_by_text(text) for text in self.text_list
        ]

        if not self.use_language_model:
            #build vocabulary map using training data
            self.vocab_dict = embedding[self.embedding_type].build_dict(
                dict_path=self.dict_path, text_list=self.text_list)

            #define embedding object by embedding_type
            self.embedding = embedding[self.embedding_type](
                text_list=self.text_list,
                vocab_dict=self.vocab_dict,
                dict_path=self.dict_path,
                random=self.rand_embedding,
                batch_size=self.batch_size,
                maxlen=self.maxlen,
                embedding_size=self.embedding_size,
                conf=self.conf)
            self.embed = self.embedding(name='x')
        else:
            self.embedding = None
        self.labels = tf.placeholder(tf.int32,
                                     shape=[None, None],
                                     name="labels")
        self.sequence_lengths = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name="sequence_lengths")

        #model params
        params = conf
        params.update({
            "maxlen": self.maxlen,
            "embedding_size": self.embedding_size,
            "keep_prob": self.keep_prob,
            "is_training": self.is_training,
            "batch_size": self.batch_size,
            "num_output": self.num_class
        })

        self.encoder = encoder[self.encoder_type](**params)
        if not self.use_language_model:
            self.out = self.encoder(self.embed, 'query', middle_flag=True)
        else:
            self.out = self.encoder()
        self.output_nodes = self.out.name.split(':')[0]
        self.loss(self.out)
        self.optimizer = get_train_op(self.global_step, self.optimizer_type,
                                      self.loss, self.clip_grad,
                                      self.learning_rate)
        #self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(tf.global_variables())
        if self.use_language_model:
            tvars = tf.trainable_variables()
            init_checkpoint = conf['init_checkpoint_path']
            (assignment_map,
             initialized_variable_names) = get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
Ejemplo n.º 4
0
        def model_fn(features, labels, mode, params):
            ############# embedding #################
            if not self.use_language_model:
                self.embedding = init_embedding()
                if self.tfrecords_mode == 'class':
                    self.embed_query = self.embedding(features=features,
                                                      name='x_query')
                else:
                    self.embed_query = self.embedding(features=features,
                                                      name='x_query')
                    self.embed_sample = self.embedding(features=features,
                                                       name='x_sample')
            else:
                self.embedding = None
            #############  encoder  #################
            #model params
            self.encoder.keep_prob = params['keep_prob']
            self.encoder.is_training = params['is_training']
            global_step = tf.train.get_or_create_global_step()
            if self.sim_mode == 'cross':
                if not self.use_language_model:
                    output = self.encoder(x_query=self.embed_query,
                                          x_sample=self.embed_sample,
                                          features=features)
                else:
                    output = self.encoder(features=features)

            elif self.sim_mode == 'represent':
                if not self.use_language_model:
                    #features['x_query_length'] = features['length']
                    output = self.encoder(self.embed_query,
                                          name='x_query',
                                          features=features)
                else:
                    output = self.encoder(features=features)
            else:
                raise ValueError('unknown sim mode')

            ############### predict ##################
            if mode == tf.estimator.ModeKeys.PREDICT:
                #pdb.set_trace()
                predictions = {
                    'encode':
                    output,
                    'pred':
                    tf.cast(tf.greater(tf.nn.softmax(output)[:, 0], 0.5),
                            tf.int32) if self.num_output == 2 else
                    tf.cast(tf.greater(tf.nn.sigmoid(output), 0.5), tf.int32),
                    'score':
                    tf.nn.softmax(output)[:, 0]
                    if self.num_output == 2 else tf.nn.sigmoid(output),
                    'label':
                    features['label']
                }
                return tf.estimator.EstimatorSpec(mode,
                                                  predictions=predictions)
            ############### loss ##################
            loss = cal_loss(output, labels, self.batch_size, self.conf)
            ############### train ##################
            if mode == tf.estimator.ModeKeys.TRAIN:
                if self.use_clr:
                    self.learning_rate = cyclic_learning_rate(
                        global_step=global_step,
                        learning_rate=self.learning_rate,
                        mode=self.clr_mode)
                optimizer = get_train_op(global_step,
                                         self.optimizer_type,
                                         loss,
                                         self.learning_rate,
                                         clip_grad=5)
                return tf.estimator.EstimatorSpec(mode,
                                                  loss=loss,
                                                  train_op=optimizer)
            ############### eval ##################
            if mode == tf.estimator.ModeKeys.EVAL:
                eval_metric_ops = {}
                #{"accuracy": tf.metrics.accuracy(
                #    labels=labels, predictions=predictions["classes"])}
                return tf.estimator.EstimatorSpec(
                    mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)