Esempio n. 1
0
 def infer(self, dev_data):
     hparams = self.hparams
     sess = self.sess
     assert len(dev_data[0]) == len(
         dev_data[1]), "Size of features data must be equal to label"
     preds = []
     total_loss = []
     for idx in range(len(dev_data[0]) // hparams.batch_size + 1):
         batch=dev_data[0][idx*hparams.batch_size:\
                           min((idx+1)*hparams.batch_size,len(dev_data[0]))]
         if len(batch) == 0:
             break
         batch = utils.hash_batch(batch, hparams)
         label=dev_data[1][idx*hparams.batch_size:\
                           min((idx+1)*hparams.batch_size,len(dev_data[1]))]
         """
         infer时不开dropout,BN
         """
         pred=sess.run(self.prob,feed_dict=\
                       {self.features:batch,self.label:label,self.use_norm:False,
                        self.dnn_dropout:len(hparams.dnn_dropout) * [1]
                       })
         preds.append(pred)
     preds = np.concatenate(preds)
     return preds
Esempio n. 2
0
    def train(self, train_data, dev_data):
        hparams = self.hparams  # 超参数
        sess = self.sess  # sess
        assert len(train_data[0]) == len(
            train_data[1]), "Size of features data must be equal to label"
        """
        训练
        """
        for epoch in range(hparams.epoch):
            info = {}
            info['loss'] = []
            info['norm'] = []
            start_time = time.time()
            for idx in range(math.ceil(
                    len(train_data[0]) / hparams.batch_size)):
                """
                train_data[0] 是数据, train_data[1]是label
                """
                # 截取一个batch的数据,最后一个batch的大小可能不满batch_size
                batch = train_data[0][idx * hparams.batch_size:min(
                    (idx + 1) * hparams.batch_size, len(train_data[0]))]

                # 对每一个特征值做hash(类似于做one-hot)
                batch = utils.hash_batch(batch, hparams)

                # 截取一个batch的label,最后一个batch的大小可能不满batch_size
                label = train_data[1][idx * hparams.batch_size:min(
                    (idx + 1) * hparams.batch_size, len(train_data[1]))]


                loss,_,norm=sess.run([self.loss,self.update,self.grad_norm],feed_dict=\
                                     {self.features:batch,self.label:label,self.use_norm:True,
                                      self.second_order_dropout:hparams.second_order_dropout,
                                      self.dnn_dropout:hparams.dnn_dropout
                                     })

                info['loss'].append(loss)  # 损失
                info['norm'].append(norm)
                if (idx + 1) % hparams.num_display_steps == 0:
                    info['learning_rate'] = hparams.learning_rate
                    info["train_ppl"] = np.mean(info['loss'])
                    info["avg_grad_norm"] = np.mean(info['norm'])
                    utils.print_step_info("  ", epoch, idx + 1, info)
                    del info
                    info = {}
                    info['loss'] = []
                    info['norm'] = []
                """eval时如果模型效果好会保存模型"""
                if (idx + 1) % hparams.num_eval_steps == 0 and dev_data:
                    T = (time.time() - start_time)
                    self.eval(T, dev_data, hparams, sess)

        self.saver.restore(sess, 'model_tmp/model')
        T = (time.time() - start_time)
        self.eval(T, dev_data, hparams, sess)
        os.system("rm -r model_tmp")
Esempio n. 3
0
    def train(self, train_data, dev_data=None):
        """
            train_data: (num_train_data * field*size, num_train_data *1)
            dev_data:   (num_dev_data * field*size, num_dev_data * field*size)
        """

        hparams = self.hparams
        sess = self.sess
        assert len(train_data[0]) == len(train_data[1])

        for epoch in range(hparams.epoch):
            info = {}
            info['loss'] = []
            info['norm'] = []
            start_time = time.time()
            for idx in range(math.ceil(
                    len(train_data[0]) / hparams.batch_size)):
                """分batch,并针对每个特征值做hash"""
                batch=train_data[0][idx*hparams.batch_size:\
                                    min((idx+1)*hparams.batch_size,len(train_data[0]))]
                batch = utils.hash_batch(batch, hparams)  # hash
                label=train_data[1][idx*hparams.batch_size:\
                                    min((idx+1)*hparams.batch_size,len(train_data[1]))]
                """训练"""
                loss,_,norm=sess.run([self.loss,self.update,self.grad_norm],\
                                     feed_dict={self.features:batch,self.label:label})
                """打印一些信息"""
                info['loss'].append(loss)
                info['norm'].append(norm)
                if (idx + 1) % hparams.num_display_steps == 0:
                    info['learning_rate'] = hparams.learning_rate
                    info["train_ppl"] = np.mean(info['loss'])
                    info["avg_grad_norm"] = np.mean(info['norm'])
                    utils.print_step_info("  ", epoch, idx + 1, info)
                    del info
                    info = {}
                    info['loss'] = []
                    info['norm'] = []
                if (idx + 1) % hparams.num_eval_steps == 0 and dev_data:
                    T = (time.time() - start_time)
                    self.eval(T, dev_data, hparams, sess)
        """保存模型"""
        self.saver.restore(sess, 'model_tmp/model')
        T = (time.time() - start_time)
        self.eval(T, dev_data, hparams, sess)
        os.system("rm -r model_tmp")
Esempio n. 4
0
 def get_embedding(self, dev_data):
     hparams = self.hparams
     sess = self.sess
     assert len(dev_data[0]) == len(
         dev_data[1]), "Size of features data must be equal to label"
     embedding = []
     total_loss = []
     for idx in range(len(dev_data[0]) // hparams.batch_size + 1):
         batch=dev_data[0][idx*hparams.batch_size:\
                           min((idx+1)*hparams.batch_size,len(dev_data[0]))]
         if len(batch) == 0:
             break
         batch = utils.hash_batch(batch, hparams)
         label=dev_data[1][idx*hparams.batch_size:\
                           min((idx+1)*hparams.batch_size,len(dev_data[1]))]
         temp=sess.run(self.emb_inp_v2,\
                       feed_dict={self.features:batch,self.label:label})
         embedding.append(temp)
     embedding = np.concatenate(embedding, 0)
     return embedding