def eval_classification():

                max_step = 0.
                precision  = 0.
                recall  = 0.

                for step_i, batch_data in DataInput(self.test_set, self.FLAGS.test_batch_size):
                    max_step = 1 + max_step

                    step_precision, step_recall = self.model.metrics_classification(sess=self.sess,
                                                      batch_data=batch_data,
                                                      global_step=self.global_step,
                                                      topk=self.FLAGS.top_k)
                    precision += step_precision[0]
                    recall += step_recall[0]

                precision_val = precision/max_step
                recall_val = recall/max_step

                if precision_val > self.precision and recall_val > self.recall:
                    self.precision = precision_val
                    self.recall = recall_val

                print('----test precision : %.5f, recall : %.5f-----' % (precision_val,recall_val))
                print('----MAX precision: %.5f, MAX recall : %.5f-----' % (self.precision,self.recall))
Beispiel #2
0
        def eval_mse():

            max_step = 0.
            mse_lst = []

            for step_i, batch_data in DataInput(self.test_set,
                                                self.FLAGS.test_batch_size):
                max_step = 1 + max_step

                step_mse = self.model.metrics_mse(sess=self.sess,
                                                  batch_data=batch_data,
                                                  global_step=self.global_step,
                                                  topk=self.FLAGS.top_k)
                mse_lst.extend(list(step_mse[0]))

            mse_val = np.mean(mse_lst)

            if mse_val < self.mse:
                self.mse = mse_val

            print('----test mse: %.5f-----' % mse_val)
            print('----MIN mse: %.5f-----' % (self.mse))
Beispiel #3
0
            def eval_model():

                type_prob = []
                target_type = []  # one_hot 形式的
                seq_llh = []
                time_llh = []
                type_llh = []
                squared_error = []
                total_pred_num = len(self.test_set)
                for step_i, batch_data in DataInput(
                        self.test_set, self.FLAGS.test_batch_size):
                    step_type_prob, step_target_type,\
                    step_seq_llh,step_time_llh,step_type_llh,\
                    step_cross_entropy, step_se_loss = self.model.metrics_likelihood(sess = self.sess,
                                                                       batch_data = batch_data)
                    type_prob.extend(list(step_type_prob))
                    target_type.extend(list(step_target_type))
                    seq_llh.extend(list(step_seq_llh))
                    time_llh.extend(list(step_time_llh))
                    type_llh.extend(list(step_type_llh))
                    squared_error.extend(list(step_se_loss))

                correct_num = 0
                for i in range(len(type_prob)):
                    pred_probs = type_prob[i]
                    truth_probs = target_type[i]
                    idx_pred = np.argmax(pred_probs)
                    idx_truth = np.argmax(truth_probs)
                    if idx_pred == idx_truth:
                        correct_num += 1

                accuracy = correct_num / total_pred_num  #TODO 计算方法需更正

                avg_log_likelihood = np.mean(seq_llh)
                avg_time_llh = np.mean(time_llh)
                avg_type_llh = np.mean(type_llh)
                rmse = np.sqrt(np.mean(squared_error))

                return avg_log_likelihood, accuracy, rmse
Beispiel #4
0
    def train(self):

        start_time = time.time()
        # Config GPU options
        if self.FLAGS.per_process_gpu_memory_fraction == 0.0:
            gpu_options = tf.GPUOptions(allow_growth=True)
        elif self.FLAGS.per_process_gpu_memory_fraction == 1.0:
            gpu_options = tf.GPUOptions()

        else:
            gpu_options = tf.GPUOptions(
                per_process_gpu_memory_fraction=self.FLAGS.per_process_gpu_memory_fraction)

        os.environ['CUDA_VISIBLE_DEVICES'] = self.FLAGS.cuda_visible_devices

        # Initiate TF session
        # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        global_step_lr = tf.Variable(0, trainable=False)
        learing_rate = tf.train.exponential_decay(
            learning_rate=self.FLAGS.learning_rate, global_step=global_step_lr, decay_steps=100, decay_rate=0.995,
            staircase=True)
        with self.sess.as_default():

            # Create a new model or reload existing checkpoint
            if self.FLAGS.experiment_type == "atrank":
                self.model = Atrank_model(self.FLAGS, self.emb, self.sess)

            elif self.FLAGS.experiment_type == "slirec":
                self.model = SLiRec_model(self.FLAGS, self.emb, self.sess)

            elif self.FLAGS.experiment_type == "bpr":
                self.model = Bprmf_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'dib':
                self.model = DIB_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'no_emb_dib':
                self.model = DIB_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'sasrec':
                self.model = SASRec_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'grurec':
                self.model = GRU4Rec_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'lstur':
                self.model = LSTUR_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'bert':
                self.model = BERT4Rec_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'dmpn':
                self.model = DMPN_model(self.FLAGS, self.emb,self.sess)

            elif self.FLAGS.experiment_type == 'dmpn2':
                self.model = DMPN2_model(self.FLAGS, self.emb,self.sess)
            elif self.FLAGS.experiment_type == 'dmpn3':
                self.model = DMPN3_model(self.FLAGS, self.emb,self.sess)
            elif self.FLAGS.experiment_type == 'dmpn4':
                self.model = DMPN4_model(self.FLAGS, self.emb,self.sess)
            elif self.FLAGS.experiment_type == 'dfm':
                self.model = DFM_model(self.FLAGS, self.emb,self.sess)


            self.logger.info('Init model finish.\tCost time: %.2fs' % (time.time() - start_time))

            # test_auc = self.model.metrics(sess=self.sess,
            #                               batch_data=self.test_set,
            #                               global_step=self.global_step,
            #                               name='test auc')

            # Eval init AUC
            # self.logger.info('Init AUC: %.4f' % test_auc)

            recall_rate, avg_ndcg = self.model.metrics_topK(sess=self.sess,
                                                            batch_data=self.test_set,
                                                            global_step=self.global_step,
                                                            topk=self.FLAGS.top_k)

            self.logger.info('Init recall_rate: %.4f' % recall_rate)
            self.logger.info('Init avg_ndcg: %.4f' % avg_ndcg)

            # Start training
            self.logger.info('Training....\tmax_epochs:%d\tepoch_size:%d' % (self.FLAGS.max_epochs,self.FLAGS.train_batch_size))
            start_time = time.time()
            avg_loss = 0.0
            self.best_hr_5, self.best_ndcg_5,\
            self.best_hr_10, self.best_ndcg_10,\
            self.best_hr_20, self.best_ndcg_20, = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
            for epoch in range(self.FLAGS.max_epochs):

                random.shuffle(self.train_set)
                self.logger.info('train_set:%d\t test_set:%d' % (len(self.train_set), len(self.test_set)))

                epoch_start_time = time.time()

                for step_i, train_batch_data in DataInput(self.train_set, self.FLAGS.train_batch_size):
                    try:
                        lr = self.sess.run(learing_rate, feed_dict={global_step_lr: self.global_step})
                        add_summary = bool(self.global_step % self.FLAGS.display_freq == 0)
                        step_loss, merge = self.model.train(self.sess,train_batch_data,lr,add_summary,self.global_step)

                        if self.FLAGS.add_summary:
                            self.model.train_writer.add_summary(merge, self.global_step)
                        avg_loss = avg_loss + step_loss
                        self.global_step = self.global_step + 1
                        self.one_epoch_step = self.one_epoch_step + 1

                        #evaluate for eval steps
                        if self.global_step % self.FLAGS.eval_freq == 0:

                            self.logger.info("epoch_step:%d  global_step:%d batch_loss:%.4f" % (self.one_epoch_step,
                                                                                                self.global_step,
                                                                                                (avg_loss / self.FLAGS.eval_freq)))

                            # train_auc = self.model.metrics(sess=self.sess, batch_data=train_batch_data,
                            #                               global_step=self.global_step,name='train auc')
                            # self.logger.info('Batch Train AUC: %.4f' % train_auc)
                            # self.test_auc = self.model.metrics(sess=self.sess, batch_data=self.test_set,
                            #                               global_step=self.global_step,name='test auc')
                            # self.logger.info('Test AUC: %.4f' % self.test_auc)
                            # self.recall_rate, self.avg_ndcg = self.model.metrics_topK(sess=self.sess, batch_data=self.test_set,
                            #                                                 global_step=self.global_step, topk=self.FLAGS.top_k)
                            self.hr_5, self.ndcg_5, \
                            self.hr_10, self.ndcg_10, \
                            self.hr_20, self.ndcg_20 = self.model.test(self.sess, self.test_set, self.global_step)

                            self.logger.info('HR@5: %.4f NDCG@5: %.4f\t'
                                             'HR@10: %.4f NDCG@10: %.4f\t'
                                             'HR@20: %.4f NDCG@20: %.4f' % (self.hr_5, self.ndcg_5,
                                                                            self.hr_10, self.ndcg_10,
                                                                            self.hr_20, self.ndcg_20))
                            avg_loss = 0

                            self.save_model()
                            if self.FLAGS.draw_pic == True:
                                self.save_fig()

                    except Exception as e:
                        self.logger.info("Error!!!!!!!!!!!!")
                        self.logger.info(e)


                self.logger.info('one epoch Cost time: %.2f' %(time.time() - epoch_start_time))

                #evaluate test auc and train auc for an epoch
                # test_auc = self.model.metrics(sess=self.sess, batch_data=self.test_set,
                #                                       global_step=self.global_step,name='test auc')
                # self.logger.info('Test AUC for epoch %d: %.4f' % (epoch, test_auc))

                self.one_epoch_step = 0
                # if self.global_step > 1000:
                #     lr = lr / 2
                # elif lr < 10e-5:
                #     lr = lr * 0.88
                # else:
                #     lr = lr * 0.95

                self.logger.info('Epoch %d DONE\tCost time: %.2f' % (self.now_epoch, time.time() - start_time))
                self.logger.info("----------------------------------------------------------------------")

                self.now_epoch = self.now_epoch + 1
                self.one_epoch_step = 0


        self.model.save(self.sess,self.global_step)
        # self.logger.info('best test_auc: ' + str(self.best_auc))
        self.logger.info('best HR@5: ' + str(self.best_hr_5))
        self.logger.info('best HR@10: ' + str(self.best_hr_10))
        self.logger.info('best HR@20: ' + str(self.best_hr_20))

        self.logger.info('best NDCG@5: ' + str(self.best_ndcg_5))
        self.logger.info('best NDCG@10: ' + str(self.best_ndcg_10))
        self.logger.info('best NDCG@20: ' + str(self.best_ndcg_20))

        self.logger.info('Finished')
Beispiel #5
0
            def eval_topk():

                sum_hr_1, sum_ndcg_1, sum_hr_5, sum_ndcg_5, sum_hr_10, sum_ndcg_10, sum_hr_30, sum_ndcg_30, sum_hr_50, sum_ndcg_50 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                result_list_hr_all = []
                result_list_ndcg_all = []

                max_step = 0

                for step_i, batch_data in DataInput(
                        self.test_set, self.FLAGS.test_batch_size):
                    max_step = 1 + max_step
                    if self.FLAGS.experiment_type == "NARM" or \
                            self.FLAGS.experiment_type == "NARM+" or \
                            self.FLAGS.experiment_type == "NARM++"   or \
                            self.FLAGS.experiment_type == "SR_GNN" :
                        hr_1, ndcg_1, hr_5, ndcg_5, hr_10, ndcg_10,  hr_30, ndcg_30, hr_50, ndcg_50, \
                        result_list_hr, result_list_ndcg= \
                                                          self.model.metrics_topK_concat(sess=self.sess,
                                                                                         batch_data=batch_data,
                                                                                         global_step=self.global_step,
                                                                                         topk=self.FLAGS.top_k)
                    else:
                        hr_1, ndcg_1, hr_5, ndcg_5, hr_10, ndcg_10,  hr_30, ndcg_30, hr_50, ndcg_50, \
                        result_list_hr, result_list_ndcg = \
                                                          self.model.metrics_topK(sess=self.sess,
                                                                                  batch_data=batch_data,
                                                                                  global_step=self.global_step,
                                                                                  topk=self.FLAGS.top_k)
                    sum_hr_1 = sum_hr_1 + hr_1
                    sum_ndcg_1 = sum_ndcg_1 + ndcg_1
                    sum_hr_5 = sum_hr_5 + hr_5
                    sum_ndcg_5 = sum_ndcg_5 + ndcg_5
                    sum_hr_10 = sum_hr_10 + hr_10
                    sum_ndcg_10 = sum_ndcg_10 + ndcg_10
                    sum_hr_30 = sum_hr_30 + hr_30
                    sum_ndcg_30 = sum_ndcg_30 + ndcg_30
                    sum_hr_50 = sum_hr_50 + hr_50
                    sum_ndcg_50 = sum_ndcg_50 + ndcg_50
                    result_list_hr_all = result_list_hr_all + result_list_hr
                    result_list_ndcg_all = result_list_ndcg_all + result_list_ndcg

                sum_hr_1 /= max_step
                sum_ndcg_1 /= max_step
                sum_hr_5 /= max_step
                sum_ndcg_5 /= max_step
                sum_hr_10 /= max_step
                sum_ndcg_10 /= max_step
                sum_hr_30 /= max_step
                sum_ndcg_30 /= max_step
                sum_hr_50 /= max_step
                sum_ndcg_50 /= max_step

                if sum_hr_1 > self.hr_1 and sum_ndcg_1 > self.ndcg_1:
                    self.hr_1, self.ndcg_1 = sum_hr_1, sum_ndcg_1
                if sum_hr_5 > self.hr_5 and sum_ndcg_5 > self.ndcg_5:
                    self.hr_5, self.ndcg_5 = sum_hr_5, sum_ndcg_5
                if sum_hr_10 > self.hr_10 and sum_ndcg_10 > self.ndcg_10:
                    self.hr_10, self.ndcg_10 = sum_hr_10, sum_ndcg_10
                    self.best_result_hr = result_list_hr_all
                    self.best_result_ndcg = result_list_ndcg_all

                if sum_hr_30 > self.hr_30 and sum_ndcg_30 > self.ndcg_30:
                    self.hr_30, self.ndcg_30 = sum_hr_30, sum_ndcg_30
                if sum_hr_50 > self.hr_50 and sum_ndcg_50 > self.ndcg_50:
                    self.hr_50, self.ndcg_50 = sum_hr_50, sum_ndcg_50

                def summery(k, hr, ndcg):
                    tag_recall = 'recall@' + str(k)
                    tag_ndcg = 'ndgc@' + str(k)
                    summary_recall_rate = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_recall, simple_value=hr)
                    ])
                    self.model.train_writer.add_summary(
                        summary_recall_rate, global_step=self.global_step)
                    summary_avg_ndcg = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_ndcg, simple_value=ndcg)
                    ])
                    self.model.train_writer.add_summary(
                        summary_avg_ndcg, global_step=self.global_step)
                    self.logger.info(
                        'Test recall rate @ %d : %.4f   ndcg @ %d: %.4f' %
                        (k, hr, k, ndcg))

                summery(1, sum_hr_1, sum_ndcg_1)
                summery(5, sum_hr_5, sum_ndcg_5)
                summery(10, sum_hr_10, sum_ndcg_10)
                summery(30, sum_hr_30, sum_ndcg_30)
                summery(50, sum_hr_50, sum_ndcg_50)
Beispiel #6
0
    def train(self):

        start_time = time.time()

        # Config GPU options
        if self.FLAGS.per_process_gpu_memory_fraction == 0.0:
            gpu_options = tf.GPUOptions(allow_growth=True)
        elif self.FLAGS.per_process_gpu_memory_fraction == 1.0:
            gpu_options = tf.GPUOptions()

        else:
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.
                                        FLAGS.per_process_gpu_memory_fraction,
                                        allow_growth=True)

        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ['CUDA_VISIBLE_DEVICES'] = self.FLAGS.cuda_visible_devices

        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        if not tf.test.gpu_device_name():
            self.logger.warning("No GPU is found")
        else:
            self.logger.info(tf.test.gpu_device_name())

        global_step_lr = tf.Variable(0, trainable=False)
        lr1 = tf.train.exponential_decay(
            learning_rate=self.FLAGS.learning_rate,
            global_step=global_step_lr,
            decay_steps=1000,
            decay_rate=0.995,
            staircase=True)
        lr2 = tf.train.exponential_decay(learning_rate=0.001,
                                         global_step=global_step_lr,
                                         decay_steps=1000,
                                         decay_rate=0.995,
                                         staircase=True)

        with self.sess.as_default():

            if self.FLAGS.experiment_type == "AttentiveGrnnRec":
                self.model = AttentiveGrnnRec(self.FLAGS, self.emb, self.sess)

            elif self.FLAGS.experiment_type == "GatRnnRec":
                self.model = GatRnnRec(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.experiment_type == "OrderedGatRnnRec":
                self.model = OrderedGatRnnRec(self.FLAGS, self.emb, self.sess)
            self.logger.info('Init finish.\tCost time: %.2fs' %
                             (time.time() - start_time))

            #AUC暂时不看
            # test_auc = self.model.metrics(sess=self.sess,
            #                               batch_data=self.test_set,
            #                               global_step=self.global_step,
            #                               name='test auc')

            # Eval init AUC
            # self.logger.info('Init AUC: %.4f' % test_auc)

            test_start = time.time()
            self.hr_1, self.ndcg_1, self.hr_5, self.ndcg_5, self.hr_10, self.ndcg_10, self.hr_30, self.ndcg_30, self.hr_50, self.ndcg_50 = \
                0,0,0,0,0,0,0,0,0,0
            self.best_result_hr = []
            self.best_result_ndcg = []

            def eval_topk():

                sum_hr_1, sum_ndcg_1, sum_hr_5, sum_ndcg_5, sum_hr_10, sum_ndcg_10, sum_hr_30, sum_ndcg_30, sum_hr_50, sum_ndcg_50 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                result_list_hr_all = []
                result_list_ndcg_all = []

                max_step = 0

                for step_i, batch_data in DataInput(
                        self.test_set, self.FLAGS.test_batch_size):
                    max_step = 1 + max_step
                    if self.FLAGS.experiment_type == "NARM" or \
                            self.FLAGS.experiment_type == "NARM+" or \
                            self.FLAGS.experiment_type == "NARM++"   or \
                            self.FLAGS.experiment_type == "SR_GNN" :
                        hr_1, ndcg_1, hr_5, ndcg_5, hr_10, ndcg_10,  hr_30, ndcg_30, hr_50, ndcg_50, \
                        result_list_hr, result_list_ndcg= \
                                                          self.model.metrics_topK_concat(sess=self.sess,
                                                                                         batch_data=batch_data,
                                                                                         global_step=self.global_step,
                                                                                         topk=self.FLAGS.top_k)
                    else:
                        hr_1, ndcg_1, hr_5, ndcg_5, hr_10, ndcg_10,  hr_30, ndcg_30, hr_50, ndcg_50, \
                        result_list_hr, result_list_ndcg = \
                                                          self.model.metrics_topK(sess=self.sess,
                                                                                  batch_data=batch_data,
                                                                                  global_step=self.global_step,
                                                                                  topk=self.FLAGS.top_k)
                    sum_hr_1 = sum_hr_1 + hr_1
                    sum_ndcg_1 = sum_ndcg_1 + ndcg_1
                    sum_hr_5 = sum_hr_5 + hr_5
                    sum_ndcg_5 = sum_ndcg_5 + ndcg_5
                    sum_hr_10 = sum_hr_10 + hr_10
                    sum_ndcg_10 = sum_ndcg_10 + ndcg_10
                    sum_hr_30 = sum_hr_30 + hr_30
                    sum_ndcg_30 = sum_ndcg_30 + ndcg_30
                    sum_hr_50 = sum_hr_50 + hr_50
                    sum_ndcg_50 = sum_ndcg_50 + ndcg_50
                    result_list_hr_all = result_list_hr_all + result_list_hr
                    result_list_ndcg_all = result_list_ndcg_all + result_list_ndcg

                sum_hr_1 /= max_step
                sum_ndcg_1 /= max_step
                sum_hr_5 /= max_step
                sum_ndcg_5 /= max_step
                sum_hr_10 /= max_step
                sum_ndcg_10 /= max_step
                sum_hr_30 /= max_step
                sum_ndcg_30 /= max_step
                sum_hr_50 /= max_step
                sum_ndcg_50 /= max_step

                if sum_hr_1 > self.hr_1 and sum_ndcg_1 > self.ndcg_1:
                    self.hr_1, self.ndcg_1 = sum_hr_1, sum_ndcg_1
                if sum_hr_5 > self.hr_5 and sum_ndcg_5 > self.ndcg_5:
                    self.hr_5, self.ndcg_5 = sum_hr_5, sum_ndcg_5
                if sum_hr_10 > self.hr_10 and sum_ndcg_10 > self.ndcg_10:
                    self.hr_10, self.ndcg_10 = sum_hr_10, sum_ndcg_10
                    self.best_result_hr = result_list_hr_all
                    self.best_result_ndcg = result_list_ndcg_all

                if sum_hr_30 > self.hr_30 and sum_ndcg_30 > self.ndcg_30:
                    self.hr_30, self.ndcg_30 = sum_hr_30, sum_ndcg_30
                if sum_hr_50 > self.hr_50 and sum_ndcg_50 > self.ndcg_50:
                    self.hr_50, self.ndcg_50 = sum_hr_50, sum_ndcg_50

                def summery(k, hr, ndcg):
                    tag_recall = 'recall@' + str(k)
                    tag_ndcg = 'ndgc@' + str(k)
                    summary_recall_rate = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_recall, simple_value=hr)
                    ])
                    self.model.train_writer.add_summary(
                        summary_recall_rate, global_step=self.global_step)
                    summary_avg_ndcg = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_ndcg, simple_value=ndcg)
                    ])
                    self.model.train_writer.add_summary(
                        summary_avg_ndcg, global_step=self.global_step)
                    self.logger.info(
                        'Test recall rate @ %d : %.4f   ndcg @ %d: %.4f' %
                        (k, hr, k, ndcg))

                summery(1, sum_hr_1, sum_ndcg_1)
                summery(5, sum_hr_5, sum_ndcg_5)
                summery(10, sum_hr_10, sum_ndcg_10)
                summery(30, sum_hr_30, sum_ndcg_30)
                summery(50, sum_hr_50, sum_ndcg_50)

            eval_topk()
            self.logger.info('End test. \tTest Cost time: %.2fs' %
                             (time.time() - test_start))

            # Start training

            self.logger.info(
                'Training....\tmax_epochs:%d\tepoch_size:%d' %
                (self.FLAGS.max_epochs, self.FLAGS.train_batch_size))
            start_time, avg_loss, self.best_auc, self.best_recall, self.best_ndcg = time.time(
            ), 0.0, 0.0, 0.0, 0.0
            for epoch in range(self.FLAGS.max_epochs):
                #if epoch > 2:
                #lr = lr/1.5

                random.shuffle(self.train_set)
                self.logger.info('tain_set:%d' % len(self.train_set))
                epoch_start_time = time.time()
                learning_rate = self.FLAGS.learning_rate

                for step_i, train_batch_data in DataInput(
                        self.train_set, self.FLAGS.train_batch_size):

                    # try:

                    #print(self.sess.run(global_step_lr))
                    if learning_rate > 0.001:
                        learning_rate = self.sess.run(
                            lr1, feed_dict={global_step_lr: self.global_step})
                    else:
                        learning_rate = self.sess.run(
                            lr2, feed_dict={global_step_lr: self.global_step})
                    #print(learning_rate)
                    add_summary = bool(self.global_step %
                                       self.FLAGS.display_freq == 0)
                    step_loss, merge = self.model.train(
                        self.sess, train_batch_data, learning_rate,
                        add_summary, self.global_step, epoch)

                    self.sess.graph.finalize()
                    self.model.train_writer.add_summary(
                        merge, self.global_step)
                    avg_loss = avg_loss + step_loss
                    self.global_step = self.global_step + 1
                    self.one_epoch_step = self.one_epoch_step + 1

                    #evaluate for eval steps
                    if self.global_step % self.FLAGS.eval_freq == 0:
                        print(learning_rate)
                        self.logger.info("Epoch step is " +
                                         str(self.one_epoch_step))
                        self.logger.info("Global step is " +
                                         str(self.global_step))
                        self.logger.info("Train_loss is " +
                                         str(avg_loss / self.FLAGS.eval_freq))
                        # train_auc = self.model.metrics(sess=self.sess, batch_data=train_batch_data,
                        #                               global_step=self.global_step,name='train auc')
                        # self.logger.info('Batch Train AUC: %.4f' % train_auc)
                        # self.test_auc = self.model.metrics(sess=self.sess, batch_data=self.test_set,
                        #                               global_step=self.global_step,name='test auc')
                        # self.logger.info('Test AUC: %.4f' % self.test_auc)

                        eval_topk()
                        avg_loss = 0

                        self.save_model()
                        if self.FLAGS.draw_pic == True:
                            self.save_fig()

                    # except Exception as e:
                    #     self.logger.info("Error!!!!!!!!!!!!")
                    #     self.logger.info(e)
                    #     traceback.print_exc()

                self.logger.info('one epoch Cost time: %.2f' %
                                 (time.time() - epoch_start_time))
                self.logger.info("Epoch step is " + str(self.one_epoch_step))
                self.logger.info("Global step is " + str(self.global_step))
                self.logger.info("Train_loss is " + str(step_loss))

                eval_topk()
                with open('best_result_hr_' + self.FLAGS.version, 'w+') as f:
                    f.write(str(self.best_result_hr))
                with open('best_result_ndcg' + self.FLAGS.version, 'w+') as f:
                    f.write(str(self.best_result_ndcg))
                self.logger.info('Max recall rate @ 1: %.4f   ndcg @ 1: %.4f' %
                                 (self.hr_1, self.ndcg_1))
                self.logger.info('Max recall rate @ 5: %.4f   ndcg @ 5: %.4f' %
                                 (self.hr_5, self.ndcg_5))
                self.logger.info(
                    'Max recall rate @ 10: %.4f   ndcg @ 10: %.4f' %
                    (self.hr_10, self.ndcg_10))
                self.logger.info(
                    'Max recall rate @ 30: %.4f   ndcg @ 30: %.4f' %
                    (self.hr_30, self.ndcg_30))
                self.logger.info(
                    'Max recall rate @ 50: %.4f   ndcg @ 50: %.4f' %
                    (self.hr_50, self.ndcg_50))

                self.one_epoch_step = 0
                #if self.global_step > 1000:
                #lr = lr / 2
                #if lr < 0.0005:
                #lr = lr * 0.99
                #elif self.FLAGS.type == "tmall":
                #lr = lr * 0.5
                #else:
                #lr = lr * 0.98

                self.logger.info('Epoch %d DONE\tCost time: %.2f' %
                                 (self.now_epoch, time.time() - start_time))

                self.now_epoch = self.now_epoch + 1
                self.one_epoch_step = 0

        self.model.save(self.sess, self.global_step)
        self.logger.info('best test_auc: ' + str(self.best_auc))
        self.logger.info('best recall: ' + str(self.best_recall))

        self.logger.info('Finished')
Beispiel #7
0
    def train(self):

        start_time = time.time()

        # Config GPU options
        if self.FLAGS.per_process_gpu_memory_fraction == 0.0:
            gpu_options = tf.GPUOptions(allow_growth=True)
        elif self.FLAGS.per_process_gpu_memory_fraction == 1.0:
            gpu_options = tf.GPUOptions()

        else:
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.
                                        FLAGS.per_process_gpu_memory_fraction,
                                        allow_growth=True)

        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ['CUDA_VISIBLE_DEVICES'] = self.FLAGS.cuda_visible_devices

        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        if not tf.test.gpu_device_name():
            self.logger.warning("No GPU is found")
        else:
            self.logger.info(tf.test.gpu_device_name())

        global_step_lr = tf.Variable(0, trainable=False)
        lr1 = tf.train.exponential_decay(
            learning_rate=self.FLAGS.learning_rate,
            global_step=global_step_lr,
            decay_steps=1000,
            decay_rate=0.995,
            staircase=True)
        lr2 = tf.train.exponential_decay(learning_rate=0.001,
                                         global_step=global_step_lr,
                                         decay_steps=1000,
                                         decay_rate=0.995,
                                         staircase=True)

        def eval_mse():

            max_step = 0.
            mse_lst = []

            for step_i, batch_data in DataInput(self.test_set,
                                                self.FLAGS.test_batch_size):
                max_step = 1 + max_step

                step_mse = self.model.metrics_mse(sess=self.sess,
                                                  batch_data=batch_data,
                                                  global_step=self.global_step,
                                                  topk=self.FLAGS.top_k)
                mse_lst.extend(list(step_mse[0]))

            mse_val = np.mean(mse_lst)

            if mse_val < self.mse:
                self.mse = mse_val

            print('----test mse: %.5f-----' % mse_val)
            print('----MIN mse: %.5f-----' % (self.mse))

        with self.sess.as_default():

            self.model = TimePred(self.FLAGS, self.emb, self.sess)

            self.logger.info('Init finish.\tCost time: %.2fs' %
                             (time.time() - start_time))

            test_start = time.time()
            self.mse = 100

            self.logger.info('End test. \tTest Cost time: %.2fs' %
                             (time.time() - test_start))

            # Start training

            self.logger.info(
                'Training....\tmax_epochs:%d\tepoch_size:%d' %
                (self.FLAGS.max_epochs, self.FLAGS.train_batch_size))
            start_time, avg_loss, self.best_auc, self.best_recall, self.best_ndcg = time.time(
            ), 0.0, 0.0, 0.0, 0.0
            loss_origin = []
            loss_time = []
            eval_mse()
            for epoch in range(self.FLAGS.max_epochs):
                #if epoch > 2:
                #lr = lr/1.5

                random.shuffle(self.train_set)
                self.logger.info('tain_set:%d' % len(self.train_set))
                epoch_start_time = time.time()
                learning_rate = self.FLAGS.learning_rate

                for step_i, train_batch_data in DataInput(
                        self.train_set, self.FLAGS.train_batch_size):
                    try:
                        #print(self.sess.run(global_step_lr))
                        if learning_rate > 0.001:
                            learning_rate = self.sess.run(
                                lr1,
                                feed_dict={global_step_lr: self.global_step})
                        else:
                            learning_rate = self.sess.run(
                                lr2,
                                feed_dict={global_step_lr: self.global_step})
                        #print(learning_rate)
                        add_summary = bool(self.global_step %
                                           self.FLAGS.display_freq == 0)
                        step_loss, step_loss_time, merge = self.model.train(
                            self.sess, train_batch_data, learning_rate,
                            add_summary, self.global_step, epoch)

                        self.sess.graph.finalize()

                        self.model.train_writer.add_summary(
                            merge, self.global_step)
                        avg_loss = avg_loss + step_loss
                        loss_time.extend(step_loss_time)
                        self.global_step = self.global_step + 1
                        self.one_epoch_step = self.one_epoch_step + 1

                        #evaluate for eval steps
                        if self.global_step % self.FLAGS.eval_freq == 0:
                            print(learning_rate)
                            self.logger.info("Epoch step is " +
                                             str(self.one_epoch_step))
                            self.logger.info("Global step is " +
                                             str(self.global_step))
                            self.logger.info("Train_loss is " +
                                             str(avg_loss /
                                                 self.FLAGS.eval_freq))
                            self.logger.info("Time Loss is " +
                                             str(np.mean(np.array(loss_time))))
                            eval_mse()
                            avg_loss = 0
                            loss_origin = []
                            loss_time = []
                            loss_reconsume = []

                    except Exception as e:
                        self.logger.info("Error!!!!!!!!!!!!")
                        self.logger.info(e)
                        traceback.print_exc()

                self.logger.info('one epoch Cost time: %.2f' %
                                 (time.time() - epoch_start_time))
                self.logger.info("Epoch step is " + str(self.one_epoch_step))
                self.logger.info("Global step is " + str(self.global_step))
                self.logger.info("Train_loss is " + str(step_loss))
                self.logger.info("Time Loss is " +
                                 str(np.mean(np.array(loss_time))))
                eval_mse()
                self.one_epoch_step = 0

                self.logger.info('Epoch %d DONE\tCost time: %.2f' %
                                 (self.now_epoch, time.time() - start_time))

                self.now_epoch = self.now_epoch + 1
                self.one_epoch_step = 0

        self.model.save(self.sess, self.global_step)
        self.logger.info('best test_auc: ' + str(self.best_auc))
        self.logger.info('best recall: ' + str(self.best_recall))

        self.logger.info('Finished')
Beispiel #8
0
    def train(self):

        start_time = time.time()

        # Config GPU options
        if self.FLAGS.per_process_gpu_memory_fraction == 0.0:
            gpu_options = tf.GPUOptions(allow_growth=True)
        elif self.FLAGS.per_process_gpu_memory_fraction == 1.0:
            gpu_options = tf.GPUOptions()

        else:
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.
                                        FLAGS.per_process_gpu_memory_fraction,
                                        allow_growth=True)

        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ['CUDA_VISIBLE_DEVICES'] = self.FLAGS.cuda_visible_devices

        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        if not tf.test.gpu_device_name():
            self.logger.warning("No GPU is found")
        else:
            self.logger.info(tf.test.gpu_device_name())

        global_step_lr = tf.Variable(0, trainable=False)
        lr1 = tf.train.exponential_decay(
            learning_rate=self.FLAGS.learning_rate,
            global_step=global_step_lr,
            decay_steps=100,
            decay_rate=0.99,
            staircase=True)
        lr2 = tf.train.exponential_decay(learning_rate=0.001,
                                         global_step=global_step_lr,
                                         decay_steps=100,
                                         decay_rate=self.FLAGS.decay_rate,
                                         staircase=True)

        with self.sess.as_default():

            # Create a new model or reload existing checkpoint
            if self.FLAGS.experiment_type == "Vallina_Gru4Rec":
                self.model = Vallina_Gru4Rec(self.FLAGS, self.emb, self.sess)
            #1 baseline for all models
            if self.FLAGS.experiment_type == "Gru4Rec":
                self.model = Gru4Rec(self.FLAGS, self.emb, self.sess)
            #2
            elif self.FLAGS.experiment_type == "T_SeqRec":
                self.model = T_SeqRec(self.FLAGS, self.emb, self.sess)
            #3
            elif self.FLAGS.experiment_type == "NARM":
                self.model = NARM(self.FLAGS, self.emb, self.sess)
            #4
            elif self.FLAGS.experiment_type == "NARM+":
                self.model = NARM_time_att(self.FLAGS, self.emb, self.sess)
            #5
            elif self.FLAGS.experiment_type == "NARM++":
                self.model = NARM_time_att_time_rnn(self.FLAGS, self.emb,
                                                    self.sess)
            #6
            elif self.FLAGS.experiment_type == "LSTUR":
                self.model = LSTUR(self.FLAGS, self.emb, self.sess)
            #7
            elif self.FLAGS.experiment_type == "STAMP":
                self.model = STAMP(self.FLAGS, self.emb, self.sess)
            #8 the proposed model
            elif self.FLAGS.experiment_type == 'MTAM':
                self.model = MTAM(self.FLAGS, self.emb, self.sess)
            # 9
            elif self.FLAGS.experiment_type == "MTAM_no_time_aware_rnn":
                self.model = MTAM_no_time_aware_rnn(self.FLAGS, self.emb,
                                                    self.sess)
            # 10
            elif self.FLAGS.experiment_type == "MTAM_no_time_aware_att":
                self.model = MTAM_no_time_aware_att(self.FLAGS, self.emb,
                                                    self.sess)
            #11
            elif self.FLAGS.experiment_type == 'MTAM_via_T_GRU':
                self.model = MTAM_via_T_GRU(self.FLAGS, self.emb, self.sess)
            # 12
            elif self.FLAGS.experiment_type == 'MTAM_via_rnn':
                self.model = MTAM_via_rnn(self.FLAGS, self.emb, self.sess)
            #13 the proposed model
            elif self.FLAGS.experiment_type == 'T_GRU':
                self.model = MTAM_only_time_aware_RNN(self.FLAGS, self.emb,
                                                      self.sess)
            # 14 the proposed model
            elif self.FLAGS.experiment_type == 'SASrec':
                self.model = Self_Attention_Model(self.FLAGS, self.emb,
                                                  self.sess)
            # 15 the proposed model
            elif self.FLAGS.experiment_type == 'Time_Aware_Self_Attention_Model':
                self.model = Time_Aware_Self_Attention_Model(
                    self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.experiment_type == 'Ti_Self_Attention_Model':
                self.model = Ti_Self_Attention_Model(self.FLAGS, self.emb,
                                                     self.sess)
            # 16 the proposed model
            elif self.FLAGS.experiment_type == 'bpr':
                self.model = BPRMF(self.FLAGS, self.emb, self.sess)
            # 17 the proposed model
            elif self.FLAGS.experiment_type == "MTAM_with_T_SeqRec":
                self.model = MTAM_with_T_SeqRec(self.FLAGS, self.emb,
                                                self.sess)

            self.logger.info('Init finish.\tCost time: %.2fs' %
                             (time.time() - start_time))

            #AUC暂时不看
            # test_auc = self.model.metrics(sess=self.sess,
            #                               batch_data=self.test_set,
            #                               global_step=self.global_step,
            #                               name='test auc')

            # Eval init AUC
            # self.logger.info('Init AUC: %.4f' % test_auc)

            test_start = time.time()
            self.hr_1, self.ndcg_1, self.hr_5, self.ndcg_5, self.hr_10, self.ndcg_10, self.hr_30, self.ndcg_30, self.hr_50, self.ndcg_50 = \
                0,0,0,0,0,0,0,0,0,0
            self.best_result_hr = []
            self.best_result_ndcg = []

            def eval_topk():
                sum_hr_1, sum_ndcg_1, sum_hr_5, sum_ndcg_5, sum_hr_10, sum_ndcg_10, sum_hr_30, sum_ndcg_30, sum_hr_50, sum_ndcg_50 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                result_list_hr_all = []
                result_list_ndcg_all = []

                max_step = 0

                for step_i, batch_data in DataInput(
                        self.test_set, self.FLAGS.test_batch_size):
                    max_step = 1 + max_step
                    if self.FLAGS.experiment_type == "NARM" or \
                            self.FLAGS.experiment_type == "NARM+" or \
                            self.FLAGS.experiment_type == "NARM++": # or \
                        #self.FLAGS.experiment_type == "MTAM" :
                        hr_1, ndcg_1, hr_5, ndcg_5,result_list_hr, result_list_ndcg= \
                                                          self.model.metrics_topK_concat(sess=self.sess,
                                                                                         batch_data=batch_data,
                                                                                         global_step=self.global_step,
                                                                                         topk=self.FLAGS.top_k)
                    else:
                        hr_1, ndcg_1, hr_5, ndcg_5,result_list_hr, result_list_ndcg = \
                                                          self.model.metrics_topK(sess=self.sess,
                                                                                  batch_data=batch_data,
                                                                                  global_step=self.global_step,
                                                                                  topk=self.FLAGS.top_k)
                    sum_hr_1 = sum_hr_1 + hr_1
                    sum_ndcg_1 = sum_ndcg_1 + ndcg_1
                    sum_hr_5 = sum_hr_5 + hr_5
                    sum_ndcg_5 = sum_ndcg_5 + ndcg_5
                    result_list_hr_all = result_list_hr_all + result_list_hr
                    result_list_ndcg_all = result_list_ndcg_all + result_list_ndcg

                sum_hr_1 /= max_step
                sum_ndcg_1 /= max_step
                sum_hr_5 /= max_step
                sum_ndcg_5 /= max_step

                if sum_hr_1 > self.hr_1 and sum_ndcg_1 > self.ndcg_1:
                    self.hr_1, self.ndcg_1 = sum_hr_1, sum_ndcg_1
                if sum_hr_5 > self.hr_5 and sum_ndcg_5 > self.ndcg_5:
                    self.hr_5, self.ndcg_5 = sum_hr_5, sum_ndcg_5

                def summery(k, hr, ndcg):
                    tag_recall = 'recall@' + str(k)
                    tag_ndcg = 'ndgc@' + str(k)
                    summary_recall_rate = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_recall, simple_value=hr)
                    ])
                    self.model.train_writer.add_summary(
                        summary_recall_rate, global_step=self.global_step)
                    summary_avg_ndcg = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_ndcg, simple_value=ndcg)
                    ])
                    self.model.train_writer.add_summary(
                        summary_avg_ndcg, global_step=self.global_step)
                    self.logger.info(
                        'Test recall rate @ %d : %.4f   ndcg @ %d: %.4f' %
                        (k, hr, k, ndcg))

                summery(1, sum_hr_1, sum_ndcg_1)
                summery(5, sum_hr_5, sum_ndcg_5)

            eval_topk()
            self.logger.info('End test. \tTest Cost time: %.2fs' %
                             (time.time() - test_start))

            # Start training

            self.logger.info(
                'Training....\tmax_epochs:%d\tepoch_size:%d' %
                (self.FLAGS.max_epochs, self.FLAGS.train_batch_size))
            start_time, avg_loss, self.best_auc, self.best_recall, self.best_ndcg = time.time(
            ), 0.0, 0.0, 0.0, 0.0
            for epoch in range(self.FLAGS.max_epochs):
                #if epoch > 2:
                #lr = lr/1.5

                random.shuffle(self.train_set)
                self.logger.info('tain_set:%d' % len(self.train_set))
                epoch_start_time = time.time()
                learning_rate = self.FLAGS.learning_rate

                for step_i, train_batch_data in DataInput(
                        self.train_set, self.FLAGS.train_batch_size):

                    try:

                        #print(self.sess.run(global_step_lr))
                        if learning_rate > 0.001:
                            learning_rate = self.sess.run(
                                lr1,
                                feed_dict={global_step_lr: self.global_step})
                        else:
                            learning_rate = self.sess.run(
                                lr2,
                                feed_dict={global_step_lr: self.global_step})
                        #print(learning_rate)
                        add_summary = bool(self.global_step %
                                           self.FLAGS.display_freq == 0)
                        step_loss, merge = self.model.train(
                            self.sess, train_batch_data, learning_rate,
                            add_summary, self.global_step, epoch)

                        self.model.train_writer.add_summary(
                            merge, self.global_step)
                        avg_loss = avg_loss + step_loss
                        self.global_step = self.global_step + 1
                        self.one_epoch_step = self.one_epoch_step + 1

                        #evaluate for eval steps
                        if self.global_step % self.FLAGS.eval_freq == 0:
                            print(learning_rate)
                            self.logger.info("Epoch step is " +
                                             str(self.one_epoch_step))
                            self.logger.info("Global step is " +
                                             str(self.global_step))
                            self.logger.info("Train_loss is " +
                                             str(avg_loss /
                                                 self.FLAGS.eval_freq))
                            # train_auc = self.model.metrics(sess=self.sess, batch_data=train_batch_data,
                            #                               global_step=self.global_step,name='train auc')
                            # self.logger.info('Batch Train AUC: %.4f' % train_auc)
                            # self.test_auc = self.model.metrics(sess=self.sess, batch_data=self.test_set,
                            #                               global_step=self.global_step,name='test auc')
                            # self.logger.info('Test AUC: %.4f' % self.test_auc)

                            eval_topk()
                            avg_loss = 0

                            self.save_model()
                            if self.FLAGS.draw_pic == True:
                                self.save_fig()

                    except Exception as e:
                        self.logger.info("Error!!!!!!!!!!!!")
                        self.logger.info(e)

                self.logger.info('one epoch Cost time: %.2f' %
                                 (time.time() - epoch_start_time))
                self.logger.info("Epoch step is " + str(self.one_epoch_step))
                self.logger.info("Global step is " + str(self.global_step))
                self.logger.info("Train_loss is " + str(step_loss))

                eval_topk()
                with open('best_result_hr_' + self.FLAGS.version, 'w+') as f:
                    f.write(str(self.best_result_hr))
                with open('best_result_ndcg' + self.FLAGS.version, 'w+') as f:
                    f.write(str(self.best_result_ndcg))
                self.logger.info('Max recall rate @ 1: %.4f   ndcg @ 1: %.4f' %
                                 (self.hr_1, self.ndcg_1))
                self.logger.info('Max recall rate @ 5: %.4f   ndcg @ 5: %.4f' %
                                 (self.hr_5, self.ndcg_5))
                self.logger.info(
                    'Max recall rate @ 10: %.4f   ndcg @ 10: %.4f' %
                    (self.hr_10, self.ndcg_10))
                self.logger.info(
                    'Max recall rate @ 30: %.4f   ndcg @ 30: %.4f' %
                    (self.hr_30, self.ndcg_30))
                self.logger.info(
                    'Max recall rate @ 50: %.4f   ndcg @ 50: %.4f' %
                    (self.hr_50, self.ndcg_50))

                self.one_epoch_step = 0
                #if self.global_step > 1000:
                #lr = lr / 2
                #if lr < 0.0005:
                #lr = lr * 0.99
                #elif self.FLAGS.type == "tmall":
                #lr = lr * 0.5
                #else:
                #lr = lr * 0.98

                self.logger.info('Epoch %d DONE\tCost time: %.2f' %
                                 (self.now_epoch, time.time() - start_time))

                self.now_epoch = self.now_epoch + 1
                self.one_epoch_step = 0

        self.model.save(self.sess, self.global_step)
        self.logger.info('best test_auc: ' + str(self.best_auc))
        self.logger.info('best recall: ' + str(self.best_recall))

        self.logger.info('Finished')
Beispiel #9
0
    def train(self):
        start_time = time.time()

        if self.FLAGS.per_process_gpu_memory_fraction == 0.0:
            gpu_option = tf.GPUOptions(allow_growth=True)
        elif self.FLAGS.per_process_gpu_memory_fraction == 1.0:
            gpu_option = tf.GPUOptions()
        else:
            gpu_option = tf.GPUOptions(per_process_gpu_memory_fraction=self.
                                       FLAGS.per_process_gpu_memory_fraction,
                                       allow_growth=True)

        os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        os.environ['CUDA_VISIBLE_DEVICES'] = self.FLAGS.cuda_visible_devices

        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_option))

        if not tf.test.gpu_device_name():
            self.logger.warning('NO GPU is FOUND')
        else:
            self.logger.info(tf.test.gpu_device_name())

        global_step_lr = tf.Variable(0, trainable=False)
        decay_rate = tf.train.exponential_decay(learning_rate=1.,
                                                global_step=global_step_lr,
                                                decay_steps=100,
                                                decay_rate=0.99,
                                                staircase=True)

        with self.sess.as_default():

            if self.FLAGS.model_name == 'THP':
                self.model = THP(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'MTAM_TPP_wendy_att_time':
                self.model = MTAM_TPP_wendy_att_time(self.FLAGS, self.emb,
                                                     self.sess)
            elif self.FLAGS.model_name == 'TANHP_v2':
                self.model = TANHP_v2(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'TANHP_v3':
                self.model = TANHP_v3(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'NHP':
                self.model = NHP(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'RMTPP':
                self.model = RMTPP(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'SAHP':
                self.model = SAHP(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'HP':
                self.model = HP(self.FLAGS, self.emb, self.sess)
            elif self.FLAGS.model_name == 'IHP':
                self.model = IHP(self.FLAGS, self.emb, self.sess)
            self.logger.info('Init finish. cost time: %.2fs' %
                             (time.time() - start_time))

            def eval_model():

                type_prob = []
                target_type = []  # one_hot 形式的
                seq_llh = []
                time_llh = []
                type_llh = []
                squared_error = []
                total_pred_num = len(self.test_set)
                for step_i, batch_data in DataInput(
                        self.test_set, self.FLAGS.test_batch_size):
                    step_type_prob, step_target_type,\
                    step_seq_llh,step_time_llh,step_type_llh,\
                    step_cross_entropy, step_se_loss = self.model.metrics_likelihood(sess = self.sess,
                                                                       batch_data = batch_data)
                    type_prob.extend(list(step_type_prob))
                    target_type.extend(list(step_target_type))
                    seq_llh.extend(list(step_seq_llh))
                    time_llh.extend(list(step_time_llh))
                    type_llh.extend(list(step_type_llh))
                    squared_error.extend(list(step_se_loss))

                correct_num = 0
                for i in range(len(type_prob)):
                    pred_probs = type_prob[i]
                    truth_probs = target_type[i]
                    idx_pred = np.argmax(pred_probs)
                    idx_truth = np.argmax(truth_probs)
                    if idx_pred == idx_truth:
                        correct_num += 1

                accuracy = correct_num / total_pred_num  #TODO 计算方法需更正

                avg_log_likelihood = np.mean(seq_llh)
                avg_time_llh = np.mean(time_llh)
                avg_type_llh = np.mean(type_llh)
                rmse = np.sqrt(np.mean(squared_error))

                return avg_log_likelihood, accuracy, rmse

            self.logger.info('learning rate: %f' % (self.FLAGS.learning_rate))
            self.logger.info('train set: %d' % len(self.train_set))

            self.global_step = 0
            avg_loss = 0.0
            sum_seq_llh = 0.0
            sum_time_llh = 0.0
            sum_type_llh = 0.0
            sum_ce_loss = 0.0
            sum_se_loss = 0.0
            count = 0
            learning_rate = self.FLAGS.learning_rate

            llh_lst = [-100000]
            acc_lst = [0]
            rmse_lst = [100000]

            early_stop = 0

            for epoch in range(self.FLAGS.max_epochs):
                epoch_start_time = time.time()

                random.shuffle(self.train_set)

                for step_i, train_batch_data in DataInput(
                        self.train_set, self.FLAGS.train_batch_size):
                    llh_decay_rate = self.sess.run(
                        decay_rate,
                        feed_dict={global_step_lr: self.global_step})

                    self.global_step += 1

                    # target_time,predict_target_emb,last_time,target_lambda,\
                    # test_output,\
                    step_loss, \
                    seq_llh,time_llh,type_llh,\
                    ce_loss,se_loss,\
                    l2_norm, merge,_ = self.model.train(self.sess, train_batch_data, learning_rate,llh_decay_rate)
                    self.model.train_writer.add_summary(
                        merge, self.global_step)

                    self.sess.graph.finalize()

                    count += len(train_batch_data)

                    avg_loss += step_loss
                    sum_seq_llh += np.sum(seq_llh)
                    sum_time_llh += np.sum(time_llh)
                    sum_type_llh += np.sum(type_llh)
                    sum_ce_loss += np.sum(ce_loss)
                    sum_se_loss += np.sum(se_loss)

                self.logger.info("epoch : %d" % (epoch))
                avg_llh, accuracy, rmse = eval_model()
                self.logger.info(
                    "log likelihood: %.5f, accuracy: %.5f, sqrt mean squared error: %.5f"
                    % (avg_llh, accuracy, rmse))
                self.logger.info('one epoch Cost time: %.2f' %
                                 (time.time() - epoch_start_time))

                if avg_llh <= np.max(llh_lst) and accuracy <= np.max(
                        acc_lst) and rmse >= np.min(rmse_lst):
                    early_stop += 1
                    print('llh: %.5f, accuracy: %.5f, rmse: %.5f' %
                          (avg_llh, accuracy, rmse))
                    print('max llh: %.5f, accuracy: %.5f, rmse: %.5f' %
                          (np.max(llh_lst), np.max(acc_lst), np.min(rmse_lst)))
                else:
                    early_stop = 0

                llh_lst.append(avg_llh)
                acc_lst.append(accuracy)
                rmse_lst.append(rmse)
                self.logger.info(
                    "MAX log likelihood: %.5f, MAX accuracy: %.5f,MIN sqrt mean squared error: %.5f"
                    % (np.max(llh_lst), np.max(acc_lst), np.min(rmse_lst)))
                if early_stop >= 5 or (np.isnan(avg_llh)) or (np.isnan(
                        accuracy)) or (np.isnan(rmse)):  # 连续5轮都没有最好的结果好
                    break
Beispiel #10
0
            def eval_topk():

                dev_sum_hr_1, dev_sum_ndcg_1, dev_sum_mrr_1, dev_sum_hr_5, dev_sum_ndcg_5, dev_sum_mrr_5, \
                dev_sum_hr_10, dev_sum_ndcg_10, dev_sum_mrr_10, dev_sum_hr_20, dev_sum_ndcg_20, dev_sum_mrr_20, \
                dev_sum_hr_50, dev_sum_ndcg_50, dev_sum_mrr_50 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                result_list_hr_all = []
                result_list_ndcg_all = []

                max_step = 0

                for step_i, batch_data in DataInput(
                        self.dev_set, self.FLAGS.test_batch_size):
                    max_step = 1 + max_step
                    if self.FLAGS.experiment_type == "NARM" or \
                            self.FLAGS.experiment_type == "NARM+" or \
                            self.FLAGS.experiment_type == "NARM++" or \
                            self.FLAGS.experiment_type == "SR_GNN":
                        hr_1, ndcg_1, mrr_1, hr_5, ndcg_5, mrr_5, \
                        hr_10, ndcg_10, mrr_10, hr_20, ndcg_20, mrr_20, \
                        hr_50, ndcg_50, mrr_50, \
                        result_list_hr, result_list_ndcg = \
                            self.model.metrics_topK_concat(sess=self.sess,
                                                           batch_data=batch_data,
                                                           global_step=self.global_step,
                                                           topk=self.FLAGS.top_k)
                    else:
                        hr_1, ndcg_1, mrr_1, hr_5, ndcg_5, mrr_5, \
                        hr_10, ndcg_10, mrr_10, hr_20, ndcg_20, mrr_20, \
                        hr_50, ndcg_50, mrr_50, \
                        result_list_hr, result_list_ndcg = \
                            self.model.metrics_topK(sess=self.sess,
                                                    batch_data=batch_data,
                                                    global_step=self.global_step,
                                                    topk=self.FLAGS.top_k)
                    dev_sum_hr_1 = dev_sum_hr_1 + hr_1
                    dev_sum_ndcg_1 = dev_sum_ndcg_1 + ndcg_1
                    dev_sum_mrr_1 = dev_sum_mrr_1 + mrr_1
                    dev_sum_hr_5 = dev_sum_hr_5 + hr_5
                    dev_sum_ndcg_5 = dev_sum_ndcg_5 + ndcg_5
                    dev_sum_mrr_5 = dev_sum_mrr_5 + mrr_5
                    dev_sum_hr_10 = dev_sum_hr_10 + hr_10
                    dev_sum_ndcg_10 = dev_sum_ndcg_10 + ndcg_10
                    dev_sum_mrr_10 = dev_sum_mrr_10 + mrr_10
                    dev_sum_hr_20 = dev_sum_hr_20 + hr_20
                    dev_sum_ndcg_20 = dev_sum_ndcg_20 + ndcg_20
                    dev_sum_mrr_20 = dev_sum_mrr_20 + mrr_20
                    dev_sum_hr_50 = dev_sum_hr_50 + hr_50
                    dev_sum_ndcg_50 = dev_sum_ndcg_50 + ndcg_50
                    dev_sum_mrr_50 = dev_sum_mrr_50 + mrr_50

                dev_sum_hr_1 /= max_step
                dev_sum_ndcg_1 /= max_step
                dev_sum_mrr_1 /= max_step
                dev_sum_hr_5 /= max_step
                dev_sum_ndcg_5 /= max_step
                dev_sum_mrr_5 /= max_step
                dev_sum_hr_10 /= max_step
                dev_sum_ndcg_10 /= max_step
                dev_sum_mrr_10 /= max_step
                dev_sum_hr_20 /= max_step
                dev_sum_ndcg_20 /= max_step
                dev_sum_mrr_20 /= max_step
                dev_sum_hr_50 /= max_step
                dev_sum_ndcg_50 /= max_step
                dev_sum_mrr_50 /= max_step

                sum_hr_1, sum_ndcg_1, sum_mrr_1, sum_hr_5, sum_ndcg_5, sum_mrr_5, \
                sum_hr_10, sum_ndcg_10, sum_mrr_10, sum_hr_20, sum_ndcg_20, sum_mrr_20, \
                sum_hr_50, sum_ndcg_50, sum_mrr_50 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                result_list_hr_all = []
                result_list_ndcg_all = []

                max_step = 0

                for step_i, batch_data in DataInput(
                        self.test_set, self.FLAGS.test_batch_size):
                    max_step = 1 + max_step
                    if self.FLAGS.experiment_type == "NARM" or \
                            self.FLAGS.experiment_type == "NARM+" or \
                            self.FLAGS.experiment_type == "NARM++" or \
                            self.FLAGS.experiment_type == "SR_GNN":
                        hr_1, ndcg_1, mrr_1, hr_5, ndcg_5, mrr_5, \
                        hr_10, ndcg_10, mrr_10, hr_20, ndcg_20, mrr_20, \
                        hr_50, ndcg_50, mrr_50, \
                        result_list_hr, result_list_ndcg = \
                            self.model.metrics_topK_concat(sess=self.sess,
                                                           batch_data=batch_data,
                                                           global_step=self.global_step,
                                                           topk=self.FLAGS.top_k)
                    else:
                        hr_1, ndcg_1, mrr_1, hr_5, ndcg_5, mrr_5, \
                        hr_10, ndcg_10, mrr_10, hr_20, ndcg_20, mrr_20, \
                        hr_50, ndcg_50, mrr_50, \
                        result_list_hr, result_list_ndcg = \
                            self.model.metrics_topK(sess=self.sess,
                                                    batch_data=batch_data,
                                                    global_step=self.global_step,
                                                    topk=self.FLAGS.top_k)
                    sum_hr_1 = sum_hr_1 + hr_1
                    sum_ndcg_1 = sum_ndcg_1 + ndcg_1
                    sum_mrr_1 = sum_mrr_1 + mrr_1
                    sum_hr_5 = sum_hr_5 + hr_5
                    sum_ndcg_5 = sum_ndcg_5 + ndcg_5
                    sum_mrr_5 = sum_mrr_5 + mrr_5

                    sum_hr_10 = sum_hr_10 + hr_10
                    sum_ndcg_10 = sum_ndcg_10 + ndcg_10
                    sum_mrr_10 = sum_mrr_10 + mrr_10
                    sum_hr_20 = sum_hr_20 + hr_20
                    sum_ndcg_20 = sum_ndcg_20 + ndcg_20
                    sum_mrr_20 = sum_mrr_20 + mrr_20

                    sum_hr_50 = sum_hr_50 + hr_50
                    sum_ndcg_50 = sum_ndcg_50 + ndcg_50
                    sum_mrr_50 = sum_mrr_50 + mrr_50
                    result_list_hr_all = result_list_hr_all + result_list_hr
                    result_list_ndcg_all = result_list_ndcg_all + result_list_ndcg

                sum_hr_1 /= max_step
                sum_ndcg_1 /= max_step
                sum_mrr_1 /= max_step
                sum_hr_5 /= max_step
                sum_ndcg_5 /= max_step
                sum_mrr_5 /= max_step
                sum_hr_10 /= max_step
                sum_ndcg_10 /= max_step
                sum_mrr_10 /= max_step
                sum_hr_20 /= max_step
                sum_ndcg_20 /= max_step
                sum_mrr_20 /= max_step
                sum_hr_50 /= max_step
                sum_ndcg_50 /= max_step
                sum_mrr_50 /= max_step

                if dev_sum_hr_10 > self.best_dev_hr_10 and dev_sum_ndcg_10 > self.best_dev_ndcg_10:
                    self.best_dev_hr_10 = dev_sum_hr_10
                    self.best_dev_ndcg_10 = dev_sum_ndcg_10

                    self.hr_1, self.ndcg_1, self.mrr_1 = sum_hr_1, sum_ndcg_1, sum_mrr_1
                    self.hr_5, self.ndcg_5, self.mrr_5 = sum_hr_5, sum_ndcg_5, sum_mrr_5
                    self.hr_10, self.ndcg_10, self.mrr_10 = sum_hr_10, sum_ndcg_10, sum_mrr_10
                    self.best_result_hr = result_list_hr_all
                    self.best_result_ndcg = result_list_ndcg_all
                    self.hr_20, self.ndcg_20, self.mrr_20 = sum_hr_20, sum_ndcg_20, sum_mrr_20
                    self.hr_50, self.ndcg_50, self.mrr_50 = sum_hr_50, sum_ndcg_50, sum_mrr_50

                def dev_log(k, hr, ndcg, mrr):

                    self.logger.info(
                        'Dev recall rate @ %d : %.4f   ndcg @ %d: %.4f' %
                        (k, hr, k, ndcg))

                dev_log(1, dev_sum_hr_1, dev_sum_ndcg_1, dev_sum_mrr_1)
                dev_log(5, dev_sum_hr_5, dev_sum_ndcg_5, dev_sum_mrr_5)
                dev_log(10, dev_sum_hr_10, dev_sum_ndcg_10, dev_sum_mrr_10)
                dev_log(20, dev_sum_hr_20, dev_sum_ndcg_20, dev_sum_mrr_20)
                dev_log(50, dev_sum_hr_50, dev_sum_ndcg_50, dev_sum_mrr_50)

                def summery(k, hr, ndcg, mrr):
                    tag_recall = 'test recall@' + str(k)
                    tag_ndcg = 'test ndgc@' + str(k)
                    summary_recall_rate = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_recall, simple_value=hr)
                    ])
                    self.model.train_writer.add_summary(
                        summary_recall_rate, global_step=self.global_step)
                    summary_avg_ndcg = tf.Summary(value=[
                        tf.Summary.Value(tag=tag_ndcg, simple_value=ndcg)
                    ])
                    self.model.train_writer.add_summary(
                        summary_avg_ndcg, global_step=self.global_step)
                    self.logger.info(
                        'Test recall rate @ %d : %.4f   ndcg @ %d: %.4f  mrr @ %d: %.4f'
                        % (k, hr, k, ndcg, k, mrr))

                summery(1, sum_hr_1, sum_ndcg_1, sum_mrr_1)
                summery(5, sum_hr_5, sum_ndcg_5, sum_mrr_5)
                summery(10, sum_hr_10, sum_ndcg_10, sum_mrr_10)
                summery(20, sum_hr_20, sum_ndcg_20, sum_mrr_20)
                summery(50, sum_hr_50, sum_ndcg_50, sum_mrr_50)