def _eval_logdata_improve(sess, dataset, batch_size, type): score_arr = [] y = [] for _, uij in DataInput(dataset, batch_size): score_ = sess.run([batch_logits_test], feed_dict={ i_batch: uij[1], j_batch: uij[2], y_batch: uij[2], hist_i_batch: uij[3], sl_batch: uij[4] }) score_arr.append(np.squeeze(score_[0])) y.append(np.asarray(uij[2])) score_arr = np.hstack(score_arr) y = np.hstack(np.asarray(y)) Auc = roc_auc_score(y[:len(score_arr)], score_arr) global best_auc_train global best_auc_test if type=='train' and best_auc_train<Auc: best_auc_train = Auc if type=='test' and best_auc_test<Auc: best_auc_test = Auc saver = tf.train.Saver() saver.save(sess, save_path='save_path/ckpt') return Auc
def fit(self, sess, train_data): for i in range(5): for j, data in DataInput(train_data, batch_size=128): loss_output, _, _ = sess.run( [self.loss, self.optimizer, self.auc_update_op], feed_dict={ self.user: data[0], self.item: data[1], self.y: data[2], self.click_hist: data[3], self.click_len: data[4] }) if j % 1000 == 0: log = { 'itr': i, 'step': j, 'loss': loss_output, 'auc': self.auc.eval(session=sess) } print( "itr : {itr} , step: {step}, loss : {loss},auc : {auc}" .format(**log))
def eval(self, sess, data_set, name='eval'): loss_sum = 0 logits_arr = np.array([]) y_arr = np.array([]) for _, row in DataInput(data_set, 128000): reviewerId, asin, y, hist, hist_len = row loss, logits = sess.run( [self.loss, self.logits], feed_dict={ self.uid: reviewerId, self.tid: asin, self.y: y, self.hist_i: hist, self.hist_len: hist_len }) loss_sum += loss logits_arr = np.append(logits_arr, logits) y_arr = np.append(y_arr, y) ## TODO 实现模型评估, 计算AUC, 并打印出效果日志 auc = roc_auc_score(y_arr, logits_arr) log_data = { 'name': name, 'loss': loss_sum / len(data_set), 'auc': auc, } print('Eval {name} : avg loss = {loss} auc = {auc}'.format(**log_data))
def _eval(sess, merge, model, total_step, test_writer): score_arr = [] loss_sum = 0 slice_test_set = np.array(random.sample(test_set, slice_size)) for i, (y, user_feature, item_feature, cate, keyword, keyword2, tag1, tag2, tag3, ks1, ks2, hist_cate, hist_keyword, hist_keyword2, hist_tag1, hist_tag2, hist_tag3, hist_ks1, hist_ks2, sl) in DataInput(slice_test_set, eval_batch_size): summary, score, loss = model.test( sess, merge, (y, user_feature, item_feature, cate, keyword, keyword2, tag1, tag2, tag3, ks1, ks2, hist_cate, hist_keyword, hist_keyword2, hist_tag1, hist_tag2, hist_tag3, hist_ks1, hist_ks2, sl, 1.0)) test_writer.add_summary(summary, global_step=total_step) loss_sum = loss_sum + loss score_arr += list(score) true_y = slice_test_set[:, 0] true_y = list(true_y) score_arr_binary = [int(i > 0.5) for i in score_arr] auc = roc_auc_score(true_y, score_arr) recall = recall_score(true_y, score_arr_binary) precision = precision_score(true_y, score_arr_binary) accuracy = accuracy_score(true_y, score_arr_binary) # output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, # output_node_names=['sig_logits']) # with tf.gfile.FastGFile('save_path/din.pb' + '.' + str(epoch) + '.' + str(step), mode='wb') as f: # f.write(output_graph_def.SerializeToString()) return auc, recall, precision, accuracy, (loss_sum * eval_batch_size) / slice_size
def _eval_train(sess, model): score_arr = [] loss_sum = 0 slice_train_set = np.array(random.sample(train_set, slice_size)) for _, (y, user_feature, item_feature, cate, keyword, keyword2, tag1, tag2, tag3, ks1, ks2, hist_cate, hist_keyword, hist_keyword2, hist_tag1, hist_tag2, hist_tag3, hist_ks1, hist_ks2, sl) in DataInput(slice_train_set, eval_batch_size): score, loss = model.eval_train( sess, (y, user_feature, item_feature, cate, keyword, keyword2, tag1, tag2, tag3, ks1, ks2, hist_cate, hist_keyword, hist_keyword2, hist_tag1, hist_tag2, hist_tag3, hist_ks1, hist_ks2, sl, 1.0)) loss_sum = loss_sum + loss score_arr += list(score) true_y = slice_train_set[:, 0] true_y = list(true_y) score_arr_binary = [int(i > 0.5) for i in score_arr] auc = roc_auc_score(true_y, score_arr) recall = recall_score(true_y, score_arr_binary) precision = precision_score(true_y, score_arr_binary) accuracy = accuracy_score(true_y, score_arr_binary) return auc, recall, precision, accuracy, (loss_sum * eval_batch_size) / slice_size
def eval(self,sess,data_set,name='eval'): loss_sum = 0 logits_arr = np.array([]) y_arr = np.array([]) for j,data in DataInput(data_set,batch_size=12800): loss_output,logits=sess.run([self.loss,self.logits],feed_dict={ self.user:data[0], self.item:data[1], self.y:data[2], self.click_hist:data[3], self.click_len:data[4] }) loss_sum += loss_output logits_arr = np.append(logits_arr, logits) y_arr = np.append(y_arr, data[2]) from sklearn.metrics import roc_auc_score auc = roc_auc_score(y_arr, logits_arr) log_data = {'name': name, 'loss' : loss_sum, 'auc' : auc, } print('Eval {name} : loss = {loss} auc = {auc}'.format(**log_data))
def fit(self, sess, train_set, eval_set=None): """ 模型已定义好的张量有: - self.uid 用户ID - self.tid 产品ID - self.y target - self.hist_i 用户历史asin序列 - self.hist_len 历史序列长度 - self.loss loss - self.train_op 训练op - self.auc auc - self.auc_update_op auc更新的op - self.global_step 步数 - self.logits 模型输出的对数几率 :param sess: :param train_set: 训练集 :param eval_set: 验证集 :return: None """ loss_sum = 0 for epoch in range(5): for _, row in DataInput(train_set, 128): reviewerId, asin, y, hist, hist_len = row ## TODO 实现模型训练逻辑, 并且每隔1000次迭代打印出部分调试信息 raise NotImplementedError() ## TODO 每一个epoch完成之后,调用 eval() 方法实现模型评估 raise NotImplementedError()
def eval_recall(sess, test_set, model): for _, batch in DataInput(test_set, FLAGS.test_batch_size): model.eval_recall(sess, batch) recall = sess.run([ model.recall_1, model.recall_10, model.recall_20, model.recall_30, model.recall_40, model.recall_50 ]) return recall
def eval_prec(sess, test_set, model): for _, batch in DataInput(test_set, FLAGS.test_batch_size): model.eval_prec(sess, batch) prec = sess.run([ model.prec_1, model.prec_10, model.prec_20, model.prec_30, model.prec_40, model.prec_50 ]) return prec
def eval_auc(sess, test_set, model): auc_sum = 0.0 for _, batch in DataInput(test_set, FLAGS.test_batch_size): auc_sum += model.eval_auc(sess, batch) * len(batch[0]) res = auc_sum / len(test_set) model.eval_writer.add_summary(summary=tf.Summary( value=[tf.Summary.Value(tag='AUC', simple_value=res)]), global_step=model.global_step.eval()) return res
def fit(self, sess, train_set, eval_set=None): """ 模型已定义好的张量有: - self.uid 用户ID - self.tid 产品ID - self.y target - self.hist_i 用户历史asin序列 - self.hist_len 历史序列长度 - self.loss loss - self.train_op 训练op - self.auc auc - self.auc_update_op auc更新的op - self.global_step 步数 - self.logits 模型输出的对数几率 :param sess: :param train_set: 训练集 :param eval_set: 验证集 :return: None """ loss_sum = 0 for epoch in range(5): for _, row in DataInput(train_set, 128): reviewerId, asin, y, hist, hist_len = row ## TODO 实现模型训练逻辑, 并且每隔1000次迭代打印出部分调试信息 loss, _, _, = sess.run( [self.loss, self.train_op, self.auc_update_op], feed_dict={ self.uid: reviewerId, self.tid: asin, self.y: y, self.hist_i: hist, self.hist_len: hist_len }) loss_sum += loss if self.global_step.eval(session=sess) % 1000 == 0: log_data = { 'loss': loss_sum, 'global_step': self.global_step.eval(session=sess), 'auc': self.auc.eval(session=sess), 'epoch': epoch } print( 'Epoch {epoch}, Global step = {global_step}, Loss = {loss}, AUC = {auc}' .format(**log_data)) loss_sum = 0 ## TODO 每一个epoch完成之后,调用 eval() 方法实现模型评估 print('Epoch {epoch}, '.format(epoch=epoch), end='') self.eval(sess, train_set, name='train') if eval_set is not None: print('Epoch {epoch}, '.format(epoch=epoch), end='') self.eval(sess, eval_set, name='eval')
def eval(self, sess, data_set, name='eval'): loss_sum = 0 logits_arr = np.array([]) y_arr = np.array([]) for _, row in DataInput(data_set, 128000): reviewerId, asin, y, hist, hist_len = row ## TODO 实现预测逻辑并累加loss raise NotImplementedError() ## TODO 实现模型评估, 计算AUC, 并打印出效果日志 raise NotImplementedError()
def eval(sess, test_set, model, behavior_type=None): predict_prob_y, test_y = [], [] for iter, uij in DataInput(test_set, FLAGS.test_batch_size): test_y.extend(uij[2]) predict_prob_y.extend(model.eval(sess, uij, behavior_type).tolist()) assert(len(test_y) == len(predict_prob_y)) test_auc = metrics.roc_auc_score(test_y, predict_prob_y) # model.eval_writer.add_summary( # summary=tf.Summary(value=[tf.Summary.Value(tag='Eval AUC', simple_value=test_auc)]), # global_step=model.global_step.eval()) return test_auc
def predict(self, sess, data_set, **kwargs): logits_arr = np.array([]) for _, row in DataInput(data_set, 128000): reviewerId, asin, y, hist, hist_len = row logits = sess.run([self.logits], feed_dict = { self.uid : reviewerId, self.tid : asin, self.y : y, self.hist_i : hist, self.hist_len: hist_len }) logits_arr = np.append(logits_arr, logits) return 1/(1 + np.exp(- logits_arr )) # 输出概率
def _eval(sess, model): loss_sum = 0. batch = 0 score_label = [] for _, datainput, u_readinput, u_friendinput, uf_readinput, u_read_l, u_friend_l, uf_read_linput, \ i_readinput, i_friendinput, if_readinput, i_linkinput, i_read_l, i_friend_l, if_read_linput in \ DataInput(test_set, u_read_list, u_friend_list, uf_read_list, i_read_list, i_friend_list, if_read_list, \ i_link_list, test_batch_size, trunc_len): score_, loss = model.eval(sess, datainput, u_readinput, u_friendinput, uf_readinput, u_read_l, \ u_friend_l, uf_read_linput, i_readinput, i_friendinput, if_readinput, i_linkinput, i_read_l, i_friend_l, if_read_linput, lambda1, lambda2) for i in range(len(score_)): score_label.append([datainput[1][i], score_[i], datainput[2][i]]) loss_sum += loss batch += 1 Precision, NDCG, AUC, GPrecision, GAUC, MAE, RMSE = get_metric(score_label) return loss_sum / batch, Precision, NDCG, MAE, RMSE
def _eval_logdata(sess, model, dataset, batch_size, type): score_arr = [] y = [] for _, uij in DataInput(dataset, batch_size): score_ = model.eval_logdata(sess, uij) score_arr.append(np.squeeze(score_[0])) y.append(np.asarray(uij[2])) score_arr = np.hstack(score_arr) y = np.hstack(np.asarray(y)) Auc = roc_auc_score(y, score_arr) global best_auc_train global best_auc_test if type == 'train' and best_auc_train < Auc: best_auc_train = Auc if type == 'test' and best_auc_test < Auc: best_auc_test = Auc model.save(sess, 'save_path/ckpt') return Auc
def main_train(): tf.reset_default_graph() model = Model() # saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print('test_gauc: %.4f\t test_auc: %.4f' % _eval2(sess, model)) sys.stdout.flush() lr = 1.0 start_time = time.time() for _ in range(50): random.shuffle(train_set) epoch_size = round(len(train_set) / hp.train_batch_size) loss_sum = 0.0 for _, uij in DataInput(train_set, hp.train_batch_size): loss = model.train(sess, uij, lr) loss_sum += loss if model.global_step.eval() % 10000 == 0: test_gauc, Auc = _eval(sess, model, model.global_step.eval(), model.saver) print( 'Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_GAUC: %.4f\tEval_AUC: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), loss_sum / 1000, test_gauc, Auc)) sys.stdout.flush() loss_sum = 0.0 if model.global_step.eval() % 336000 == 0: lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time() - start_time)) sys.stdout.flush() model.global_epoch_step_op.eval() print('best test_gauc:', best_auc) sys.stdout.flush()
def inference(self, data): converted_data = self.convert_data(data) with tf.Session() as sess: meta_path = './save_path/atrank-815240.meta' saver = tf.train.import_meta_graph(meta_path) saver.restore(sess, "./save_path/atrank-815240") print('model restored') # whether it's training or not # is_training = tf.placeholder(tf.bool, []) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for _, uij in DataInput(converted_data, config['item_count']): max_asin = [] logit = self.Model.inference(sess, uij) # top 10 asin IDs for i in range(10): max_asin.append(np.argmax(logit)) logit[max_asin[i]] = -1 return max_asin
model = Model(user_count, item_count) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) #model.restore(sess, '/home/myronwu/save_model/DUAL_GAT.ckpt') sys.stdout.flush() start_time = time.time() Train_loss_pre = 100 bestP, bestR, bestF1, bestAUC = 0.0, 0.0, 0.0, 0.0 for _ in range(10000): random.shuffle(train_set) epoch_size = round(len(train_set) / train_batch_size) iter_num, loss_r_sum, loss_reg_sum = 0, 0., 0. for _, datainput1, datainput2, u_hisinput, u_posinput, u_his_l, i_hisinput, i_posinput, i_his_l in DataInput( train_set, u_his_list, i_his_list, train_batch_size, trunc_len, user_count, item_count): loss_r, loss_reg = model.train(sess, datainput1, datainput2, u_hisinput, u_posinput, u_his_l, i_hisinput, i_posinput, i_his_l, learning_rate, keep_prob, lambda_reg, h1, h2) iter_num += 1 loss_r_sum += loss_r loss_reg_sum += loss_reg if model.global_step.eval() % 1000 == 0: Train_loss_r = loss_r_sum / iter_num Train_loss_reg = loss_reg_sum / iter_num Test_loss, P, R, F1, AUC, NDCG, GP, GAUC = _eval( sess, model, test_set_list)
sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() lr = learning_rate start_time = time.time() Train_loss_pre = 100 best_mae = 0. for _ in range(10000): random.shuffle(train_set) epoch_size = round(len(train_set) / train_batch_size) iter_num, loss_sum = 0, 0. for _, datainput, u_readinput, u_friendinput, uf_readinput, u_read_l, u_friend_l, uf_read_linput, \ i_readinput, i_friendinput, if_readinput, i_linkinput, i_read_l, i_friend_l, if_read_linput in \ DataInput(train_set, u_read_list, u_friend_list, uf_read_list, i_read_list, i_friend_list, if_read_list, \ i_link_list, train_batch_size, trunc_len): loss = model.train(sess, datainput, u_readinput, u_friendinput, uf_readinput, u_read_l, u_friend_l, \ uf_read_linput, i_readinput, i_friendinput, if_readinput, i_linkinput, i_read_l, i_friend_l, if_read_linput, lr, keep_prob, lambda1, lambda2) iter_num += 1 loss_sum += loss if model.global_step.eval() % 1000 == 0: Train_loss = loss_sum / iter_num # 损失值、准确率、NDCG、MAE、MRSE Test_loss, P, N, MAE, MRSE = _eval(sess, model) Train_loss = loss_sum / iter_num print( 'Epoch %d Step %d Train_loss: %.4f Test_loss: %.4f P@3: %.4f P@5: %.4f P@10: %.4f NDCG@3: %.4f NDCG@5: %.4f NDCG@10: %.4f MAE: %.4f MRSE: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), Train_loss, Test_loss, P[2], P[4], P[9], N[2], N[4], N[9], MAE, MRSE))
def run(): with tf.Session() as sess: count_dict = read_dict('/data/count_dict.txt') count_dict['lamda'] = 0.1 model = Model(**count_dict) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) merge = tf.summary.merge_all() if tf.gfile.Exists("logs"): tf.gfile.DeleteRecursively("logs") train_writer = tf.summary.FileWriter("logs/train", sess.graph) test_writer = tf.summary.FileWriter("logs/test") # model export saver = tf.train.Saver() inputs = { "user_feature": tf.saved_model.utils.build_tensor_info(model.user_feature), "hist_cate": tf.saved_model.utils.build_tensor_info(model.hist_cate), "hist_keyword": tf.saved_model.utils.build_tensor_info(model.hist_keyword), "hist_keyword2": tf.saved_model.utils.build_tensor_info(model.hist_keyword2), "hist_tag1": tf.saved_model.utils.build_tensor_info(model.hist_tag1), "hist_tag2": tf.saved_model.utils.build_tensor_info(model.hist_tag2), "hist_tag3": tf.saved_model.utils.build_tensor_info(model.hist_tag3), "hist_ks1": tf.saved_model.utils.build_tensor_info(model.hist_ks1), "hist_ks2": tf.saved_model.utils.build_tensor_info(model.hist_ks2), "sl": tf.saved_model.utils.build_tensor_info(model.sl), "item_feature": tf.saved_model.utils.build_tensor_info(model.item_feature), "cate": tf.saved_model.utils.build_tensor_info(model.cate), "keyword": tf.saved_model.utils.build_tensor_info(model.keyword), "keyword2": tf.saved_model.utils.build_tensor_info(model.keyword2), "tag1": tf.saved_model.utils.build_tensor_info(model.tag1), "tag2": tf.saved_model.utils.build_tensor_info(model.tag2), "tag3": tf.saved_model.utils.build_tensor_info(model.tag3), "ks1": tf.saved_model.utils.build_tensor_info(model.ks1), "ks2": tf.saved_model.utils.build_tensor_info(model.ks2), "keep_prob": tf.saved_model.utils.build_tensor_info(model.keep_prob) } outputs = { "sig_logits": tf.saved_model.utils.build_tensor_info(model.sig_logits) } signature_def_map = { "predict": tf.saved_model.signature_def_utils.build_signature_def( inputs=inputs, outputs=outputs, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME) } # model_exporter = tf.contrib.session_bundle.exporter.Exporter(saver) # model_exporter.init(sess.graph.as_graph_def(), # named_graph_signatures={ # 'inputs': tf.contrib.session_bundle.exporter.generic_signature(inputs), # 'outputs': tf.contrib.session_bundle.exporter.generic_signature(outputs)}) sys.stdout.flush() lr = 0.001 keep_prob = 0.6 start_time = time.time() for epoch in range(10): # random.shuffle(train_set) loss_sum = 0.0 for i, (y, user_feature, item_feature, cate, keyword, keyword2, tag1, tag2, tag3, ks1, ks2, hist_cate, hist_keyword, hist_keyword2, hist_tag1, hist_tag2, hist_tag3, hist_ks1, hist_ks2, sl) in \ DataInput(train_set, train_batch_size): step = epoch * batch_step + i # if step / 2000 % 3 == 0: # lr = 0.001 # if step / 2000 % 3 == 1: # lr = 0.0005 # if step / 2000 % 3 == 2: # lr = 0.0001 train_summary, loss = model.train( sess, merge, (y, user_feature, item_feature, cate, keyword, keyword2, tag1, tag2, tag3, ks1, ks2, hist_cate, hist_keyword, hist_keyword2, hist_tag1, hist_tag2, hist_tag3, hist_ks1, hist_ks2, sl), lr, keep_prob) loss_sum += loss if i % 100 == 0: test_auc, test_recall, test_precision, test_accuracy, test_loss = _eval( sess, merge, model, step, test_writer) train_auc, train_recall, train_precision, train_accuracy, train_loss = _eval_train( sess, model) print( 'epoch %d step %d t_step %d,train:b_loss:%.4f loss:%.4f auc:%.4f recall:%.4f pre:%.4f acc:%.4f' % (epoch, i, step, loss_sum / 100, train_loss, train_auc, train_recall, train_precision, train_accuracy)) print( 'epoch %d step %d t_step %d,test: loss:%.4f auc:%.4f recall:%.4f pre:%.4f acc:%.4f' % (epoch, i, step, test_loss, test_auc, test_recall, test_precision, test_accuracy)) sys.stdout.flush() loss_sum = 0.0 saver.save(sess, 'save_path/ckpt') builder = tf.saved_model.builder.SavedModelBuilder( 'model_path/' + str(int(step))) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map=signature_def_map) builder.save() output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=['sig_logits']) with tf.gfile.FastGFile('save_path/din.pb' + '.' + str(int(step)), mode='wb') as f: f.write(output_graph_def.SerializeToString()) else: train_writer.add_summary(train_summary, global_step=step) print('Epoch %d DONE\tCost time: %.2f' % (epoch, time.time() - start_time)) sys.stdout.flush() sys.stdout.flush()
def train(): start_time = time.time() if FLAGS.from_scratch: if tf.gfile.Exists(FLAGS.model_dir): tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) # Loading data print('Loading data..', flush=True) with open('dataset.pkl', 'rb') as f: ai_train_set = pickle.load(f) ai_test_set = pickle.load(f) item_feat_shop_list = pickle.load(f) item_feat_cate_list = pickle.load(f) item_feat_brand_list = pickle.load(f) coupon_feat_shop_list = pickle.load(f) coupon_feat_cate_list = pickle.load(f) coupon_feat_type_list = pickle.load(f) user_count, item_count, shop_count, cate_count, brand_count, action_count, query_count, coupon_count = pickle.load(f) aq_train_set = pickle.load(f) aq_test_set = pickle.load(f) ac_train_set = pickle.load(f) ac_test_set = pickle.load(f) # Config GPU options if FLAGS.per_process_gpu_memory_fraction == 0.0: gpu_options = tf.GPUOptions(allow_growth=True) elif FLAGS.per_process_gpu_memory_fraction == 1.0: gpu_options = tf.GPUOptions() else: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.per_process_gpu_memory_fraction) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.cuda_visible_devices # Build Config config = OrderedDict(sorted(FLAGS.__flags.items())) config['user_count'] = user_count config['item_count'] = item_count config['shop_count'] = shop_count config['cate_count'] = cate_count config['brand_count'] = brand_count config['action_count'] = action_count config['query_count'] = query_count config['coupon_count'] = coupon_count if config['net_type'].endswith('i'): train_set = ai_train_set test_set = ai_test_set elif config['net_type'].endswith('q'): train_set = aq_train_set test_set = aq_test_set elif config['net_type'].endswith('c'): train_set = ac_train_set test_set = ac_test_set else: print('net_type error') exit(1) # Initiate TF session with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options)) as sess: # Create a new model or reload existing checkpoint model = create_model(sess, config, (item_feat_shop_list, item_feat_cate_list, item_feat_brand_list, coupon_feat_shop_list, coupon_feat_cate_list, coupon_feat_type_list)) print('Init finish.\tCost time: %.2fs' % (time.time()-start_time), flush=True) # Eval init AUC best_auc = eval(sess, test_set, model) print('Init AUC: %.4f' % best_auc) # Start training lr = FLAGS.learning_rate epoch_size = round(len(train_set) / FLAGS.train_batch_size) print('Training..\tmax_epochs: %d\tepoch_size: %d' % (FLAGS.max_epochs, epoch_size), flush=True) start_time, avg_loss = time.time(), 0.0 for _ in range(FLAGS.max_epochs): random.shuffle(train_set) for _, uij in DataInput(train_set, FLAGS.train_batch_size): add_summary = True if model.global_step.eval() % FLAGS.display_freq == 0 else False step_loss = model.train(sess, uij, lr, add_summary) avg_loss += step_loss if model.global_step.eval() % FLAGS.eval_freq == 0: test_auc = eval(sess, test_set, model) print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_AUC: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), avg_loss / FLAGS.eval_freq, test_auc), flush=True) avg_loss = 0.0 if test_auc > 0.60 and test_auc > best_auc: best_auc = test_auc # model.save(sess) # if model.global_epoch_step.eval() == 2: # lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time()-start_time), flush=True) model.global_epoch_step_op.eval() print('best test_auc:', best_auc) print('Finished', flush=True)
def train(): start_time = time.time() if FLAGS.from_scratch: if tf.gfile.Exists(FLAGS.model_dir): tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) #load file: dataset_model.pickle dataset_file_name = os.path.join(folder_dataset, Config.dict_pkl_dataset[p_model]) print "load file :", dataset_file_name (train_set, valid_set, test_set, user_count, item_count, cate_count, dict_item_cate) =\ Dataset.load_dataset(p_model, dataset_file_name) # Config GPU options if FLAGS.per_process_gpu_memory_fraction == 0.0: gpu_options = tf.GPUOptions(allow_growth=True) elif FLAGS.per_process_gpu_memory_fraction == 1.0: gpu_options = tf.GPUOptions() else: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS. per_process_gpu_memory_fraction) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.cuda_visible_devices # Build Config #config = OrderedDict(sorted(FLAGS.__flags.items())) config = OrderedDict(tf.app.flags.FLAGS.flag_values_dict().items()) # for k, v in config.items(): # config[k] = v.value config['user_count'] = user_count config['item_count'] = item_count config['cate_count'] = cate_count #print("config:", config) # write args file_arg = "args.txt_%s" % config["train_type"] with open(os.path.join(config["model_dir"], file_arg), 'w') as f: f.write('\n'.join([ str(k) + ',' + str(v) for k, v in sorted(vars(config).items(), key=lambda x: x[0]) ])) f.close() # Initiate TF session with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Create a new model or reload existing checkpoint model = create_model(sess, config, dict_item_cate) print('Init finish.\tCost time: %.2fs' % (time.time() - start_time)) # Eval init AUC #print('Init AUC: %.4f' % _eval(sess, test_set, model)) (NDCG_K, Hit_K) = _eval_negN(sess, test_set, model, item_count) print('Init NDCG_K: %.4f, Hit_K: %.4f' % (NDCG_K, Hit_K)) # Start training lr = FLAGS.learning_rate epoch_size = round(len(train_set) / FLAGS.train_batch_size) print('Training..\tmax_epochs: %d\tepoch_size: %d' % (FLAGS.max_epochs, epoch_size)) start_time, avg_loss, epoch_loss, best_auc = time.time(), 0.0, 0.0, 0.0 best_epoch, best_NDCG_K, best_Hit_K = 0, 0., 0. best_valid = [0., 0.] best_test = [0., 0.] T = 0.0 t0 = time.time() for epoch in range(FLAGS.max_epochs): epoch_loss = 0. random.shuffle(train_set) batch_cnt = 0 for _, uij in DataInput(train_set, FLAGS.train_batch_size): add_summary = bool(model.global_step.eval() % FLAGS.display_freq == 0) step_loss = model.train(sess, uij, lr, add_summary) avg_loss += step_loss epoch_loss += step_loss batch_cnt += 1 if model.global_step.eval() % FLAGS.eval_freq == 0: print( 'Epoch %d Global_step %d\tTrain_loss: %.4f\t' % (model.global_epoch_step.eval(), model.global_step.eval(), avg_loss / FLAGS.eval_freq)) avg_loss = 0.0 epoch_loss_avg = float(epoch_loss) / batch_cnt print("Epoch %d DONE\tepoch_loss_avg:%.4f\tCost time: %.2f(s)" % (model.global_epoch_step.eval(), epoch_loss_avg, time.time() - start_time)) #if model.global_step.eval() % FLAGS.eval_freq == 0: # test_auc = _eval(sess, test_set, model) # print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_AUC: %.4f' % # (model.global_epoch_step.eval(), model.global_step.eval(), # avg_loss / FLAGS.eval_freq, test_auc)) # if model.global_epoch_step.eval() % FLAGS.eval_rank_epoch_freq == 0: # test_auc = _eval(sess, test_set, model) # print('Epoch %d Global_step %d\tEval_AUC: %.4f' % # (model.global_epoch_step.eval(), model.global_step.eval(), # test_auc)) # # if test_auc > best_auc: # best_auc = test_auc # if(best_auc > 0.8): # model.save(sess) #if model.global_step.eval() % FLAGS.eval_rank_freq == 0: if epoch % Config.eval_every_num_epochs == 0: t1 = time.time() - t0 T += t1 t_valid = _eval_negN(sess, valid_set, model, item_count) t_test = _eval_negN(sess, test_set, model, item_count) str_info = "\tEpoch:%d, Global_step %d, time: %f(s), valid (NDCG@10: %.4f, HR@10: %.4f), test (NDCG@10: %.4f, HR@10: %.4f)\n" % ( epoch, model.global_step.eval(), T, t_valid[0], t_valid[1], t_test[0], t_test[1]) print(str_info) t0 = time.time() best_epoch, best_valid, best_test = Metrics.save_best_result( epoch, t_valid, t_test, best_epoch, best_valid, best_test) # if model.global_step.eval() == 336000: lr = 0.1 # print('Epoch %d DONE\tCost time: %.2f(s)' % # (model.global_epoch_step.eval(), time.time()-start_time)) model.global_epoch_step_op.eval() model.save(sess) #print('best test_auc:', best_auc) str_info = "Best epoch:%d, best_test_ndcg_K:%f, best_test_hit_K:%f, K:%d\n" % ( best_epoch, best_test[0], best_test[1], Config.metrics_K) print(str_info) print('Finished')
def main(_): gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: s = FLAGS.architecture.split('_') arch_dict = { 'emb_dim': int(s[0]), 'f1_dim': int(s[1]), 'f2_dim': int(s[2]) } path_suffix = _make_suffix(arch_dict) model_prefix = os.path.join("save_path", "save_{}".format(path_suffix)) model_path = os.path.join(model_prefix, "ckpt") log_dir = os.path.join("log", "log_{}".format(path_suffix)) pred_dir = "prediction" item_b_pred_file = os.path.join( pred_dir, "item_b_pred_{}.csv".format(path_suffix)) prediction_file = os.path.join( pred_dir, "wepick_pred_{}.csv".format(path_suffix)) if os.path.exists(model_prefix) == False: os.makedirs(model_prefix) if os.path.exists(log_dir) == False: os.makedirs(log_dir) if os.path.exists(pred_dir) == False: os.makedirs(pred_dir) model = Model(user_count, item_count, cate_count, cate_list, arch_dict, FLAGS.use_item_embedding) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) if FLAGS.inspect_item_b: model.restore(sess, model_path) inspect_item_b(sess, model, item_b_pred_file) return 0 if FLAGS.testonly: model.restore(sess, model_path) _predict(sess, model, prediction_file) return 0 writer = tf.summary.FileWriter(log_dir, sess.graph) print('test_gauc: %.4f\t test_auc: %.4f' % _eval(sess, model, model_path)) sys.stdout.flush() lr = 1.0 start_time = time.time() for _ in range(FLAGS.num_epochs): random.shuffle(train_set) epoch_size = round(len(train_set) / FLAGS.batch_size) loss_sum = 0.0 loss_count = 0 for _, uij in DataInput(train_set, FLAGS.batch_size): summary, loss = model.train(sess, uij, lr) loss_sum += loss loss_count += 1 if model.global_step.eval() % 100 == 0: test_gauc, Auc = _eval(sess, model, model_path) writer.add_summary(summary, model.global_step.eval()) print( 'Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_GAUC: %.4f\tEval_AUC: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), loss_sum / loss_count, test_gauc, Auc)) sys.stdout.flush() loss_sum = 0.0 loss_count = 0 if model.global_step.eval() % 336000 == 0: lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time() - start_time)) sys.stdout.flush() model.global_epoch_step_op.eval() writer.close() print('best test_gauc:', best_auc) sys.stdout.flush() return 0
def run_experiment(hparams): # os.environ['CUDA_VISIBLE_DEVICES'] = '0' random.seed(1234) np.random.seed(1234) tf.set_random_seed(1234) train_batch_size = hparams.train_batch_size test_batch_size = hparams.test_batch_size with file_io.FileIO(",".join(hparams.train_files), 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) cate_list = pickle.load(f) user_count, item_count, cate_count = pickle.load(f) best_auc = 0.0 def calc_auc(raw_arr): """Summary Args: raw_arr (TYPE): Description Returns: TYPE: Description """ # sort by pred value, from small to big arr = sorted(raw_arr, key=lambda d: d[2]) auc = 0.0 fp1, tp1, fp2, tp2 = 0.0, 0.0, 0.0, 0.0 for record in arr: fp2 += record[0] # noclick tp2 += record[1] # click auc += (fp2 - fp1) * (tp2 + tp1) fp1, tp1 = fp2, tp2 # if all nonclick or click, disgard threshold = len(arr) - 1e-3 if tp2 > threshold or fp2 > threshold: return -0.5 if tp2 * fp2 > 0.0: # normal auc return (1.0 - auc / (2.0 * tp2 * fp2)) else: return None def _auc_arr(score): score_p = score[:, 0] score_n = score[:, 1] #print "============== p =============" #print score_p #print "============== n =============" #print score_n score_arr = [] for s in score_p.tolist(): score_arr.append([0, 1, s]) for s in score_n.tolist(): score_arr.append([1, 0, s]) return score_arr def _eval(sess, model, best_auc): auc_sum = 0.0 score_arr = [] for _, uij in DataInputTest(test_set, test_batch_size): auc_, score_ = model.eval(sess, uij) score_arr += _auc_arr(score_) auc_sum += auc_ * len(uij[0]) test_gauc = auc_sum / len(test_set) Auc = calc_auc(score_arr) if best_auc < test_gauc: best_auc = test_gauc model.save(sess, hparams.job_dir) return test_gauc, Auc def _export(sess, uij, lr, logits_all, path): # saver = tf.train.Saver() # saver.restore(sess, save_path=path) # tf.saved_model.simple_save( # sess, # hparams.export_dir, # inputs={ # 'u': tf.convert_to_tensor(uij[0], dtype=tf.float16), # 'hist_i': tf.convert_to_tensor(uij[3], dtype=tf.float16), # 'sl': tf.convert_to_tensor(lr, dtype=tf.float16) # }, # outputs={'logits_all': tf.convert_to_tensor(logits_all, dtype=tf.float16)} # ) tf.saved_model.simple_save(sess, hparams.export_dir, inputs={ 'u': model.u, 'hist_i': model.hist_i, 'sl': model.sl }, outputs={'logits_all': model.logits_all}) def build_i_map(keys): """ Make inverse map for keys: i -> item :param keys: listing items in order. :return: """ return dict(zip(range(len(keys), keys))) def restore_info(uij, predicted, dic): iu, ii, _, ihist_i, _ = uij u = dic['deal_key'][iu] for i in range(len(iu[0])): u = dic['deal_key'][iu[i]] def _predict(sess): with file_io.FileIO(",".join(hparams.train_files), 'rb') as f: wepick_data = pickle.load(f) total_model = 0 total_time = 0 start = time.time() with file_io.FileIO(",".join(hparams.pred_out_path), 'w') as pred_f: for _, uij in DataInputTest(test_set, test_batch_size): outputs = [] inf_start = time.time() users, histories, lengths = uij[0], uij[3], uij[4] predicted = model.test(sess, users, histories, lengths) inf_end = time.time() total_model += inf_end - inf_start for k in range(len(users)): u = wepick_data['user_key'][users[k]] hist = list( map(lambda x: wepick_data['deal_key'][x], histories[k][0:lengths[k]])) sort_i = np.argsort(predicted[k, :]) sort_i = np.fliplr([sort_i])[0] order = list( map( lambda x: (wepick_data['deal_key'][x], predicted[k, x]), sort_i)) outputs.append((u, hist, order)) for u, hist, order in outputs: h = '-'.join(map(lambda x: str(x), hist)) s = ':'.join( map(lambda x: "{}/{:.2f}".format(x[0], x[1]), order[:30])) pred_f.write("{},{},{}\n".format(u, h, s)) total_time += (time.time() - start) sys.stderr.wirte("Elapsed total {}: model {}\n".format( total_time, total_model)) def restore(sess, path): saver = tf.train.Saver() saver.restore(sess, save_path=path) model = Model(user_count, item_count, cate_count, cate_list, hparams.variable_strategy) # gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: if hparams.run_mode == "test": restore(sess, hparams.job_dir) _predict(sess) return 0 sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) writer = tf.summary.FileWriter('log', sess.graph) print('test_gauc: %.4f\t test_auc: %.4f' % _eval(sess, model, best_auc)) sys.stdout.flush() lr = 1.0 start_time = time.time() for _ in range(50): random.shuffle(train_set) epoch_size = round(len(train_set) / train_batch_size) loss_sum = 0.0 for _, uij in DataInput(train_set, train_batch_size): loss, logits_all = model.train(sess, uij, lr, hparams.variable_strategy) loss_sum += loss if model.global_step.eval() % 1000 == 0: test_gauc, Auc = _eval(sess, model, best_auc) print( 'Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_GAUC: %.4f\tEval_AUC: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), loss_sum / 1000, test_gauc, Auc)) sys.stdout.flush() loss_sum = 0.0 if model.global_step.eval() % 336000 == 0: lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time() - start_time)) sys.stdout.flush() model.global_epoch_step_op.eval() print('best test_gauc:', best_auc) _export(sess, uij, lr, logits_all, hparams.job_dir) sys.stdout.flush()
def train(): start_time = time.time() if FLAGS.from_scratch: if tf.gfile.Exists(FLAGS.model_dir): tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) # Loading data print('Loading data..', flush=True) with open('dataset.pkl', 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) user_count, item_count, cate_count = pickle.load(f) item_cate_list = pickle.load(f) # Config GPU options if FLAGS.per_process_gpu_memory_fraction == 0.0: gpu_options = tf.GPUOptions(allow_growth=True) elif FLAGS.per_process_gpu_memory_fraction == 1.0: gpu_options = tf.GPUOptions() else: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.per_process_gpu_memory_fraction) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.cuda_visible_devices # Build Config config = OrderedDict(sorted(FLAGS.__flags.items())) for k, v in config.items(): config[k] = v.value config['user_count'] = user_count config['item_count'] = item_count config['cate_count'] = cate_count # Initiate TF session with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Create a new model or reload existing checkpoint model = create_model(sess, config, item_cate_list) print('Init finish.\tCost time: %.2fs' % (time.time()-start_time), flush=True) # Eval init AUC print('Init AUC: %.4f' % eval_auc(sess, test_set, model, config)) # Eval init precision print('Init precision:') prec = eval_prec(sess, test_set, model, config) for i, k in zip(range(6), [1, 10, 20, 30, 40, 50]): print('@' + str(k) + ' = %.4f' % prec[i], end=' ') print() # Eval init recall print('Init recall:') recall = eval_recall(sess, test_set, model, config) for i, k in zip(range(6), [1, 10, 20, 30, 40, 50]): print('@' + str(k) + ' = %.4f' % recall[i], end=' ') print() # Start training lr = FLAGS.learning_rate epoch_size = round(len(train_set) / FLAGS.train_batch_size) print('Training..\tmax_epochs: %d\tepoch_size: %d' % (FLAGS.max_epochs, epoch_size), flush=True) start_time, avg_loss, best_auc = time.time(), 0.0, 0.0 best_prec = [0, 0, 0, 0, 0, 0] best_recall = [0, 0, 0, 0, 0, 0] for _ in range(FLAGS.max_epochs): random.shuffle(train_set) for _, batch in DataInput(train_set, FLAGS.train_batch_size, config['SL']): add_summary = bool(model.global_step.eval() % FLAGS.display_freq == 0) step_loss = model.train(sess, batch, lr, add_summary) avg_loss += step_loss if model.global_step.eval() % FLAGS.eval_freq == 0: test_auc = eval_auc(sess, test_set, model, config) # Training curve time_line.append(time.time()-start_time) auc_value.append(test_auc) print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_auc: %.4f\t' % (model.global_epoch_step.eval(), model.global_step.eval(), avg_loss / FLAGS.eval_freq, test_auc), flush=True) print('Precision:') prec = eval_prec(sess, test_set, model, config) for i, k in zip(range(6), [1, 10, 20, 30, 40, 50]): print('@' + str(k) + ' = %.4f' % prec[i], end=' ') print() print('Recall:') recall = eval_recall(sess, test_set, model, config) for i, k in zip(range(6), [1, 10, 20, 30, 40, 50]): print('@' + str(k) + ' = %.4f' % recall[i], end=' ') print() avg_loss = 0.0 if model.global_step.eval() > 20000: for i in range(6): if prec[i] > best_prec[i]: best_prec[i] = prec[i] if recall[i] > best_recall[i]: best_recall[i] = recall[i] if test_auc > 0.8 and test_auc > best_auc: best_auc = test_auc model.save(sess) if model.global_step.eval() == 150000: lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time()-start_time), flush=True) model.global_epoch_step_op.eval() model.save(sess) print('Best test_auc:', best_auc) print('Best precision:') for i, k in zip(range(6), [1, 10, 20, 30, 40, 50]): print('@' + str(k) + ' = %.4f' % best_prec[i], end=' ') print() print('Best recall:') for i, k in zip(range(6), [1, 10, 20, 30, 40, 50]): print('@' + str(k) + ' = %.4f' % best_recall[i], end=' ') print() print('Finished', flush=True)
def train(): start_time = time.time() if FLAGS.from_scratch: if tf.gfile.Exists(FLAGS.model_dir): tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) # Loading data print('Loading data..', flush=True) with open('dataset.pkl', 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) cate_list = pickle.load(f) user_count, item_count, cate_count = pickle.load(f) # Config GPU options if FLAGS.per_process_gpu_memory_fraction == 0.0: gpu_options = tf.GPUOptions(allow_growth=True) elif FLAGS.per_process_gpu_memory_fraction == 1.0: gpu_options = tf.GPUOptions() else: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.per_process_gpu_memory_fraction) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.cuda_visible_devices # Build Config config = OrderedDict(sorted(FLAGS.__flags.items())) for k, v in config.items(): config[k] = v.value config['user_count'] = user_count config['item_count'] = item_count config['cate_count'] = cate_count # Initiate TF session with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Create a new model or reload existing checkpoint model = create_model(sess, config, cate_list) print('Init finish.\tCost time: %.2fs' % (time.time()-start_time), flush=True) # Eval init AUC print('Init AUC: %.4f' % _eval(sess, test_set, model)) # Start training lr = FLAGS.learning_rate epoch_size = round(len(train_set) / FLAGS.train_batch_size) print('Training..\tmax_epochs: %d\tepoch_size: %d' % (FLAGS.max_epochs, epoch_size), flush=True) start_time, avg_loss, best_auc = time.time(), 0.0, 0.0 for _ in range(FLAGS.max_epochs): random.shuffle(train_set) for _, uij in DataInput(train_set, FLAGS.train_batch_size): add_summary = bool(model.global_step.eval() % FLAGS.display_freq == 0) step_loss = model.train(sess, uij, lr, add_summary) avg_loss += step_loss if model.global_step.eval() % FLAGS.eval_freq == 0: test_auc = _eval(sess, test_set, model) print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_AUC: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), avg_loss / FLAGS.eval_freq, test_auc), flush=True) avg_loss = 0.0 if test_auc > 0.88 and test_auc > best_auc: best_auc = test_auc model.save(sess) if model.global_step.eval() == 336000: lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time()-start_time), flush=True) model.global_epoch_step_op.eval() model.save(sess) print('best test_auc:', best_auc) print('Finished', flush=True)
def run_experiment(hparams): # os.environ['CUDA_VISIBLE_DEVICES'] = '0' random.seed(1234) np.random.seed(1234) tf.set_random_seed(1234) train_batch_size = hparams.train_batch_size test_batch_size = hparams.test_batch_size with file_io.FileIO(",".join(hparams.train_files), 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) cate_list = pickle.load(f) user_count, item_count, cate_count = pickle.load(f) best_auc = 0.0 def calc_auc(raw_arr): """Summary Args: raw_arr (TYPE): Description Returns: TYPE: Description """ # sort by pred value, from small to big arr = sorted(raw_arr, key=lambda d:d[2]) auc = 0.0 fp1, tp1, fp2, tp2 = 0.0, 0.0, 0.0, 0.0 for record in arr: fp2 += record[0] # noclick tp2 += record[1] # click auc += (fp2 - fp1) * (tp2 + tp1) fp1, tp1 = fp2, tp2 # if all nonclick or click, disgard threshold = len(arr) - 1e-3 if tp2 > threshold or fp2 > threshold: return -0.5 if tp2 * fp2 > 0.0: # normal auc return (1.0 - auc / (2.0 * tp2 * fp2)) else: return None def _auc_arr(score): score_p = score[:,0] score_n = score[:,1] #print "============== p =============" #print score_p #print "============== n =============" #print score_n score_arr = [] for s in score_p.tolist(): score_arr.append([0, 1, s]) for s in score_n.tolist(): score_arr.append([1, 0, s]) return score_arr def _eval(sess, model, best_auc): auc_sum = 0.0 score_arr = [] for _, uij in DataInputTest(test_set, test_batch_size): auc_, score_ = model.eval(sess, uij) score_arr += _auc_arr(score_) auc_sum += auc_ * len(uij[0]) test_gauc = auc_sum / len(test_set) Auc = calc_auc(score_arr) if best_auc < test_gauc: best_auc = test_gauc model.save(sess, hparams.job_dir) return test_gauc, Auc # gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: model = Model(user_count, item_count, cate_count, cate_list, hparams.variable_strategy) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print('test_gauc: %.4f\t test_auc: %.4f' % _eval(sess, model, best_auc)) sys.stdout.flush() lr = 1.0 start_time = time.time() for _ in range(50): random.shuffle(train_set) epoch_size = round(len(train_set) / train_batch_size) loss_sum = 0.0 for _, uij in DataInput(train_set, train_batch_size): loss, logits_all = model.train(sess, uij, lr, hparams.variable_strategy) loss_sum += loss if model.global_step.eval() % 1000 == 0: test_gauc, Auc = _eval(sess, model, best_auc) print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_GAUC: %.4f\tEval_AUC: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), loss_sum / 1000, test_gauc, Auc)) sys.stdout.flush() loss_sum = 0.0 if model.global_step.eval() % 336000 == 0: lr = 0.1 print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time()-start_time)) sys.stdout.flush() model.global_epoch_step_op.eval() print('best test_gauc:', best_auc) sys.stdout.flush()
random.shuffle(train_set) epoch_size = round(len(train_set) / train_batch_size) loss_sum = 0.0 """ uij是一个tupple,里面包括 (u, i, y, hist_i, sl, b, lt, qr) u是每个玩家的id i这个玩家当次点击正负样本item的index y是上面那个正负样本item的index对应的label,1和0 hist_i是吧所有玩家点击的item index对应到一个column长度均等的一个方阵,但是这里item的index对应是0的然后这里hist_i填充的也是0,所以在embedding的时候可能会出错 """ for _, uij in DataInput(train_set, train_batch_size): loss = model.train(sess, uij, lr, keep_prob=0.95) loss_sum += loss if model.global_step.eval() % 1000 == 0: model.save(sess, checkpoint_dir) print('Global_step %d\tTrain_loss: %.4f' % (model.global_step.eval(), loss_sum / 1000)) print('Epoch %d Global_step %d\tTrain_loss: %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), loss_sum / 1000)) sys.stdout.flush() loss_sum = 0.0 if model.global_step.eval() % 336000 == 0: lr = 0.1
def train(): start_time = time.time() if FLAGS.from_scratch: if tf.gfile.Exists(FLAGS.model_dir): tf.gfile.DeleteRecursively(FLAGS.model_dir) tf.gfile.MakeDirs(FLAGS.model_dir) # Loading data print('Loading data.....', flush=True) with open('../BisIE_mask/tianchi/dataset.pkl', 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) cate_list = pickle.load(f) action_list = pickle.load(f) # print(cate_list) user_count, item_count, cate_count, action_count = pickle.load(f) # print(user_count, item_count, cate_count, action_count) # Config GPU options if FLAGS.per_process_gpu_memory_fraction == 0.0: gpu_options = tf.GPUOptions(allow_growth=True) elif FLAGS.per_process_gpu_memory_fraction == 1.0: gpu_options = tf.GPUOptions() else: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.per_process_gpu_memory_fraction) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.cuda_visible_devices # Build Config config = OrderedDict(sorted(FLAGS.__flags.items())) # for k, v in config.items(): # config[k] = v.value print(config.items()) config['user_count'] = user_count config['item_count'] = item_count config['cate_count'] = cate_count config['action_count'] = action_count + 1 # Initiate TF session # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) with sess.as_default(): # Create a new model or reload existing checkpoint model = create_model(sess, config, cate_list, action_list) print('Init finish.\tCost time: %.2fs' % (time.time() - start_time), flush=True) result = [] # Eval init AUC print('Init AUC: %.4f, new %.4f' % (_eval(sess, test_set, model),_eval_auc(sess, test_set, model))) result.append((0,0,0,_eval(sess, test_set, model),_eval_auc(sess, test_set, model))) # Start training lr = FLAGS.learning_rate epoch_size = round(len(train_set) / FLAGS.train_batch_size) print('Training....\tmax_epochs:%d\tepoch_size:%d' % (FLAGS.max_epochs,epoch_size),flush=True) start_time, avg_loss, best_auc = time.time(), 0.0, 0.0 for _ in range(FLAGS.max_epochs): random.shuffle(train_set)#将所有元素随机排序 print('tain_set:%d'%len(train_set)) for _, uij in DataInput(train_set, FLAGS.train_batch_size): # print('uij:%d'%len(uij[0])) add_summary = bool(model.global_step.eval() % FLAGS.display_freq == 0) step_loss = model.train(sess,uij,lr,add_summary) avg_loss += step_loss # print('global_step:%d,global_epoch_step:%d,global_op:%d' % (model.global_step.eval(), model.global_epoch_step.eval(), model.global_epoch_step_op.eval())) if model.global_step.eval() % FLAGS.eval_freq == 0: test_auc = _eval(sess, test_set, model) test_auc_new = _eval_auc(sess, test_set, model) # print('test_auc:%.4f,best_auc:%.4f'%(test_auc, best_auc)) print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_AUC: %.4f, new %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), avg_loss / FLAGS.eval_freq, test_auc, test_auc_new), flush=True) result.append((model.global_epoch_step.eval(), model.global_step.eval(), avg_loss / FLAGS.eval_freq, _eval(sess, test_set, model), _eval_auc(sess, test_set, model))) avg_loss = 0.0 # if test_auc > 0.88 and test_auc > best_auc: # best_auc = test_auc # model.save(sess) if test_auc_new > 0.88 and test_auc_new > best_auc: best_auc = test_auc_new model.save(sess) # if model.global_epoch_step.eval() <2000: # lr = 0.95*lr if model.global_epoch_step.eval() % 5: lr = lr*0.9998 #pirnt for every epoch test_auc = _eval(sess, test_set, model) test_auc_new = _eval_auc(sess, test_set, model) print('Epoch %d Global_step %d\tEval_AUC: %.4f, new %.4f' % (model.global_epoch_step.eval(), model.global_step.eval(), test_auc, test_auc_new), flush=True) print('Epoch %d DONE\tCost time: %.2f' % (model.global_epoch_step.eval(), time.time() - start_time), flush=True) model.global_epoch_step_op.eval() model.save(sess) with open('result_dropout0.2_adam.pkl', 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) print('best test_auc:', best_auc) print('Finished', flush=True)