def load_model(fileName): # 实例化配置参数对象 config = Config() # 实例化数据生成对象 dataGen = DataGenerator(fileName, config) dataGen.gen_attr() # 生成训练集和测试集 test_seqs = dataGen.test_seqs with tf.Session() as sess: accuracys = [] aucs = [] step = 1 for params in dataGen.next_batch(test_seqs): print("step: {}".format(step)) checkpoint_file = tf.train.latest_checkpoint("model/") saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # 获得默认的计算图结构 graph = tf.get_default_graph() # 获得需要喂给模型的参数,输出的结果依赖的输入值 input_x = graph.get_operation_by_name("test/dkt/input_x").outputs[0] target_id = graph.get_operation_by_name("test/dkt/target_id").outputs[0] keep_prob = graph.get_operation_by_name("test/dkt/keep_prob").outputs[0] max_steps = graph.get_operation_by_name("test/dkt/max_steps").outputs[0] sequence_len = graph.get_operation_by_name("test/dkt/sequence_len").outputs[0] # 获得输出的结果 pred_all = graph.get_tensor_by_name("test/dkt/pred_all:0") pred = graph.get_tensor_by_name("test/dkt/pred:0") binary_pred = graph.get_tensor_by_name("test/dkt/binary_pred:0") target_correctness = params['target_correctness'] pred_all, pred, binary_pred = sess.run([pred_all, pred, binary_pred], feed_dict={input_x: params["input_x"], target_id: params["target_id"], keep_prob: 1.0, max_steps: params["max_len"], sequence_len: params["seq_len"]}) auc, acc = gen_metrics(params["seq_len"], binary_pred, pred, target_correctness) print(auc, acc) accuracys.append(acc) aucs.append(auc) step += 1 aucMean = mean(aucs) accMean = mean(accuracys) print("inference auc: {} acc: {}".format(aucMean, accMean))
def train(args,train_dkt): run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as sess: print(args.model) model = GIKT(args) saver = tf.train.Saver() index = 0 if train_dkt: # lr = 0.4 # lr_decay = 0.92 sess.run(tf.global_variables_initializer()) model_dir = save_model_dir(args) best_valid_auc = 0 for epoch in tqdm(range(args.num_epochs)): train_generator = DataGenerator(args.train_seqs, args.max_step, batch_size=args.batch_size, feature_size=args.feature_answer_size - 2, hist_num=args.hist_neighbor_num) valid_generator = DataGenerator(args.valid_seqs, args.max_step, batch_size=args.batch_size, feature_size=args.feature_answer_size - 2, hist_num=args.hist_neighbor_num) # assign_lr() print("epoch:", epoch) # self.assign_lr(self.sess,self.args.lr * self.args.lr_decay ** epoch) overall_loss = 0 train_generator.shuffle() preds, binary_preds, targets = list(), list(), list() train_step = 0 while not train_generator.end: train_step += 1 [features_answer_index,target_answers,seq_lens,hist_neighbor_index] = train_generator.next_batch() binary_pred, pred, loss = model.train(sess,features_answer_index,target_answers,seq_lens,hist_neighbor_index) overall_loss += loss for seq_idx, seq_len in enumerate(seq_lens): preds.append(pred[seq_idx, 0:seq_len]) binary_preds.append(binary_pred[seq_idx, 0:seq_len]) targets.append(target_answers[seq_idx, 0:seq_len]) # print("\r idx:{0}, overall_loss:{1}".format(train_generator.pos, overall_loss)), train_loss = overall_loss / train_step preds = np.concatenate(preds) binary_preds = np.concatenate(binary_preds) targets = np.concatenate(targets) auc_value = roc_auc_score(targets, preds) accuracy = accuracy_score(targets, binary_preds) precision, recall, f_score, _ = precision_recall_fscore_support(targets, binary_preds) print("\ntrain loss = {0},auc={1}, accuracy={2}".format(train_loss, auc_value, accuracy)) write_log(args,model_dir,auc_value, accuracy, epoch, name='train_') # if epoch == self.args.num_epochs-1: # self.save(epoch) # valid valid_generator.reset() preds, binary_preds, targets = list(), list(), list() valid_step = 0 #overall_loss = 0 while not valid_generator.end: valid_step += 1 [features_answer_index,target_answers,seq_lens,hist_neighbor_index] = valid_generator.next_batch() binary_pred, pred = model.evaluate(sess,features_answer_index,target_answers,seq_lens,hist_neighbor_index,valid_step) #overall_loss += loss for seq_idx, seq_len in enumerate(seq_lens): preds.append(pred[seq_idx, 0:seq_len]) binary_preds.append(binary_pred[seq_idx, 0:seq_len]) targets.append(target_answers[seq_idx, 0:seq_len]) # compute metrics #valid_loss = overall_loss / valid_step preds = np.concatenate(preds) binary_preds = np.concatenate(binary_preds) targets = np.concatenate(targets) auc_value = roc_auc_score(targets, preds) accuracy = accuracy_score(targets, binary_preds) precision, recall, f_score, _ = precision_recall_fscore_support(targets, binary_preds) print("\nvalid auc={0}, accuracy={1}, precision={2}, recall={3}".format(auc_value, accuracy, precision, recall)) write_log(args,model_dir,auc_value, accuracy, epoch, name='valid_') if auc_value > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, auc_value)) best_valid_auc = auc_value best_epoch = epoch #np.save('feature_embedding.npy', feature_embedding) checkpoint_dir = os.path.join(args.checkpoint_dir, model_dir) save(best_epoch,sess,checkpoint_dir,saver) # print(model_dir) print(model_dir+"\t"+str(best_valid_auc)) else: if self.load(): print('CKPT loaded') else: raise Exception('CKPT need') test_data_generator = DataGenerator(args.test_seqs, args.max_step, batch_size=args.batch_size, feature_size=args.feature_answer_size - 2, hist_num=args.hist_neighbor_num) data_generator.reset() correct_times = np.zeros(self.num_skills + 1) preds, binary_preds, targets = list(), list(), list() while not test_data_generator.end: [features_answer_index, target_answers, seq_lens, hist_neighbor_index] = valid_generator.next_batch() binary_pred, pred = model.evaluate(sess, features_answer_index, target_answers, seq_lens, hist_neighbor_index) # overall_loss += loss for seq_idx, seq_len in enumerate(seq_lens): preds.append(pred[seq_idx, 0:seq_len]) binary_preds.append(binary_pred[seq_idx, 0:seq_len]) targets.append(target_answers[seq_idx, 0:seq_len]) preds = np.concatenate(preds) binary_preds = np.concatenate(binary_preds) targets = np.concatenate(targets) auc_value = roc_auc_score(targets, preds) accuracy = accuracy_score(targets, binary_preds) precision, recall, f_score, _ = precision_recall_fscore_support(targets, binary_preds) print("\ntest auc={0}, accuracy={1}, precision={2}, recall={3}".format(auc_value, accuracy, precision, recall)) print(model_dir) write_log(args, model_dir, auc_value, accuracy, epoch, name='test_')
def run_epoch(self, fileName): """ 训练模型 :param filePath: :return: """ # 实例化配置参数对象 config = Config() # 实例化数据生成对象 dataGen = DataGenerator(fileName, config) dataGen.gen_attr() # 生成训练集和测试集 train_seqs = dataGen.train_seqs test_seqs = dataGen.test_seqs session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) self.sess = sess with sess.as_default(): # 实例化dkt模型对象 with tf.name_scope("train"): with tf.variable_scope("dkt", reuse=None): train_dkt = TensorFlowDKT(config) with tf.name_scope("test"): with tf.variable_scope("dkt", reuse=True): test_dkt = TensorFlowDKT(config) self.train_dkt = train_dkt self.test_dkt = test_dkt global_step = tf.Variable(0, name="global_step", trainable=False) self.global_step = global_step # 定义一个优化器 optimizer = tf.train.AdamOptimizer( config.trainConfig.learning_rate) grads_and_vars = optimizer.compute_gradients(train_dkt.loss) # 对梯度进行截断,并且加上梯度噪音 grads_and_vars = [ (tf.clip_by_norm(g, config.trainConfig.max_grad_norm), v) for g, v in grads_and_vars if g is not None ] # grads_and_vars = [(self.add_gradient_noise(g), v) for g, v in grads_and_vars] # 定义图中最后的节点 train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step, name="train_op") # 保存各种变量或结果的值 grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("writing to {}".format(out_dir)) # 训练时的 Summaries train_loss_summary = tf.summary.scalar("loss", train_dkt.loss) train_summary_op = tf.summary.merge( [train_loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # 测试时的 summaries test_loss_summary = tf.summary.scalar("loss", test_dkt.loss) dev_summary_op = tf.summary.merge([test_loss_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables()) sess.run(tf.global_variables_initializer()) print("初始化完毕,开始训练") for i in range(config.trainConfig.epochs): np.random.shuffle(train_seqs) for params in dataGen.next_batch(train_seqs): # 批次获得训练集,训练模型 self.train_step(params, train_op, train_summary_op, train_summary_writer) current_step = tf.train.global_step(sess, global_step) # train_step.run(feed_dict={x: batch_train[0], y_actual: batch_train[1], keep_prob: 0.5}) # 对结果进行记录 if current_step % config.trainConfig.evaluate_every == 0: print("\nEvaluation:") # 获得测试数据 losses = [] accuracys = [] aucs = [] for params in dataGen.next_batch(test_seqs): loss, accuracy, auc = self.dev_step(params, dev_summary_op, writer=None) losses.append(loss) accuracys.append(accuracy) aucs.append(auc) time_str = datetime.datetime.now().isoformat() print("dev: {}, step: {}, loss: {}, acc: {}, auc: {}". format(time_str, current_step, mean(losses), mean(accuracys), mean(aucs))) if current_step % config.trainConfig.checkpoint_every == 0: path = saver.save(sess, "model/my-model", global_step=current_step) print("Saved model checkpoint to {}\n".format(path))