def main(_): #1.load data(X:list of lint,y:int). #if os.path.exists(FLAGS.cache_path): # 如果文件系统中存在,那么加载故事(词汇表索引化的) # with open(FLAGS.cache_path, 'r') as data_f: # trainX, trainY, testX, testY, vocabulary_index2word=pickle.load(data_f) # vocab_size=len(vocabulary_index2word) #else: if 1 == 1: trainX, trainY, testX, testY = None, None, None, None vocabulary_word2index, vocabulary_index2word = create_vocabulary( word2vec_model_path=FLAGS.word2vec_model_path, name_scope="transformer_classification") vocab_size = len(vocabulary_word2index) print("transformer.vocab_size:", vocab_size) train, test, _ = load_data_multilabel_new( vocabulary_word2index, training_data_path=FLAGS.training_data_path) compare_train_data = WikiQA(word2vec=Word2Vec(), max_len=FLAGS.max_len_compare) compare_train_data.open_file(mode="train") compare_test_data = WikiQA(word2vec=Word2Vec(), max_len=FLAGS.max_len_compare) compare_test_data.open_file(mode="valid") trainX, trainY, = train testX, testY = test trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.) testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = Transformer(FLAGS.num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sequence_length, vocab_size, FLAGS.embed_size, FLAGS.d_model, FLAGS.d_k, FLAGS.d_v, FLAGS.h, FLAGS.num_layer, FLAGS.is_training, compare_train_data.num_features, di=50, s=compare_train_data.max_len, w=4, l2_reg=0.0004, l2_lambda=FLAGS.l2_lambda) print("=" * 50) print("List of Variables:") for v in tf.trainable_variables(): print(v.name) print("=" * 50) saver = tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir + "checkpoint"): print("Restoring Variables from Checkpoint") saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print('Initializing Variables') sess.run(tf.global_variables_initializer()) if FLAGS.use_embedding: #load pre-trained word embedding assign_pretrained_word_embedding( sess, vocabulary_index2word, vocab_size, model, word2vec_model_path=FLAGS.word2vec_model_path) curr_epoch = sess.run(model.epoch_step) number_of_training_data = len(trainX) print("number_of_training_data:", number_of_training_data) previous_eval_loss = 10000 best_eval_loss = 10000 batch_size = FLAGS.batch_size for epoch in range(curr_epoch, FLAGS.num_epochs): loss, acc, counter = 0.0, 0.0, 0 compare_train_data.reset_index() for start, end in zip( range(0, number_of_training_data, batch_size), range(batch_size, number_of_training_data, batch_size)): if epoch == 0 and counter == 0: print("trainX[start:end]:", trainX[start:end]) batch_x1, batch_x2, _, batch_features = compare_train_data.next_batch( batch_size=end - start) feed_dict = { model.input_x: trainX[start:end], model.dropout_keep_prob: 0.9, model.x1: batch_x1, model.x2: batch_x2, model.features: batch_features } feed_dict[model.input_y_label] = trainY[start:end] curr_loss, curr_acc, _ = sess.run( [model.loss_val, model.accuracy, model.train_op], feed_dict) #curr_acc--->TextCNN.accuracy loss, counter, acc = loss + curr_loss, counter + 1, acc + curr_acc if counter % 50 == 0: print( "transformer.classification==>Epoch %d\tBatch %d\tTrain Loss:%.3f\tTrain Accuracy:%.3f" % (epoch, counter, loss / float(counter), acc / float(counter)) ) #tTrain Accuracy:%.3f---》acc/float(counter) ##VALIDATION VALIDATION VALIDATION PART###################################################################################################### if FLAGS.batch_size != 0 and ( start % (FLAGS.validate_step * FLAGS.batch_size) == 0): eval_loss, eval_acc = do_eval(sess, model, testX, testY, compare_test_data, batch_size) print( "transformer.classification.validation.part. previous_eval_loss:", previous_eval_loss, ";current_eval_loss:", eval_loss) if eval_loss > previous_eval_loss: #if loss is not decreasing # reduce the learning rate by a factor of 0.5 print( "transformer.classification.==>validation.part.going to reduce the learning rate." ) learning_rate1 = sess.run(model.learning_rate) lrr = sess.run([model.learning_rate_decay_half_op]) learning_rate2 = sess.run(model.learning_rate) print( "transformer.classification==>validation.part.learning_rate1:", learning_rate1, " ;learning_rate2:", learning_rate2) #print("HierAtten==>Epoch %d Validation Loss:%.3f\tValidation Accuracy: %.3f" % (epoch, eval_loss, eval_acc)) else: # loss is decreasing if eval_loss < best_eval_loss: print( "transformer.classification==>going to save the model.eval_loss:", eval_loss, ";best_eval_loss:", best_eval_loss) # save model to checkpoint save_path = FLAGS.ckpt_dir + "model.ckpt" saver.save(sess, save_path, global_step=epoch) best_eval_loss = eval_loss previous_eval_loss = eval_loss compare_test_data.reset_index() ##VALIDATION VALIDATION VALIDATION PART###################################################################################################### #epoch increment print("going to increment epoch counter....") sess.run(model.epoch_increment)
def main(_): ''' 主函数:数据预处理,迭代数据训练模型 ''' print("数据预处理阶段:......") trainX,trainY,testX,testY = None, None, None, None #加载词向量转换 vocabulary_word2index, vocabulary_index2word = create_vocabulary( word2vec_model_path=FLAGS.word2vec_model_path, name_scope="cnn") vocab_size = len(vocabulary_word2index) #标签索引 vocabulary_word2index_label, vocabulary_index2word_label = create_vocabulary_label( vocabulary_label=FLAGS.training_data_path, name_scope="cnn") #将文本转换为向量形式,分训练,测试集 train, test, _ = load_data_multilabel_new(vocabulary_word2index, vocabulary_word2index_label,training_data_path=FLAGS.training_data_path) trainX, trainY = train testX, testY = test #用0填充短句 trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0) testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0) print("数据预处理部分完成.....") print("创建session 对话.......") config = tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: textCNN = TextCNN(filter_size,FLAGS.num_filters,FLAGS.num_classes,FLAGS.learning_rate,FLAGS.batch_size, FLAGS.sequence_length,vocab_size,FLAGS.embed_size,FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.is_decay, FLAGS.is_dropout,FLAGS.is_l2) #存储变量 merged = tf.summary.merge_all() writer = tf.summary.FileWriter(FLAGS.tensorboard_dir, sess.graph) #初始化模型保存 saver = tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir+"checkpoint"): #判断模型是否存在 print("从模型中恢复变量") # saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) #自动获取最近一次的模型变量 else: print("初始化变量") sess.run(tf.global_variables_initializer()) #初始化所有变量 if FLAGS.use_embedding: #加载预训练词向量 assign_pretrained_word_embedding(sess, vocabulary_index2word, vocab_size, textCNN, word2vec_model_path=FLAGS.word2vec_model_path) curr_epoch = sess.run(textCNN.epoch_step) #划分训练数据 num_train_data = len(trainX) batch_size = FLAGS.batch_size index = 0 for epoch in range(curr_epoch, FLAGS.num_epochs): loss,acc,counter = 0.0, 0.0, 0.0 for start, end in zip(range(0, num_train_data, batch_size), range(batch_size, num_train_data, batch_size)): feed_dict = {textCNN.input_x:trainX[start:end], textCNN.input_y:trainY[start:end], textCNN.dropout_keep_prob:0.9} curr_loss, curr_acc, logits, _ = sess.run([textCNN.loss_val, textCNN.accuracy, textCNN.logits, textCNN.train_op], feed_dict) index += 1 loss, counter, acc = loss+curr_loss, counter+1, acc+curr_acc if counter % 100 == 0: rs = sess.run(merged,feed_dict) #执行参数记录 writer.add_summary(rs, index) print("Epoch %d\tBatch %d\tTrain Loss:%.3f\tTrain Accuracy:%.3f\tGlobal Step %d" %(epoch,counter,loss/float(counter),acc/float(counter),sess.run(textCNN.global_step))) # print("Train Logits{}".format(logits)) #迭代次数增加 epoch_increment = tf.assign(textCNN.epoch_step, tf.add(textCNN.epoch_step, tf.constant(1))) sess.run(epoch_increment) #验证 print("迭代次数:{}".format(epoch)) if epoch % FLAGS.validate_every == 0: eval_loss, eval_acc = do_eval(sess,textCNN,testX,testY,batch_size) print("迭代次数:{}\t验证损失值:{}\t准确率:{}".format(epoch, eval_loss, eval_acc)) #保存模型 # save_path = FLAGS.ckpt_dir+"model.ckpt" # saver.save(sess, save_path, global_step=epoch) print("验证集上进行损失,准确率计算.....") test_loss, test_acc = do_eval(sess, textCNN, testX, testY, batch_size) print("测试集中损失值:{}\t准确率:{}".format(test_loss, test_acc))
def main(_): #os.environ['CUDA_VISIBLE_DEVICES'] = '' if FLAGS.dataset == "bibsonomy-clean": word2vec_model_path = FLAGS.word2vec_model_path_bib traning_data_path = FLAGS.training_data_path_bib FLAGS.sequence_length = 300 FLAGS.ave_labels_per_doc = 11.59 elif FLAGS.dataset == "zhihu-sample": word2vec_model_path = FLAGS.word2vec_model_path_zhihu traning_data_path = FLAGS.training_data_path_zhihu FLAGS.sequence_length = 100 FLAGS.ave_labels_per_doc = 2.45 elif FLAGS.dataset == "citeulike-a-clean": word2vec_model_path = FLAGS.word2vec_model_path_cua traning_data_path = FLAGS.training_data_path_cua FLAGS.sequence_length = 300 FLAGS.ave_labels_per_doc = 11.6 elif FLAGS.dataset == "citeulike-t-clean": word2vec_model_path = FLAGS.word2vec_model_path_cut traning_data_path = FLAGS.training_data_path_cut FLAGS.sequence_length = 300 FLAGS.ave_labels_per_doc = 7.68 # 1. create trainlist, validlist and testlist trainX, trainY, testX, testY = None, None, None, None vocabulary_word2index, vocabulary_index2word = create_voabulary( word2vec_model_path, name_scope=FLAGS.dataset + "-lda") #simple='simple' vocabulary_word2index_label, vocabulary_index2word_label = create_voabulary_label( voabulary_label=traning_data_path, name_scope=FLAGS.dataset + "-lda") num_classes = len(vocabulary_word2index_label) print(vocabulary_index2word_label[0], vocabulary_index2word_label[1]) vocab_size = len(vocabulary_word2index) print("vocab_size:", vocab_size) # choosing whether to use k-fold cross-validation or hold-out validation if FLAGS.kfold == -1: # hold-out train, valid, test = load_data_multilabel_new( vocabulary_word2index, vocabulary_word2index_label, keep_label_percent=FLAGS.keep_label_percent, valid_portion=FLAGS.valid_portion, test_portion=FLAGS.test_portion, multi_label_flag=FLAGS.multi_label_flag, traning_data_path=traning_data_path) # here train, test are tuples; turn train into trainlist. trainlist, validlist, testlist = list(), list(), list() trainlist.append(train) validlist.append(valid) testlist.append(test) else: # k-fold trainlist, validlist, testlist = load_data_multilabel_new_k_fold( vocabulary_word2index, vocabulary_word2index_label, keep_label_percent=FLAGS.keep_label_percent, kfold=FLAGS.kfold, test_portion=FLAGS.test_portion, multi_label_flag=FLAGS.multi_label_flag, traning_data_path=traning_data_path) # here trainlist, testlist are list of tuples. # get and pad testing data: there is only one testing data, but kfold training and validation data assert len(testlist) == 1 testX, testY = testlist[0] testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.) # padding to max length # 3. transform trainlist to the format. x_train, x_test: training and test feature matrices of size (n_samples, n_features) #print(len(trainlist)) #trainX,trainY = trainlist[0] #trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.) #print(len(trainX)) #print(len(trainX[0])) #print(trainX[0]) #print(len(trainY)) #print(len(trainY[0])) #print(trainY[0]) #print(np.asarray(trainY).shape) num_runs = len(trainlist) #validation results variables valid_acc_th, valid_prec_th, valid_rec_th, valid_fmeasure_th, valid_hamming_loss_th = [ 0 ] * num_runs, [0] * num_runs, [0] * num_runs, [0] * num_runs, [ 0 ] * num_runs # initialise the result lists final_valid_acc_th, final_valid_prec_th, final_valid_rec_th, final_valid_fmeasure_th, final_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 min_valid_acc_th, min_valid_prec_th, min_valid_rec_th, min_valid_fmeasure_th, min_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 max_valid_acc_th, max_valid_prec_th, max_valid_rec_th, max_valid_fmeasure_th, max_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 std_valid_acc_th, std_valid_prec_th, std_valid_rec_th, std_valid_fmeasure_th, std_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 #testing results variables test_acc_th, test_prec_th, test_rec_th, test_fmeasure_th, test_hamming_loss_th = [ 0 ] * num_runs, [0] * num_runs, [0] * num_runs, [0] * num_runs, [ 0 ] * num_runs # initialise the testing result lists final_test_acc_th, final_test_prec_th, final_test_rec_th, final_test_fmeasure_th, final_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 min_test_acc_th, min_test_prec_th, min_test_rec_th, min_test_fmeasure_th, min_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 max_test_acc_th, max_test_prec_th, max_test_rec_th, max_test_fmeasure_th, max_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 std_test_acc_th, std_test_prec_th, std_test_rec_th, std_test_fmeasure_th, std_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0 #output variables output_valid = "" output_test = "" output_csv_valid = "fold,hamming_loss,acc,prec,rec,f1" output_csv_test = "fold,hamming_loss,acc,prec,rec,f1" time_train = [0] * num_runs # get time spent in training num_run = 0 mallet_path = FLAGS.mallet_path num_topics = FLAGS.num_topics alpha = 50 / num_topics iterations = FLAGS.iterations k_num_doc = FLAGS.k_num_doc remove_pad_id = True remove_dot = True docs_test = generateLDAdocFromIndex(testX, vocabulary_index2word, remove_pad_id=remove_pad_id, remove_dot=remove_dot) for trainfold in trainlist: # get training and validation data trainX, trainY = trainfold trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.) # generate training data for gensim MALLET wrapper for LDA docs = generateLDAdocFromIndex(trainX, vocabulary_index2word, remove_pad_id=remove_pad_id, remove_dot=remove_dot) #print(docs[10]) id2word = corpora.Dictionary(docs) corpus = [id2word.doc2bow(text) for text in docs] #print(corpus[10]) # generate validation data for gensim MALLET wrapper for LDA validX, validY = validlist[num_run] validX = pad_sequences(validX, maxlen=FLAGS.sequence_length, value=0.) docs_valid = generateLDAdocFromIndex(validX, vocabulary_index2word, remove_pad_id=remove_pad_id, remove_dot=remove_dot) corpus_valid = [id2word.doc2bow(text) for text in docs_valid] # generate testing data for gensim MALLET wrapper for LDA corpus_test = [id2word.doc2bow(text) for text in docs_test] # training start_time_train = time.time() print('start training fold', str(num_run)) model = gensim.models.wrappers.LdaMallet(mallet_path, corpus=corpus, num_topics=num_topics, alpha=alpha, id2word=id2word, iterations=iterations) pprint(model.show_topics(formatted=False)) print('num_run', str(num_run), 'train done.') time_train[num_run] = time.time() - start_time_train print("--- training of fold %s took %s seconds ---" % (num_run, time_train[num_run])) # represent each document as a topic vector #mat_train = np.array(model[corpus]) # this will cause an Error with large num_topics, e.g. 1000 or higher. #Thus, we turn the MALLET LDA model to a native Gensim LDA model model = gensim.models.wrappers.ldamallet.malletmodel2ldamodel(model) mat_train = np.array( model.get_document_topics(corpus, minimum_probability=0.0)) #print(len(model[corpus[0]])) #print(len(model[corpus[1]])) #print(len(model[corpus[2]])) #print(mat_train.shape) mat_train = mat_train[:, :, 1] # documents in training set as a matrix of topic probabilities # evaluate on training data #if num_run == 0 and FLAGS.kfold != -1: # do this only for the first fold in k-fold cross-validation to save time # acc, prec, rec, f_measure, hamming_loss = do_eval_lda(model, k_num_doc, mat_train, trainY, corpus, trainY, vocabulary_index2word_label, hamming_q=FLAGS.ave_labels_per_doc) # print('training:', acc, prec, rec, f_measure, hamming_loss) # validation valid_acc_th[num_run], valid_prec_th[num_run], valid_rec_th[ num_run], valid_fmeasure_th[num_run], valid_hamming_loss_th[ num_run] = do_eval_lda(model, k_num_doc, mat_train, trainY, corpus_valid, validY, vocabulary_index2word_label, hamming_q=FLAGS.ave_labels_per_doc) print( "LDA==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % (num_run, valid_acc_th[num_run], valid_hamming_loss_th[num_run], valid_prec_th[num_run], valid_rec_th[num_run], valid_fmeasure_th[num_run])) output_valid = output_valid + "\n" + "LDA==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % ( num_run, valid_acc_th[num_run], valid_hamming_loss_th[num_run], valid_prec_th[num_run], valid_rec_th[num_run], valid_fmeasure_th[ num_run]) + "\n" # also output the results of each run. output_csv_valid = output_csv_valid + "\n" + str(num_run) + "," + str( valid_hamming_loss_th[num_run]) + "," + str( valid_acc_th[num_run]) + "," + str( valid_prec_th[num_run]) + "," + str( valid_rec_th[num_run]) + "," + str( valid_fmeasure_th[num_run]) start_time_test = time.time() # evaluate on testing data test_acc_th[num_run], test_prec_th[num_run], test_rec_th[ num_run], test_fmeasure_th[num_run], test_hamming_loss_th[ num_run] = do_eval_lda(model, k_num_doc, mat_train, trainY, corpus_test, testY, vocabulary_index2word_label, hamming_q=FLAGS.ave_labels_per_doc) print( "LDA==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % (num_run, test_acc_th[num_run], test_hamming_loss_th[num_run], test_prec_th[num_run], test_rec_th[num_run], test_fmeasure_th[num_run])) output_test = output_test + "\n" + "LDA==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % ( num_run, test_acc_th[num_run], test_hamming_loss_th[num_run], test_prec_th[num_run], test_rec_th[num_run], test_fmeasure_th[ num_run]) + "\n" # also output the results of each run. output_csv_test = output_csv_test + "\n" + str(num_run) + "," + str( test_hamming_loss_th[num_run]) + "," + str( test_acc_th[num_run]) + "," + str( test_prec_th[num_run]) + "," + str( test_rec_th[num_run]) + "," + str( test_fmeasure_th[num_run]) print("--- testing of fold %s took %s seconds ---" % (num_run, time.time() - start_time_test)) prediction_str = "" # output final predictions for qualitative analysis if FLAGS.report_rand_pred == True: prediction_str = display_for_qualitative_evaluation( model, k_num_doc, mat_train, trainY, corpus_test, testX, testY, vocabulary_index2word, vocabulary_index2word_label, hamming_q=FLAGS.ave_labels_per_doc) # update the num_run num_run = num_run + 1 print('\n--Final Results--\n') #print('C', FLAGS.C, 'gamma', FLAGS.gamma) # report min, max, std, average for the validation results min_valid_acc_th = min(valid_acc_th) min_valid_prec_th = min(valid_prec_th) min_valid_rec_th = min(valid_rec_th) min_valid_fmeasure_th = min(valid_fmeasure_th) min_valid_hamming_loss_th = min(valid_hamming_loss_th) max_valid_acc_th = max(valid_acc_th) max_valid_prec_th = max(valid_prec_th) max_valid_rec_th = max(valid_rec_th) max_valid_fmeasure_th = max(valid_fmeasure_th) max_valid_hamming_loss_th = max(valid_hamming_loss_th) if FLAGS.kfold != -1: std_valid_acc_th = statistics.stdev(valid_acc_th) # to change std_valid_prec_th = statistics.stdev(valid_prec_th) std_valid_rec_th = statistics.stdev(valid_rec_th) std_valid_fmeasure_th = statistics.stdev(valid_fmeasure_th) std_valid_hamming_loss_th = statistics.stdev(valid_hamming_loss_th) final_valid_acc_th = sum(valid_acc_th) / num_runs final_valid_prec_th = sum(valid_prec_th) / num_runs final_valid_rec_th = sum(valid_rec_th) / num_runs final_valid_fmeasure_th = sum(valid_fmeasure_th) / num_runs final_valid_hamming_loss_th = sum(valid_hamming_loss_th) / num_runs print( "LDA==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_valid_acc_th, std_valid_acc_th, min_valid_acc_th, max_valid_acc_th, final_valid_hamming_loss_th, std_valid_hamming_loss_th, min_valid_hamming_loss_th, max_valid_hamming_loss_th, final_valid_prec_th, std_valid_prec_th, min_valid_prec_th, max_valid_prec_th, final_valid_rec_th, std_valid_rec_th, min_valid_rec_th, max_valid_rec_th, final_valid_fmeasure_th, std_valid_fmeasure_th, min_valid_fmeasure_th, max_valid_fmeasure_th)) #output the result to a file output_valid = output_valid + "\n" + "LDA==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % ( final_valid_acc_th, std_valid_acc_th, min_valid_acc_th, max_valid_acc_th, final_valid_hamming_loss_th, std_valid_hamming_loss_th, min_valid_hamming_loss_th, max_valid_hamming_loss_th, final_valid_prec_th, std_valid_prec_th, min_valid_prec_th, max_valid_prec_th, final_valid_rec_th, std_valid_rec_th, min_valid_rec_th, max_valid_rec_th, final_valid_fmeasure_th, std_valid_fmeasure_th, min_valid_fmeasure_th, max_valid_fmeasure_th) + "\n" output_csv_valid = output_csv_valid + "\n" + "average" + "," + str( round(final_valid_hamming_loss_th, 3)) + "±" + str( round(std_valid_hamming_loss_th, 3) ) + "," + str(round(final_valid_acc_th, 3)) + "±" + str( round(std_valid_acc_th, 3)) + "," + str( round(final_valid_prec_th, 3)) + "±" + str( round(std_valid_prec_th, 3)) + "," + str( round(final_valid_rec_th, 3)) + "±" + str( round(std_valid_rec_th, 3)) + "," + str( round(final_valid_fmeasure_th, 3)) + "±" + str( round(std_valid_fmeasure_th, 3)) # report min, max, std, average for the testing results min_test_acc_th = min(test_acc_th) min_test_prec_th = min(test_prec_th) min_test_rec_th = min(test_rec_th) min_test_fmeasure_th = min(test_fmeasure_th) min_test_hamming_loss_th = min(test_hamming_loss_th) max_test_acc_th = max(test_acc_th) max_test_prec_th = max(test_prec_th) max_test_rec_th = max(test_rec_th) max_test_fmeasure_th = max(test_fmeasure_th) max_test_hamming_loss_th = max(test_hamming_loss_th) if FLAGS.kfold != -1: std_test_acc_th = statistics.stdev(test_acc_th) # to change std_test_prec_th = statistics.stdev(test_prec_th) std_test_rec_th = statistics.stdev(test_rec_th) std_test_fmeasure_th = statistics.stdev(test_fmeasure_th) std_test_hamming_loss_th = statistics.stdev(test_hamming_loss_th) final_test_acc_th = sum(test_acc_th) / num_runs final_test_prec_th = sum(test_prec_th) / num_runs final_test_rec_th = sum(test_rec_th) / num_runs final_test_fmeasure_th = sum(test_fmeasure_th) / num_runs final_test_hamming_loss_th = sum(test_hamming_loss_th) / num_runs print( "LDA==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_test_acc_th, std_test_acc_th, min_test_acc_th, max_test_acc_th, final_test_hamming_loss_th, std_test_hamming_loss_th, min_test_hamming_loss_th, max_test_hamming_loss_th, final_test_prec_th, std_test_prec_th, min_test_prec_th, max_test_prec_th, final_test_rec_th, std_test_rec_th, min_test_rec_th, max_test_rec_th, final_test_fmeasure_th, std_test_fmeasure_th, min_test_fmeasure_th, max_test_fmeasure_th)) #output the result to a file output_test = output_test + "\n" + "LDA==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % ( final_test_acc_th, std_test_acc_th, min_test_acc_th, max_test_acc_th, final_test_hamming_loss_th, std_test_hamming_loss_th, min_test_hamming_loss_th, max_test_hamming_loss_th, final_test_prec_th, std_test_prec_th, min_test_prec_th, max_test_prec_th, final_test_rec_th, std_test_rec_th, min_test_rec_th, max_test_rec_th, final_test_fmeasure_th, std_test_fmeasure_th, min_test_fmeasure_th, max_test_fmeasure_th) + "\n" output_csv_test = output_csv_test + "\n" + "average" + "," + str( round(final_test_hamming_loss_th, 3)) + "±" + str( round(std_test_hamming_loss_th, 3)) + "," + str( round(final_test_acc_th, 3) ) + "±" + str(round(std_test_acc_th, 3)) + "," + str( round(final_test_prec_th, 3)) + "±" + str( round(std_test_prec_th, 3)) + "," + str( round(final_test_rec_th, 3)) + "±" + str( round(std_test_rec_th, 3)) + "," + str( round(final_test_fmeasure_th, 3)) + "±" + str( round(std_test_fmeasure_th, 3)) setting = "dataset:" + str(FLAGS.dataset) + "\nT: " + str( FLAGS.num_topics) + "\nk: " + str(FLAGS.k_num_doc) + ' \ni: ' + str( FLAGS.iterations) print("--- The whole program took %s seconds ---" % (time.time() - start_time)) time_used = "--- The whole program took %s seconds ---" % (time.time() - start_time) if FLAGS.kfold != -1: print("--- The average training took %s ± %s seconds ---" % (sum(time_train) / num_runs, statistics.stdev(time_train))) average_time_train = "--- The average training took %s ± %s seconds ---" % ( sum(time_train) / num_runs, statistics.stdev(time_train)) else: print("--- The average training took %s ± %s seconds ---" % (sum(time_train) / num_runs, 0)) average_time_train = "--- The average training took %s ± %s seconds ---" % ( sum(time_train) / num_runs, 0) # output setting configuration, results, prediction and time used output_to_file( 'lda ' + str(FLAGS.dataset) + " T" + str(FLAGS.num_topics) + ' k' + str(FLAGS.k_num_doc) + ' i' + str(FLAGS.iterations) + ' gp_id' + str(FLAGS.marking_id) + '.txt', setting + '\n' + output_valid + '\n' + output_test + '\n' + prediction_str + '\n' + time_used + '\n' + average_time_train) # output structured evaluation results output_to_file( 'lda ' + str(FLAGS.dataset) + " T" + str(FLAGS.num_topics) + ' k' + str(FLAGS.k_num_doc) + ' i' + str(FLAGS.iterations) + ' gp_id' + str(FLAGS.marking_id) + ' valid.csv', output_csv_valid) output_to_file( 'lda ' + str(FLAGS.dataset) + " T" + str(FLAGS.num_topics) + ' k' + str(FLAGS.k_num_doc) + ' i' + str(FLAGS.iterations) + ' gp_id' + str(FLAGS.marking_id) + ' test.csv', output_csv_test)
def main(_): #os.environ['CUDA_VISIBLE_DEVICES'] = '' if FLAGS.dataset == "bibsonomy-clean": word2vec_model_path = FLAGS.word2vec_model_path_bib traning_data_path = FLAGS.training_data_path_bib FLAGS.sequence_length = 300 FLAGS.ave_labels_per_doc = 11.59 elif FLAGS.dataset == "zhihu-sample": word2vec_model_path = FLAGS.word2vec_model_path_zhihu traning_data_path = FLAGS.training_data_path_zhihu FLAGS.sequence_length = 100 FLAGS.ave_labels_per_doc = 2.45 elif FLAGS.dataset == "citeulike-a-clean": word2vec_model_path = FLAGS.word2vec_model_path_cua traning_data_path = FLAGS.training_data_path_cua FLAGS.sequence_length = 300 FLAGS.ave_labels_per_doc = 11.6 elif FLAGS.dataset == "citeulike-t-clean": word2vec_model_path = FLAGS.word2vec_model_path_cut traning_data_path = FLAGS.training_data_path_cut FLAGS.sequence_length = 300 FLAGS.ave_labels_per_doc = 7.68 # 1. create trainlist, validlist and testlist trainX, trainY, testX, testY = None, None, None, None vocabulary_word2index, vocabulary_index2word = create_voabulary(word2vec_model_path,name_scope=FLAGS.dataset + "-svm") #simple='simple' vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label(voabulary_label=traning_data_path, name_scope=FLAGS.dataset + "-svm") num_classes=len(vocabulary_word2index_label) print(vocabulary_index2word_label[0],vocabulary_index2word_label[1]) vocab_size = len(vocabulary_word2index) print("vocab_size:",vocab_size) # choosing whether to use k-fold cross-validation or hold-out validation if FLAGS.kfold == -1: # hold-out train, valid, test = load_data_multilabel_new(vocabulary_word2index, vocabulary_word2index_label,keep_label_percent=FLAGS.keep_label_percent,valid_portion=FLAGS.valid_portion,test_portion=FLAGS.test_portion,multi_label_flag=FLAGS.multi_label_flag,traning_data_path=traning_data_path) # here train, test are tuples; turn train into trainlist. trainlist, validlist, testlist = list(), list(), list() trainlist.append(train) validlist.append(valid) testlist.append(test) else: # k-fold trainlist, validlist, testlist = load_data_multilabel_new_k_fold(vocabulary_word2index, vocabulary_word2index_label,keep_label_percent=FLAGS.keep_label_percent,kfold=FLAGS.kfold,test_portion=FLAGS.test_portion,multi_label_flag=FLAGS.multi_label_flag,traning_data_path=traning_data_path) # here trainlist, testlist are list of tuples. # get and pad testing data: there is only one testing data, but kfold training and validation data assert len(testlist) == 1 testX, testY = testlist[0] testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.) # padding to max length # 2. get word_embedding matrix: shape (21425,100) word2vec_model = word2vec.load(word2vec_model_path, kind='bin') word2vec_dict = {} for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors): word2vec_dict[word] = vector word_embedding_2dlist = [[]] * vocab_size # create an empty word_embedding list: which is a list of list, i.e. a list of word, where each word is a list of values as an embedding vector. word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size) # assign empty for first word:'PAD' bound = np.sqrt(6.0) / np.sqrt(vocab_size) # bound for random variables. count_exist = 0; count_not_exist = 0 for i in range(1, vocab_size): # loop each word word = vocabulary_index2word[i] # get a word embedding = None try: embedding = word2vec_dict[word] # try to get vector:it is an array. except Exception: embedding = None if embedding is not None: # the 'word' exist a embedding word_embedding_2dlist[i] = embedding; count_exist = count_exist + 1 # assign array to this word. else: # no embedding for this word word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size); count_not_exist = count_not_exist + 1 # init a random value for the word. word_embedding_final = np.array(word_embedding_2dlist) # covert to 2d array. print('embedding per word:',word_embedding_final) print('embedding per word, shape:',word_embedding_final.shape) # 3. transform trainlist to the format. x_train, x_test: training and test feature matrices of size (n_samples, n_features) #print(len(trainlist)) #trainX,trainY = trainlist[0] #trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.) #print(len(trainX)) #print(len(trainX[0])) #print(trainX[0]) #print(len(trainY)) #print(len(trainY[0])) #print(trainY[0]) #print(np.asarray(trainY).shape) num_runs = len(trainlist) #validation results variables valid_acc_th,valid_prec_th,valid_rec_th,valid_fmeasure_th,valid_hamming_loss_th =[0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs # initialise the result lists final_valid_acc_th,final_valid_prec_th,final_valid_rec_th,final_valid_fmeasure_th,final_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 min_valid_acc_th,min_valid_prec_th,min_valid_rec_th,min_valid_fmeasure_th,min_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 max_valid_acc_th,max_valid_prec_th,max_valid_rec_th,max_valid_fmeasure_th,max_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 std_valid_acc_th,std_valid_prec_th,std_valid_rec_th,std_valid_fmeasure_th,std_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 #testing results variables test_acc_th,test_prec_th,test_rec_th,test_fmeasure_th,test_hamming_loss_th = [0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs # initialise the testing result lists final_test_acc_th,final_test_prec_th,final_test_rec_th,final_test_fmeasure_th,final_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 min_test_acc_th,min_test_prec_th,min_test_rec_th,min_test_fmeasure_th,min_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 max_test_acc_th,max_test_prec_th,max_test_rec_th,max_test_fmeasure_th,max_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 std_test_acc_th,std_test_prec_th,std_test_rec_th,std_test_fmeasure_th,std_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0 #output variables output_valid = "" output_test = "" output_csv_valid = "fold,hamming_loss,acc,prec,rec,f1" output_csv_test = "fold,hamming_loss,acc,prec,rec,f1" time_train = [0]*num_runs # get time spent in training num_run = 0 testX_embedded = get_embedded_words(testX,word_embedding_final,vocab_size) print('testX_embedded:',testX_embedded) print('testX_embedded:',testX_embedded.shape) for trainfold in trainlist: # get training and validation data trainX,trainY=trainfold trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.) trainX_embedded = get_embedded_words(trainX,word_embedding_final,vocab_size) print('trainX_embedded:',trainX_embedded) print('trainX_embedded:',trainX_embedded.shape) # code for debugging with less training data # debugging_num=1000 # print('trainX_embedded_for_debugging:',trainX_embedded[1:debugging_num].shape) # for quick debugging # trainX_embedded = trainX_embedded[1:debugging_num] # for quick debugging # trainY = trainY[1:debugging_num] # for quick debugging validX,validY=validlist[num_run] validX = pad_sequences(validX, maxlen=FLAGS.sequence_length, value=0.) validX_embedded = get_embedded_words(validX,word_embedding_final,vocab_size) print('validX_embedded:',validX_embedded) print('validX_embedded:',validX_embedded.shape) # ** training ** start_time_train = time.time() print('start training fold',str(num_run)) trainY_int = np.asarray(trainY).astype(int) #print(type(trainY_int)) # <class 'numpy.ndarray'> #print(np.asarray(trainY).astype(int) == 1) #check trainY and remove labels that are False for all training instances one_class_label_list = list() # the list of labels that are not associated with any training instances. #print(trainY_int.shape) #print(sum(trainY_int[:,2])) for k in range(num_classes): if sum(trainY_int[:,k]) == 0: #print(k) one_class_label_list.append(k) # to delete the labels not associated to any labels in the training data trainY_int_pruned =np.delete(trainY_int,one_class_label_list,1) print(trainY_int_pruned.shape) #print(len(one_class_label_list),one_class_label_list) # base_lr = LogisticRegression() # #base_svm = SVC(kernel='rbf',C=FLAGS.C,gamma=FLAGS.gamma,probability=False) # #model = train_svm(trainX_embedded,np.asarray(trainY).astype(int)) # chains = [ClassifierChain(base_lr, order='random', random_state=i) for i in range(3)] # count_chain=0 # for chain in chains: # chain.fit(trainX_embedded,trainY_int_pruned == 1) # print('chain',count_chain,'out of',3,'done') # count_chain=count_chain+1 # print('num_run',str(num_run),'train done.') chains = train_cc(trainX_embedded,trainY_int_pruned,num_chains=FLAGS.num_chains) time_train[num_run] = time.time() - start_time_train print("--- training of fold %s took %s seconds ---" % (num_run,time_train[num_run])) # evaluate on training data #acc, prec, rec, f_measure, hamming_loss = do_eval(model,trainX_embedded,np.asarray(trainY),hamming_q=FLAGS.ave_labels_per_doc) acc, prec, rec, f_measure, hamming_loss = do_eval_chains(chains,one_class_label_list,trainX_embedded,np.asarray(trainY),hamming_q=FLAGS.ave_labels_per_doc) #print('training:', acc, prec, rec, f_measure, hamming_loss) #pp = model.predict_proba(trainX_embedded) #print('pp',pp) #print('pp:',pp.shape) #print('pp_sum',np.sum(pp,0)) #print('pp_sum',np.sum(pp,1)) # evaluate on validation data #valid_acc_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run],valid_hamming_loss_th[num_run] = do_eval(model,validX_embedded,validY,hamming_q=FLAGS.ave_labels_per_doc) valid_acc_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run],valid_hamming_loss_th[num_run] = do_eval_chains(chains,one_class_label_list,validX_embedded,validY,hamming_q=FLAGS.ave_labels_per_doc) #print('validation:', acc, prec, rec, f_measure, hamming_loss) print("CC==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % (num_run,valid_acc_th[num_run],valid_hamming_loss_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run])) output_valid = output_valid + "\n" + "CC==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % (num_run,valid_acc_th[num_run],valid_hamming_loss_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run]) + "\n" # also output the results of each run. output_csv_valid = output_csv_valid + "\n" + str(num_run) + "," + str(valid_hamming_loss_th[num_run]) + "," + str(valid_acc_th[num_run]) + "," + str(valid_prec_th[num_run]) + "," + str(valid_rec_th[num_run]) + "," + str(valid_fmeasure_th[num_run]) start_time_test = time.time() # evaluate on testing data #test_acc_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run],test_hamming_loss_th[num_run] = do_eval(model,testX_embedded,testY,hamming_q=FLAGS.ave_labels_per_doc) test_acc_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run],test_hamming_loss_th[num_run] = do_eval_chains(chains,one_class_label_list,testX_embedded,testY,hamming_q=FLAGS.ave_labels_per_doc) #print('testing:', acc, prec, rec, f_measure, hamming_loss) print("CC==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % (num_run,test_acc_th[num_run],test_hamming_loss_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run])) output_test = output_test + "\n" + "CC==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % (num_run,test_acc_th[num_run],test_hamming_loss_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run]) + "\n" # also output the results of each run. output_csv_test = output_csv_test + "\n" + str(num_run) + "," + str(test_hamming_loss_th[num_run]) + "," + str(test_acc_th[num_run]) + "," + str(test_prec_th[num_run]) + "," + str(test_rec_th[num_run]) + "," + str(test_fmeasure_th[num_run]) print("--- testing of fold %s took %s seconds ---" % (num_run, time.time() - start_time_test)) prediction_str = "" # output final predictions for qualitative analysis if FLAGS.report_rand_pred == True: #prediction_str = display_for_qualitative_evaluation(model, testX_embedded,testX,testY,vocabulary_index2word,vocabulary_index2word_label) prediction_str = display_for_qualitative_evaluation_chains(chains, one_class_label_list,testX_embedded,testX,testY,vocabulary_index2word,vocabulary_index2word_label) # update the num_run num_run = num_run + 1 print('\n--Final Results--\n') #print('C', FLAGS.C, 'gamma', FLAGS.gamma) # report min, max, std, average for the validation results min_valid_acc_th = min(valid_acc_th) min_valid_prec_th = min(valid_prec_th) min_valid_rec_th = min(valid_rec_th) min_valid_fmeasure_th = min(valid_fmeasure_th) min_valid_hamming_loss_th = min(valid_hamming_loss_th) max_valid_acc_th = max(valid_acc_th) max_valid_prec_th = max(valid_prec_th) max_valid_rec_th = max(valid_rec_th) max_valid_fmeasure_th = max(valid_fmeasure_th) max_valid_hamming_loss_th = max(valid_hamming_loss_th) if FLAGS.kfold != -1: std_valid_acc_th = statistics.stdev(valid_acc_th) # to change std_valid_prec_th = statistics.stdev(valid_prec_th) std_valid_rec_th = statistics.stdev(valid_rec_th) std_valid_fmeasure_th = statistics.stdev(valid_fmeasure_th) std_valid_hamming_loss_th = statistics.stdev(valid_hamming_loss_th) final_valid_acc_th = sum(valid_acc_th)/num_runs final_valid_prec_th = sum(valid_prec_th)/num_runs final_valid_rec_th = sum(valid_rec_th)/num_runs final_valid_fmeasure_th = sum(valid_fmeasure_th)/num_runs final_valid_hamming_loss_th = sum(valid_hamming_loss_th)/num_runs print("CC==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_valid_acc_th,std_valid_acc_th,min_valid_acc_th,max_valid_acc_th,final_valid_hamming_loss_th,std_valid_hamming_loss_th,min_valid_hamming_loss_th,max_valid_hamming_loss_th,final_valid_prec_th,std_valid_prec_th,min_valid_prec_th,max_valid_prec_th,final_valid_rec_th,std_valid_rec_th,min_valid_rec_th,max_valid_rec_th,final_valid_fmeasure_th,std_valid_fmeasure_th,min_valid_fmeasure_th,max_valid_fmeasure_th)) #output the result to a file output_valid = output_valid + "\n" + "CC==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_valid_acc_th,std_valid_acc_th,min_valid_acc_th,max_valid_acc_th,final_valid_hamming_loss_th,std_valid_hamming_loss_th,min_valid_hamming_loss_th,max_valid_hamming_loss_th,final_valid_prec_th,std_valid_prec_th,min_valid_prec_th,max_valid_prec_th,final_valid_rec_th,std_valid_rec_th,min_valid_rec_th,max_valid_rec_th,final_valid_fmeasure_th,std_valid_fmeasure_th,min_valid_fmeasure_th,max_valid_fmeasure_th) + "\n" output_csv_valid = output_csv_valid + "\n" + "average" + "," + str(round(final_valid_hamming_loss_th,3)) + "±" + str(round(std_valid_hamming_loss_th,3)) + "," + str(round(final_valid_acc_th,3)) + "±" + str(round(std_valid_acc_th,3)) + "," + str(round(final_valid_prec_th,3)) + "±" + str(round(std_valid_prec_th,3)) + "," + str(round(final_valid_rec_th,3)) + "±" + str(round(std_valid_rec_th,3)) + "," + str(round(final_valid_fmeasure_th,3)) + "±" + str(round(std_valid_fmeasure_th,3)) # report min, max, std, average for the testing results min_test_acc_th = min(test_acc_th) min_test_prec_th = min(test_prec_th) min_test_rec_th = min(test_rec_th) min_test_fmeasure_th = min(test_fmeasure_th) min_test_hamming_loss_th = min(test_hamming_loss_th) max_test_acc_th = max(test_acc_th) max_test_prec_th = max(test_prec_th) max_test_rec_th = max(test_rec_th) max_test_fmeasure_th = max(test_fmeasure_th) max_test_hamming_loss_th = max(test_hamming_loss_th) if FLAGS.kfold != -1: std_test_acc_th = statistics.stdev(test_acc_th) # to change std_test_prec_th = statistics.stdev(test_prec_th) std_test_rec_th = statistics.stdev(test_rec_th) std_test_fmeasure_th = statistics.stdev(test_fmeasure_th) std_test_hamming_loss_th = statistics.stdev(test_hamming_loss_th) final_test_acc_th = sum(test_acc_th)/num_runs final_test_prec_th = sum(test_prec_th)/num_runs final_test_rec_th = sum(test_rec_th)/num_runs final_test_fmeasure_th = sum(test_fmeasure_th)/num_runs final_test_hamming_loss_th = sum(test_hamming_loss_th)/num_runs print("SVM==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_test_acc_th,std_test_acc_th,min_test_acc_th,max_test_acc_th,final_test_hamming_loss_th,std_test_hamming_loss_th,min_test_hamming_loss_th,max_test_hamming_loss_th,final_test_prec_th,std_test_prec_th,min_test_prec_th,max_test_prec_th,final_test_rec_th,std_test_rec_th,min_test_rec_th,max_test_rec_th,final_test_fmeasure_th,std_test_fmeasure_th,min_test_fmeasure_th,max_test_fmeasure_th)) #output the result to a file output_test = output_test + "\n" + "SVM==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_test_acc_th,std_test_acc_th,min_test_acc_th,max_test_acc_th,final_test_hamming_loss_th,std_test_hamming_loss_th,min_test_hamming_loss_th,max_test_hamming_loss_th,final_test_prec_th,std_test_prec_th,min_test_prec_th,max_test_prec_th,final_test_rec_th,std_test_rec_th,min_test_rec_th,max_test_rec_th,final_test_fmeasure_th,std_test_fmeasure_th,min_test_fmeasure_th,max_test_fmeasure_th) + "\n" output_csv_test = output_csv_test + "\n" + "average" + "," + str(round(final_test_hamming_loss_th,3)) + "±" + str(round(std_test_hamming_loss_th,3)) + "," + str(round(final_test_acc_th,3)) + "±" + str(round(std_test_acc_th,3)) + "," + str(round(final_test_prec_th,3)) + "±" + str(round(std_test_prec_th,3)) + "," + str(round(final_test_rec_th,3)) + "±" + str(round(std_test_rec_th,3)) + "," + str(round(final_test_fmeasure_th,3)) + "±" + str(round(std_test_fmeasure_th,3)) setting = "dataset:" + str(FLAGS.dataset) + "\nC: " + str(FLAGS.C) + "\ngamma: " + str(FLAGS.gamma) print("--- The whole program took %s seconds ---" % (time.time() - start_time)) time_used = "--- The whole program took %s seconds ---" % (time.time() - start_time) if FLAGS.kfold != -1: print("--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,statistics.stdev(time_train))) average_time_train = "--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,statistics.stdev(time_train)) else: print("--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,0)) average_time_train = "--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,0) # output setting configuration, results, prediction and time used output_to_file('svm ' + str(FLAGS.dataset) + " C " + str(FLAGS.C) + ' gamma' + str(FLAGS.gamma) + ' gp_id' + str(FLAGS.marking_id) + '.txt',setting + '\n' + output_valid + '\n' + output_test + '\n' + prediction_str + '\n' + time_used + '\n' + average_time_train) # output structured evaluation results output_to_file('svm ' + str(FLAGS.dataset) + " C " + str(FLAGS.C) + ' gamma' + str(FLAGS.gamma) + ' gp_id' + str(FLAGS.marking_id) + ' valid.csv',output_csv_valid) output_to_file('svm ' + str(FLAGS.dataset) + " C " + str(FLAGS.C) + ' gamma' + str(FLAGS.gamma) + ' gp_id' + str(FLAGS.marking_id) + ' test.csv',output_csv_test)