def run_validation(self, session, tg, num_classifiers_to_test, feed_dict, specific_classifiers=[]): avg_f1, avg_prec, avg_rec = 0.0, 0.0, 0.0 run_on_classifiers = range(num_classifiers_to_test) if len(specific_classifiers)==0 else specific_classifiers for j in run_on_classifiers: print("Running validation tests on classifier " + str(j)) clsfr = tg.classifiers[j] self.valid_batch.batch_size = len(self.valid_batch.dataset[id2label(j)]) num_iters = 1 #len(self.valid_batch.dataset[id2label(j)]) // self.valid_batch.batch_size self.valid_batch.resetCursors(j) # if num_iters==0: # print("classifier {} has no validation data!".format(j)) # continue # average f1, precision, recall across all batches f1_s,prec_s,rec_s = 0.0, 0.0, 0.0 for i in range(num_iters): batch = self.valid_batch.classifier_next_batch(j) op = executeTrainStep(session, j, tg, batch['nodes'], batch['labels'], feed_dict, True) pred=hotEncodeDistribution(op['classifier_ops']) f1, prec, rec = get_accuracy([pred], [batch]) f1_summ, prec_summ, rec_summ = session.run([clsfr.f1_summary,clsfr.precision_summary, clsfr.recall_summary], feed_dict={clsfr.precision:prec, clsfr.f1:f1, clsfr.recall:rec}) # map(lambda x:self.valid_summary_writer.add_summary(x,i),[f1_summ, prec_summ, rec_summ]) self.valid_summary_writer.add_summary(f1_summ,i) self.valid_summary_writer.add_summary(prec_summ,i) self.valid_summary_writer.add_summary(rec_summ,i) f1_s += f1 prec_s += prec rec_s += rec self.precision_accum+=prec_s self.recall_accum+=rec f1_s /= num_iters prec_s /= num_iters rec_s /= num_iters f1_summ, prec_summ, rec_summ = session.run([clsfr.avg_f1_summary, clsfr.avg_precision_summary, clsfr.avg_recall_summary], feed_dict={clsfr.avg_f1:f1_s, clsfr.avg_precision:prec_s, clsfr.avg_recall:rec_s}) print("f1: {}, precision: {}, recall: {}".format(f1_s, prec_s, rec_s)) # map(lambda x:self.valid_summary_writer.add_summary(x,self.global_counter),[f1_summ, prec_summ, rec_summ]) self.valid_summary_writer.add_summary(f1_summ,self.global_counter) self.valid_summary_writer.add_summary(prec_summ,self.global_counter) self.valid_summary_writer.add_summary(rec_summ,self.global_counter) avg_f1+=f1_s avg_prec+=prec_s avg_rec+=rec_s if len(specific_classifiers)==0: avg_f1 = avg_f1 / num_classifiers_to_test avg_prec = avg_prec / num_classifiers_to_test avg_rec = avg_rec / num_classifiers_to_test f1_summ, prec_summ, rec_summ = session.run([self.avg_f1_summary, self.avg_prec_summary, self.avg_rec_summary], feed_dict={self.avg_f1:avg_f1, self.avg_prec:avg_prec, self.avg_rec:avg_rec}) self.valid_summary_writer.add_summary(f1_summ,self.global_counter) self.valid_summary_writer.add_summary(prec_summ,self.global_counter) self.valid_summary_writer.add_summary(rec_summ,self.global_counter) print("avg f1: {}, avg prec: {}, avg rec: {}".format(avg_f1, avg_prec, avg_rec)) self.global_counter+=1
def create_record(self,index): nodes=[0] * self.batch_size labels=[0] * self.batch_size lbl = id2label(index) # for indexing in dataset for i in range(self.batch_size): cur = self.cursors[index] entry = self.dataset[lbl][cur] nodes[i] = node2id(entry[0]) if entry[1]==1: labels[i] = [1,0] else: labels[i] = [0,1] self.cursors[index] = (self.cursors[index] + 1) % len(self.dataset[lbl]) return {'nodes':nodes, 'labels':labels}
def trainSingleClassifier(classifierId, graph, session, trainingGraph, dataset, batch_size, embeddings, batchGen, num_epochs, train_summary_writer, saver, train_model_file, is_training, valid_test, summary_frequency=-1): tg = trainingGraph with graph.as_default(): precision_tf = tf.placeholder(shape=[], dtype=tf.float32,name='precision') recall_tf = tf.placeholder(shape=[], dtype=tf.float32,name='recall') f1_tf = tf.placeholder(shape=[], dtype=tf.float32,name='f1') with tf.variable_scope('label_classifier-'+str(classifierId)): macro_f1_tf = tf.placeholder(shape=[], dtype=tf.float32,name='macro_f1') total_prec = 0.0 total_rec = 0.0 precision_summary = tf.summary.scalar('precision_summary',precision_tf) recall_summary = tf.summary.scalar('recall_summary',recall_tf) f1_summary = tf.summary.scalar('f1_summary',f1_tf) macro_f1_summary = tf.summary.scalar('macro_f1_summary', macro_f1_tf) stat_summary = tf.summary.merge([precision_summary, recall_summary, f1_summary]) stat_dict={} recordLength = len(dataset[id2label(classifierId)]) num_iters = (recordLength // batch_size) * num_epochs feed_dict={} feed_dict[tg.embeddings] = embeddings classifier_ops = [] summaries=[] print("Classifier {} will take {} iters".format(classifierId, num_iters)) for i in range(num_iters): batch = batchGen.classifier_next_batch(classifierId) op = executeTrainStep(session, classifierId, tg, batch['nodes'], batch['labels'], feed_dict, is_training) net_loss = op['net_loss'] pred = [hotEncodeDistribution(op['classifier_ops'])] f1,prec,rec = get_accuracy(pred, [batch]) total_rec+=rec total_prec+=prec print("step: {} loss:{} f1:{}".format(i,net_loss,f1)) summaries = op['summaries_calculated'] for j in range(len(summaries)): train_summary_writer.add_summary(summaries[j], i) if summary_frequency > 0 and i%summary_frequency == 0: save_loc = saver.save(session, train_model_file, global_step=i) valid_test.run_validation(session, tg, 1, feed_dict, [classifierId]) print(pred,batch['labels']) valid_test.run_validation(session, tg, 1, feed_dict, [classifierId]) avg_prec = total_prec/num_iters avg_rec = total_rec/num_iters macro_f1 = 2 * avg_prec * avg_rec / (avg_prec + avg_rec + 1e-7) macro_f1_summ = session.run([macro_f1_summary], feed_dict={macro_f1_tf:macro_f1}) train_summary_writer.add_summary(macro_f1_summ[0],0)
def hotDecode(hotVec): s = [id2label(i) for i in range(len(hotVec)) if hotVec[i] >= 0.5] return s