def _train(self, ts, dropout, batchsz, model, sess, word_keep): start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) total_loss = total_err = total_sum = 0 for i in range(steps): si = shuffle[i] ts_i = batch(ts, si, batchsz) feed_dict = model.ex2dict(ts_i, 1.0 - dropout, True, word_keep) _, step, summary_str, lossv = sess.run( [self.train_op, self.global_step, self.summary_op, self.loss], feed_dict=feed_dict) self.train_writer.add_summary(summary_str, step) total_loss += lossv duration = time.time() - start_time print('Train (Loss %.4f) (%.3f sec)' % (float(total_loss) / len(ts), duration))
def _train(self, ts, dropout, batchsz, model, sess, word_keep): start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) total_loss = total_err = total_sum = 0 for i in range(steps): si = shuffle[i] ts_i = batch(ts, si, batchsz) if self.drop_words > 0.0: word_probs = np.random.random(ts_i['x'].shape) drop_indices = np.where((word_probs > self.drop_words) & (ts_i['x'] != self.vocab['<PAD>'])) ts_i['x'][drop_indices[0], drop_indices[1]] = self.vocab['<OOV>'] feed_dict = model.ex2dict(ts_i, 1.0 - dropout, True, word_keep) _, step, summary_str, lossv = sess.run( [self.train_op, self.global_step, self.summary_op, model.loss], feed_dict=feed_dict) self.train_writer.add_summary(summary_str, step) total_loss += lossv duration = time.time() - start_time print('Train (Loss %.4f) (%.3f sec)' % (float(total_loss) / len(ts), duration))
def test(self, ts, batchsz=1, phase='Test', conll_file=None, txts=None): total_correct = total_sum = fscore = 0 total_gold_count = total_guess_count = total_overlap_count = 0 start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) # Only if they provide a file and the raw txts, we can write CONLL file handle = None if conll_file is not None and txts is not None: handle = open(conll_file, "w") # total_cm = np.ndarray((len(self.idx2label), len(self.idx2label))) for i in range(steps): ts_i = batch(ts, i, batchsz) correct, count, overlaps, golds, guesses = self._batch( ts_i, handle, txts) total_correct += correct total_sum += count total_gold_count += golds total_guess_count += guesses total_overlap_count += overlaps # total_cm += cm duration = time.time() - start_time total_acc = total_correct / float(total_sum) # Only show the fscore if requested if self.fscore > 0: fscore = fScore(total_overlap_count, total_gold_count, total_guess_count, self.fscore) print('%s (F%d = %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (phase, self.fscore, float(fscore), total_correct, total_sum, total_acc, duration)) # #show label specific metrics # for i in len(self.idx2label.keys()): # label = self.idx2label[i] # not_i = [x for x in self.idx2label.keys() if x != i] # tp = total_cm[i,i] # fp = np.sum(total_cm[i, not_i]) # fn = np.sum(total_cm[not_i, i]) # fscore_l = fScore(tp, tp+fn, tp+fp, 1) # print ('%s -- %s (F%d = %.4f)' % ( # phase, label, self.fscore, fscore_l)) else: print('%s (Acc %d/%d = %.4f) (%.3f sec)' % (phase, total_correct, total_sum, total_acc, duration)) if handle is not None: handle.close() return total_acc, fscore
def test(self, ts, batchsz=1, phase='Test', conll_file=None, txts=None): total_correct = total_sum = fscore = 0 total_gold_count = total_guess_count = total_overlap_count = 0 start_time = time.time() steps = int(math.floor(len(ts)/float(batchsz))) # Only if they provide a file and the raw txts, we can write CONLL file handle = None if conll_file is not None and txts is not None: handle = open(conll_file, "w") for i in range(steps): ts_i = batch(ts, i, batchsz) correct, count, overlaps, golds, guesses = self._batch(ts_i, handle, txts) total_correct += correct total_sum += count total_gold_count += golds total_guess_count += guesses total_overlap_count += overlaps duration = time.time() - start_time total_acc = total_correct / float(total_sum) # Only show the fscore if requested if self.fscore > 0: fscore = fScore(total_overlap_count, total_gold_count, total_guess_count, self.fscore) print('%s (F%d = %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (phase, self.fscore, float(fscore), total_correct, total_sum, total_acc, duration)) else: print('%s (Acc %d/%d = %.4f) (%.3f sec)' % (phase, total_correct, total_sum, total_acc, duration)) if handle is not None: handle.close() return total_acc, fscore