Exemple #1
0
    def evaluate(self, batches, mode, output_path=None):
        results = []
        used_batches = []
        for i, batch in enumerate(batches):
            input_feed = self.get_input_feed(batch, False)
            #outputs = np.array([random.randint(0, self.vocab.category.size-1)
            #                    for _ in range(batch.contexts.word.shape[0])])
            outputs = self.sess.run(self.predictions, input_feed)
            try:
                used_batches += flatten_batch(batch)
            except Exception as e:
                pprint(batch)
                print(e)
                exit(1)
            results.append(outputs)
        results = np.concatenate(results, axis=0)

        sys.stdout = open(output_path, 'w') if output_path else sys.stdout
        accuracy = evaluate_and_print(used_batches,
                                      results,
                                      vocab=self.encoder.vocab)
        sys.stdout = sys.__stdout__
        if output_path:
            sys.stderr.write(
                "Output the testing results to \'{}\' .\n".format(output_path))

        summary_dict = {}
        summary_dict['category/%s/Accuracy' % mode] = accuracy
        summary = make_summary(summary_dict)
        return accuracy, summary
Exemple #2
0
  def run_epoch_one_task(self, task_name, batches, is_training):
    task_model = self.tasks[task_name]
    loss = 0.0
    start_time = time.time()
    for i, raw_batch in enumerate(batches[task_name]):
      batch = {task_name:raw_batch}
      input_feed = self.get_input_feed(batch, is_training)
      output_feed = [self.tasks[task_name].loss]
      if is_training:
        output_feed.append(self.updates[task_name])

      t = time.time()
      outputs = self.sess.run(output_feed, input_feed)
      t = time.time() - t
      
      step_loss = outputs[0]
      loss += step_loss

      print('epoch: %d,' % self.epoch.eval(), 
            'step: %d,' % i,
            'task: %s,' % task_name, 
            'step_loss: %.3f,' % step_loss, 
            'step_time: %f,' % t)
      sys.stdout.flush()
      #break # DEBUG
      if math.isnan(step_loss):
        raise ValueError(
          "Nan loss detection ... (%s: step %d)" % (task_name, i))
    loss /= i
    mode = 'train' if is_training else 'valid'
    summary_dict = {'%s/%s/loss' % (task_name, mode): loss}
    summary = tf_utils.make_summary(summary_dict)
    epoch_time = (time.time() - start_time)
    return epoch_time, loss, summary
Exemple #3
0
  def run_epoch(self, batches, is_training):
    start_time = time.time()
    num_steps_in_epoch = [0 for _ in self.tasks]
    loss = [0.0 for _ in self.tasks]
    is_uncomplete = True
    while is_uncomplete:
      is_uncomplete = False
      t = time.time()
      for i, (task_name, task_model) in enumerate(self.tasks.items()):
        try:
          raw_batch = batches[task_name].__next__()
          batch = {task_name:raw_batch}
          input_feed = self.get_input_feed(batch, is_training)
          if task_model.debug_ops:
            for ops, res in zip(task_model.debug_ops, 
                                self.sess.run(task_model.debug_ops, input_feed)):
              #print(ops, res.shape)
              print(ops)
              print(res)
            #exit(1)
          output_feed = [task_model.loss]
          if is_training:
            output_feed.append(self.updates[task_name])
          t = time.time()
          outputs = self.sess.run(output_feed, input_feed)
          t = time.time() - t
          step_loss = outputs[0]

          print('epoch: %d,' % self.epoch.eval(), 
                'step: %d,' % num_steps_in_epoch[i],
                'task: %s,' % task_name, 
                'step_loss: %.3f,' % step_loss, 
                'step_time: %f,' % t)
          sys.stdout.flush()
          if math.isnan(step_loss):
            raise ValueError(
              "Nan loss detection ... (%s: step %d)" % (task_name, num_steps_in_epoch[i])
            )
          num_steps_in_epoch[i] += 1
          loss[i] += step_loss
          is_uncomplete = True
        except StopIteration as e:
          pass
        except ValueError as e:
          print (e)
          # print('subj.position\n', raw_batch.subj.position)
          # print('obj.position\n', raw_batch.obj.position)
          # print('text.raw\n', raw_batch.text.raw)
          # print('text.word\n', raw_batch.text.word)
          # print('text.char\n', raw_batch.text.char)
          # print('rel.word\n', raw_batch.rel.word)
          # print('rel.char\n', raw_batch.rel.char)
          exit(1)

    epoch_time = (time.time() - start_time)
    loss = [l/num_steps for l, num_steps in zip(loss, num_steps_in_epoch)]
    mode = 'train' if is_training else 'valid'
    summary_dict = {'%s/%s/loss' % (task_name, mode): l for task_name, l in zip(self.tasks, loss)}
    summary = tf_utils.make_summary(summary_dict)
    return epoch_time, loss, summary
Exemple #4
0
    def train(self):
        model = self.create_model(self.config)

        for epoch in range(model.epoch.eval(), self.config.max_epoch):
            sys.stdout.write(
                'Save the model at the begining of epoch %02d as model.ckpt-%d\n'
                % (epoch, epoch))

            self.save_model(model)
            self.output_variables_as_text(model)
            sente_win_rate, gote_win_rate = self.evaluate(model)
            self.logger.info(
                'Epoch %d, Win Rate (sente, gote) = (%.3f, %.3f)' %
                (epoch, sente_win_rate, gote_win_rate))
            batches = self.dataset.get_batches(self.config.batch_size,
                                               model.epoch.eval(),
                                               is_training=True)
            average_loss = 0.0
            for i, batch in enumerate(batches):
                q_values, loss, _ = model.step(batch, i)
                average_loss += loss
                print('step = %d, loss = %f' % (i, loss))
            average_loss /= (i + 1)
            self.logger.info('Epoch %d, Average loss = %f' %
                             (epoch, average_loss))
            summary_dict = {}
            summary_dict["train/loss"] = average_loss
            summary = tf_utils.make_summary(summary_dict)
            self.summary_writer.add_summary(summary, model.epoch.eval())
            model.add_epoch()
        self.evaluate(model)
Exemple #5
0
    def evaluate(self, batches, mode, output_path=None):
        start_time = time.time()
        results = []
        used_batches = []
        for i, batch in enumerate(batches):
            input_feed = self.get_input_feed(batch, False)
            ce = self.sess.run(self.loss, input_feed)
            outputs = self.sess.run(self.outputs, input_feed)
            used_batches += flatten_batch(batch)
            results.append(outputs)
        results = np.concatenate(results, axis=0)
        epoch_time = time.time() - start_time
        sys.stdout = open(output_path, 'w') if output_path else sys.stdout
        acc, prec, recall = evaluate_and_print(used_batches,
                                               results,
                                               vocab=self.encoder.vocab)
        print('acc, p, r, f = %.2f %.2f %.2f %.2f' %
              (100.0 * acc, 100.0 * prec, 100.0 * recall, 100.0 *
               (prec + recall) / 2))
        sys.stdout = sys.__stdout__

        summary_dict = {}
        summary_dict['graph/%s/Accuracy' % mode] = acc
        summary_dict['graph/%s/Precision' % mode] = prec
        summary_dict['graph/%s/Recall' % mode] = recall
        summary_dict['graph/%s/F1' % mode] = (prec + recall) / 2
        summary = make_summary(summary_dict)
        return (acc, prec, recall), summary
Exemple #6
0
    def run_epoch(self, batches, is_training):
        start_time = time.time()
        num_steps = 0
        loss = np.array(
            [0.0 for t in self.tasks.values() if t.loss is not None])
        mode = 'train' if is_training else 'valid'
        sys.stdout.write('<%s>\n' % mode)
        while True:
            t = time.time()

            # Once one of the batches of a task stops iteration in an epoch, go to the next epoch.
            batch = self.yield_examples(batches, self.tasks)
            if batch is None:
                break
            num_steps += 1

            input_feed = self.get_input_feed(batch, is_training)
            output_feed = []
            output_feed.extend(self.losses)

            if is_training:
                output_feed.append(self.updates)

            # for task_model in self.tasks.values():
            #   if task_model.debug_ops:
            #     print(task_model)
            #     print(task_model.debug_ops)
            #     for ops, res in zip(task_model.debug_ops,
            #                         self.sess.run(task_model.debug_ops, input_feed)):
            #       print(ops, res.shape)
            #       print(res)

            t = time.time()
            outputs = self.sess.run(output_feed, input_feed)
            t = time.time() - t
            step_loss = outputs[:len(loss)]
            loss += np.array(step_loss)

            print('epoch: %d,' % self.epoch.eval(),
                  'step: %d,' % self.global_step.eval(),
                  'task: %s,' % ' '.join(self.trainable_tasks.keys()),
                  'step_loss: %s,' % ' '.join(["%.3f" % l for l in step_loss]),
                  'step_time: %f' % t)
            sys.stdout.flush()

        epoch_time = (time.time() - start_time)
        step_time = epoch_time / num_steps
        loss = [l / num_steps for l in loss]
        mode = 'train' if is_training else 'valid'
        summary_dict = {
            '%s/%s/loss' % (task_model.scopename, mode): l
            for task_model, l in zip(self.trainable_tasks.values(), loss)
        }
        summary = make_summary(summary_dict)
        return epoch_time, step_time, loss, summary
Exemple #7
0
  def run_epoch(self, batches, is_training):
    start_time = time.time()
    loss = np.array([0.0 for _ in self.tasks])

    num_steps = 0
    while True:
      t = time.time()
      batch = {}
      for i, (task_name, task_model) in enumerate(self.tasks.items()):
        try:
          if task_name in batches:
            raw_batch = batches[task_name].__next__()
            batch.update({task_name:raw_batch})
          else:
            batch.update({task_name:{}})
        except StopIteration as e:
          pass
        except ValueError as e:
          print (e)
          exit(1)

      # Once one of the batches of a task stops iteration in an epoch, go to the next epoch.
      if False in [task_name in batch for task_name in self.tasks]:
        break

      input_feed = self.get_input_feed(batch, is_training)
      output_feed = []
      output_feed += self.losses
      if is_training:
        output_feed.append(self.updates)
      t = time.time()
      outputs = self.sess.run(output_feed, input_feed)
      t = time.time() - t
      step_loss = outputs[:len(self.tasks)]
      loss += np.array(step_loss)

      print('epoch: %d,' % self.epoch.eval(), 
            'step: %d,' % self.global_step.eval(),
            'task: %s,' % ' '.join(self.tasks.keys()),
            'step_loss: %s,' % ' '.join(["%.3f" % l for l in step_loss]), 
            'step_time: %f,' % t)
      num_steps += 1
      sys.stdout.flush()

    epoch_time = (time.time() - start_time)
    loss = [l/num_steps for l in loss]
    mode = 'train' if is_training else 'valid'
    summary_dict = {'%s/%s/loss' % (task_name, mode): l for task_name, l in zip(self.tasks, loss)}
    summary = tf_utils.make_summary(summary_dict)
    return epoch_time, loss, summary
Exemple #8
0
    def evaluate(self, model):
        epoch = model.epoch.eval()
        self.output_variables_as_text(model)
        cmd = './simulator/evaluateAgent.sh %s' % self.root_path
        os.system(cmd)
        sente_log_path = self.root_path + '/evaluation/sente/%03d/sente_summary' % epoch
        for i, l in enumerate(open(sente_log_path)):
            if i == 1:
                sente_win_rate = float(l.split('/')[0])

        gote_log_path = self.root_path + '/evaluation/gote/%03d/gote_summary' % epoch
        for i, l in enumerate(open(gote_log_path)):
            if i == 1:
                gote_win_rate = float(l.split('/')[1])

        summary_dict = {}
        summary_dict["test/win_rate/sente"] = sente_win_rate
        summary_dict["test/win_rate/gote"] = gote_win_rate
        summary = tf_utils.make_summary(summary_dict)
        self.summary_writer.add_summary(summary, model.epoch.eval())
        return sente_win_rate, gote_win_rate
Exemple #9
0
    def test(self, batches, mode, logger, output_path=None):
        results = []
        used_batches = []
        for i, batch in enumerate(batches):
            input_feed = self.get_input_feed(batch, False)
            relations, mentions = self.sess.run(self.predictions, input_feed)
            try:
                used_batches += flatten_batch(batch)
            except Exception as e:
                pprint(batch)
                print(e)
                exit(1)
            for rel, mention in zip(relations.tolist(), mentions.tolist()):
                results.append((rel, mention))

        sys.stdout = open(output_path, 'w') if output_path else sys.stdout

        triples, mentions = dataset_class.formatize_and_print(
            used_batches, results, vocab=self.encoder.vocab)
        triple_precision, triple_recall, triple_f1 = dataset_class.evaluate_triples(
            triples)
        mention_precision, mention_recall, mention_f1 = dataset_class.evaluate_mentions(
            mentions)

        sys.stdout = sys.__stdout__
        if output_path:
            sys.stderr.write(
                "Output the testing results to \'{}\' .\n".format(output_path))
        summary_dict = {}
        summary_dict['relex/%s/triple/f1' % mode] = triple_f1
        summary_dict['relex/%s/triple/precision' % mode] = triple_precision
        summary_dict['relex/%s/triple/recall' % mode] = triple_recall
        summary_dict['relex/%s/mention/f1' % mode] = mention_f1
        summary_dict['relex/%s/mention/precision' % mode] = mention_precision
        summary_dict['relex/%s/mention/recall' % mode] = mention_recall
        summary = make_summary(summary_dict)
        return triple_f1, summary
Exemple #10
0
    def test(self, batches, mode, logger, output_path):
        results = np.zeros([0, 2])
        used_batches = []
        sys.stderr.write('Start decoding (%s) ...\n' % mode)
        for i, batch in enumerate(batches):
            input_feed = self.get_input_feed(batch, False)
            # output_feed = [
            #   self.predictions,
            # ]
            output_feed = self.predictions
            outputs = self.sess.run(output_feed, input_feed)

            # Flatten the batch and outputs.
            used_batches += flatten_batch(batch)
            results = np.concatenate([results, outputs])

        sys.stdout = open(output_path, 'w') if output_path else sys.stdout
        sys.stderr.write('%d %d\n' % (len(results), len(used_batches)))
        acc = evaluate_and_print(used_batches, results)
        sys.stdout = sys.__stdout__
        summary_dict = {}
        summary_dict['%s/%s/accuracy' % (self.scopename, mode)] = acc
        summary = make_summary(summary_dict)
        return acc, summary
Exemple #11
0
 def test(self, batches, mode, logger, output_path):
     results = []
     used_batches = []
     for i, batch in enumerate(batches):
         input_feed = self.get_input_feed(batch, False)
         outputs = self.sess.run(self.predictions, input_feed)
         try:
             used_batches += flatten_batch(batch)
         except Exception as e:
             pprint(batch)
             print(e)
             exit(1)
         results.append(outputs[:, 0, :])
     results = flatten([r.tolist() for r in results])
     sys.stdout = open(output_path, 'w') if output_path else sys.stdout
     bleu = evaluate_and_print(used_batches, results, vocab=self.vocab)
     if output_path:
         sys.stderr.write(
             "Output the testing results to \'{}\' .\n".format(output_path))
     sys.stdout = sys.__stdout__
     summary_dict = {}
     summary_dict['desc/%s/BLEU' % mode] = bleu
     summary = make_summary(summary_dict)
     return bleu, summary
Exemple #12
0
    def run_epoch(self, batches, is_training):
        start_time = time.time()
        num_steps = 1
        loss = np.array([0.0 for _ in self.tasks])
        mode = 'train' if is_training else 'valid'
        print('<%s>' % mode)
        while True:
            t = time.time()
            try:
                model = self.tasks[self.taskname]
                batch = [
                    batches[self.taskname].__next__()
                    for _ in range(model.n_models)
                ]
            except StopIteration as e:
                break

            input_feed = self.get_input_feed(batch, is_training)
            task_model = self.tasks[self.taskname]

            if task_model.debug_ops:
                print(task_model)
                for ops, res in zip(
                        task_model.debug_ops,
                        self.sess.run(task_model.debug_ops, input_feed)):
                    print(ops, res.shape)
                    print(res)
                exit(1)

            output_feed = []
            output_feed.append(self.losses)
            if is_training:
                output_feed.append(self.updates)

            t = time.time()
            outputs = self.sess.run(output_feed, input_feed)
            t = time.time() - t
            step_loss = outputs[:len(self.tasks)]
            loss += np.array(step_loss)

            print('epoch: %d,' % self.epoch.eval(),
                  'step: %d,' % self.global_step.eval(),
                  'task: %s,' % ' '.join(self.tasks.keys()),
                  'step_loss: %s,' % ' '.join(["%.3f" % l for l in step_loss]),
                  'step_time: %.3f,' % t)
            sys.stdout.flush()
            num_steps += 1

        epoch_time = (time.time() - start_time)
        step_time = epoch_time / num_steps
        loss = [l / num_steps for l in loss]
        if loss[0] == 0:
            raise ValueError(
                'Set max_rows of the data more than batch_size * num_gpus.')

        mode = 'train' if is_training else 'valid'
        summary_dict = {
            '%s/%s/loss' % (task_model.scopename, mode): l
            for task_model, l in zip(self.tasks.values(), loss)
        }
        summary = tf_utils.make_summary(summary_dict)
        return epoch_time, step_time, loss, summary
Exemple #13
0
    def evaluate(self, batches, gold_path, mode, official_stdout=False):
        def _k_to_tag(k):
            if k == -3:
                return "oracle"  # use only gold spans.
            elif k == -2:
                return "actual"  # use mention_spans as a result of pruning candidate_spans.
            elif k == -1:
                return "exact"  # use the same number of candidate_spans as the gold_spans.
            elif k == 0:
                return "threshold"  # use only candidate_spans with a score greater than 0.
            else:
                return "{}%".format(k)

        #mention_evaluators = { k:util.RetrievalEvaluator() for k in [-3, -2, -1, 0, 10, 15, 20, 25, 30, 40, 50] }
        mention_evaluators = {
            k: coref_util.RetrievalEvaluator()
            for k in [-3, -2, -1, 0]
        }

        coref_predictions = {}
        coref_evaluator = metrics.CorefEvaluator()
        results = OrderedDict()

        for example_num, example in enumerate(batches):
            input_feed = self.get_input_feed(example, False)
            gold_starts = input_feed[self.ph.gold_starts]
            gold_ends = input_feed[self.ph.gold_ends]

            ######
            # debug
            # flattened_text_emb, mention_starts, mention_ends, gold_starts, gold_ends = self.sess.run(self.debug_ops, input_feed)
            # dbgprint(str(example_num) + ':')
            # print('text_shape', flattened_text_emb.shape)
            # print('pred_mentions', np.concatenate([np.expand_dims(mention_starts, -1),
            #                                        np.expand_dims(mention_ends, -1)],
            #                                       axis=-1))
            # print('gold_mentions', np.concatenate([np.expand_dims(gold_starts, -1),
            #                                        np.expand_dims(gold_ends, -1)],
            #                                       axis=-1))
            # print()
            ######

            outputs = self.sess.run(self.outputs, input_feed)
            candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores = outputs[:
                                                                                                                                               7]

            self.evaluate_mentions(candidate_starts, candidate_ends,
                                   mention_starts, mention_ends,
                                   candidate_mention_scores, gold_starts,
                                   gold_ends, example, mention_evaluators)
            predicted_antecedents = self.get_predicted_antecedents(
                antecedents, antecedent_scores)
            coref_predictions[example.doc_key] = self.evaluate_coref(
                mention_starts, mention_ends, predicted_antecedents,
                example.clusters, coref_evaluator)

            results[example.doc_key] = dotDict({
                'raw_text':
                example.text.raw,
                'speakers':
                example.speakers,
                'extracted_mentions':
                [(begin, end)
                 for begin, end in zip(mention_starts, mention_ends)],
                'predicted_antecedents':
                predicted_antecedents
            })
            if len(outputs) > 7:
                mention_descs = {}

                pred_mention_desc = [
                    self.vocab.decoder.word.ids2tokens(s)
                    for s in outputs[7][:, 0, :]
                ]
                gold_mention_desc = [
                    self.vocab.decoder.word.ids2tokens(s)
                    for s in outputs[8][:, 0, :]
                ]
                for s, e, desc in zip(mention_starts, mention_ends,
                                      pred_mention_desc):
                    mention_descs[(s, e)] = desc
                for s, e, desc in zip(gold_starts, gold_ends,
                                      pred_mention_desc):
                    mention_descs[(s, e)] = desc

                results[example.doc_key].mention_descs = mention_descs
            else:
                results[example.doc_key].mention_descs = []
        summary_dict = {}

        for k, evaluator in sorted(list(mention_evaluators.items()),
                                   key=operator.itemgetter(0)):
            tags = [
                "mention/{} @ {}".format(t, _k_to_tag(k))
                for t in ("R", "P", "F")
            ]
            results_to_print = []
            for t, v in zip(tags, evaluator.metrics()):
                results_to_print.append("{:<10}: {:.2f}".format(t, v))
                summary_dict["coref/%s/" % mode + t] = v
            print(", ".join(results_to_print))

        conll_results = conll.evaluate_conll(gold_path, coref_predictions,
                                             official_stdout)
        val_types = ('p', 'r', 'f')
        for metric in conll_results:
            for val_type in val_types:
                summary_dict["coref/%s/%s/%s" %
                             (mode, metric,
                              val_type)] = conll_results[metric][val_type]
            print("%s (%s) : %s" % (metric, ", ".join(val_types), " ".join(
                ["%.2f" % x for x in conll_results[metric].values()])))

        average_f1 = sum(
            conll_res["f"]
            for conll_res in list(conll_results.values())) / len(conll_results)
        summary_dict["coref/%s/Average F1 (conll)" % mode] = average_f1
        print("Average F1 (conll): {:.2f}%".format(average_f1))

        p, r, f = coref_evaluator.get_prf()
        summary_dict["coref/%s/Average F1 (py)" % mode] = f
        print("Average F1 (py): {:.2f}%".format(f * 100))
        summary_dict["coref/%s/Average precision (py)" % mode] = p
        print("Average precision (py): {:.2f}%".format(p * 100))
        summary_dict["coref/%s/Average recall (py)" % mode] = r
        print("Average recall (py): {:.2f}%".format(r * 100))

        aligned_results = coref_evaluator.get_aligned_results()
        for doc_key, aligned in zip(results, aligned_results):
            results[doc_key]['aligned_results'] = aligned

        average_f1 = sum(
            [values['f']
             for metric, values in conll_results.items()]) / len(conll_results)
        return tf_utils.make_summary(summary_dict), average_f1, results