Example #1
0
    def test(self, ts, steps=0, **kwargs):
        """Method that evaluates on some data.  There are 2 modes this can run in, `feed_dict` and `dataset`

        In `feed_dict` mode, the model cycles the test data batch-wise and feeds each batch in with a `feed_dict`.
        In `dataset` mode, the data is still passed in to this method, but it is not passed in a `feed_dict` and is
        mostly superfluous since the features are grafted right onto the graph.  However, we do use it for supplying
        the ground truth, ids and text, so it is essential that the caller does not shuffle the data
        :param ts: The test set
        :param conll_output: (`str`) An optional file output
        :param txts: A list of text data associated with the encoded batch
        :param dataset: (`bool`) Is this using `tf.dataset`s
        :return: The metrics
        """
        SET_TRAIN_FLAG(False)

        total_correct = total_sum = 0
        gold_spans = []
        pred_spans = []

        self.cm = ConfusionMatrix(self.idx2classlabel)

        handle = None
        if kwargs.get("conll_output") is not None and kwargs.get(
                'txts') is not None:
            handle = open(kwargs.get("conll_output"), "w")

        try:
            pg = create_progress_bar(steps)
            metrics = {}
            for (features, y), batch in pg(
                    zip_longest(ts, kwargs.get('batches', []), fillvalue={})):
                correct, count, golds, guesses = self.process_batch(
                    features,
                    y,
                    handle=handle,
                    txts=kwargs.get("txts"),
                    ids=batch.get("ids"))
                total_correct += correct
                total_sum += count
                gold_spans.extend(golds)
                pred_spans.extend(guesses)

            total_acc = total_correct / float(total_sum)
            # Only show the fscore if requested
            metrics['tagging_f1'] = span_f1(gold_spans, pred_spans)
            metrics['tagging_acc'] = total_acc
            metrics.update({
                f"classification_{k}": v
                for k, v in self.cm.get_all_metrics().items()
            })
            if self.verbose:
                conll_metrics = per_entity_f1(gold_spans, pred_spans)
                conll_metrics['acc'] = total_acc * 100
                conll_metrics['tokens'] = total_sum
                logger.info(conlleval_output(conll_metrics))
        finally:
            if handle is not None:
                handle.close()

        return metrics
Example #2
0
    def _test(self, loader, **kwargs):
        self.model.eval()
        steps = len(loader)
        pg = create_progress_bar(steps)
        metrics = [LAS(), UAS(), LCM(), UCM()]

        with torch.no_grad():
            for batch_dict in pg(loader):
                example = self._make_input(batch_dict)
                labels_gold = example.pop('labels')
                heads_gold = example.pop('heads')
                batchsz = self._get_batchsz(batch_dict)
                greedy_heads_pred, greedy_labels_pred = self.model.decode(example)
                T = greedy_labels_pred.shape[1]
                labels_gold_trimmed = labels_gold[:, :T]
                heads_gold_trimmed = heads_gold[:, :T]

                for i in range(batchsz):
                    for m in metrics:
                        if self.punct_eval is False:
                            labels_gold_trimmed[i].masked_fill_(labels_gold_trimmed[i] == self.model.punct, Offsets.PAD)
                        m.add(greedy_heads_pred[i], heads_gold_trimmed[i], greedy_labels_pred[i], labels_gold_trimmed[i])

        metrics = {m.name: m.score for m in metrics} 
        return metrics
Example #3
0
    def __init__(self, directory, pattern, vocabs, vectorizers, nctx):
        super().__init__()
        self.src_vectorizer = vectorizers['src']
        self.tgt_vectorizer = vectorizers['tgt']
        self.pattern = pattern
        self.nctx = nctx
        self.directory = directory
        self.vocab = vocabs
        self.samples = 0
        self.rank = 0
        self.world_size = 1
        if torch.distributed.is_initialized():
            self.rank = torch.distributed.get_rank()
            self.world_size = torch.distributed.get_world_size()

        if os.path.exists(f"{directory}/md.yml"):
            f = read_yaml(f"{directory}/md.yml")
            self.samples = f['num_samples']
        else:
            files = list(glob.glob(f"{directory}/{self.pattern}"))
            pg = create_progress_bar(len(files))
            for file in pg(files):
                with open(file) as rf:
                    for _ in rf:
                        self.samples += 1
            write_yaml({'num_samples': self.samples}, f"{directory}/md.yml")
Example #4
0
    def __init__(
            self, directory, pattern, vocabs, src_vectorizer, tgt_vectorizer, last_turn_only=False,
            distribute=True, shuffle=True, record_keys=[]
    ):
        super().__init__()
        self.record_keys = record_keys
        self.src_vectorizer = src_vectorizer
        self.tgt_vectorizer = tgt_vectorizer
        self.pattern = pattern
        self.directory = directory
        self.vocab = vocabs
        self.samples = 0
        self.rank = 0
        self.world_size = 1
        self.shuffle = shuffle
        self.last_turn_only = last_turn_only
        self.distribute = distribute
        if torch.distributed.is_initialized() and distribute:
            self.rank = torch.distributed.get_rank()
            self.world_size = torch.distributed.get_world_size()

        if os.path.exists(f"{directory}/md.yml"):
            f = read_yaml(f"{directory}/md.yml")
            self.samples = f['num_samples']
        else:
            files = list(glob.glob(f"{directory}/{self.pattern}"))
            pg = create_progress_bar(len(files))
            for file in pg(files):
                with open(file) as rf:
                    for _ in rf:
                        self.samples += 1
            write_yaml({'num_samples': self.samples}, f"{directory}/md.yml")
Example #5
0
    def _train(self, loader, steps=0, **kwargs):
        """Train an epoch of data using either the input loader or using `tf.dataset`

        In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
        When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
        to train.  We do use a `feed_dict` for passing the `TRAIN_FLAG` in either case

        :param loader: A data feed
        :param kwargs: See below

        :Keyword Arguments:
         * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True`
         * *reporting_fns* (`list`) A list of reporting hooks to use

        :return: Metrics
        """
        SET_TRAIN_FLAG(True)
        reporting_fns = kwargs.get('reporting_fns', [])
        pg = create_progress_bar(steps)
        epoch_loss = tf.Variable(0.0)
        epoch_div = tf.Variable(0, dtype=tf.int32)
        nstep_loss = tf.Variable(0.0)
        nstep_div = tf.Variable(0, dtype=tf.int32)
        self.nstep_start = time.perf_counter()

        @tf.function
        def _train_step(inputs):
            features, y = inputs
            loss = self.optimizer.update(self.model, features, y)
            batchsz = get_shape_as_list(y)[0]
            report_loss = loss * batchsz
            return report_loss, batchsz

        with autograph_options({
                "function_optimization": False,
                "layout_optimizer": False
        }):
            for inputs in pg(loader):
                step_report_loss, step_batchsz = _train_step(inputs)
                epoch_loss.assign_add(step_report_loss)
                nstep_loss.assign_add(step_report_loss)
                epoch_div.assign_add(step_batchsz)
                nstep_div.assign_add(step_batchsz)

                step = self.optimizer.global_step.numpy() + 1
                if step % self.nsteps == 0:
                    metrics = self.calc_metrics(nstep_loss.numpy(),
                                                nstep_div.numpy())
                    self.report(step, metrics, self.nstep_start, 'Train',
                                'STEP', reporting_fns, self.nsteps)
                    nstep_loss.assign(0.0)
                    nstep_div.assign(0)
                    self.nstep_start = time.perf_counter()

        epoch_loss = epoch_loss.numpy()
        epoch_div = epoch_div.numpy()
        metrics = self.calc_metrics(epoch_loss, epoch_div)
        return metrics
Example #6
0
def run(args):
    # Limit it to a single GPU.
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    conn = create_db(args.db)
    m = Manager()

    logs = args.logging
    datasets = args.datasets
    embeddings = args.embeddings
    settings = args.settings

    # So we don't litter the fs
    dir_ = tempfile.mkdtemp(prefix='baseline-speed-test-')

    try:
        configs = get_configs(args.config)
        if not args.single:
            full_configs = []
            for config in configs:
                full_configs.extend(
                    edit_config(config, args.frameworks, args.no_crf,
                                args.no_attn))
            configs = full_configs
        if args.verbose:
            for config in configs:
                pprint(config)
                print()
            print()
        steps = len(configs)
        pg = create_progress_bar(steps)
        for config in configs:
            write_config = deepcopy(config)
            config['train']['epochs'] = args.trials
            task_name = config['task']

            system_info = m.dict()
            p = Process(target=run_model,
                        args=(system_info, config, logs, settings, datasets,
                              embeddings, task_name, dir_, int(args.gpu)))
            p.start()
            pid = p.pid
            p.join()
            log_file = os.path.join(dir_, 'timing-{}.log'.format(pid))
            speeds = parse_logs(log_file)

            save_data(conn, speeds, write_config, system_info)
            pg.update()
        pg.done()
    finally:
        shutil.rmtree(dir_)
Example #7
0
    def _test(self, ts, **kwargs):

        self.model.eval()
        total_sum = 0
        total_correct = 0

        gold_spans = []
        pred_spans = []
        cm = ConfusionMatrix(self.idx2classlabel)
        metrics = {}
        steps = len(ts)
        conll_output = kwargs.get('conll_output', None)
        txts = kwargs.get('txts', None)
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            lengths = inputs['lengths']
            ids = inputs['ids']
            class_labels = inputs["class_label"]
            with torch.no_grad():
                class_pred, pred = self.model(inputs)
            correct, count, golds, guesses = self.process_output(
                pred, y.data, lengths, ids, handle, txts)
            total_correct += correct
            total_sum += count
            gold_spans.extend(golds)
            pred_spans.extend(guesses)
            _add_to_cm(cm, class_labels, class_pred)

        total_acc = total_correct / float(total_sum)
        metrics['tagging_acc'] = total_acc
        metrics['tagging_f1'] = span_f1(gold_spans, pred_spans)
        metrics.update({
            f"classification_{k}": v
            for k, v in cm.get_all_metrics().items()
        })
        if self.verbose:
            # TODO: Add programmatic access to these metrics?
            conll_metrics = per_entity_f1(gold_spans, pred_spans)
            conll_metrics['acc'] = total_acc * 100
            conll_metrics['tokens'] = total_sum.item()
            logger.info(conlleval_output(conll_metrics))
        return metrics
Example #8
0
    def test(self, vs, reporting_fns, phase='Valid'):
        """Run an epoch of testing over the dataset

        If we are using a `tf.dataset`-based `fit_func`, we will just
        cycle the number of steps and let the `dataset` yield new batches.

        If we are using `feed_dict`s, we convert each batch from the `DataFeed`
        and pass that into TF as the `feed_dict`

        :param vs: A validation set
        :param reporting_fns: Reporting hooks
        :param phase: The phase of evaluation (`Test`, `Valid`)
        :param dataset: (`bool`) Are we using `tf.dataset`s
        :return: Metrics
        """
        if phase == 'Test':
            return self._evaluate(vs, reporting_fns)
        self.valid_epochs += 1

        total_loss = 0
        total_toks = 0
        preds = []
        golds = []

        start = time.perf_counter()
        pg = create_progress_bar(len(vs))
        for batch_dict in pg(vs):

            feed_dict = self.model.make_input(batch_dict)
            lossv, top_preds = self.model.sess.run(
                [self.test_loss, self.model.decoder.best], feed_dict=feed_dict)
            toks = self._num_toks(batch_dict['tgt_lengths'])
            total_loss += lossv * toks
            total_toks += toks

            preds.extend(convert_seq2seq_preds(top_preds.T, self.tgt_rlut))
            golds.extend(
                convert_seq2seq_golds(batch_dict['tgt'],
                                      batch_dict['tgt_lengths'],
                                      self.tgt_rlut))

        metrics = self.calc_metrics(total_loss, total_toks)
        metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0]
        self.report(self.valid_epochs, metrics, start, phase, 'EPOCH',
                    reporting_fns)
        return metrics
Example #9
0
    def _train(self, loader, **kwargs):
        """Train an epoch of data using either the input loader or using `tf.dataset`

        In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
        When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
        to train.  We do use a `feed_dict` for passing the `TRAIN_FLAG` in either case

        :param loader: A data feed
        :param kwargs: See below

        :Keyword Arguments:
         * *reporting_fns* (`list`) A list of reporting hooks to use

        :return: Metrics
        """
        if self.ema:
            self.sess.run(self.ema_restore)

        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_div = 0
        steps = len(loader)
        pg = create_progress_bar(steps)
        for batch_dict in pg(loader):
            feed_dict = self.model.make_input(batch_dict, True)
            _, step, lossv = self.sess.run(
                [self.train_op, self.global_step, self.loss],
                feed_dict=feed_dict)

            batchsz = self._get_batchsz(batch_dict)
            report_lossv = lossv * batchsz
            epoch_loss += report_lossv
            epoch_div += batchsz
            self.nstep_agg += report_lossv
            self.nstep_div += batchsz

            if (step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(step + 1, metrics, self.nstep_start, 'Train',
                            'STEP', reporting_fns, self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_div)
        return metrics
Example #10
0
    def test(self, ts, conll_output=None, txts=None):
        """Method that evaluates on some data.

        :param ts: The test set
        :param conll_output: (`str`) An optional file output
        :param txts: A list of text data associated with the encoded batch
        :return: The metrics
        """
        total_correct = total_sum = 0
        gold_spans = []
        pred_spans = []

        steps = len(ts)
        pg = create_progress_bar(steps)
        metrics = {}
        # Only if they provide a file and the raw txts, we can write CONLL file
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")

        try:
            for batch_dict in pg(ts):
                correct, count, golds, guesses = self.process_batch(
                    batch_dict, handle, txts)
                total_correct += correct
                total_sum += count
                gold_spans.extend(golds)
                pred_spans.extend(guesses)

            total_acc = total_correct / float(total_sum)
            # Only show the fscore if requested
            metrics['f1'] = span_f1(gold_spans, pred_spans)
            metrics['acc'] = total_acc
            if self.verbose:
                conll_metrics = per_entity_f1(gold_spans, pred_spans)
                conll_metrics['acc'] = total_acc * 100
                conll_metrics['tokens'] = total_sum
                logger.info(conlleval_output(conll_metrics))
        finally:
            if handle is not None:
                handle.close()

        return metrics
Example #11
0
 def _evaluate(self, es, reporting_fns, **kwargs):
     self.model.eval()
     pg = create_progress_bar(len(es))
     preds = []
     golds = []
     start = time.perf_counter()
     for batch_dict in pg(es):
         tgt = batch_dict['tgt']
         tgt_lens = batch_dict['tgt_lengths']
         pred = [
             p[0] for p in self._predict(
                 batch_dict, numpy_to_tensor=False, **kwargs)[0]
         ]
         preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut))
         golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut))
     metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]}
     metrics['acc'] = self._acc(preds, golds)
     self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns)
     return metrics
Example #12
0
    def _test(self, loader, **kwargs):
        """Test an epoch of data using either the input loader or using `tf.dataset`

        In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
        When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
        to train.

        :param loader: A data feed
        :param kwargs: See below

        :Keyword Arguments:
          * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True`
          * *reporting_fns* (`list`) A list of reporting hooks to use
          * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on

        :return: Metrics
        """
        if self.ema:
            self.sess.run(self.ema_load)

        cm = ConfusionMatrix(self.model.labels)
        steps = len(loader)
        total_loss = 0
        total_norm = 0
        verbose = kwargs.get("verbose", None)

        pg = create_progress_bar(steps)
        for i, batch_dict in enumerate(pg(loader)):
            y = batch_dict['y']
            feed_dict = self.model.make_input(batch_dict, False)
            guess, lossv = self.sess.run([self.model.best, self.test_loss],
                                         feed_dict=feed_dict)

            batchsz = len(guess)
            total_loss += lossv * batchsz
            total_norm += batchsz
            cm.add_batch(y, guess)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)

        return metrics
Example #13
0
    def _evaluate(self, es, reporting_fns):
        """Run the model with beam search and report Bleu.

        :param es: `DataFeed` of input
        :param reporting_fns: Input hooks
        """
        pg = create_progress_bar(len(es))
        preds = []
        golds = []
        start = time.perf_counter()
        for batch_dict in pg(es):
            tgt = batch_dict.pop('tgt')
            tgt_lens = batch_dict.pop('tgt_lengths')
            pred = [p[0] for p in self.model.predict(batch_dict)[0]]
            preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut))
            golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut))
        metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]}
        self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns)
        return metrics
Example #14
0
    def _test(self, loader, steps=0, **kwargs):
        """Test an epoch of data using either the input loader or using `tf.dataset`

        In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
        When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
        to train.

        :param loader: A data feed
        :param kwargs: See below

        :Keyword Arguments:
          * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True`
          * *reporting_fns* (`list`) A list of reporting hooks to use
          * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on

        :return: Metrics
        """

        cm = ConfusionMatrix(self.model.labels)
        total_loss = 0
        total_norm = 0
        verbose = kwargs.get("verbose", None)

        pg = create_progress_bar(steps)

        SET_TRAIN_FLAG(False)
        for features, y in pg(loader):
            logits = self.model(features)
            y_ = tf.argmax(logits, axis=1, output_type=tf.int32)
            cm.add_batch(y, y_)
            lossv = tf.compat.v1.losses.sparse_softmax_cross_entropy(
                labels=y, logits=logits).numpy()
            batchsz = int(y.shape[0])
            assert len(y_) == batchsz
            total_loss += lossv * batchsz
            total_norm += batchsz
            cm.add_batch(y, y_)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)

        return metrics
Example #15
0
    def _test(self, loader, steps=0, **kwargs):
        """Test an epoch of data using either the input loader or using `tf.dataset`

        In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
        When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
        to train.

        :param loader: A data feed
        :param kwargs: See below

        :Keyword Arguments:
          * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True`
          * *reporting_fns* (`list`) A list of reporting hooks to use
          * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on

        :return: Metrics
        """

        metrics = [LAS(), UAS(), LCM(), UCM()]

        pg = create_progress_bar(steps)

        SET_TRAIN_FLAG(False)
        for features, y in pg(loader):
            heads_gold, labels_gold = y
            greedy_heads_pred, greedy_labels_pred = self.model.decode(features)
            B, T = get_shape_as_list(greedy_labels_pred)[:2]
            labels_gold_trimmed = labels_gold[:, :T].numpy()
            heads_gold_trimmed = heads_gold[:, :T].numpy()

            for i in range(B):
                for m in metrics:
                    if self.punct_eval is False:
                        labels_gold_trimmed[i] = masked_fill(
                            labels_gold_trimmed[i],
                            labels_gold_trimmed[i] == self.model.punct,
                            Offsets.PAD)
                    m.add(greedy_heads_pred[i], heads_gold_trimmed[i],
                          greedy_labels_pred[i], labels_gold_trimmed[i])

        metrics = {m.name: m.score for m in metrics}
        return metrics
Example #16
0
    def _test(self, ts, **kwargs):

        self.model.eval()
        total_sum = 0
        total_correct = 0

        gold_spans = []
        pred_spans = []

        metrics = {}
        steps = len(ts)
        conll_output = kwargs.get('conll_output', None)
        txts = kwargs.get('txts', None)
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            lengths = inputs['lengths']
            ids = inputs['ids']
            with torch.no_grad():
                pred = self.model(inputs)
            correct, count, golds, guesses = self.process_output(
                pred, y.data, lengths, ids, handle, txts)
            total_correct += correct
            total_sum += count
            gold_spans.extend(golds)
            pred_spans.extend(guesses)

        total_acc = total_correct / float(total_sum)
        metrics['acc'] = total_acc
        metrics['f1'] = span_f1(gold_spans, pred_spans)
        if self.verbose:
            # TODO: Add programmatic access to these metrics?
            conll_metrics = per_entity_f1(gold_spans, pred_spans)
            conll_metrics['acc'] = total_acc * 100
            conll_metrics['tokens'] = total_sum.item()
            logger.info(conlleval_output(conll_metrics))
        return metrics
Example #17
0
    def _test(self, loader, **kwargs):
        self.model.eval()
        total_loss = 0
        total_norm = 0
        steps = len(loader)
        pg = create_progress_bar(steps)
        no_cm = bool(kwargs.get('no_cm', False))
        cm = None if no_cm else ConfusionMatrix(self.labels)
        verbose = kwargs.get("verbose", None)
        output = kwargs.get('output')
        txts = kwargs.get('txts')
        handle = None
        line_number = 0
        if output is not None and txts is not None:
            handle = open(output, "w")

        with torch.no_grad():
            for batch_dict in pg(loader):
                example = self._make_input(batch_dict)
                ys = example.pop('y')
                pred = self.model(example)
                loss = self.crit(pred, ys)
                if handle is not None:
                    for p, y in zip(pred, ys):
                        handle.write('{}\t{}\t{}\n'.format(
                            " ".join(txts[line_number]), self.model.labels[p],
                            self.model.labels[y]))
                        line_number += 1
                batchsz = self._get_batchsz(batch_dict)
                total_loss += loss.item() * batchsz
                total_norm += batchsz
                _add_to_cm(cm, ys, pred)

        metrics = cm.get_all_metrics() if cm is not None else {}
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)
        if handle is not None:
            handle.close()

        return metrics
Example #18
0
    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        no_cm = bool(kwargs.get('no_cm', False))
        cm = None if no_cm else ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            _add_to_cm(cm, y, pred)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = cm.get_all_metrics() if cm is not None else {}
        metrics['lr'] = self.optimizer.current_lr

        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics
Example #19
0
    def _train(self, ts, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_norm = 0
        steps = len(ts)
        pg = create_progress_bar(steps)
        self.optimizer.zero_grad()

        for i, batch_dict in enumerate(pg(ts)):
            inputs = self.model.make_input(batch_dict)
            loss = self.model.compute_loss(inputs)
            loss.backward()

            if (i + 1) % self.grad_accum == 0 or (i + 1) == steps:
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.clip)
                self.optimizer.step()
                self.optimizer.zero_grad()

            bsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * bsz
            epoch_loss += report_loss
            epoch_norm += bsz
            self.nstep_agg += report_loss
            self.nstep_div += bsz
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_norm)
        metrics['lr'] = self.optimizer.current_lr

        return metrics
Example #20
0
    def test(self, vs, reporting_fns, phase, **kwargs):
        if phase == 'Test':
            return self._evaluate(vs, reporting_fns, **kwargs)

        self.model.eval()
        total_loss = total_toks = 0
        steps = len(vs)
        self.valid_epochs += 1
        preds = []
        golds = []

        start = time.perf_counter()
        pg = create_progress_bar(steps)
        for batch_dict in pg(vs):
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            tgt_lens = input_['tgt_len']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            toks = self._num_toks(tgt_lens)
            total_loss += loss.item() * toks
            total_toks += toks
            greedy_preds = [
                p[0]
                for p in self._predict(input_, beam=1, make_input=False)[0]
            ]
            preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut))
            golds.extend(
                convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens,
                                      self.tgt_rlut))

        metrics = self.calc_metrics(total_loss, total_toks)
        metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0]
        metrics['acc'] = self._acc(preds, golds)
        self.report(self.valid_epochs, metrics, start, phase, 'EPOCH',
                    reporting_fns)
        return metrics
Example #21
0
    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            heads_gold = example.pop('heads')
            labels_gold = example.pop('labels')
            heads_pred, labels_pred = self.model(example)
            loss = self.crit(heads_pred, heads_gold, labels_pred, labels_gold)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = {}
        metrics['lr'] = self.optimizer.current_lr
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics
Example #22
0
def main():
    parser = argparse.ArgumentParser(
        "Load a dual-encoder model and do response selection on testing data")
    parser.add_argument(
        "--embed_type",
        type=str,
        default='default',
        choices=["default", "positional", "learned-positional"],
        help="register label of the embeddings")
    parser.add_argument("--d_model",
                        type=int,
                        default=512,
                        help="Model dimension (and embedding dsz)")
    parser.add_argument("--d_ff", type=int, default=2048, help="FFN dimension")
    parser.add_argument(
        "--d_k",
        type=int,
        default=None,
        help="Dimension per head.  Use if num_heads=1 to reduce dims")
    parser.add_argument("--num_heads",
                        type=int,
                        default=8,
                        help="Number of heads")
    parser.add_argument("--num_layers",
                        type=int,
                        default=8,
                        help="Number of layers")
    parser.add_argument("--windowed_ra",
                        type=str2bool,
                        default=False,
                        help="whether prevent attention beyond rpr_k")
    parser.add_argument("--num_train_workers",
                        type=int,
                        default=4,
                        help="Number train workers")
    parser.add_argument("--nctx",
                        type=int,
                        default=256,
                        help="Max input length")
    parser.add_argument("--file_type", default='json', help="Suffix for data")
    parser.add_argument("--record_keys", default=['x', 'y'], nargs='+')
    parser.add_argument("--model_type",
                        default="dual-encoder",
                        choices=["dual-encoder", "transformer-bow"])
    parser.add_argument("--batch_size",
                        type=int,
                        default=256,
                        help="Batch Size")
    parser.add_argument("--subword_model_file",
                        type=str,
                        help="The BPE model file",
                        required=True)
    parser.add_argument("--subword_vocab_file",
                        type=str,
                        help="The BPE subword vocab",
                        required=True)
    parser.add_argument("--reduction_d_k",
                        type=int,
                        default=64,
                        help="Dimensions of Key and Query in the single headed"
                        "reduction layers")
    parser.add_argument(
        "--reduction_type",
        type=str,
        default="2ha",
        help="Method of reduction, defaults to 2-headed attention")
    parser.add_argument(
        "--stacking_layers",
        type=int,
        nargs='+',
        help="Hidden sizes of the dense stack (ff2 from the convert paper)")

    parser.add_argument("--reader_type",
                        type=str,
                        default='preprocessed',
                        choices=['ntp', 'nsp', 'preprocessed', 'tfrecord'])
    parser.add_argument("--output_file", type=str)
    parser.add_argument(
        '--rpr_k',
        help=
        'Relative attention positional sizes pass 0 if you dont want relative attention',
        type=int,
        default=[8],
        nargs='+')
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--num_test_workers",
                        type=int,
                        default=1,
                        help="Number valid workers")
    parser.add_argument("--ckpt",
                        type=str,
                        help="path to the model checkpoint",
                        required=True)
    parser.add_argument("--test_file",
                        type=str,
                        help="path to the testing data")
    parser.add_argument("--recall_k",
                        type=int,
                        default=100,
                        help="select the response from how many candidates")
    parser.add_argument("--recall_top",
                        type=int,
                        default=1,
                        help="whether the correct response is ranked top x")
    parser.add_argument("--num_batches", type=int, default=1_000_000)
    parser.add_argument("--extra_tokens",
                        help="What extra tokens should we use",
                        nargs="+",
                        default=["[CLS]", "[MASK]"])
    args = parser.parse_args()

    reader = MultiFileDatasetReader(args.nctx,
                                    args.nctx,
                                    model_file=args.subword_model_file,
                                    vocab_file=args.subword_vocab_file,
                                    file_type=args.file_type,
                                    reader_type=args.reader_type,
                                    record_keys=args.record_keys,
                                    extra_tokens=args.extra_tokens)

    vocab = reader.build_vocab()
    # If we are not using chars, then use 'x' for both input and output
    preproc_data = baseline.embeddings.load_embeddings(
        'x',
        dsz=args.d_model,
        known_vocab=vocab['x'],
        preserve_vocab_indices=True,
        embed_type=args.embed_type)

    vocabs = preproc_data['vocab']
    embeddings = preproc_data['embeddings']
    logger.info("Loaded embeddings")

    test_set = reader.load(args.test_file, vocabs)
    ind2tok = {ind: tok for tok, ind in vocabs.items()}

    # use other samples in a batch as negative samples. Don't shuffle to compare with conveRT benchmarks
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             num_workers=args.num_test_workers)
    logger.info("Loaded datasets")
    model = create_model(args.model_type,
                         embeddings,
                         d_model=args.d_model,
                         d_ff=args.d_ff,
                         num_heads=args.num_heads,
                         num_layers=args.num_layers,
                         rpr_k=args.rpr_k,
                         d_k=args.d_k,
                         reduction_d_k=args.reduction_d_k,
                         stacking_layers=args.stacking_layers,
                         windowed_ra=args.windowed_ra,
                         reduction_type=args.reduction_type,
                         logger=logger)

    if os.path.isdir(args.ckpt):
        checkpoint, _ = find_latest_checkpoint(args.ckpt)
        logger.warning("Found latest checkpoint %s", checkpoint)
    else:
        checkpoint = args.ckpt
    if checkpoint.endswith(".npz"):
        load_transformer_de_npz(model, checkpoint)
    else:
        model.load_state_dict(
            torch.load(checkpoint, map_location=torch.device('cpu')))
    model.to(args.device)

    numerator = 0
    denominator = 0
    model.eval()
    num_batches = min(len(test_loader), args.num_batches)
    pg = create_progress_bar(num_batches)

    for i, batch in enumerate(get_next_k(test_loader, args.recall_k)):

        if i >= num_batches or batch[0].shape[0] != args.recall_k:
            break
        with torch.no_grad():
            inputs, targets = batch
            inputs = inputs.to(args.device)
            targets = targets.to(args.device)

            query = model.encode_query(inputs).unsqueeze(1)  # [B, 1, H]
            response = model.encode_response(targets).unsqueeze(0)  # [1, B, H]
            all_score = nn.CosineSimilarity(dim=-1)(query, response)
            _, indices = torch.topk(all_score, args.recall_top, dim=1)
            correct = (indices == torch.arange(
                args.recall_k, device=all_score.device).unsqueeze(1).expand(
                    -1, args.recall_top)).sum()
            numerator += correct
            print(
                f"Selected {correct} correct responses out of {args.recall_k}")
            denominator += args.recall_k
        pg.update()
    pg.done()
    acc = float(numerator) / denominator

    print(f"{args.recall_top}@{args.recall_k} acc: {acc}")

    if args.output_file:
        with open(args.output_file, 'a') as wf:
            wf.write(
                f"Checkpoint: {checkpoint}; {args.recall_top}@{args.recall_k} accuracy: {acc}\n"
            )
Example #23
0
 def _report_hook(count, block_size, total_size):
     if Context.pg is None:
         length = int((total_size + block_size - 1) /
                      float(block_size)) if total_size != -1 else 1
         Context.pg = create_progress_bar(length)
     Context.pg.update()
Example #24
0
    checkpoint, _ = find_latest_checkpoint(args.ckpt)
    logger.warning("Found latest checkpoint %s", checkpoint)
else:
    checkpoint = args.ckpt
if checkpoint.endswith(".npz"):
    load_transformer_de_npz(model, checkpoint)
else:
    model.load_state_dict(
        torch.load(checkpoint, map_location=torch.device('cpu')))
model.to(args.device)

numerator = 0
denominator = 0
model.eval()
num_batches = min(len(test_loader), args.num_batches)
pg = create_progress_bar(num_batches)

for i, batch in enumerate(get_next_k(test_loader, args.recall_k)):

    if i >= num_batches or batch[0].shape[0] != args.recall_k:
        break
    uniq = set()
    with torch.no_grad():
        inputs, targets = batch
        inputs = inputs.to(args.device)
        targets = targets.to(args.device)

        query = model.encode_query(inputs).unsqueeze(1)  # [B, 1, H]
        response = model.encode_response(targets).unsqueeze(0)  # [1, B, H]
        all_score = nn.CosineSimilarity(dim=-1)(query, response)
        _, indices = torch.topk(all_score, args.recall_top, dim=1)
Example #25
0
                     reduction_d_k=args.reduction_d_k,
                     ff_pdrop=0.,
                     logger=logger)

if os.path.isdir(args.ckpt):
    checkpoint, _ = find_latest_checkpoint(args.ckpt)
    logger.warning("Found latest checkpoint %s", checkpoint)
else:
    checkpoint = args.ckpt
model.load_state_dict(torch.load(checkpoint, map_location=torch.device('cpu')))
model.to(args.device)

numerator = 0
denominator = 0
model.eval()
pg = create_progress_bar(len(test_loader) // args.recall_k)
for batch in test_loader:
    if batch[0].shape[0] != args.recall_k:
        break
    with torch.no_grad():
        x, y = batch
        inputs = x.to(args.device)
        targets = y.to(args.device)
        query = model.encode_query(inputs).unsqueeze(1)  # [B, 1, H]
        response = model.encode_response(targets).unsqueeze(0)  # [1, B, H]
        all_score = nn.CosineSimilarity(dim=-1)(query, response).to('cpu')

        _, indices = torch.topk(all_score, args.recall_top, dim=1)
        correct = (indices == torch.arange(args.recall_k).unsqueeze(1).expand(
            -1, args.recall_top)).sum()
        numerator += correct