def test(self, ts, steps=0, **kwargs): """Method that evaluates on some data. There are 2 modes this can run in, `feed_dict` and `dataset` In `feed_dict` mode, the model cycles the test data batch-wise and feeds each batch in with a `feed_dict`. In `dataset` mode, the data is still passed in to this method, but it is not passed in a `feed_dict` and is mostly superfluous since the features are grafted right onto the graph. However, we do use it for supplying the ground truth, ids and text, so it is essential that the caller does not shuffle the data :param ts: The test set :param conll_output: (`str`) An optional file output :param txts: A list of text data associated with the encoded batch :param dataset: (`bool`) Is this using `tf.dataset`s :return: The metrics """ SET_TRAIN_FLAG(False) total_correct = total_sum = 0 gold_spans = [] pred_spans = [] self.cm = ConfusionMatrix(self.idx2classlabel) handle = None if kwargs.get("conll_output") is not None and kwargs.get( 'txts') is not None: handle = open(kwargs.get("conll_output"), "w") try: pg = create_progress_bar(steps) metrics = {} for (features, y), batch in pg( zip_longest(ts, kwargs.get('batches', []), fillvalue={})): correct, count, golds, guesses = self.process_batch( features, y, handle=handle, txts=kwargs.get("txts"), ids=batch.get("ids")) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) # Only show the fscore if requested metrics['tagging_f1'] = span_f1(gold_spans, pred_spans) metrics['tagging_acc'] = total_acc metrics.update({ f"classification_{k}": v for k, v in self.cm.get_all_metrics().items() }) if self.verbose: conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum logger.info(conlleval_output(conll_metrics)) finally: if handle is not None: handle.close() return metrics
def _test(self, loader, **kwargs): self.model.eval() steps = len(loader) pg = create_progress_bar(steps) metrics = [LAS(), UAS(), LCM(), UCM()] with torch.no_grad(): for batch_dict in pg(loader): example = self._make_input(batch_dict) labels_gold = example.pop('labels') heads_gold = example.pop('heads') batchsz = self._get_batchsz(batch_dict) greedy_heads_pred, greedy_labels_pred = self.model.decode(example) T = greedy_labels_pred.shape[1] labels_gold_trimmed = labels_gold[:, :T] heads_gold_trimmed = heads_gold[:, :T] for i in range(batchsz): for m in metrics: if self.punct_eval is False: labels_gold_trimmed[i].masked_fill_(labels_gold_trimmed[i] == self.model.punct, Offsets.PAD) m.add(greedy_heads_pred[i], heads_gold_trimmed[i], greedy_labels_pred[i], labels_gold_trimmed[i]) metrics = {m.name: m.score for m in metrics} return metrics
def __init__(self, directory, pattern, vocabs, vectorizers, nctx): super().__init__() self.src_vectorizer = vectorizers['src'] self.tgt_vectorizer = vectorizers['tgt'] self.pattern = pattern self.nctx = nctx self.directory = directory self.vocab = vocabs self.samples = 0 self.rank = 0 self.world_size = 1 if torch.distributed.is_initialized(): self.rank = torch.distributed.get_rank() self.world_size = torch.distributed.get_world_size() if os.path.exists(f"{directory}/md.yml"): f = read_yaml(f"{directory}/md.yml") self.samples = f['num_samples'] else: files = list(glob.glob(f"{directory}/{self.pattern}")) pg = create_progress_bar(len(files)) for file in pg(files): with open(file) as rf: for _ in rf: self.samples += 1 write_yaml({'num_samples': self.samples}, f"{directory}/md.yml")
def __init__( self, directory, pattern, vocabs, src_vectorizer, tgt_vectorizer, last_turn_only=False, distribute=True, shuffle=True, record_keys=[] ): super().__init__() self.record_keys = record_keys self.src_vectorizer = src_vectorizer self.tgt_vectorizer = tgt_vectorizer self.pattern = pattern self.directory = directory self.vocab = vocabs self.samples = 0 self.rank = 0 self.world_size = 1 self.shuffle = shuffle self.last_turn_only = last_turn_only self.distribute = distribute if torch.distributed.is_initialized() and distribute: self.rank = torch.distributed.get_rank() self.world_size = torch.distributed.get_world_size() if os.path.exists(f"{directory}/md.yml"): f = read_yaml(f"{directory}/md.yml") self.samples = f['num_samples'] else: files = list(glob.glob(f"{directory}/{self.pattern}")) pg = create_progress_bar(len(files)) for file in pg(files): with open(file) as rf: for _ in rf: self.samples += 1 write_yaml({'num_samples': self.samples}, f"{directory}/md.yml")
def _train(self, loader, steps=0, **kwargs): """Train an epoch of data using either the input loader or using `tf.dataset` In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps to train. We do use a `feed_dict` for passing the `TRAIN_FLAG` in either case :param loader: A data feed :param kwargs: See below :Keyword Arguments: * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True` * *reporting_fns* (`list`) A list of reporting hooks to use :return: Metrics """ SET_TRAIN_FLAG(True) reporting_fns = kwargs.get('reporting_fns', []) pg = create_progress_bar(steps) epoch_loss = tf.Variable(0.0) epoch_div = tf.Variable(0, dtype=tf.int32) nstep_loss = tf.Variable(0.0) nstep_div = tf.Variable(0, dtype=tf.int32) self.nstep_start = time.perf_counter() @tf.function def _train_step(inputs): features, y = inputs loss = self.optimizer.update(self.model, features, y) batchsz = get_shape_as_list(y)[0] report_loss = loss * batchsz return report_loss, batchsz with autograph_options({ "function_optimization": False, "layout_optimizer": False }): for inputs in pg(loader): step_report_loss, step_batchsz = _train_step(inputs) epoch_loss.assign_add(step_report_loss) nstep_loss.assign_add(step_report_loss) epoch_div.assign_add(step_batchsz) nstep_div.assign_add(step_batchsz) step = self.optimizer.global_step.numpy() + 1 if step % self.nsteps == 0: metrics = self.calc_metrics(nstep_loss.numpy(), nstep_div.numpy()) self.report(step, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) nstep_loss.assign(0.0) nstep_div.assign(0) self.nstep_start = time.perf_counter() epoch_loss = epoch_loss.numpy() epoch_div = epoch_div.numpy() metrics = self.calc_metrics(epoch_loss, epoch_div) return metrics
def run(args): # Limit it to a single GPU. os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) conn = create_db(args.db) m = Manager() logs = args.logging datasets = args.datasets embeddings = args.embeddings settings = args.settings # So we don't litter the fs dir_ = tempfile.mkdtemp(prefix='baseline-speed-test-') try: configs = get_configs(args.config) if not args.single: full_configs = [] for config in configs: full_configs.extend( edit_config(config, args.frameworks, args.no_crf, args.no_attn)) configs = full_configs if args.verbose: for config in configs: pprint(config) print() print() steps = len(configs) pg = create_progress_bar(steps) for config in configs: write_config = deepcopy(config) config['train']['epochs'] = args.trials task_name = config['task'] system_info = m.dict() p = Process(target=run_model, args=(system_info, config, logs, settings, datasets, embeddings, task_name, dir_, int(args.gpu))) p.start() pid = p.pid p.join() log_file = os.path.join(dir_, 'timing-{}.log'.format(pid)) speeds = parse_logs(log_file) save_data(conn, speeds, write_config, system_info) pg.update() pg.done() finally: shutil.rmtree(dir_)
def _test(self, ts, **kwargs): self.model.eval() total_sum = 0 total_correct = 0 gold_spans = [] pred_spans = [] cm = ConfusionMatrix(self.idx2classlabel) metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] class_labels = inputs["class_label"] with torch.no_grad(): class_pred, pred = self.model(inputs) correct, count, golds, guesses = self.process_output( pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) _add_to_cm(cm, class_labels, class_pred) total_acc = total_correct / float(total_sum) metrics['tagging_acc'] = total_acc metrics['tagging_f1'] = span_f1(gold_spans, pred_spans) metrics.update({ f"classification_{k}": v for k, v in cm.get_all_metrics().items() }) if self.verbose: # TODO: Add programmatic access to these metrics? conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum.item() logger.info(conlleval_output(conll_metrics)) return metrics
def test(self, vs, reporting_fns, phase='Valid'): """Run an epoch of testing over the dataset If we are using a `tf.dataset`-based `fit_func`, we will just cycle the number of steps and let the `dataset` yield new batches. If we are using `feed_dict`s, we convert each batch from the `DataFeed` and pass that into TF as the `feed_dict` :param vs: A validation set :param reporting_fns: Reporting hooks :param phase: The phase of evaluation (`Test`, `Valid`) :param dataset: (`bool`) Are we using `tf.dataset`s :return: Metrics """ if phase == 'Test': return self._evaluate(vs, reporting_fns) self.valid_epochs += 1 total_loss = 0 total_toks = 0 preds = [] golds = [] start = time.perf_counter() pg = create_progress_bar(len(vs)) for batch_dict in pg(vs): feed_dict = self.model.make_input(batch_dict) lossv, top_preds = self.model.sess.run( [self.test_loss, self.model.decoder.best], feed_dict=feed_dict) toks = self._num_toks(batch_dict['tgt_lengths']) total_loss += lossv * toks total_toks += toks preds.extend(convert_seq2seq_preds(top_preds.T, self.tgt_rlut)) golds.extend( convert_seq2seq_golds(batch_dict['tgt'], batch_dict['tgt_lengths'], self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0] self.report(self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns) return metrics
def _train(self, loader, **kwargs): """Train an epoch of data using either the input loader or using `tf.dataset` In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps to train. We do use a `feed_dict` for passing the `TRAIN_FLAG` in either case :param loader: A data feed :param kwargs: See below :Keyword Arguments: * *reporting_fns* (`list`) A list of reporting hooks to use :return: Metrics """ if self.ema: self.sess.run(self.ema_restore) reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_div = 0 steps = len(loader) pg = create_progress_bar(steps) for batch_dict in pg(loader): feed_dict = self.model.make_input(batch_dict, True) _, step, lossv = self.sess.run( [self.train_op, self.global_step, self.loss], feed_dict=feed_dict) batchsz = self._get_batchsz(batch_dict) report_lossv = lossv * batchsz epoch_loss += report_lossv epoch_div += batchsz self.nstep_agg += report_lossv self.nstep_div += batchsz if (step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report(step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_div) return metrics
def test(self, ts, conll_output=None, txts=None): """Method that evaluates on some data. :param ts: The test set :param conll_output: (`str`) An optional file output :param txts: A list of text data associated with the encoded batch :return: The metrics """ total_correct = total_sum = 0 gold_spans = [] pred_spans = [] steps = len(ts) pg = create_progress_bar(steps) metrics = {} # Only if they provide a file and the raw txts, we can write CONLL file handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") try: for batch_dict in pg(ts): correct, count, golds, guesses = self.process_batch( batch_dict, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) # Only show the fscore if requested metrics['f1'] = span_f1(gold_spans, pred_spans) metrics['acc'] = total_acc if self.verbose: conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum logger.info(conlleval_output(conll_metrics)) finally: if handle is not None: handle.close() return metrics
def _evaluate(self, es, reporting_fns, **kwargs): self.model.eval() pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.perf_counter() for batch_dict in pg(es): tgt = batch_dict['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = [ p[0] for p in self._predict( batch_dict, numpy_to_tensor=False, **kwargs)[0] ] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]} metrics['acc'] = self._acc(preds, golds) self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns) return metrics
def _test(self, loader, **kwargs): """Test an epoch of data using either the input loader or using `tf.dataset` In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps to train. :param loader: A data feed :param kwargs: See below :Keyword Arguments: * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True` * *reporting_fns* (`list`) A list of reporting hooks to use * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on :return: Metrics """ if self.ema: self.sess.run(self.ema_load) cm = ConfusionMatrix(self.model.labels) steps = len(loader) total_loss = 0 total_norm = 0 verbose = kwargs.get("verbose", None) pg = create_progress_bar(steps) for i, batch_dict in enumerate(pg(loader)): y = batch_dict['y'] feed_dict = self.model.make_input(batch_dict, False) guess, lossv = self.sess.run([self.model.best, self.test_loss], feed_dict=feed_dict) batchsz = len(guess) total_loss += lossv * batchsz total_norm += batchsz cm.add_batch(y, guess) metrics = cm.get_all_metrics() metrics['avg_loss'] = total_loss / float(total_norm) verbose_output(verbose, cm) return metrics
def _evaluate(self, es, reporting_fns): """Run the model with beam search and report Bleu. :param es: `DataFeed` of input :param reporting_fns: Input hooks """ pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.perf_counter() for batch_dict in pg(es): tgt = batch_dict.pop('tgt') tgt_lens = batch_dict.pop('tgt_lengths') pred = [p[0] for p in self.model.predict(batch_dict)[0]] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]} self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns) return metrics
def _test(self, loader, steps=0, **kwargs): """Test an epoch of data using either the input loader or using `tf.dataset` In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps to train. :param loader: A data feed :param kwargs: See below :Keyword Arguments: * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True` * *reporting_fns* (`list`) A list of reporting hooks to use * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on :return: Metrics """ cm = ConfusionMatrix(self.model.labels) total_loss = 0 total_norm = 0 verbose = kwargs.get("verbose", None) pg = create_progress_bar(steps) SET_TRAIN_FLAG(False) for features, y in pg(loader): logits = self.model(features) y_ = tf.argmax(logits, axis=1, output_type=tf.int32) cm.add_batch(y, y_) lossv = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=y, logits=logits).numpy() batchsz = int(y.shape[0]) assert len(y_) == batchsz total_loss += lossv * batchsz total_norm += batchsz cm.add_batch(y, y_) metrics = cm.get_all_metrics() metrics['avg_loss'] = total_loss / float(total_norm) verbose_output(verbose, cm) return metrics
def _test(self, loader, steps=0, **kwargs): """Test an epoch of data using either the input loader or using `tf.dataset` In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps to train. :param loader: A data feed :param kwargs: See below :Keyword Arguments: * *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True` * *reporting_fns* (`list`) A list of reporting hooks to use * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on :return: Metrics """ metrics = [LAS(), UAS(), LCM(), UCM()] pg = create_progress_bar(steps) SET_TRAIN_FLAG(False) for features, y in pg(loader): heads_gold, labels_gold = y greedy_heads_pred, greedy_labels_pred = self.model.decode(features) B, T = get_shape_as_list(greedy_labels_pred)[:2] labels_gold_trimmed = labels_gold[:, :T].numpy() heads_gold_trimmed = heads_gold[:, :T].numpy() for i in range(B): for m in metrics: if self.punct_eval is False: labels_gold_trimmed[i] = masked_fill( labels_gold_trimmed[i], labels_gold_trimmed[i] == self.model.punct, Offsets.PAD) m.add(greedy_heads_pred[i], heads_gold_trimmed[i], greedy_labels_pred[i], labels_gold_trimmed[i]) metrics = {m.name: m.score for m in metrics} return metrics
def _test(self, ts, **kwargs): self.model.eval() total_sum = 0 total_correct = 0 gold_spans = [] pred_spans = [] metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] with torch.no_grad(): pred = self.model(inputs) correct, count, golds, guesses = self.process_output( pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) metrics['acc'] = total_acc metrics['f1'] = span_f1(gold_spans, pred_spans) if self.verbose: # TODO: Add programmatic access to these metrics? conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum.item() logger.info(conlleval_output(conll_metrics)) return metrics
def _test(self, loader, **kwargs): self.model.eval() total_loss = 0 total_norm = 0 steps = len(loader) pg = create_progress_bar(steps) no_cm = bool(kwargs.get('no_cm', False)) cm = None if no_cm else ConfusionMatrix(self.labels) verbose = kwargs.get("verbose", None) output = kwargs.get('output') txts = kwargs.get('txts') handle = None line_number = 0 if output is not None and txts is not None: handle = open(output, "w") with torch.no_grad(): for batch_dict in pg(loader): example = self._make_input(batch_dict) ys = example.pop('y') pred = self.model(example) loss = self.crit(pred, ys) if handle is not None: for p, y in zip(pred, ys): handle.write('{}\t{}\t{}\n'.format( " ".join(txts[line_number]), self.model.labels[p], self.model.labels[y])) line_number += 1 batchsz = self._get_batchsz(batch_dict) total_loss += loss.item() * batchsz total_norm += batchsz _add_to_cm(cm, ys, pred) metrics = cm.get_all_metrics() if cm is not None else {} metrics['avg_loss'] = total_loss / float(total_norm) verbose_output(verbose, cm) if handle is not None: handle.close() return metrics
def _train(self, loader, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) steps = len(loader) pg = create_progress_bar(steps) no_cm = bool(kwargs.get('no_cm', False)) cm = None if no_cm else ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): self.optimizer.zero_grad() example = self._make_input(batch_dict) y = example.pop('y') pred = self.model(example) loss = self.crit(pred, y) batchsz = self._get_batchsz(batch_dict) report_loss = loss.item() * batchsz epoch_loss += report_loss epoch_div += batchsz self.nstep_agg += report_loss self.nstep_div += batchsz loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) _add_to_cm(cm, y, pred) self.optimizer.step() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = cm.get_all_metrics() if cm is not None else {} metrics['lr'] = self.optimizer.current_lr metrics['avg_loss'] = epoch_loss / float(epoch_div) return metrics
def _train(self, ts, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_norm = 0 steps = len(ts) pg = create_progress_bar(steps) self.optimizer.zero_grad() for i, batch_dict in enumerate(pg(ts)): inputs = self.model.make_input(batch_dict) loss = self.model.compute_loss(inputs) loss.backward() if (i + 1) % self.grad_accum == 0 or (i + 1) == steps: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() self.optimizer.zero_grad() bsz = self._get_batchsz(batch_dict) report_loss = loss.item() * bsz epoch_loss += report_loss epoch_norm += bsz self.nstep_agg += report_loss self.nstep_div += bsz if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_norm) metrics['lr'] = self.optimizer.current_lr return metrics
def test(self, vs, reporting_fns, phase, **kwargs): if phase == 'Test': return self._evaluate(vs, reporting_fns, **kwargs) self.model.eval() total_loss = total_toks = 0 steps = len(vs) self.valid_epochs += 1 preds = [] golds = [] start = time.perf_counter() pg = create_progress_bar(steps) for batch_dict in pg(vs): input_ = self._input(batch_dict) tgt = input_['tgt'] tgt_lens = input_['tgt_len'] pred = self.model(input_) loss = self.crit(pred, tgt) toks = self._num_toks(tgt_lens) total_loss += loss.item() * toks total_toks += toks greedy_preds = [ p[0] for p in self._predict(input_, beam=1, make_input=False)[0] ] preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut)) golds.extend( convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0] metrics['acc'] = self._acc(preds, golds) self.report(self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns) return metrics
def _train(self, loader, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) steps = len(loader) pg = create_progress_bar(steps) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): self.optimizer.zero_grad() example = self._make_input(batch_dict) heads_gold = example.pop('heads') labels_gold = example.pop('labels') heads_pred, labels_pred = self.model(example) loss = self.crit(heads_pred, heads_gold, labels_pred, labels_gold) batchsz = self._get_batchsz(batch_dict) report_loss = loss.item() * batchsz epoch_loss += report_loss epoch_div += batchsz self.nstep_agg += report_loss self.nstep_div += batchsz loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = {} metrics['lr'] = self.optimizer.current_lr metrics['avg_loss'] = epoch_loss / float(epoch_div) return metrics
def main(): parser = argparse.ArgumentParser( "Load a dual-encoder model and do response selection on testing data") parser.add_argument( "--embed_type", type=str, default='default', choices=["default", "positional", "learned-positional"], help="register label of the embeddings") parser.add_argument("--d_model", type=int, default=512, help="Model dimension (and embedding dsz)") parser.add_argument("--d_ff", type=int, default=2048, help="FFN dimension") parser.add_argument( "--d_k", type=int, default=None, help="Dimension per head. Use if num_heads=1 to reduce dims") parser.add_argument("--num_heads", type=int, default=8, help="Number of heads") parser.add_argument("--num_layers", type=int, default=8, help="Number of layers") parser.add_argument("--windowed_ra", type=str2bool, default=False, help="whether prevent attention beyond rpr_k") parser.add_argument("--num_train_workers", type=int, default=4, help="Number train workers") parser.add_argument("--nctx", type=int, default=256, help="Max input length") parser.add_argument("--file_type", default='json', help="Suffix for data") parser.add_argument("--record_keys", default=['x', 'y'], nargs='+') parser.add_argument("--model_type", default="dual-encoder", choices=["dual-encoder", "transformer-bow"]) parser.add_argument("--batch_size", type=int, default=256, help="Batch Size") parser.add_argument("--subword_model_file", type=str, help="The BPE model file", required=True) parser.add_argument("--subword_vocab_file", type=str, help="The BPE subword vocab", required=True) parser.add_argument("--reduction_d_k", type=int, default=64, help="Dimensions of Key and Query in the single headed" "reduction layers") parser.add_argument( "--reduction_type", type=str, default="2ha", help="Method of reduction, defaults to 2-headed attention") parser.add_argument( "--stacking_layers", type=int, nargs='+', help="Hidden sizes of the dense stack (ff2 from the convert paper)") parser.add_argument("--reader_type", type=str, default='preprocessed', choices=['ntp', 'nsp', 'preprocessed', 'tfrecord']) parser.add_argument("--output_file", type=str) parser.add_argument( '--rpr_k', help= 'Relative attention positional sizes pass 0 if you dont want relative attention', type=int, default=[8], nargs='+') parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--num_test_workers", type=int, default=1, help="Number valid workers") parser.add_argument("--ckpt", type=str, help="path to the model checkpoint", required=True) parser.add_argument("--test_file", type=str, help="path to the testing data") parser.add_argument("--recall_k", type=int, default=100, help="select the response from how many candidates") parser.add_argument("--recall_top", type=int, default=1, help="whether the correct response is ranked top x") parser.add_argument("--num_batches", type=int, default=1_000_000) parser.add_argument("--extra_tokens", help="What extra tokens should we use", nargs="+", default=["[CLS]", "[MASK]"]) args = parser.parse_args() reader = MultiFileDatasetReader(args.nctx, args.nctx, model_file=args.subword_model_file, vocab_file=args.subword_vocab_file, file_type=args.file_type, reader_type=args.reader_type, record_keys=args.record_keys, extra_tokens=args.extra_tokens) vocab = reader.build_vocab() # If we are not using chars, then use 'x' for both input and output preproc_data = baseline.embeddings.load_embeddings( 'x', dsz=args.d_model, known_vocab=vocab['x'], preserve_vocab_indices=True, embed_type=args.embed_type) vocabs = preproc_data['vocab'] embeddings = preproc_data['embeddings'] logger.info("Loaded embeddings") test_set = reader.load(args.test_file, vocabs) ind2tok = {ind: tok for tok, ind in vocabs.items()} # use other samples in a batch as negative samples. Don't shuffle to compare with conveRT benchmarks test_loader = DataLoader(test_set, batch_size=args.batch_size, num_workers=args.num_test_workers) logger.info("Loaded datasets") model = create_model(args.model_type, embeddings, d_model=args.d_model, d_ff=args.d_ff, num_heads=args.num_heads, num_layers=args.num_layers, rpr_k=args.rpr_k, d_k=args.d_k, reduction_d_k=args.reduction_d_k, stacking_layers=args.stacking_layers, windowed_ra=args.windowed_ra, reduction_type=args.reduction_type, logger=logger) if os.path.isdir(args.ckpt): checkpoint, _ = find_latest_checkpoint(args.ckpt) logger.warning("Found latest checkpoint %s", checkpoint) else: checkpoint = args.ckpt if checkpoint.endswith(".npz"): load_transformer_de_npz(model, checkpoint) else: model.load_state_dict( torch.load(checkpoint, map_location=torch.device('cpu'))) model.to(args.device) numerator = 0 denominator = 0 model.eval() num_batches = min(len(test_loader), args.num_batches) pg = create_progress_bar(num_batches) for i, batch in enumerate(get_next_k(test_loader, args.recall_k)): if i >= num_batches or batch[0].shape[0] != args.recall_k: break with torch.no_grad(): inputs, targets = batch inputs = inputs.to(args.device) targets = targets.to(args.device) query = model.encode_query(inputs).unsqueeze(1) # [B, 1, H] response = model.encode_response(targets).unsqueeze(0) # [1, B, H] all_score = nn.CosineSimilarity(dim=-1)(query, response) _, indices = torch.topk(all_score, args.recall_top, dim=1) correct = (indices == torch.arange( args.recall_k, device=all_score.device).unsqueeze(1).expand( -1, args.recall_top)).sum() numerator += correct print( f"Selected {correct} correct responses out of {args.recall_k}") denominator += args.recall_k pg.update() pg.done() acc = float(numerator) / denominator print(f"{args.recall_top}@{args.recall_k} acc: {acc}") if args.output_file: with open(args.output_file, 'a') as wf: wf.write( f"Checkpoint: {checkpoint}; {args.recall_top}@{args.recall_k} accuracy: {acc}\n" )
def _report_hook(count, block_size, total_size): if Context.pg is None: length = int((total_size + block_size - 1) / float(block_size)) if total_size != -1 else 1 Context.pg = create_progress_bar(length) Context.pg.update()
checkpoint, _ = find_latest_checkpoint(args.ckpt) logger.warning("Found latest checkpoint %s", checkpoint) else: checkpoint = args.ckpt if checkpoint.endswith(".npz"): load_transformer_de_npz(model, checkpoint) else: model.load_state_dict( torch.load(checkpoint, map_location=torch.device('cpu'))) model.to(args.device) numerator = 0 denominator = 0 model.eval() num_batches = min(len(test_loader), args.num_batches) pg = create_progress_bar(num_batches) for i, batch in enumerate(get_next_k(test_loader, args.recall_k)): if i >= num_batches or batch[0].shape[0] != args.recall_k: break uniq = set() with torch.no_grad(): inputs, targets = batch inputs = inputs.to(args.device) targets = targets.to(args.device) query = model.encode_query(inputs).unsqueeze(1) # [B, 1, H] response = model.encode_response(targets).unsqueeze(0) # [1, B, H] all_score = nn.CosineSimilarity(dim=-1)(query, response) _, indices = torch.topk(all_score, args.recall_top, dim=1)
reduction_d_k=args.reduction_d_k, ff_pdrop=0., logger=logger) if os.path.isdir(args.ckpt): checkpoint, _ = find_latest_checkpoint(args.ckpt) logger.warning("Found latest checkpoint %s", checkpoint) else: checkpoint = args.ckpt model.load_state_dict(torch.load(checkpoint, map_location=torch.device('cpu'))) model.to(args.device) numerator = 0 denominator = 0 model.eval() pg = create_progress_bar(len(test_loader) // args.recall_k) for batch in test_loader: if batch[0].shape[0] != args.recall_k: break with torch.no_grad(): x, y = batch inputs = x.to(args.device) targets = y.to(args.device) query = model.encode_query(inputs).unsqueeze(1) # [B, 1, H] response = model.encode_response(targets).unsqueeze(0) # [1, B, H] all_score = nn.CosineSimilarity(dim=-1)(query, response).to('cpu') _, indices = torch.topk(all_score, args.recall_top, dim=1) correct = (indices == torch.arange(args.recall_k).unsqueeze(1).expand( -1, args.recall_top)).sum() numerator += correct