def from_config(cls, config: Config, **kwargs): tokenizer = create_component(ComponentType.TOKENIZER, config.tokenizer) replacements = { config.unk_token: UNK, config.pad_token: PAD, config.bos_token: BOS, config.eos_token: EOS, config.mask_token: MASK, } if isinstance(tokenizer, WordPieceTokenizer): vocab = Vocabulary( [token for token, _ in tokenizer.vocab.items()], replacements=replacements, ) else: dictionary = BertDictionary.load(config.vocab_file) vocab = Vocabulary( dictionary.symbols, dictionary.count, replacements=replacements ) return cls( columns=config.columns, tokenizer=tokenizer, add_bos_token=config.add_bos_token, add_eos_token=config.add_eos_token, use_eos_token_for_bos=config.use_eos_token_for_bos, max_seq_len=config.max_seq_len, vocab=vocab, **kwargs, )
def test_torchscript_intent_slot_output_layer(self, num_doc_labels, num_word_labels, seq_lens): batch_size = len(seq_lens) doc_vocab = Vocabulary([ OutputLayerTest._generate_random_string() for _ in range(num_doc_labels) ]) word_vocab = Vocabulary([ OutputLayerTest._generate_random_string() for _ in range(num_word_labels) ]) intent_slot_output_layer = IntentSlotOutputLayer.from_config( config=IntentSlotOutputLayer.Config(), doc_labels=doc_vocab, word_labels=word_vocab, ) doc_logits = OutputLayerTest._generate_doc_classification_inputs( batch_size, num_doc_labels) word_logits, seq_lens_tensor = OutputLayerTest._generate_word_tagging_inputs( batch_size, num_word_labels, seq_lens) context = {"seq_lens": seq_lens_tensor} torchscript_output_layer = intent_slot_output_layer.torchscript_predictions( ) pt_output = intent_slot_output_layer.get_pred( (doc_logits, word_logits), None, context)[1] ts_output = torchscript_output_layer((doc_logits, word_logits), seq_lens_tensor) self._validate_doc_classification_result(pt_output[0], ts_output[0], doc_vocab) self._validate_word_tagging_result(pt_output[1], ts_output[1], word_vocab) ( word_bpe_logits, seq_lens_tensor, token_indices_tensor, ) = OutputLayerTest._generate_bpe_tagging_inputs( batch_size, num_word_labels, seq_lens) context = { "seq_lens": seq_lens_tensor, "token_indices": token_indices_tensor } pt_output = intent_slot_output_layer.get_pred( (doc_logits, word_bpe_logits), None, context)[1] ts_output = torchscript_output_layer((doc_logits, word_bpe_logits), seq_lens_tensor, token_indices_tensor) self._validate_doc_classification_result(pt_output[0], ts_output[0], doc_vocab) self._validate_word_tagging_result(pt_output[1], ts_output[1], word_vocab)
def test_torchscript_intent_slot_output_layer( self, num_doc_labels, num_word_labels, seq_lens ): batch_size = len(seq_lens) doc_vocab = Vocabulary( [OutputLayerTest._generate_random_string() for _ in range(num_doc_labels)] ) word_vocab = Vocabulary( [OutputLayerTest._generate_random_string() for _ in range(num_word_labels)] ) intent_slot_output_layer = IntentSlotOutputLayer.from_config( config=IntentSlotOutputLayer.Config(), doc_labels=doc_vocab, word_labels=word_vocab, ) doc_logits = OutputLayerTest._generate_doc_classification_inputs( batch_size, num_doc_labels ) word_logits, seq_lens_tensor = OutputLayerTest._generate_word_tagging_inputs( batch_size, num_word_labels, seq_lens ) context = {"seq_lens": seq_lens_tensor} torchscript_output_layer = intent_slot_output_layer.torchscript_predictions() pt_output = intent_slot_output_layer.get_pred( (doc_logits, word_logits), None, context )[1] with redirect_stdout() as redirected_stdout: ts_output = torchscript_output_layer((doc_logits, word_logits), context) buffer = redirected_stdout.getvalue() assert ( "Implicit dimension choice for log_softmax has been deprecated" not in buffer ) self._validate_doc_classification_result(pt_output[0], ts_output[0], doc_vocab) self._validate_word_tagging_result(pt_output[1], ts_output[1], word_vocab) ( word_bpe_logits, seq_lens_tensor, token_indices_tensor, ) = OutputLayerTest._generate_bpe_tagging_inputs( batch_size, num_word_labels, seq_lens ) context = {"seq_lens": seq_lens_tensor, "token_indices": token_indices_tensor} pt_output = intent_slot_output_layer.get_pred( (doc_logits, word_bpe_logits), None, context )[1] ts_output = torchscript_output_layer((doc_logits, word_bpe_logits), context) self._validate_doc_classification_result(pt_output[0], ts_output[0], doc_vocab) self._validate_word_tagging_result(pt_output[1], ts_output[1], word_vocab)
def test_wordblstm_export_to_caffe2(self, export_num_words, num_word_classes, test_num_words, num_predictions): for WORD_CONFIG in WORD_CONFIGS: config = self._get_config(WordTaggingTask.Config, WORD_CONFIG) tensorizers, data = _NewTask._init_tensorizers(config) word_labels = [ SpecialTokens.PAD, SpecialTokens.UNK, "NoLabel", "person" ] tensorizers["labels"].vocab = Vocabulary(word_labels) tensorizers["tokens"].vocab = Vocabulary(WORD_VOCAB) py_model = _NewTask._init_model(config.model, tensorizers) dummy_test_input = self._get_rand_input_intent_slot( BATCH_SIZE, W_VOCAB_SIZE, test_num_words) exporter = ModelExporter( ModelExporter.Config(), py_model.get_export_input_names(tensorizers), dummy_test_input, py_model.vocab_to_export(tensorizers), py_model.get_export_output_names(tensorizers), ) with tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(".predictor")) as pred_file: exporter.export_to_caffe2(py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): test_inputs = self._get_rand_input_intent_slot( BATCH_SIZE, W_VOCAB_SIZE, test_num_words) self._feed_c2_input(workspace, test_inputs, exporter.input_names, exporter.vocab_map) workspace.RunNetOnce(pred_net) word_output_names = [ "{}:{}".format("word_scores", class_name) for class_name in word_labels ] py_model.eval() py_outs = py_model(*test_inputs) context = {"seq_lens": test_inputs[-1]} target = None pred, score = py_model.get_pred(py_outs, target, context) c2_word_out = [] for o_name in word_output_names: c2_word_out.extend(list(workspace.FetchBlob(o_name))) np.testing.assert_array_almost_equal( torch.transpose(score, 1, 2).contiguous().view(-1).detach().numpy(), np.array(c2_word_out).flatten(), )
def test_seq_nn_export_to_caffe2( self, export_num_words, num_doc_classes, test_num_words, num_predictions, test_num_seq, ): config = self._get_config(SeqNNTask.Config, SEQ_NN_CONFIG) tensorizers, data = _NewTask._init_tensorizers(config) doc_labels = [SpecialTokens.UNK, "cu:other", "cu:address_Person"] tensorizers["labels"].vocab = Vocabulary(doc_labels) tensorizers["tokens"].vocab = Vocabulary(WORD_VOCAB) py_model = _NewTask._init_model(config.model, tensorizers) dummy_test_input = self._get_seq_nn_rand_input(BATCH_SIZE, W_VOCAB_SIZE, test_num_words, test_num_seq) exporter = ModelExporter( ModelExporter.Config(), py_model.get_export_input_names(tensorizers), dummy_test_input, py_model.vocab_to_export(tensorizers), py_model.get_export_output_names(tensorizers), ) with tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(".predictor")) as pred_file: output_names = exporter.export_to_caffe2(py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): test_inputs = self._get_seq_nn_rand_input(BATCH_SIZE, W_VOCAB_SIZE, test_num_words, test_num_seq) self._feed_c2_input(workspace, test_inputs, exporter.input_names, exporter.vocab_map) workspace.RunNetOnce(pred_net) c2_out = [ list(workspace.FetchBlob(o_name)) for o_name in output_names ] py_model.eval() py_outs = py_model(*test_inputs) # Do log_softmax since we do that before exporting predictor nets py_outs = F.log_softmax(py_outs, 1) np.testing.assert_array_almost_equal( py_outs.view(-1).detach().numpy(), np.array(c2_out).flatten())
def test_lookup_tokens(self): text = "let's tokenize this" tokenizer = Tokenizer() vocab = Vocabulary(text.split() + [BOS, EOS]) tokens, start_idx, end_idx = lookup_tokens( text, tokenizer=tokenizer, vocab=vocab, add_bos_token=False, add_eos_token=False, ) self.assertEqual(tokens, [0, 1, 2]) self.assertEqual(start_idx, (0, 6, 15)) self.assertEqual(end_idx, (5, 14, 19)) tokens, start_idx, end_idx = lookup_tokens( text, tokenizer=tokenizer, vocab=vocab, add_bos_token=True, add_eos_token=True, ) self.assertEqual(tokens, [3, 0, 1, 2, 4]) self.assertEqual(start_idx, (-1, 0, 6, 15, -1)) self.assertEqual(end_idx, (-1, 5, 14, 19, -1))
def from_config(cls, config: Config, **kwargs): """ from_config parses the config associated with the tensorizer and creates both the tokenizer and the Vocabulary object. The extra arguments passed as kwargs allow us to reuse thie function with variable number of arguments (eg: for classes which derive from this class). """ tokenizer = create_component(ComponentType.TOKENIZER, config.tokenizer) special_token_replacements = { "[UNK]": UNK, "[PAD]": PAD, "[CLS]": BOS, "[MASK]": MASK, "[SEP]": EOS, } if isinstance(tokenizer, WordPieceTokenizer): vocab = Vocabulary( [token for token, _ in tokenizer.vocab.items()], replacements=special_token_replacements, ) else: with PathManager.open(config.vocab_file) as file_path: vocab = build_fairseq_vocab( dictionary_class=BertDictionary, vocab_file=file_path, special_token_replacements=special_token_replacements, ) return cls( columns=config.columns, vocab=vocab, tokenizer=tokenizer, max_seq_len=config.max_seq_len, **kwargs, )
def test_torchscript_word_tagging_output_layer(self, num_labels, seq_lens): batch_size = len(seq_lens) vocab = Vocabulary( [OutputLayerTest._generate_random_string() for _ in range(num_labels)] ) word_layer = WordTaggingOutputLayer.from_config( config=WordTaggingOutputLayer.Config(), labels=vocab ) crf_layer = CRFOutputLayer.from_config( config=CRFOutputLayer.Config(), labels=vocab ) logits, seq_lens_tensor = OutputLayerTest._generate_word_tagging_inputs( batch_size, num_labels, seq_lens ) context = {"seq_lens": seq_lens_tensor} torchsript_word_layer = word_layer.torchscript_predictions() torchscript_crf_layer = crf_layer.torchscript_predictions() self._validate_word_tagging_result( word_layer.get_pred(logits, None, context)[1], torchsript_word_layer(logits, context), vocab, ) self._validate_word_tagging_result( crf_layer.get_pred(logits, None, context)[1], torchscript_crf_layer(logits, context), vocab, )
def build_fairseq_vocab( vocab_file: str, dictionary_class: Dictionary = Dictionary, special_token_replacements: Dict[str, Token] = None, max_vocab: int = -1, min_count: int = -1, tokens_to_add: Optional[List[str]] = None, ) -> Vocabulary: """ Function builds a PyText vocabulary for models pre-trained using Fairseq modules. The dictionary class can take any Fairseq Dictionary class and is used to load the vocab file. """ dictionary = dictionary_class.load(vocab_file) # finalize will sort the dict based on frequency so only do this if # a min_count or max_vocab size is specified if min_count > 0 or max_vocab > 0: dictionary.finalize(threshold=min_count, nwords=max_vocab, padding_factor=1) if tokens_to_add: for token in tokens_to_add: dictionary.add_symbol(token) return Vocabulary(dictionary.symbols, dictionary.count, replacements=special_token_replacements)
def from_config(cls, config: Config, tensorizers): has_answer_labels = ["False", "True"] tensorizers["has_answer"].vocab = Vocabulary(has_answer_labels) vocab = tensorizers["squad_input"].vocab encoder = create_module( config.encoder, output_encoded_layers=True, padding_idx=vocab.get_pad_index(), vocab_size=vocab.__len__(), ) pos_decoder = create_module(config.pos_decoder, in_dim=encoder.representation_dim, out_dim=2) has_ans_decoder = create_module( config.has_ans_decoder, in_dim=encoder.representation_dim, out_dim=len(has_answer_labels), ) output_layer = create_module(config.output_layer, labels=has_answer_labels, is_kd=config.is_kd) return cls(encoder, pos_decoder, has_ans_decoder, output_layer, is_kd=config.is_kd)
def test_doc_classification_output_layer(self): tensorizer = LabelTensorizer() tensorizer.vocab = Vocabulary([SpecialTokens.PAD, "foo", "bar"]) layer = ClassificationOutputLayer.from_config( config=ClassificationOutputLayer.Config(loss=CrossEntropyLoss.Config()), labels=tensorizer.vocab, ) self.assertEqual(layer.loss_fn.ignore_index, 0) # use default pad tensorizer.vocab = Vocabulary(["foo", "bar"]) layer = ClassificationOutputLayer.from_config( config=ClassificationOutputLayer.Config(loss=CrossEntropyLoss.Config()), labels=tensorizer.vocab, ) self.assertEqual(layer.loss_fn.ignore_index, -1)
def build_fairseq_vocab( vocab_file: str, dictionary_class: Dictionary = Dictionary, special_token_replacements: Dict[str, SpecialToken] = None, max_vocab: int = -1, min_count: int = -1, tokens_to_add: Optional[List[str]] = None, ): """ Function builds a PyText vocabulary for models pre-trained using Fairseq modules. The dictionary class can take any Fairseq Dictionary class and is used to load the vocab file. """ if not special_token_replacements: special_token_replacements = { "<pad>": SpecialTokens.PAD, "<s>": SpecialTokens.BOS, "</s>": SpecialTokens.EOS, "<unk>": SpecialTokens.UNK, "<mask>": SpecialTokens.MASK, } with PathManager.open(vocab_file) as f: dictionary = dictionary_class.load(f) # finalize will sort the dict based on frequency so only do this if # a min_count or max_vocab size is specified if min_count > 0 or max_vocab > 0: dictionary.finalize(threshold=min_count, nwords=max_vocab, padding_factor=1) if tokens_to_add: for token in tokens_to_add: dictionary.add_symbol(token) return Vocabulary( dictionary.symbols, dictionary.count, replacements=special_token_replacements, )
def setUp(self): self.input_iterator = [ {"text": "hello world"}, {"text": "feeling lucky today"}, {"text": "hello"}, {"text": "lucky world"}, {"text": "today world"}, ] self.vocab = Vocabulary(["hello", "world", "feeling", "lucky", "today"])
def __init__(self, poss_slots: List[str], tokenizer: nn.Module = None): super().__init__() self.NO_LABEL = Token("NoLabel") poss_slots = list(poss_slots) if self.NO_LABEL not in poss_slots: poss_slots.insert(0, self.NO_LABEL) if SpecialTokens.PAD not in poss_slots: poss_slots.insert(1, SpecialTokens.PAD) self.vocab = Vocabulary(poss_slots)
def __init__(self, bpe, dictionary: Dictionary): self.bpe = bpe self.vocab = Vocabulary( dictionary.symbols, pad_token=str(dictionary[dictionary.pad()]), bos_token=str(dictionary[dictionary.bos()]), eos_token=str(dictionary[dictionary.eos()]), ) self.bos = self.vocab.bos_token self.eos = self.vocab.eos_token
def test_create_word_tagging_output_layer(self): tensorizer = LabelTensorizer() tensorizer.vocab = Vocabulary(["foo", "bar"]) tensorizer.pad_idx = 0 layer = WordTaggingOutputLayer.from_config( config=WordTaggingOutputLayer.Config(label_weights={"foo": 2.2}), labels=tensorizer.vocab, ) np.testing.assert_array_almost_equal( np.array([2.2, 1]), layer.loss_fn.weight.detach().numpy() )
def build_dumb_slot_labelling_model(): return build_slot_labelling_model( None, 5, 100, [10 for i in range(100)], 0.4, False, None, None, 5, Vocabulary([SpecialTokens.UNK, SpecialTokens.PAD, "the", "cat"]), )
def _build_vocab(self, vocab_file: str, max_vocab: int, min_count: int) -> Vocabulary: """ Build Vocab for XLM by calling the vocab reader associated with the model source. """ if self.is_fairseq: vocab_list, counts, replacements = read_fairseq_vocab( vocab_file, max_vocab, min_count) else: vocab_list, counts, replacements = read_vocab( vocab_file, max_vocab, min_count) return Vocabulary(vocab_list, counts, replacements=replacements)
def build_legacy_pytext_vocab_pipeline(vocab_file): from pytext.data.utils import Vocabulary tokenizer = get_tokenizer("basic_english") f = open(vocab_file, 'r') vocab_counter = Counter([token for line in f for token in line.rstrip()]) sorted_by_freq_tuples = sorted(vocab_counter.items(), key=lambda x: x[1], reverse=True) vocab_list = [pair[0] for pair in sorted_by_freq_tuples] vocab_list.insert(0, "<unk>") pipeline = sequential_transforms(tokenizer_func(tokenizer), PyTextVocabTransform(Vocabulary(vocab_list, unk_token="<unk>"))) return pipeline, None, None
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): # Although the RNN params are configurable, for DrQA we want to set # the following parameters for all cases. config.ques_rnn.dropout = config.dropout config.doc_rnn.dropout = config.dropout embedding = cls.create_embedding(config, tensorizers) ques_aligned_doc_attn = SequenceAlignedAttention( embedding.embedding_dim) ques_rnn = create_module(config.ques_rnn, input_size=embedding.embedding_dim) doc_rnn = create_module(config.doc_rnn, input_size=embedding.embedding_dim * 2) ques_self_attn = DotProductSelfAttention(ques_rnn.representation_dim) start_attn = MultiplicativeAttention(doc_rnn.representation_dim, ques_rnn.representation_dim, normalize=False) end_attn = MultiplicativeAttention(doc_rnn.representation_dim, ques_rnn.representation_dim, normalize=False) doc_rep_pool = SelfAttention( SelfAttention.Config(dropout=config.dropout), n_input=doc_rnn.representation_dim, ) has_answer_labels = ["False", "True"] tensorizers["has_answer"].vocab = Vocabulary(has_answer_labels) has_ans_decoder = MLPDecoder( config=MLPDecoder.Config(), in_dim=doc_rnn.representation_dim, out_dim=len(has_answer_labels), ) output_layer = create_module(config.output_layer, labels=has_answer_labels, is_kd=config.is_kd) return cls( dropout=nn.Dropout(config.dropout), embedding=embedding, ques_rnn=ques_rnn, doc_rnn=doc_rnn, ques_self_attn=ques_self_attn, ques_aligned_doc_attn=ques_aligned_doc_attn, start_attn=start_attn, end_attn=end_attn, doc_rep_pool=doc_rep_pool, has_ans_decoder=has_ans_decoder, output_layer=output_layer, is_kd=config.is_kd, )
def build_dumb_intent_slot_model(): return build_intent_joint_model( use_intent=False, loss_doc_weight=0.4, pretrain_embed=None, embed_dim=10, slot_kernel_num=10, slot_kernel_sizes=[10 for i in range(100)], doc_kernel_num=10, doc_kernel_sizes=[10 for i in range(100)], slot_bias=True, slot_decoder_hidden_dims=None, doc_bias=True, doc_decoder_hidden_dims=None, num_slots=26, num_intents=43, vocab=Vocabulary([SpecialTokens.UNK, SpecialTokens.PAD, "the", "cat"]), dropout=0.4, add_feat_len=0, )
def from_config(cls, config: Config, **kwargs): tokenizer = create_component(ComponentType.TOKENIZER, config.tokenizer) vocab = None if isinstance(tokenizer, WordPieceTokenizer): print("Using WordPieceTokenizer") replacements = { "[UNK]": UNK, "[PAD]": PAD, "[CLS]": BOS, "[SEP]": EOS, "[MASK]": MASK, } vocab = Vocabulary( [token for token, _ in tokenizer.vocab.items()], replacements=replacements, ) doc_tensorizer = TokenTensorizer( text_column=config.doc_column, tokenizer=tokenizer, vocab=vocab, max_seq_len=config.max_doc_seq_len, ) ques_tensorizer = TokenTensorizer( text_column=config.ques_column, tokenizer=tokenizer, vocab=vocab, max_seq_len=config.max_ques_seq_len, ) return cls( doc_tensorizer=doc_tensorizer, ques_tensorizer=ques_tensorizer, doc_column=config.doc_column, ques_column=config.ques_column, answers_column=config.answers_column, answer_starts_column=config.answer_starts_column, tokenizer=tokenizer, vocab=vocab, **kwargs, )
def test_lookup_tokens(self): text = "let's tokenize this" tokenizer = Tokenizer() vocab = Vocabulary(text.split() + [SpecialTokens.BOS, SpecialTokens.EOS]) tokens, start_idx, end_idx = lookup_tokens(text, tokenizer=tokenizer, vocab=vocab, bos_token=None, eos_token=None) self.assertEqual(tokens, [0, 1, 2]) self.assertEqual(start_idx, (0, 6, 15)) self.assertEqual(end_idx, (5, 14, 19)) tokens, start_idx, end_idx = lookup_tokens( text, tokenizer=tokenizer, vocab=vocab, bos_token=SpecialTokens.BOS, eos_token=SpecialTokens.EOS, ) self.assertEqual(tokens, [3, 0, 1, 2, 4]) self.assertEqual(start_idx, (-1, 0, 6, 15, -1)) self.assertEqual(end_idx, (-1, 5, 14, 19, -1))
def test_contextual_intent_slot_export_to_caffe2(self, test_num_words, num_predictions, test_num_seq): config = self._get_config(IntentSlotTask.Config, CONTEXTUAL_INTENT_SLOT_CONFIG) tensorizers, data = _NewTask._init_tensorizers(config) doc_labels = ["__UNKNOWN__", "cu:other", "cu:address_Person"] word_labels = ["__UNKNOWN__", "NoLabel", "person"] tensorizers["word_labels"].vocab = Vocabulary(word_labels) tensorizers["doc_labels"].vocab = Vocabulary(doc_labels) tensorizers["tokens"].vocab = Vocabulary(WORD_VOCAB) tensorizers["seq_tokens"].vocab = Vocabulary(WORD_VOCAB) py_model = _NewTask._init_model(config.model, tensorizers) dummy_test_input = self._get_rand_input_intent_slot( BATCH_SIZE, W_VOCAB_SIZE, test_num_words, test_num_seq) exporter = ModelExporter( ModelExporter.Config(), py_model.get_export_input_names(tensorizers), dummy_test_input, py_model.vocab_to_export(tensorizers), py_model.get_export_output_names(tensorizers), ) with tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(".predictor")) as pred_file: print(pred_file.name) exporter.export_to_caffe2(py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): test_inputs = self._get_rand_input_intent_slot( BATCH_SIZE, W_VOCAB_SIZE, test_num_words, test_num_seq) self._feed_c2_input(workspace, test_inputs, exporter.input_names, exporter.vocab_map) workspace.RunNetOnce(pred_net) doc_output_names = [ "{}:{}".format("doc_scores", class_name) for class_name in doc_labels ] word_output_names = [ "{}:{}".format("word_scores", class_name) for class_name in word_labels ] py_model.eval() logits = py_model(*test_inputs) context = {SEQ_LENS: test_inputs[-1]} target = None (d_pred, w_pred), (d_score, w_score) = py_model.get_pred(logits, target, context) c2_doc_out = [] for o_name in doc_output_names: c2_doc_out.extend(list(workspace.FetchBlob(o_name))) c2_word_out = [] for o_name in word_output_names: c2_word_out.extend(list(workspace.FetchBlob(o_name))) np.testing.assert_array_almost_equal( d_score.view(-1).detach().numpy(), np.array(c2_doc_out).flatten()) np.testing.assert_array_almost_equal( torch.transpose(w_score, 1, 2).contiguous().view(-1).detach().numpy(), np.array(c2_word_out).flatten(), )
def __init__(self, label_names: List[str]): super().__init__() self.vocab = Vocabulary(label_names)
def __init__(self, label_names: List[str]): super().__init__() if SpecialTokens.UNK not in label_names: label_names.insert(0, SpecialTokens.UNK) self.vocab = Vocabulary(label_names)