Beispiel #1
0
 def __init__(self, vocab_list, unk_idx: int = 0):
     super().__init__()
     self.vocab = jit.Attribute(vocab_list, List[str])
     self.unk_idx = jit.Attribute(unk_idx, int)
     self.idx = jit.Attribute(
         {word: i
          for i, word in enumerate(vocab_list)}, Dict[str, int])
Beispiel #2
0
 def __init__(self):
     super().__init__()
     self.vocab = ScriptVocabulary(input_vocab,
                                   unk_idx=input_vocab.idx[UNK])
     self.model = traced_model
     self.output_layer = output_layer
     self.pad_idx = jit.Attribute(input_vocab.idx[PAD], int)
     self.max_seq_len = jit.Attribute(max_seq_len, int)
Beispiel #3
0
 def __init__(self):
     super().__init__()
     self.vocab = Vocabulary(input_vocab,
                             unk_idx=input_vocab.idx[UNK])
     self.max_byte_len = jit.Attribute(max_byte_len, int)
     self.byte_offset_for_non_padding = jit.Attribute(
         byte_offset_for_non_padding, int)
     self.pad_idx = jit.Attribute(input_vocab.idx[PAD], int)
     self.model = traced_model
     self.output_layer = output_layer
Beispiel #4
0
 def __init__(self):
     super().__init__()
     self.vocab = ScriptVocabulary(
         input_vocab,
         input_vocab.get_unk_index(),
         input_vocab.get_pad_index(),
     )
     self.model = traced_model
     self.output_layer = output_layer
     self.pad_idx = jit.Attribute(input_vocab.get_pad_index(), int)
     self.max_seq_len = jit.Attribute(max_seq_len, int)
Beispiel #5
0
 def __init__(self):
     super().__init__()
     self.vocab = ScriptVocabulary(
         input_vocab,
         input_vocab.get_unk_index(),
         input_vocab.get_pad_index(),
     )
     self.normalizer = tensorizers["dense"].normalizer
     self.model = traced_model
     self.output_layer = output_layer
     self.pad_idx = jit.Attribute(input_vocab.get_pad_index(), int)
     self.max_seq_len = jit.Attribute(max_seq_len, int)
     self.tokenizer = scripted_tokenizer
Beispiel #6
0
    def __init__(self, vocab: Dict[str, int], eow: str = "_EOW"):
        """vocab is a dictionary from BPE segments, including any EOW elements,
        to their priority in joining. Priority must be an integer, should not be
        negative, and should not contain ties. In the case of negative priorities,
        segments with negative priorities will be ignored. In the case of ties,
        ties will be broken according to left-to-right byte order precedence, but
        this behavior isn't guaranteed and may change in the future.

        eow should be a string which corresponds to the EOW used in the vocab
        dictionary."""
        super().__init__()
        self.vocab = jit.Attribute(vocab, Dict[str, int])
        self.eow = jit.Attribute(eow, str)
Beispiel #7
0
 def __init__(self):
     super().__init__()
     self.vocab = ScriptVocabulary(
         input_vocab,
         input_vocab.get_unk_index(),
         input_vocab.get_pad_index(),
     )
     self.normalizer = tensorizers["dense"].normalizer
     self.max_seq_len = jit.Attribute(max_seq_len, int)
     self.max_byte_len = jit.Attribute(max_byte_len, int)
     self.byte_offset_for_non_padding = jit.Attribute(
         byte_offset_for_non_padding, int)
     self.pad_idx = jit.Attribute(input_vocab.get_pad_index(), int)
     self.model = traced_model
     self.output_layer = output_layer
Beispiel #8
0
 def __init__(self):
     super().__init__()
     self.vocab = ScriptVocabulary(input_vocab, unk_idx=input_vocab.idx[UNK])
     self.normalizer = tensorizers["dense"].normalizer
     self.model = traced_model
     self.output_layer = output_layer
     self.pad_idx = jit.Attribute(input_vocab.idx[PAD], int)
Beispiel #9
0
 def __init__(self, classes, score_function):
     super().__init__()
     self.classes = jit.Attribute(classes, List[str])
     self.score_function = score_function