Esempio n. 1
0
def build_char_lang():
    lang = Lang()
    lang.word2index = dict()
    lang.index2word = dict()
    lang.n_words = 0
    chars = "!\"$%&'()*+,-./0123456789:;<>?[]abcdefghijklmnopqrstuvwxyz"
    for c in chars:
        lang.addWord(c)
    return lang
Esempio n. 2
0
    def __init__(self,
                 word_vectors,
                 max_length,
                 char_embed=False,
                 seeder=int(time.time())):
        super(PreTrainedEmbeddingEncoderBiRNN,
              self).__init__(word_vectors.vector_size,
                             max_length,
                             seeder=seeder)
        self.model_type = 'pre_trained_embedding'

        # define word vector embedding
        self.word_vectors = word_vectors

        # empty vector for oov
        self.empty_vector = Variable(torch.Tensor(self.empty_vector)).view(
            1, 1, -1)

        # char embed
        self.char_embed = char_embed
        if self.char_embed:
            lang = Lang()
            lang.word2index = dict()
            lang.index2word = dict()
            lang.n_words = 0
            chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
            for c in chars:
                lang.addWord(c)
            self.charbased_model = WordEncoderBiRNN(self.hidden_size // 2,
                                                    params.CHAR_LENGTH,
                                                    lang,
                                                    seeder=seeder)

        # word vector for start of string
        sos = torch.ones(self.hidden_size)
        self.sos_vector = Variable(sos).view(1, 1, -1)

        # word vector for end of string
        eos = torch.ones(self.hidden_size) * -1
        self.eos_vector = Variable(eos).view(1, 1, -1)

        if params.USE_CUDA:
            self.cuda()
            self.empty_vector = self.empty_vector.cuda()
            self.sos_vector = self.sos_vector.cuda()
            self.eos_vector = self.eos_vector.cuda()

        self.cache_dict = dict()
        self.cache_dict[params.SOS_TOKEN] = self.sos_vector
        self.cache_dict[params.EOS_TOKEN] = self.eos_vector