Esempio n. 1
0
    def __init__(self,
                 model,
                 ldims=400,
                 input_size=100,
                 output_size=100,
                 dropout=0.33):

        self.input = input_size
        self.ldims = ldims
        self.output = output_size
        self.dropout = dropout
        self.charlstm = LSTM(model, self.input, self.ldims, forget_bias=0.0)
        self.W_atten = model.add_parameters((self.ldims, 1),
                                            init=dy.ConstInitializer(0))
        self.W_linear = model.add_parameters((self.output, self.ldims * 2),
                                             init=dy.ConstInitializer(0))
        self.b_linear = model.add_parameters((self.output),
                                             init=dy.ConstInitializer(0))
Esempio n. 2
0
 def __init__(self, xs, ys, rl, eo, lr):
     #initial input (first word)
     self.x = np.zeros(xs)
     #input size
     self.xs = xs
     #expected output (next word)
     self.y = np.zeros(ys)
     #output size
     self.ys = ys
     #weight matrix for interpreting results from LSTM cell (num words x num words matrix)
     self.w = np.random.random((ys, ys))
     #matrix used in RMSprop
     self.G = np.zeros_like(self.w)
     #length of the recurrent network - number of recurrences i.e num of words
     self.rl = rl
     #learning rate
     self.lr = lr
     #array for storing inputs
     self.ia = np.zeros((rl + 1, xs))
     #array for storing cell states
     self.ca = np.zeros((rl + 1, ys))
     #array for storing outputs
     self.oa = np.zeros((rl + 1, ys))
     #array for storing hidden states
     self.ha = np.zeros((rl + 1, ys))
     #forget gate
     self.af = np.zeros((rl + 1, ys))
     #input gate
     self.ai = np.zeros((rl + 1, ys))
     #cell state
     self.ac = np.zeros((rl + 1, ys))
     #output gate
     self.ao = np.zeros((rl + 1, ys))
     #array of expected output values
     self.eo = np.vstack((np.zeros(eo.shape[0]), eo.T))
     #declare LSTM cell (input, output, amount of recurrence, learning rate)
     self.LSTM = LSTM(xs, ys, rl, lr)
Esempio n. 3
0
    def __init__(self, vocab, pos, xpos, rels, w2i, c2i, ext_words_train,
                 ext_words_devtest, options):

        self.model = dy.ParameterCollection()
        self.pretrained_embs = dy.ParameterCollection()
        self.learning_rate = options.learning_rate
        self.trainer = dy.AdamTrainer(self.model,
                                      alpha=self.learning_rate,
                                      beta_1=0.9,
                                      beta_2=0.9,
                                      eps=1e-12)

        self.dropout = float(options.dropout)
        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.posdims = options.posembedding_dims
        self.pred_batch_size = options.pred_batch_size
        self.ext_words_train = {
            word: ind + 2
            for word, ind in ext_words_train.items()
        }
        self.ext_words_devtest = {
            word: ind + 2
            for word, ind in ext_words_devtest.items()
        }
        self.wordsCount = vocab
        self.vocab = {word: ind + 2 for word, ind in w2i.items()}
        self.pos = {word: ind + 2 for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for word, ind in self.pos.items()}
        self.xpos = {word: ind + 2 for ind, word in enumerate(xpos)}
        self.id2xpos = {ind: word for word, ind in self.xpos.items()}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = {ind: word for word, ind in self.rels.items()}
        self.vocab['PAD'] = 1
        self.pos['PAD'] = 1
        self.xpos['PAD'] = 1

        self.external_embedding, self.edim, self.edim_out = None, 0, 0
        if options.external_embedding is not None:

            self.external_embedding = np.load(options.external_embedding)
            self.ext_voc = pickle.load(
                open(options.external_embedding_voc, "rb"))
            self.edim = self.external_embedding.shape[1]
            self.projected_embs = Lin_Projection(self.model, self.edim,
                                                 self.wdims)
            self.elookup_train = self.pretrained_embs.add_lookup_parameters(
                (len(self.ext_words_train) + 2, self.edim))
            for word, i in self.ext_words_train.items():
                self.elookup_train.init_row(
                    i, self.external_embedding[self.ext_voc[word], :])
            self.elookup_train.init_row(0, np.zeros(self.edim))
            self.elookup_train.init_row(1, np.zeros(self.edim))

            self.elookup_devtest = self.pretrained_embs.add_lookup_parameters(
                (len(self.ext_words_devtest) + 2, self.edim))
            for word, i in self.ext_words_devtest.items():
                self.elookup_devtest.init_row(
                    i, self.external_embedding[self.ext_voc[word], :])
            self.elookup_devtest.init_row(0, np.zeros(self.edim))
            self.elookup_devtest.init_row(1, np.zeros(self.edim))

            self.ext_words_train['PAD'] = 1
            self.ext_words_devtest['PAD'] = 1

            print(
                'Load external embeddings. External embeddings vectors dimension',
                self.edim)

        #LSTMs
        self.fwdLSTM1 = LSTM(self.model,
                             self.wdims + self.posdims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM1 = LSTM(self.model,
                             self.wdims + self.posdims,
                             self.ldims,
                             forget_bias=0.0)
        self.fwdLSTM2 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM2 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.fwdLSTM3 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM3 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)

        self.biaffineParser = DeepBiaffineAttentionDecoder(
            self.model,
            len(self.rels),
            src_ctx_dim=self.ldims * 2,
            n_arc_mlp_units=400,
            n_label_mlp_units=100,
            arc_mlp_dropout=self.dropout,
            label_mlp_dropout=self.dropout)

        self.HybridCharembs = HybridCharacterAttention(self.model,
                                                       ldims=400,
                                                       input_size=self.cdims,
                                                       output_size=self.wdims,
                                                       dropout=self.dropout)

        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 2, self.wdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for [PAD]
        self.poslookup = self.model.add_lookup_parameters(
            (len(self.pos) + 2, self.posdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for  [PAD]
        self.xposlookup = self.model.add_lookup_parameters(
            (len(self.xpos) + 2, self.posdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for  [PAD]

        self.clookup = self.model.add_lookup_parameters(
            (len(c2i), self.cdims), init=dy.NormalInitializer())
        self.ROOT = self.model.add_parameters((self.wdims * 2),
                                              init=dy.ConstInitializer(0))