def __init__(self, model, ldims=400, input_size=100, output_size=100, dropout=0.33): self.input = input_size self.ldims = ldims self.output = output_size self.dropout = dropout self.charlstm = LSTM(model, self.input, self.ldims, forget_bias=0.0) self.W_atten = model.add_parameters((self.ldims, 1), init=dy.ConstInitializer(0)) self.W_linear = model.add_parameters((self.output, self.ldims * 2), init=dy.ConstInitializer(0)) self.b_linear = model.add_parameters((self.output), init=dy.ConstInitializer(0))
def __init__(self, xs, ys, rl, eo, lr): #initial input (first word) self.x = np.zeros(xs) #input size self.xs = xs #expected output (next word) self.y = np.zeros(ys) #output size self.ys = ys #weight matrix for interpreting results from LSTM cell (num words x num words matrix) self.w = np.random.random((ys, ys)) #matrix used in RMSprop self.G = np.zeros_like(self.w) #length of the recurrent network - number of recurrences i.e num of words self.rl = rl #learning rate self.lr = lr #array for storing inputs self.ia = np.zeros((rl + 1, xs)) #array for storing cell states self.ca = np.zeros((rl + 1, ys)) #array for storing outputs self.oa = np.zeros((rl + 1, ys)) #array for storing hidden states self.ha = np.zeros((rl + 1, ys)) #forget gate self.af = np.zeros((rl + 1, ys)) #input gate self.ai = np.zeros((rl + 1, ys)) #cell state self.ac = np.zeros((rl + 1, ys)) #output gate self.ao = np.zeros((rl + 1, ys)) #array of expected output values self.eo = np.vstack((np.zeros(eo.shape[0]), eo.T)) #declare LSTM cell (input, output, amount of recurrence, learning rate) self.LSTM = LSTM(xs, ys, rl, lr)
def __init__(self, vocab, pos, xpos, rels, w2i, c2i, ext_words_train, ext_words_devtest, options): self.model = dy.ParameterCollection() self.pretrained_embs = dy.ParameterCollection() self.learning_rate = options.learning_rate self.trainer = dy.AdamTrainer(self.model, alpha=self.learning_rate, beta_1=0.9, beta_2=0.9, eps=1e-12) self.dropout = float(options.dropout) self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.posdims = options.posembedding_dims self.pred_batch_size = options.pred_batch_size self.ext_words_train = { word: ind + 2 for word, ind in ext_words_train.items() } self.ext_words_devtest = { word: ind + 2 for word, ind in ext_words_devtest.items() } self.wordsCount = vocab self.vocab = {word: ind + 2 for word, ind in w2i.items()} self.pos = {word: ind + 2 for ind, word in enumerate(pos)} self.id2pos = {ind: word for word, ind in self.pos.items()} self.xpos = {word: ind + 2 for ind, word in enumerate(xpos)} self.id2xpos = {ind: word for word, ind in self.xpos.items()} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = {ind: word for word, ind in self.rels.items()} self.vocab['PAD'] = 1 self.pos['PAD'] = 1 self.xpos['PAD'] = 1 self.external_embedding, self.edim, self.edim_out = None, 0, 0 if options.external_embedding is not None: self.external_embedding = np.load(options.external_embedding) self.ext_voc = pickle.load( open(options.external_embedding_voc, "rb")) self.edim = self.external_embedding.shape[1] self.projected_embs = Lin_Projection(self.model, self.edim, self.wdims) self.elookup_train = self.pretrained_embs.add_lookup_parameters( (len(self.ext_words_train) + 2, self.edim)) for word, i in self.ext_words_train.items(): self.elookup_train.init_row( i, self.external_embedding[self.ext_voc[word], :]) self.elookup_train.init_row(0, np.zeros(self.edim)) self.elookup_train.init_row(1, np.zeros(self.edim)) self.elookup_devtest = self.pretrained_embs.add_lookup_parameters( (len(self.ext_words_devtest) + 2, self.edim)) for word, i in self.ext_words_devtest.items(): self.elookup_devtest.init_row( i, self.external_embedding[self.ext_voc[word], :]) self.elookup_devtest.init_row(0, np.zeros(self.edim)) self.elookup_devtest.init_row(1, np.zeros(self.edim)) self.ext_words_train['PAD'] = 1 self.ext_words_devtest['PAD'] = 1 print( 'Load external embeddings. External embeddings vectors dimension', self.edim) #LSTMs self.fwdLSTM1 = LSTM(self.model, self.wdims + self.posdims, self.ldims, forget_bias=0.0) self.bwdLSTM1 = LSTM(self.model, self.wdims + self.posdims, self.ldims, forget_bias=0.0) self.fwdLSTM2 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.bwdLSTM2 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.fwdLSTM3 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.bwdLSTM3 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.biaffineParser = DeepBiaffineAttentionDecoder( self.model, len(self.rels), src_ctx_dim=self.ldims * 2, n_arc_mlp_units=400, n_label_mlp_units=100, arc_mlp_dropout=self.dropout, label_mlp_dropout=self.dropout) self.HybridCharembs = HybridCharacterAttention(self.model, ldims=400, input_size=self.cdims, output_size=self.wdims, dropout=self.dropout) self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 2, self.wdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.poslookup = self.model.add_lookup_parameters( (len(self.pos) + 2, self.posdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.xposlookup = self.model.add_lookup_parameters( (len(self.xpos) + 2, self.posdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.clookup = self.model.add_lookup_parameters( (len(c2i), self.cdims), init=dy.NormalInitializer()) self.ROOT = self.model.add_parameters((self.wdims * 2), init=dy.ConstInitializer(0))