def __init__(self, args, model, full=False): super().__init__(args, model) self.train_input_key = 'input_tokens' self.train_output_key = 'gold_linearized_tokens' self.pred_input_key = 'input_tokens' self.pred_output_key = 'linearized_tokens' self.vec_key = 'tsp_vec' if 'seq' in self.args.tree_vecs: self.seq_encoder = SeqEncoder(self.args, self.model, 'tsp_seq') if 'bag' in self.args.tree_vecs: self.bag_encoder = BagEncoder(self.args, self.model, 'tsp_bag') if 'tree' in self.args.tree_vecs: self.tree_encoder = TreeEncoder(self.args, self.model, 'tsp_tree') self.full = full self.special = self.model.add_lookup_parameters( (2, self.args.token_dim)) self.biaffine = dm.BiaffineAttention(self.model, self.args.token_dim, self.args.hid_dim) if not full: self.f_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, model) self.b_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, model) self.log( f'Initialized <{self.__class__.__name__}>, params = {self.model.parameter_count()}' )
def __init__(self, w2i, options): print('Similarity Experiment - init') self.options = options self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model) self.w2i = w2i self.wdims = options.wembedding_dims self.ldims = options.lstm_dims self.ext_embeddings = None #Model Parameters self.wlookup = self.model.add_lookup_parameters((len(w2i), self.wdims)) self.__load_model() extra_dim = 0 if self.options.external_info != "no_info": extra_dim = self.options.external_info_dim self.__load_external_info() if self.options.lstm_type == "lstm": self.phrase_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model) ] self.mlp_w = self.model.add_parameters((1, extra_dim + self.ldims)) elif self.options.lstm_type == "bilstm": self.phrase_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model) ] self.mlp_w = self.model.add_parameters( (1, extra_dim + 2 * self.ldims)) self.mlp_b = self.model.add_parameters(1)
def __init__(self, c2i, options): self.model = dy.ParameterCollection() random.seed(1) self.trainer = dy.AdamTrainer(self.model) self.dropput_rate = options.dropout_rate self.ldims = options.lstm_dims self.cdims = options.cembedding_dims self.c2i = c2i self.W_d = self.model.add_parameters((self.ldims, 2 * self.ldims)) self.W_db = self.model.add_parameters(self.ldims) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.word_encoder = RNNSequencePredictor( dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model)) self.context_encoder = [ dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model) ] self.output_encoder = dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model) self.decoder = dy.VanillaLSTMBuilder(2, self.cdims, self.ldims, self.model) self.W_s = self.model.add_parameters((len(self.c2i), self.ldims)) self.W_sb = self.model.add_parameters((len(self.c2i)))
def __init__(self, args, model): super().__init__(args, model) self.train_input_key = 'gold_projective_tokens' # or 'linearized tokens' self.train_output_key = 'gold_linearized_tokens' if 'gen' in self.args.tasks: self.pred_input_key = 'generated_tokens' if any(task in ['tsp', 'tsp-full', 'lin'] for task in self.args.tasks): self.pred_input_key = 'linearized_tokens' else: self.pred_input_key = 'gold_projective_tokens' self.pred_output_key = 'sorted_tokens' # self.vec_key = 'sum_tree' self.vec_key = 'swap_vec' if 'seq' in self.args.tree_vecs: self.seq_encoder = SeqEncoder(self.args, self.model, 'swap_seq') if 'bag' in self.args.tree_vecs: self.bag_encoder = BagEncoder(self.args, self.model, 'swap_bag') if 'tree' in self.args.tree_vecs: self.tree_encoder = TreeEncoder(self.args, self.model, 'swap_tree') self.special = self.model.add_lookup_parameters( (2, self.args.token_dim)) self.f_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, model) self.b_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, model) self.mlp = dm.MLP(self.model, self.args.token_dim * 2, 2, self.args.hid_dim) self.log( f'Initialized <{self.__class__.__name__}>, params = {self.model.parameter_count()}' ) self.stats = defaultdict(int)
def build_model(self, nwords, nchars, ntags): self.model = dy.Model() trainer = dy.AdamTrainer(self.model) #EMB_SIZE = 64 CHAR_EMB_SIZE = 32 CHAR_HID_SIZE = 64 #self.W_emb = self.model.add_lookup_parameters((nwords, EMB_SIZE)) # Word embeddings self.C_emb = self.model.add_lookup_parameters( (nchars, CHAR_EMB_SIZE)) # Char embeddings self.char_lower_fwdLSTM = dy.VanillaLSTMBuilder( 1, CHAR_EMB_SIZE, CHAR_EMB_SIZE, self.model) self.char_lower_bwdLSTM = dy.VanillaLSTMBuilder( 1, CHAR_EMB_SIZE, CHAR_EMB_SIZE, self.model) self.char_upper_fwdLSTM = dy.VanillaLSTMBuilder( 1, 2 * CHAR_EMB_SIZE, CHAR_HID_SIZE, self.model) self.char_upper_bwdLSTM = dy.VanillaLSTMBuilder( 1, 2 * CHAR_EMB_SIZE, CHAR_HID_SIZE, self.model) # self.fwdLSTM = dy.VanillaLSTMBuilder(1, CHAR_EMB_SIZE, HID_SIZE, self.model) # Forward RNN # self.bwdLSTM = dy.VanillaLSTMBuilder(1, CHAR_EMB_SIZE, HID_SIZE, self.model) # Backward RNN self.W_sm = self.model.add_parameters( (ntags, 2 * CHAR_HID_SIZE)) # Softmax weights self.b_sm = self.model.add_parameters((ntags)) # Softmax bias return trainer
def __init__(self, character_embeddings_size, encodings, rnn_size=100, rnn_layers=1, embeddings_size=100, lang_embeddings_size=100, model=None, runtime=False): if model is None: self.model = dy.Model() else: self.model = model self.encodings = encodings self.character_embeddings_size = character_embeddings_size self.embeddings_size = embeddings_size self.num_characters = len(encodings.char2int) self.character_lookup = self.model.add_lookup_parameters( (self.num_characters, character_embeddings_size)) self.rnn_fw = [] self.rnn_bw = [] self.rnn_layers = rnn_layers self.rnn_size = rnn_size input_size = character_embeddings_size + 3 + lang_embeddings_size for _ in range(rnn_layers): if runtime: self.rnn_fw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) else: from cube.generic_networks.utils import orthonormal_VanillaLSTMBuilder self.rnn_fw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) input_size = rnn_size * 2 + lang_embeddings_size lemb_size = 2 * lang_embeddings_size #if rnn_layers > 1: # lemb_size = 2 * lang_embeddings_size self.linearW = self.model.add_parameters( (embeddings_size, rnn_size * 4 + lemb_size)) # last state and attention over the other states self.linearB = self.model.add_parameters((embeddings_size)) self.att_w1 = self.model.add_parameters( (rnn_size, rnn_size * 2 + lang_embeddings_size)) self.att_w2 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_v = self.model.add_parameters((1, rnn_size))
def initializeParameters(self, model): self.__lookup = model.add_lookup_parameters( (len(self.__chars) + self.__addEntries, self.__dim)) self.__rootVec = model.add_parameters((self.getDim())) self.__forwardLstm = dynet.VanillaLSTMBuilder(1, self.__dim, self.__lstmDim, model) self.__backwardLstm = dynet.VanillaLSTMBuilder(1, self.__dim, self.__lstmDim, model)
def __init__(self, vocab_size, num_labels, LSTM_params, embed_vec, P_rows, model, improvement): # LSTM_layers - [#layers in the premise BiLSTM, #layers in the hypothesis BiLSTM, dimension of parameters] embed_size = LSTM_params[2] self.params = {} self.max_seq_len = P_rows self.params_size = LSTM_params[2] # lookup: self.params["lookup"] = model.add_lookup_parameters( (vocab_size, embed_size)) self.params["lookup"].init_from_array(embed_vec) # premise bi-LSTM parameter collection: self.fw_premise_builder = dy.VanillaLSTMBuilder( LSTM_params[0], embed_size, LSTM_params[2] / 2, model) self.bw_premise_builder = dy.VanillaLSTMBuilder( LSTM_params[0], embed_size, LSTM_params[2] / 2, model) # hypothesis bi-LSTM parameter collection: self.fw_hypo_builder = dy.VanillaLSTMBuilder(LSTM_params[1], embed_size, LSTM_params[2], model) self.bw_hypo_builder = dy.VanillaLSTMBuilder(LSTM_params[1], embed_size, LSTM_params[2], model) # attend vector self.params["fw_A_t0"] = model.add_parameters((self.max_seq_len)) self.params["bw_A_t0"] = model.add_parameters((self.max_seq_len)) # reRead params: self.params["fw_Wp"] = model.add_parameters( (LSTM_params[2], LSTM_params[2])) self.params["fw_Wm"] = model.add_parameters( (LSTM_params[2], LSTM_params[2])) # out layer parameter collection: self.params["fw_Wc"] = model.add_parameters( (LSTM_params[2], LSTM_params[2])) self.params["fw_Walpha"] = model.add_parameters( (LSTM_params[2])) # out layer parameter collection: self.params["bw_Wp"] = model.add_parameters( (LSTM_params[2], LSTM_params[2])) self.params["bw_Wm"] = model.add_parameters( (LSTM_params[2], LSTM_params[2])) # out layer parameter collection: self.params["bw_Wc"] = model.add_parameters( (LSTM_params[2], LSTM_params[2])) self.params["bw_Walpha"] = model.add_parameters( (LSTM_params[2])) # out layer parameter collection: # out layer parameter collection: self.params["W"] = model.add_parameters( (num_labels, LSTM_params[2] * 2)) self.params["b"] = model.add_parameters((num_labels))
def __init__(self, encodings): self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.encodings = encodings self.DECODER_SIZE = 100 self.ENCODER_SIZE = 100 self.CHAR_EMB_SIZE = 100 self.HIDDEN_SIZE = 100 self.lexicon = {} self.char_lookup = self.model.add_lookup_parameters( (len(self.encodings.char2int), self.CHAR_EMB_SIZE)) self.phoneme_lookup = self.model.add_lookup_parameters( (len(self.encodings.phoneme2int) + 1, self.CHAR_EMB_SIZE)) # +1 is for special START self.start_lookup = self.model.add_lookup_parameters( (1, self.CHAR_EMB_SIZE + self.ENCODER_SIZE * 2)) # START SYMBOL self.encoder_fw = [] self.encoder_bw = [] input_layer_size = self.CHAR_EMB_SIZE for ii in range(2): self.encoder_fw.append( dy.VanillaLSTMBuilder(1, input_layer_size, self.ENCODER_SIZE, self.model)) self.encoder_bw.append( dy.VanillaLSTMBuilder(1, input_layer_size, self.ENCODER_SIZE, self.model)) input_layer_size = self.ENCODER_SIZE * 2 self.decoder = dy.VanillaLSTMBuilder( 2, self.ENCODER_SIZE * 2 + self.CHAR_EMB_SIZE, self.DECODER_SIZE, self.model) self.att_w1 = self.model.add_parameters((100, self.ENCODER_SIZE * 2)) self.att_w2 = self.model.add_parameters((100, self.DECODER_SIZE)) self.att_v = self.model.add_parameters((1, 100)) self.hidden_w = self.model.add_parameters( (self.HIDDEN_SIZE, self.DECODER_SIZE)) self.hidden_b = self.model.add_parameters((self.HIDDEN_SIZE)) self.softmax_w = self.model.add_parameters( (len(self.encodings.phoneme2int) + 1, self.HIDDEN_SIZE)) # +1 is for EOS self.softmax_b = self.model.add_parameters( (len(self.encodings.phoneme2int) + 1))
def __init__(self, data, opt): self.opt = opt self.model = dy.ParameterCollection() self.trainer = dy.MomentumSGDTrainer(self.model) self.w2i = data.w2i self.wdims = opt.embedding_size self.ldims = opt.hidden_size self.attsize = opt.attention_size self.ext_embeddings = data.ext_embeddings # Model Parameters self.wlookup = self.model.add_lookup_parameters( (len(self.w2i), self.wdims)) self.__load_external_embeddings() if self.opt.encoder_dir == "single": if self.opt.encoder_type == "lstm": self.sentence_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model) ] elif self.opt.encoder_type == "gru": self.sentence_rnn = [ dy.GRUBuilder(1, self.wdims, self.ldims, self.model) ] self.attention_w = self.model.add_parameters( (self.attsize, self.ldims)) self.attention_b = self.model.add_parameters(self.attsize) self.att_context = self.model.add_parameters(self.attsize) self.mlp_w = self.model.add_parameters( (1, self.ldims + 2 * self.ldims)) self.mlp_b = self.model.add_parameters(1) elif self.opt.encoder_dir == "bidirectional": if self.opt.encoder_type == "lstm": self.sentence_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), ] elif self.opt.encoder_type == "gru": self.sentence_rnn = [ dy.GRUBuilder(1, self.wdims, self.ldims, self.model), dy.GRUBuilder(1, self.wdims, self.ldims, self.model), ] self.attention_w = self.model.add_parameters( (self.attsize, 2 * self.ldims)) self.attention_b = self.model.add_parameters(self.attsize) self.att_context = self.model.add_parameters(self.attsize) self.mlp_w = self.model.add_parameters( (1, 2 * self.ldims + 4 * self.ldims)) self.mlp_b = self.model.add_parameters(1)
def build_model(self, nwords, nchars, ntags): self.model = dy.Model() trainer = dy.AdamTrainer(self.model) EMB_SIZE = 64 HID_SIZE = 64 self.W_emb = self.model.add_lookup_parameters((nwords, EMB_SIZE)) # Word embeddings self.fwdLSTM = dy.VanillaLSTMBuilder(1, EMB_SIZE, HID_SIZE, self.model) # Forward RNN self.bwdLSTM = dy.VanillaLSTMBuilder(1, EMB_SIZE, HID_SIZE, self.model) # Backward RNN self.W_sm = self.model.add_parameters((ntags, 2 * HID_SIZE)) # Softmax weights self.b_sm = self.model.add_parameters((ntags)) # Softmax bias return trainer
def __init__(self, char_dim, feat_dim, hidden_dim, char_size, feat_sizes): self._global_step = 0 self._char_dim = char_dim self._feat_dim = feat_dim self._hidden_dim = hidden_dim self._pc = dy.ParameterCollection() if config.adam: self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon) else: trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate) trainer.set_clip_threshold(config.clip_threshold) self.params = dict() self.lp_c = self._pc.add_lookup_parameters((char_size, char_dim)) self.lp_feats = [] for idx in range(len(feat_sizes)): self.lp_feats.append( self._pc.add_lookup_parameters((feat_sizes[idx], feat_dim), init=dy.ConstInitializer(0.))) self._pdrop_embs = config.pdrop_embs self._pdrop_lstm = config.pdrop_lstm self._pdrop_mlp = config.pdrop_mlp self.LSTM_builders = [] f = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc) b = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) for i in range(config.layers - 1): f = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) b = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) self.dec_LSTM = dy.VanillaLSTMBuilder(config.layers, hidden_dim, hidden_dim, self._pc) self.MLP = self._pc.add_parameters( (hidden_dim, hidden_dim * 3 + char_dim + feat_dim * 10)) self.MLP_bias = self._pc.add_parameters((hidden_dim)) self.classifier = self._pc.add_parameters((char_size, hidden_dim)) self.classifier_bias = self._pc.add_parameters((char_size)) self.MLP_attn = self._pc.add_parameters( (hidden_dim * 2, char_dim + feat_dim * 10 + hidden_dim)) self.MLP_attn_bias = self._pc.add_parameters((hidden_dim * 2)) self.attn_weight = self._pc.add_parameters((hidden_dim * 2))
def __init__(self, model, input_size, num_layers, dropout): assert input_size % 2 == 0, 'input size size must be even' self.model = model.add_subcollection('BiRecurrentComposition') self.fwd_rnn_builder = dy.VanillaLSTMBuilder(num_layers, input_size, input_size // 2, self.model) self.bwd_rnn_builder = dy.VanillaLSTMBuilder(num_layers, input_size, input_size // 2, self.model) self.dropout = dropout
def __init__(self, dsz, pc, hsz=None, rnntype='blstm', layers=1, pdrop=0.5, residual=False, create_src_mask=True, name='rnn-encoder', **kwargs): pc = pc.add_subcollection(name=name) super(RNNEncoder, self).__init__(pc) self.residual = residual hidden = hsz if hsz is not None else dsz if rnntype == 'blstm': self.lstm_forward = dy.VanillaLSTMBuilder(layers, dsz, hidden // 2, self.pc) self.lstm_backward = dy.VanillaLSTMBuilder(layers, dsz, hidden // 2, self.pc) else: self.lstm_forward = dy.VanillaLSTMBuilder(layers, dsz, hidden, self.pc) self.lstm_backward = None self.src_mask_fn = sequence_mask if create_src_mask else lambda x, y: (None, None) self.pdrop = pdrop
def __init__(self, args, model, lost_map): super().__init__(args, model) self.lost_map = lost_map self.train_input_key = 'gold_linearized_domain' self.train_output_key = 'gold_generated_tokens' if any(task in ['tsp', 'tsp-full', 'lin'] for task in self.args.tasks): self.pred_input_key = 'linearized_domain' else: self.pred_input_key = 'domain' self.pred_output_key = 'generated_tokens' self.vec_key = 'gen_vec' if 'seq' in self.args.tree_vecs: self.seq_encoder = SeqEncoder(self.args, self.model, 'gen_seq') if 'bag' in self.args.tree_vecs: self.bag_encoder = BagEncoder(self.args, self.model, 'gen_bag') if 'tree' in self.args.tree_vecs: self.tree_encoder = TreeEncoder(self.args, self.model, 'gen_tree') self.lf_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, self.model) self.lb_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, self.model) self.rf_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, self.model) self.rb_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, self.model) self.l_gen_mlp = dm.MLP(self.model, self.args.token_dim * 2, len(lost_map), self.args.hid_dim) self.r_gen_mlp = dm.MLP(self.model, self.args.token_dim * 2, len(lost_map), self.args.hid_dim) self.l_attention = dm.Attention(self.model, self.args.token_dim, self.args.token_dim, self.args.token_dim) self.r_attention = dm.Attention(self.model, self.args.token_dim, self.args.token_dim, self.args.token_dim) # for T2 generating lost tokens (no beam search for now since it's tricky to make the lost stable) self.special = self.model.add_lookup_parameters( (2, self.args.token_dim)) self.end = self.model.add_parameters(self.args.token_dim) self.lost_map = lost_map self.lost_emb = self.model.add_lookup_parameters( (len(self.lost_map), self.args.token_dim)) self.gen_tokens = [LostToken(l) for l in self.lost_map] self.log( f'Initialized <{self.__class__.__name__}>, params = {self.model.parameter_count()}' )
def create_multilayer_lstm_params(num_layers, in_size, state_size, model, name=""): """ Adds a multilayer LSTM to the model parameters. Inputs: num_layers (int): Number of layers to create. in_size (int): The input size to the first layer. state_size (int): The size of the states. model (dy.ParameterCollection): The parameter collection for the model. name (str, optional): The name of the multilayer LSTM. """ params = [] in_size = in_size state_size = state_size for i in range(num_layers): layer_name = name + "-" + str(i) print( "LSTM " + layer_name + ": " + str(in_size) + " x " + str(state_size) + "; default Dynet initialization of hidden weights") params.append(dy.VanillaLSTMBuilder(1, in_size, state_size, model)) in_size = state_size return params
def __init__( self, model, word_vocab, word_embedding_dim, lstm_layers, lstm_dim, dropout, ): self.spec = locals() self.spec.pop("self") self.spec.pop("model") self.model = model.add_subcollection("LanguageModel") self.word_vocab = word_vocab self.lstm_dim = lstm_dim self.embeddings = self.model.add_lookup_parameters( (word_vocab.size, word_embedding_dim)) self.rnn_builder = dy.VanillaLSTMBuilder( lstm_layers, word_embedding_dim, lstm_dim, self.model) self.out = Affine( self.model, lstm_dim, word_vocab.size) self.dropout = dropout self.training = True
def __init__(self, tgt_embeddings, **kwargs): pc = kwargs.pop('pc').add_subcollection(name=kwargs.get('name', 'rnn-decoder')) super(RNNDecoder, self).__init__(pc) self.hsz = kwargs['hsz'] self.arc_policy = create_seq2seq_arc_policy(**kwargs) self.tgt_embeddings = tgt_embeddings rnntype = kwargs.get('rnntype', 'lstm') layers = kwargs['layers'] feed_input = kwargs.get('feed_input', True) dsz = tgt_embeddings.get_dsz() if feed_input: self.input_i = self._feed_input dsz += self.hsz else: self.input_i = self._basic_input self.pdrop = kwargs.get('dropout', 0.5) self.decoder_rnn = dy.VanillaLSTMBuilder(layers, dsz, self.hsz, self.pc) self.init_attn(**kwargs) do_weight_tying = bool(kwargs.get('tie_weights', False)) if do_weight_tying: if self.hsz == tgt_embeddings.get_dsz(): self.preds = WeightShareLinear(tgt_embeddings.get_vsz(), tgt_embeddings.embeddings, self.pc, transform=squeeze_and_transpose, name=tgt_embeddings.pc.name()) else: raise ValueError("weight tying only valid when prediction projection \ layer's hidden size == embedding weight dimensions") else: self.preds = Linear(self.tgt_embeddings.get_vsz(), self.hsz, self.pc)
def __init__(self, nl, di, dh, du, vs, pc, dr=0.0, pre_embs=None): super(BiUserLSTMEncoder, self).__init__(nl, di, dh, du, vs, pc, dr, pre_embs) self.dim += dh # Backward encoder self.rev_lstm = dy.VanillaLSTMBuilder(self.nl, self.di, self.dh, self.pc) self.rev_Th_p = self.pc.add_parameters((dh, du), init=dy.UniformInitializer(1/np.sqrt(dh)), name='revTh')
def __init__(self, vocabs, pc): self.word_vocab, self.pos_vocab, self.suffix_vocab = vocabs self.pc = pc self.WORD_EMBED_SIZE = 100 self.POS_EMBED_SIZE = 20 self.SUFFIX_EMBED_SIZE = 40 self.SYLL_SIZE = 1 self.total_input_size = self.WORD_EMBED_SIZE + self.POS_EMBED_SIZE \ + self.SUFFIX_EMBED_SIZE + self.SYLL_SIZE self.RNN_HIDDEN_SIZE = 80 self.rnn = dy.VanillaLSTMBuilder(1, self.total_input_size, self.RNN_HIDDEN_SIZE, pc) self.attender = BilinearAttender(self.pc, self.RNN_HIDDEN_SIZE) self.word_vecs = self.pc.add_lookup_parameters( (len(self.word_vocab), self.WORD_EMBED_SIZE)) self.pos_vecs = self.pc.add_lookup_parameters( (len(self.pos_vocab), self.POS_EMBED_SIZE)) self.suffix_vecs = self.pc.add_lookup_parameters( (len(self.suffix_vocab), self.SUFFIX_EMBED_SIZE)) self.W = self.pc.add_parameters( (len(self.word_vocab), self.RNN_HIDDEN_SIZE)) self.b = self.pc.add_parameters((len(self.word_vocab)))
def __init__(self, model, dec_emb_dim, enc_output_size, dec_hidden_dim, dec_num_layers, dec_vocab_size, dec_lstm_dropout, dec_dropout, attention_type): self.model = model self.dec_emb_dim = dec_emb_dim self.enc_output_size = enc_output_size self.dec_hidden_dim = dec_hidden_dim self.dec_num_layers = dec_num_layers self.dec_vocab_size = dec_vocab_size self.dec_lstm_dropout = dec_lstm_dropout self.dec_dropout = dec_dropout self.attention_type = attention_type # layers self.embedding = self.model.add_lookup_parameters( (dec_vocab_size, dec_emb_dim)) self.rnn = dy.VanillaLSTMBuilder(dec_num_layers, dec_emb_dim + enc_output_size, dec_hidden_dim, model) self.output_linear_W = self.model.add_parameters( (dec_vocab_size, dec_hidden_dim)) self.output_linear_b = self.model.add_parameters(dec_vocab_size) self.att_w1 = self.model.add_parameters( (enc_output_size, enc_output_size)) self.att_w2 = self.model.add_parameters( (enc_output_size, dec_hidden_dim)) self.att_v = self.model.add_parameters((1, enc_output_size)) # other initializations self._train()
def rnn_from_spec(spec, num_layers, input_dim, hidden_dim, model, residual_to_output): decoder_type = spec.lower() if decoder_type == "lstm": return dy.VanillaLSTMBuilder(num_layers, input_dim, hidden_dim, model) elif decoder_type == "residuallstm": return residual.ResidualRNNBuilder(num_layers, input_dim, hidden_dim, model, dy.VanillaLSTMBuilder, residual_to_output) else: raise RuntimeError("Unknown decoder type {}".format(spec))
def __init__(self, args, model): print('<H2DLinearizer>') self.args = args Pointer = { 'simple': dm.SimplePointer, 'glimpse': dm.GlimpsePointer, 'self': dm.SelfPointer }[self.args.pointer_type] self.l_pointer = Pointer(model, self.args.token_dim, self.args.token_dim) self.r_pointer = Pointer(model, self.args.token_dim, self.args.token_dim) self.h2l_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, model) self.h2r_lstm = dy.VanillaLSTMBuilder(1, self.args.token_dim, self.args.token_dim, model)
def __init__(self, model, vocab_size, start): self.start = start self.embeddings = model.add_lookup_parameters( (vocab_size, FLAGS_word_dim)) self.rnn = dy.VanillaLSTMBuilder(FLAGS_layers, FLAGS_word_dim, FLAGS_hidden_dim, model) self.h2l = model.add_parameters((vocab_size, FLAGS_hidden_dim)) self.lb = model.add_parameters(vocab_size)
def initializeParameters(self, model): self.__logger.debug("Initialize: (%i, %i)" % (self.__tokBuilder.getDim(), 2 * self.__lstmDim)) self.__tokBuilder.initializeParameters(model) # first layer of lstms self.__forwardLstms[0] = dynet.VanillaLSTMBuilder( 1, self.__tokBuilder.getDim(), self.__lstmDim, model) self.__backwardLstms[0] = dynet.VanillaLSTMBuilder( 1, self.__tokBuilder.getDim(), self.__lstmDim, model) # other layers for i in range(1, self.__lstmLayers): self.__forwardLstms[i] = dynet.VanillaLSTMBuilder( 1, 2 * self.__lstmDim, self.__lstmDim, model) self.__backwardLstms[i] = dynet.VanillaLSTMBuilder( 1, 2 * self.__lstmDim, self.__lstmDim, model)
def _init_pool(self, dsz, layers=1, **kwargs): hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100)) if type(hsz) is list: hsz = hsz[0] self.rnn = dy.VanillaLSTMBuilder(layers, dsz, hsz, self.pc) def pool(input_, lengths): return rnn_encode(self.rnn, input_, lengths) return hsz, pool
def __init__(self, character_embeddings_size, encodings, rnn_size=100, rnn_layers=1, embeddings_size=100, model=None): if model is None: self.model = dy.Model() else: self.model = model self.encodings = encodings self.character_embeddings_size = character_embeddings_size self.embeddings_size = embeddings_size self.num_characters = len(encodings.char2int) self.character_lookup = self.model.add_lookup_parameters( (self.num_characters, character_embeddings_size)) self.rnn_fw = [] self.rnn_bw = [] self.rnn_layers = rnn_layers self.rnn_size = rnn_size input_size = character_embeddings_size + 3 for _ in range(rnn_layers): self.rnn_fw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) input_size = rnn_size * 2 self.linearW = self.model.add_parameters( (embeddings_size, rnn_size * 4)) # last state and attention over the other states self.linearB = self.model.add_parameters((embeddings_size)) self.att_w1 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_w2 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_v = self.model.add_parameters((1, rnn_size))
def __init__(self, args, model, train_sents=None): super().__init__(args, model) # train mode if train_sents: self.get_maps(train_sents) else: self.load_maps() # create parameters if 'word' in self.args.features: self.word_emb = self.model.add_lookup_parameters( (len(self.word_map), self.args.hid_dim)) if 'lemma' in self.args.features: self.lemma_emb = self.model.add_lookup_parameters( (len(self.lemma_map), self.args.hid_dim)) if 'upos' in self.args.features: self.upos_emb = self.model.add_lookup_parameters( (len(self.upos_map), self.args.hid_dim)) if 'xpos' in self.args.features: self.xpos_emb = self.model.add_lookup_parameters( (len(self.xpos_map), self.args.hid_dim)) if 'morph' in self.args.features: self.morph_emb = self.model.add_lookup_parameters( (len(self.morph_map), self.args.hid_dim)) self.morph_lstm_encoder = dy.VanillaLSTMBuilder( 1, self.args.hid_dim, self.args.hid_dim, self.model) if 'label' in self.args.features: self.label_emb = self.model.add_lookup_parameters( (len(self.label_map), self.args.hid_dim)) if 'char_lstm' in self.args.features or 'inf' in self.args.tasks or 'con' in self.args.tasks: self.char_emb = self.model.add_lookup_parameters( (len(self.char_map), self.args.hid_dim)) if 'char_lstm' in self.args.features: self.char_lstm_f_encoder = dy.VanillaLSTMBuilder( 1, self.args.hid_dim, self.args.hid_dim / 2, self.model) self.char_lstm_b_encoder = dy.VanillaLSTMBuilder( 1, self.args.hid_dim, self.args.hid_dim / 2, self.model) self.log( f'Initialized <{self.__class__.__name__}>, params = {self.model.parameter_count()}' )
def __init__(self, in_dim, out_dim, model, dropout_rate=None, direction='backward', layers=1): self.dropout_rate = dropout_rate self.surfaceBuilder = dy.VanillaLSTMBuilder(layers, in_dim, out_dim, model) self.direction = direction
def __init__(self, indim, hdim, paramcol, loadname=None): ''' @param indim: int, input dimension of biLSTM @param hdim: int, hidden state dimension of both forward and backward LSTM @param paramcol: parameter collection that is to hold the local parameters in biLSTM @param loadname: string, default=None, if it is not None, load parameters instead of creating them from scratch, taking loadname as the basename used in dy.load() ''' if loadname is None: self.flstm = dy.VanillaLSTMBuilder(1, indim, hdim, paramcol) self.blstm = dy.VanillaLSTMBuilder(1, indim, hdim, paramcol) # self.flstm = dy.LSTMBuilder(1, indim, hdim, paramcol) # self.blstm = dy.LSTMBuilder(1, indim, hdim, paramcol) self.flstm.set_dropouts(config.dropout, config.dropout) self.blstm.set_dropouts(config.dropout, config.dropout) else: self.flstm, self.blstm = dy.load(loadname, paramcol)