def __init__(self, params, vocab, label2tag, pretrained_embeddings=None): """ :param params: :param vocab: :param label2tag: :param pretrained_embeddings: """ self.dim_w = params.dim_w self.win = params.win self.vocab = vocab self.n_words = len(self.vocab) self.dim_asp = params.dim_asp self.dim_y_asp = params.n_asp_tags self.n_steps = params.n_steps self.asp_label2tag = label2tag self.dropout_asp = params.dropout_asp self.dropout = params.dropout self.ds_name = params.ds_name self.model_name = params.model_name self.attention_type = params.attention_type self.pc = dy.ParameterCollection() self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w, pretrained_embeddings=pretrained_embeddings) self.DEP_RecNN = DTreeBuilder(pc=self.pc, n_in=self.win * self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc) self.BiAttention_F=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.BiAttention_B=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.BiAttention_T=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.MultiWeightLayer=MultiWeightLayer(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.ASP_FC = Linear(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_y_asp) self.layers = [self.ASP_FC,self.DEP_RecNN,self.BiAttention_F,self.BiAttention_B,self.BiAttention_T,self.MultiWeightLayer] if params.optimizer == 'sgd': self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr) elif params.optimizer == 'momentum': self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9) elif params.optimizer == 'adam': self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9) elif params.optimizer == 'adagrad': self.optimizer = dy.AdagradTrainer(self.pc) elif params.optimizer == 'adadelta': self.optimizer = dy.AdadeltaTrainer(self.pc) else: raise Exception("Invalid optimizer!!")
def __init__(self, vw, vt, nwords, ntags): self.vw = vw self.vt = vt self.nwords = nwords self.ntags = ntags self.UNK = self.vw.w2i["_UNK_"] self._model = dynet.Model() self._sgd = dynet.SimpleSGDTrainer(self._model) self._E = self._model.add_lookup_parameters((self.nwords, 128)) self._p_t1 = self._model.add_lookup_parameters((self.ntags, 30)) self._pH = self._model.add_parameters((32, 50 * 2)) self._pO = self._model.add_parameters((self.ntags, 32)) self._fwd_lstm = dynet.LSTMBuilder(1, 128, 50, self._model) self._bwd_lstm = dynet.LSTMBuilder(1, 128, 50, self._model) self._words_batch = [] self._tags_batch = [] self._minibatch_size = 32
def _init_monolingual_params(wf2id, lemma2id, char2id, msd2id): num_embedded_context_items = 8 enc_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, 2 * STATE_SIZE + 2 * EMBEDDINGS_SIZE, STATE_SIZE, model) enc_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, 2 * STATE_SIZE + 2 * EMBEDDINGS_SIZE, STATE_SIZE, model) dec_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2 + EMBEDDINGS_SIZE, STATE_SIZE, model) character_lookup = model.add_lookup_parameters( (len(char2id), EMBEDDINGS_SIZE)) word_lookup = model.add_lookup_parameters((len(wf2id), EMBEDDINGS_SIZE)) lemma_lookup = model.add_lookup_parameters( (len(lemma2id), EMBEDDINGS_SIZE)) msd_lookup = model.add_lookup_parameters((len(msd2id), EMBEDDINGS_SIZE)) attention_w1 = model.add_parameters((ATTENTION_SIZE, STATE_SIZE * 2)) attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 2)) attention_v = model.add_parameters((1, ATTENTION_SIZE)) decoder_w = model.add_parameters((len(char2id), STATE_SIZE)) decoder_b = model.add_parameters((len(char2id))) output_lookup = model.add_lookup_parameters( (len(char2id), EMBEDDINGS_SIZE )) #TO DO use the same lookup param for input and output context_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, 3 * EMBEDDINGS_SIZE, STATE_SIZE, model) context_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, 3 * EMBEDDINGS_SIZE, STATE_SIZE, model) return enc_fwd_lstm, enc_bwd_lstm, dec_lstm, character_lookup,\ word_lookup, lemma_lookup, msd_lookup, attention_w1, attention_w2, \ attention_v, decoder_w, decoder_b, output_lookup, context_fwd_lstm, context_bwd_lstm
def __init__(self, c2i, num_lstm_layers=DEFAULT_LSTM_LAYERS,\ char_dim=DEFAULT_CHAR_DIM, hidden_dim=DEFAULT_HIDDEN_DIM,\ word_embedding_dim=DEFAULT_WORD_DIM, file=None): self.c2i = c2i self.model = dy.Model() # Char LSTM Parameters self.char_lookup = self.model.add_lookup_parameters((len(c2i), char_dim), name="ce") self.char_fwd_lstm = dy.LSTMBuilder(num_lstm_layers, char_dim, hidden_dim, self.model) self.char_bwd_lstm = dy.LSTMBuilder(num_lstm_layers, char_dim, hidden_dim, self.model) # Post-LSTM Parameters self.lstm_to_rep_params = self.model.add_parameters((word_embedding_dim, hidden_dim * 2), name="H") self.lstm_to_rep_bias = self.model.add_parameters(word_embedding_dim, name="Hb") self.mlp_out = self.model.add_parameters((word_embedding_dim, word_embedding_dim), name="O") self.mlp_out_bias = self.model.add_parameters(word_embedding_dim, name="Ob") if file is not None: # read from saved file; see old_load() for dynet 1.0 format ### NOTE - dynet 2.0 only supports explicit loading into params, so ### dimensionalities all need to be specified in init self.model.populate(file)
def init_parameters(self): self.params = dy.ParameterCollection() self.embeddings = self.params.add_lookup_parameters( (self.vocab_size, self.hidden_dim)) self.sentence_encoder = dy.LSTMBuilder(self.num_layers, self.hidden_dim, self.hidden_dim, self.params) # TODO: Edit context encoder self.context_encoder = dy.LSTMBuilder(self.num_layers, self.num_clusters, self.hidden_dim, self.params) self.output_decoder = dy.LSTMBuilder(self.num_layers, 2 * self.hidden_dim, self.hidden_dim, self.params) self.R = self.params.add_parameters((self.vocab_size, self.hidden_dim)) self.b = self.params.add_parameters((self.vocab_size, )) self.W = self.params.add_parameters( (self.num_clusters, 2 * self.hidden_dim))
def __init__(self, pc, layers, emb_dim, hidden_dim, vocab_size, tied): self.spec = (layers, emb_dim, hidden_dim, vocab_size) self.pc = pc.add_subcollection() self.rnn = dy.LSTMBuilder(layers, emb_dim, hidden_dim, self.pc) self.initial_state_params = [ self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers) ] self.output_mlp = MLP(self.pc, [hidden_dim, hidden_dim, vocab_size]) self.tied = tied if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (vocab_size, emb_dim)) self.dropout_rate = 0.0
def __init__(self): self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model) self.embeddings = self.model.add_lookup_parameters( (len(C2I), EMBEDDINGS_DIM)) self.builder = dy.LSTMBuilder(LAYERS, EMBEDDINGS_DIM, HIDDEN_DIM, self.model) # MLP - hidden layer self.W1 = self.model.add_parameters((HIDDEN_MLP_DIM, HIDDEN_DIM)) self.b1 = self.model.add_parameters(HIDDEN_MLP_DIM) # MLP - output layer self.W2 = self.model.add_parameters((len(T2I), HIDDEN_MLP_DIM)) self.b2 = self.model.add_parameters(len(T2I))
def __init__(self, vocab_actions, vocab, char_to_int): # TODO: implement this function. Should construct all subcomponents of the model, # e.g., the RNNs, or any parameters as part of the parameter collection. Should # initialize the optimizer. self._vocab_actions = vocab_actions self._vocab = vocab self._char_to_int = char_to_int self._LSTM_NUM_OF_LAYERS = 2 self._EMBEDDINGS_SIZE = 50 self._CHAR_EMBEDDINGS_SIZE = 20 self._STATE_SIZE = 100 self._ATTENTION_SIZE = 100 self._VOCAB_SIZE = len(vocab) self._ACTION_SIZE = len(vocab_actions) self._VOCAB_SIZE_CHAR = len(conv_int_to_char) self._pc = dy.ParameterCollection() self._ENC_FWD_LSTM = dy.LSTMBuilder(self._LSTM_NUM_OF_LAYERS, self._EMBEDDINGS_SIZE, self._STATE_SIZE, self._pc) self._DEC_LSTM = dy.LSTMBuilder(self._LSTM_NUM_OF_LAYERS, self._EMBEDDINGS_SIZE + self._STATE_SIZE*2, self._STATE_SIZE, self._pc) self._ENC_FWD_LSTM_CHAR = dy.LSTMBuilder(self._LSTM_NUM_OF_LAYERS, 50, 75, self._pc) self._input_lookup = self._pc.add_lookup_parameters((self._VOCAB_SIZE, self._EMBEDDINGS_SIZE)) self._char_lookup = self._pc.add_lookup_parameters((self._VOCAB_SIZE_CHAR, self._EMBEDDINGS_SIZE)) self._output_lookup = self._pc.add_lookup_parameters((self._ACTION_SIZE, self._EMBEDDINGS_SIZE)) self._pos_lookup = self._pc.add_lookup_parameters((7, 25)) self._R = self._pc.add_parameters((self._ACTION_SIZE, self._STATE_SIZE)) self._bias = self._pc.add_parameters((self._ACTION_SIZE)) self._attention_v = self._pc.add_parameters((1, self._ATTENTION_SIZE)) self._attention_w1 = self._pc.add_parameters((self._ATTENTION_SIZE, self._STATE_SIZE)) self._attention_w2 = self._pc.add_parameters((self._ATTENTION_SIZE, self._STATE_SIZE)) self._attention_b1 = self._pc.add_parameters((self._ATTENTION_SIZE, self._STATE_SIZE)) self._attention_b2 = self._pc.add_parameters((self._ATTENTION_SIZE)) self._trainer = dy.SimpleSGDTrainer(self._pc)
def __init__(self, rep): self.rep = rep self.W2I = utils.W2I self.T2I = utils.T2I self.C2I = utils.C2I self.I2W = utils.I2W self.I2T = utils.I2T self.I2C = utils.I2C self.P2I = utils.P2I self.S2I = utils.S2I self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, LR) # word embedding matrix if rep == 'a': self.E = self.model.add_lookup_parameters( (len(self.W2I), WORD_EMBEDDING_DIM)) if rep == 'b': self.E_CHAR = self.model.add_lookup_parameters( (len(self.C2I), CHAR_EMBEDDING_DIM)) # acceptor, for chars to word size of 100 self.char_LSTM = dy.LSTMBuilder(1, CHAR_EMBEDDING_DIM, WORD_EMBEDDING_DIM, self.model) if rep == 'c': self.E_PREF = self.model.add_lookup_parameters( (len(self.P2I), WORD_EMBEDDING_DIM)) self.E_SUFF = self.model.add_lookup_parameters( (len(self.S2I), WORD_EMBEDDING_DIM)) if rep == 'd': self.E = self.model.add_lookup_parameters( (len(self.W2I), WORD_EMBEDDING_DIM)) self.E_CHAR = self.model.add_lookup_parameters( (len(self.C2I), CHAR_EMBEDDING_DIM)) # acceptor, for chars to word size of 100 self.char_LSTM = dy.LSTMBuilder(1, CHAR_EMBEDDING_DIM, WORD_EMBEDDING_DIM, self.model) self.W = self.model.add_parameters( (WORD_EMBEDDING_DIM, WORD_EMBEDDING_DIM * 2)) self.b = self.model.add_parameters(WORD_EMBEDDING_DIM) # builders = [first forward, first backward,....] self.builders = [ dy.LSTMBuilder(1, WORD_EMBEDDING_DIM, LSTM_DIM, self.model), dy.LSTMBuilder(1, WORD_EMBEDDING_DIM, LSTM_DIM, self.model), dy.LSTMBuilder(1, WORD_EMBEDDING_DIM, LSTM_DIM, self.model), dy.LSTMBuilder(1, WORD_EMBEDDING_DIM, LSTM_DIM, self.model), ] # MLP mult on: b'1,..b'n # PH is from first BiLSTM so I choose the output dim # PO dim is same as TAG length - (classifier..) self.PH = self.model.add_parameters((HID_DIM, LSTM_DIM * 2)) self.PO = self.model.add_parameters((len(self.T2I), HID_DIM))
def __init__(self, model, training_src, training_tgt, embed_size=256, hidden_size=256, attention_size=32): self.vw_src = Vocab.from_corpus(training_src) self.vw_tgt = Vocab.from_corpus(training_tgt) self.src_vocab_size = self.vw_src.size() self.tgt_vocab_size = self.vw_tgt.size() self.model = model self.training = [(x, y) for (x, y) in zip(training_src, training_tgt)] self.src_token_to_id, self.src_id_to_token = self.vw_src.w2i, self.vw_src.i2w self.tgt_token_to_id, self.tgt_id_to_token = self.vw_tgt.w2i, self.vw_tgt.i2w self.src_pad = '<S>' self.tgt_pad = '</S>' self.beam_size = 10 self.max_len = 80 self.BATCH_SIZE = 32 self.embed_size = embed_size self.hidden_size = hidden_size self.attention_size = attention_size self.layers = 1 self.src_lookup = model.add_lookup_parameters((self.src_vocab_size, self.embed_size)) self.tgt_lookup = model.add_lookup_parameters((self.tgt_vocab_size, self.embed_size)) self.l2r_builder = dy.LSTMBuilder(self.layers, self.embed_size, self.hidden_size, model) self.r2l_builder = dy.LSTMBuilder(self.layers, self.embed_size, self.hidden_size, model) self.l2r_builder.set_dropout(0.2) self.r2l_builder.set_dropout(0.2) self.dec_builder = dy.LSTMBuilder(self.layers, 2 * self.hidden_size + self.embed_size , self.hidden_size, model) self.dec_builder.set_dropout(0.2) self.W_y = model.add_parameters((self.tgt_vocab_size, self.hidden_size)) self.b_y = model.add_parameters((self.tgt_vocab_size)) self.W1_att_f = model.add_parameters((self.attention_size, 2 * self.hidden_size)) self.W1_att_e = model.add_parameters((self.attention_size, self.hidden_size)) self.w2_att = model.add_parameters((self.attention_size))
def init_model_c(vocab, tag_set, embedding_file): model = dy.ParameterCollection() params = {} TAGSET_SIZE = len(tag_set) VOCAB_SIZE = len(vocab) with open(embedding_file) as f: numbers = 0 print "load word vectors ..." input_wordVectors = [] for line in f: number_strings = line.split( ) # Split the line on runs of whitespace numbers = [float(n) for n in number_strings] # Convert to floats input_wordVectors.append(numbers) while len(input_wordVectors) < len(vocab): eps = np.sqrt(6) / np.sqrt(len(numbers)) vec = np.random.uniform(-eps, eps, len(numbers)) input_wordVectors.append(vec) params["lookup"] = model.add_lookup_parameters( (VOCAB_SIZE, len(input_wordVectors[0]))) params["lookup"].init_from_array(np.array(input_wordVectors)) f1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model) b1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model) f2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model) b2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model) lstms = (f1_lstm, b1_lstm, f2_lstm, b2_lstm) params["w"] = model.add_parameters((TAGSET_SIZE, 2 * LSTM_STATE_DIM)) params["bias"] = model.add_parameters((TAGSET_SIZE)) trainer = dy.AdamTrainer(model) return (lstms, params, model, trainer)
def __init__(self, name, **kwargs): pc = kwargs['pc'].add_subcollection( name=kwargs.get('name', 'char-lstm')) super(CharLSTMEmbeddings, self).__init__(pc) self.vsz = kwargs.get('vsz') self.dsz = kwargs.get('dsz') self.finetune = kwargs.get('finetune', True) self.name = name weights = kwargs.get('weights') self.embeddings = self.pc.lookup_parameters_from_numpy(weights, name=name) self.lstmsz = kwargs.get('lstmsz', 50) layers = kwargs.get('layers', 1) self.pdrop = kwargs.get('pdrop', 0.5) self.lookup = dy.lookup_batch self.lstm_fwd = dy.LSTMBuilder(layers, self.dsz, self.lstmsz // 2, model=self.pc) self.lstm_bwd = dy.LSTMBuilder(layers, self.dsz, self.lstmsz // 2, model=self.pc)
def init_params(self): super().init_params() self.entity_encoder = self.pc.add_parameters( (self.embedding_size, self.embedding_size * 3)) # e N e self.relation_encoder = self.pc.add_parameters( (self.embedding_size, self.embedding_size * 3)) # N e N self.no_ent = self.pc.add_parameters(self.embedding_size) self.vocab.create_lookup(self.pc, self.embedding_size) self.counters.create_lookup(self.pc, self.counter_size) self.decoder = dy.LSTMBuilder( 3, self.embedding_size + self.counter_size * 4, self.embedding_size, self.pc)
def init(): global model, enc_fwd_lstm, enc_bwd_lstm, dec_lstm, input_lookup, attention_w1,\ attention_w2,attention_v,decoder_w,decoder_b,output_lookup, VOCAB_SIZE VOCAB_SIZE = len(char2int) model = dy.Model() enc_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) enc_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) dec_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE*2+EMBEDDINGS_SIZE, STATE_SIZE, model) input_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE)) attention_w1 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE*2)) attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE*LSTM_NUM_OF_LAYERS*2)) attention_v = model.add_parameters( (1, ATTENTION_SIZE)) decoder_w = model.add_parameters( (VOCAB_SIZE, STATE_SIZE)) decoder_b = model.add_parameters( (VOCAB_SIZE)) output_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE))
def __init__(self, model, LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, INPUT_VOCAB_SIZE, OUTPUT_VOCAB_SIZE): self.model = model self.enc_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) self.enc_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) self.dec_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2 + EMBEDDINGS_SIZE, STATE_SIZE, model) self.input_lookup = model.add_lookup_parameters( (INPUT_VOCAB_SIZE, EMBEDDINGS_SIZE)) self.attention_w1 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * 2)) self.attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 2)) self.attention_v = model.add_parameters((1, ATTENTION_SIZE)) self.decoder_w = model.add_parameters((OUTPUT_VOCAB_SIZE, STATE_SIZE)) self.decoder_b = model.add_parameters((OUTPUT_VOCAB_SIZE)) self.output_lookup = model.add_lookup_parameters( (OUTPUT_VOCAB_SIZE, EMBEDDINGS_SIZE))
def init_model_b(vocab, tag_set, trained_model): TAGSET_SIZE = len(tag_set) VOCAB_SIZE = len(vocab) model = dy.ParameterCollection() c_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_INPUT_DIM, model) f1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model) b1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model) f2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model) b2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model) lstms = (c_lstm, f1_lstm, b1_lstm, f2_lstm, b2_lstm) params = {} params["lookup"] = model.add_lookup_parameters((VOCAB_SIZE, LSTM_INPUT_DIM), init='uniform', scale=(np.sqrt(6) / np.sqrt(LSTM_INPUT_DIM))) params["w"] = model.add_parameters((TAGSET_SIZE, 2 * LSTM_STATE_DIM)) params["bias"] = model.add_parameters((TAGSET_SIZE)) model.populate(trained_model) trainer = dy.AdamTrainer(model) return (lstms, params, model, trainer)
def init_model(wf2id,lemma2id,char2id,msd2id): global model, enc_fwd_lstm, enc_bwd_lstm, dec_lstm, character_lookup,\ word_lookup, lemma_lookup, msd_lookup, attention_w1, attention_w2, \ attention_v, decoder_w, decoder_b, output_lookup model = dy.Model() enc_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, 8*EMBEDDINGS_SIZE, STATE_SIZE, model) enc_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, 8*EMBEDDINGS_SIZE, STATE_SIZE, model) dec_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE*2+EMBEDDINGS_SIZE, STATE_SIZE, model) character_lookup = model.add_lookup_parameters((len(char2id), EMBEDDINGS_SIZE)) word_lookup = model.add_lookup_parameters((len(wf2id), EMBEDDINGS_SIZE)) lemma_lookup = model.add_lookup_parameters((len(lemma2id), EMBEDDINGS_SIZE)) msd_lookup = model.add_lookup_parameters((len(msd2id), EMBEDDINGS_SIZE)) attention_w1 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE*2)) attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE*LSTM_NUM_OF_LAYERS*2)) attention_v = model.add_parameters( (1, ATTENTION_SIZE)) decoder_w = model.add_parameters( (len(char2id), STATE_SIZE)) decoder_b = model.add_parameters( (len(char2id))) output_lookup = model.add_lookup_parameters((len(char2id), EMBEDDINGS_SIZE))
def build_model(self): """ This builds our POS-tagger model. :return: """ model = dy.Model() params = {} params["E"] = model.add_lookup_parameters((self.n_words, 128)) params["p_t1"] = model.add_lookup_parameters((self.n_tags, 30)) if self.use_mlp: params["H"] = model.add_parameters((32, 50 * 2)) params["O"] = model.add_parameters((self.n_tags, 32)) else: params["O"] = model.add_parameters((self.n_tags, 50 * 2)) builders = [ dy.LSTMBuilder(1, 128, 50, model), dy.LSTMBuilder(1, 128, 50, model) ] return model, params, builders
def __init__(self, vocab_size, arr_size, deps_size, params=(0.3, 0.3, 1, 12, 12)): dep_dropout, cons_dropout, embed_dim, cons_dim, deps_dim = params self.deps_dropout = dep_dropout self.cons_dropout = cons_dropout self.embed_dim = embed_dim self.cons_dim = cons_dim self.deps_dim = deps_dim self._model = dy.ParameterCollection() # lookups for the inputs # idea - have different lookup for the different sequence models self.word_lookup = self.model.add_lookup_parameters( (vocab_size, embed_dim)) self.arrow_lookup = self.model.add_lookup_parameters( (arr_size, embed_dim)) self.dep_lookup = self.model.add_lookup_parameters( (deps_size, embed_dim)) # sequence LSTM's self.cons_lstm = dy.LSTMBuilder(1, embed_dim, self.cons_dim, self.model) self.deps_lstm = dy.LSTMBuilder(1, embed_dim, self.deps_dim, self.model) # idea - add b's (biases vectors) dims = (128, 64) self.pW1 = self.model.add_parameters( (dims[0], 4 + self.cons_dim + self.deps_dim)) self.pb1 = self.model.add_parameters(dims[0]) self.pW2 = self.model.add_parameters((dims[1], dims[0])) self.pb2 = self.model.add_parameters(dims[1]) self.pW3 = self.model.add_parameters((3, dims[1])) self.pb3 = self.model.add_parameters(3)
def init(self): dy.renew_cg() self.VOCAB_SIZE = len(self.vocab) self.model = dy.Model() # ENCODERS self.encpre_fwd_lstm = dy.LSTMBuilder(self.config.lstm_depth, self.config.embedding_dim, self.config.state_dim, self.model) self.encpre_bwd_lstm = dy.LSTMBuilder(self.config.lstm_depth, self.config.embedding_dim, self.config.state_dim, self.model) self.encpre_fwd_lstm.set_dropout(self.config.dropout) self.encpre_bwd_lstm.set_dropout(self.config.dropout) self.encpos_fwd_lstm = dy.LSTMBuilder(self.config.lstm_depth, self.config.embedding_dim, self.config.state_dim, self.model) self.encpos_bwd_lstm = dy.LSTMBuilder(self.config.lstm_depth, self.config.embedding_dim, self.config.state_dim, self.model) self.encpos_fwd_lstm.set_dropout(self.config.dropout) self.encpos_bwd_lstm.set_dropout(self.config.dropout) # DECODER self.dec_lstm = dy.LSTMBuilder(self.config.lstm_depth, (self.config.state_dim * 4) + (self.config.embedding_dim * 4), self.config.state_dim, self.model) self.dec_lstm.set_dropout(self.config.dropout) # EMBEDDINGS self.lookup = self.model.add_lookup_parameters((self.VOCAB_SIZE, self.config.embedding_dim)) # ATTENTION self.attention_w1_pre = self.model.add_parameters((self.config.attention_dim, self.config.state_dim * 2)) self.attention_w2_pre = self.model.add_parameters((self.config.attention_dim, self.config.state_dim * self.config.lstm_depth * 2)) self.attention_v_pre = self.model.add_parameters((1, self.config.attention_dim)) self.attention_w1_pos = self.model.add_parameters((self.config.attention_dim, self.config.state_dim * 2)) self.attention_w2_pos = self.model.add_parameters((self.config.attention_dim, self.config.state_dim * self.config.lstm_depth * 2)) self.attention_v_pos = self.model.add_parameters((1, self.config.attention_dim)) # SOFTMAX self.decoder_w = self.model.add_parameters((self.VOCAB_SIZE, self.config.state_dim)) self.decoder_b = self.model.add_parameters((self.VOCAB_SIZE))
def old_style_save_and_load(): # create a model and add parameters. m = dy.Model() a = m.add_parameters((100,100)) b = m.add_lookup_parameters((20,2)) t1 = Transfer(5,6,dy.softmax, m) t2 = Transfer(7,8,dy.softmax, m) tt = MultiTransfer([10,10,10,10],dy.tanh, m) c = m.add_parameters((100)) lb = dy.LSTMBuilder(1,2,3,m) lb2 = dy.LSTMBuilder(2,4,4,m) # save m.save("test1") # create new model (same parameters): m2 = dy.Model() a2 = m2.add_parameters((100,100)) b2 = m2.add_lookup_parameters((20,2)) t12 = Transfer(5,6,dy.softmax, m2) t22 = Transfer(7,8,dy.softmax, m2) tt2 = MultiTransfer([10,10,10,10],dy.tanh, m2) c2 = m2.add_parameters((100)) lb2 = dy.LSTMBuilder(1,2,3,m2) lb22 = dy.LSTMBuilder(2,4,4,m2) # parameters should be different for p1,p2 in [(a,a2),(b,b2),(c,c2),(t1.W,t12.W),(tt.transfers[0].W,tt2.transfers[0].W)]: assert(not numpy.array_equal(p1.as_array(), p2.as_array())) m2.load("test1") # parameters should be same for p1,p2 in [(a,a2),(b,b2),(c,c2),(t1.W,t12.W),(tt.transfers[0].W,tt2.transfers[0].W)]: assert(numpy.array_equal(p1.as_array(), p2.as_array())) os.remove("test1")
def init_models(chars, fn=None): global characters, int2char, char2int, model, enc_fwd_lstm, enc_bwd_lstm, \ dec_lstm, input_lookup, attention_w1, attention_w2, attention_v, \ decoder_w, decoder_b, output_lookup characters = None int2char = None char2int = None if fn: characters, int2char, char2int = \ pickle.load(open('%s.chars.pkl' % fn,'rb')) else: characters = chars int2char = sorted(list(characters)) char2int = {c: i for i, c in enumerate(int2char)} VOCAB_SIZE = len(characters) model = dy.Model() enc_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) enc_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) dec_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2 + EMBEDDINGS_SIZE, STATE_SIZE, model) input_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE)) attention_w1 = model.add_parameters((ATTENTION_SIZE, STATE_SIZE * 2)) attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 2)) attention_v = model.add_parameters((1, ATTENTION_SIZE)) decoder_w = model.add_parameters((VOCAB_SIZE, STATE_SIZE)) decoder_b = model.add_parameters((VOCAB_SIZE)) output_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE))
def __init__(self, lemmatizer_config, encodings, embeddings, runtime=False): self.config = lemmatizer_config self.encodings = encodings # Bug in encodings - this will be removed after UD Shared Task self.has_bug = False if self.encodings.char2int[' '] != 1: self.has_bug = True self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork(self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters((len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2)) self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size)) self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings)) self.softmax_w = self.model.add_parameters((len(self.encodings.char2int) + 1, self.config.rnn_size)) self.softmax_b = self.model.add_parameters((len(self.encodings.char2int) + 1)) self.softmax_casing_w = self.model.add_parameters((2, self.config.rnn_size)) self.softmax_casing_b = self.model.add_parameters((2))
def __init__(self, word2vec, model): self.LSTM_NUM_OF_LAYERS = 2 self.VOCAB_SIZE = len(word2vec.wv.vocab) self.EMBEDDINGS_SIZE = 300 self.ATTENTION_SIZE = 32 self.STATE_SIZE = 32 self.model = model self.attention_w1 = model.add_parameters( (self.ATTENTION_SIZE, self.STATE_SIZE * 2)) self.attention_w2 = model.add_parameters( (self.ATTENTION_SIZE, self.STATE_SIZE * self.LSTM_NUM_OF_LAYERS * 2)) self.attention_v = model.add_parameters((1, self.ATTENTION_SIZE)) self.enc_fwd_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, model) self.enc_bwd_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, model) self.input_lookup = model.add_lookup_parameters( (self.VOCAB_SIZE + 1, self.EMBEDDINGS_SIZE)) self.input_lookup.init_from_array( np.concatenate( (word2vec.wv.vectors, np.zeros(self.EMBEDDINGS_SIZE, dtype=float).reshape(1, -1)), axis=0)) self.decoder_w = model.add_parameters( (self.VOCAB_SIZE, self.STATE_SIZE)) self.decoder_b = model.add_parameters((self.VOCAB_SIZE)) self.dec_lstm = dy.LSTMBuilder( self.LSTM_NUM_OF_LAYERS, self.STATE_SIZE * 2 + self.EMBEDDINGS_SIZE, self.STATE_SIZE, model) self.mlp_w = model.add_parameters((200, self.STATE_SIZE)) self.mlp_b1 = model.add_parameters(200) self.mlp_u = model.add_parameters((2, 200)) self.mlp_b2 = model.add_parameters(2)
def __init__(self, model, num_input, num_hidden, num_output, act=dy.tanh): self.num_input = int(num_input) self.num_hidden = int(num_hidden) self.num_out = int(num_output) self.model = model print "Loaded params" # LSTM Parameters self.enc_lstm_fwd_builder = dy.LSTMBuilder(1, self.num_input, num_hidden, model) self.enc_lstm_bwd_builder = dy.LSTMBuilder(1, self.num_input, num_hidden, model) self.dec_lstm_builder = dy.LSTMBuilder(1, self.num_input, num_hidden * 2, model) # MLP to predict the duration self.W_duration = self.model.add_parameters( (self.num_hidden, self.num_hidden * 2)) self.b_duration = self.model.add_parameters((1)) # MLP to predict f0 self.W_f0 = self.model.add_parameters( (self.num_hidden, self.num_hidden * 2)) self.b_f0 = self.model.add_parameters((1))
def __init__(self, dataTrain, repr): self.repr = repr # creates all dictionaries self.create_dic(dataTrain) # model parameters self.model = dy.Model() LAYERS = 1 EMB_DIM = 50 HID_DIM = 70 # is_emb: self.E_v = self.model.add_lookup_parameters( (len(dataTrain.vocab) + 1, EMB_DIM)) # is_LSTM: self.E_c = self.model.add_lookup_parameters( (len(dataTrain.chars) + 1, EMB_DIM)) self.cLSTM = dy.LSTMBuilder(LAYERS, EMB_DIM, EMB_DIM, self.model) # is_sub: self.E_p = self.model.add_lookup_parameters( (len(dataTrain.pref) + 1, EMB_DIM)) self.E_s = self.model.add_lookup_parameters( (len(dataTrain.suff) + 1, EMB_DIM)) self.LSTMf1 = dy.LSTMBuilder(LAYERS, EMB_DIM, HID_DIM, self.model) self.LSTMb1 = dy.LSTMBuilder(LAYERS, EMB_DIM, HID_DIM, self.model) self.LSTMf2 = dy.LSTMBuilder(LAYERS, HID_DIM * 2, HID_DIM, self.model) self.LSTMb2 = dy.LSTMBuilder(LAYERS, HID_DIM * 2, HID_DIM, self.model) self.linear = self.model.add_parameters((EMB_DIM, EMB_DIM * 2)) self.out = self.model.add_parameters( (len(dataTrain.tags), HID_DIM * 2)) self.trainer = dy.AdamTrainer(self.model)
def __init__(self, num_layers, input_dim, hidden_dim, word_num, init_scale_rnn, init_scale_params, x_dropout, h_dropout, w_dropout_rate, lr, clip_thr): model = dy.Model() rnn = dy.LSTMBuilder(num_layers, input_dim, hidden_dim, model) self.init_rnn(rnn, init_scale_rnn) if bilstm: rnn_b = dy.LSTMBuilder(num_layers, input_dim, hidden_dim, model) self.init_rnn(rnn_b, init_scale_rnn) if bilstm: hidden_dim = hidden_dim * 2 params = {} params["embeds"] = model.add_lookup_parameters((word_num, input_dim)) params["W_p"] = model.add_parameters((1, hidden_dim)) if init_scale_params: self.init_lookup(embeds, init_scale_params) self.init_param(params["W_p"], init_scale_params) trainer = dy.SimpleSGDTrainer(model, lr) if clip_thr > 0: trainer.set_clip_threshold(clip_thr) self._model = model self._rnn = rnn self._rnn_b = rnn_b self._params = params self._x_dropout = x_dropout self._h_dropout = h_dropout self._w_dropout_rate = w_dropout_rate self._trainer = trainer self._input_dim = input_dim
def initParams(self, model, Cemb, options): # initialize the model parameters params = dict() ## ===== Lookup parameters # Similar to parameters, but are representing a "lookup table" # that maps numbers to vectors. # These are used for embedding matrices. # for example, this will have VOCAB_SIZE rows, each of DIM dimensions. params['embed'] = model.add_lookup_parameters(Cemb.shape) # Update vectors in the parameter for row_num, vec in enumerate(Cemb): params['embed'].init_row(row_num, vec) # Initialize 1 layer, word_dims of word vector, nhiddens of neural units, LSTM TNN params['lstm'] = dy.LSTMBuilder(1, options['word_dims'], options['nhiddens'], model) params['reset_gate_W'] = [] params['reset_gate_b'] = [] params['com_W'] = [] params['com_b'] = [] params['word_score_U'] = model.add_parameters( options['word_dims'] ) # The learnable parameter to judge if the word is legality params['predict_W'] = model.add_parameters( (options['word_dims'], options['nhiddens'])) # The W to predict possibility in LSTM params['predict_b'] = model.add_parameters( options['word_dims']) # The b to predict possibility in LSTM for wlen in xrange(1, options['max_word_len'] + 1): # The W_r_l to determine which part of the character vectors should be retrieved, different word length use different W_r_l # Since W_r_l is used to generate r, which will element-wise multiply cn, the shape of W_r_l should be (wlen*options['char_dims'],wlen*options['char_dims']) params['reset_gate_W'].append( model.add_parameters((wlen * options['char_dims'], wlen * options['char_dims']))) # The b_r_l to determine which part of the character vectors should be retrieved, different word length use different W_r_l params['reset_gate_b'].append( model.add_parameters(wlen * options['char_dims'])) # Character vectors are integrated into their word representation using a weight matrix W_c_l # Since the shape of W_r_l is (wlen*options['char_dims'],wlen*options['char_dims']), the shape of W_c_l is (options['word_dims'],wlen*options['char_dims']) params['com_W'].append( model.add_parameters( (options['word_dims'], wlen * options['char_dims']))) params['com_b'].append(model.add_parameters(options['word_dims'])) params['<BoS>'] = model.add_parameters( options['word_dims']) # Begin of state? return params
def __init__(self, model, vocab_size, hidden_dim, num_layers=1, max_len=100): self.decoder_rnn = dy.LSTMBuilder(num_layers, hidden_dim * 2, hidden_dim, model) self.pre_attend = model.add_parameters((max_len, max_len)) self.v = model.add_parameters((hidden_dim, hidden_dim)) self.u = model.add_parameters((1, hidden_dim)) self.attender = model.add_parameters((hidden_dim, 2 * hidden_dim)) #self.attender=model.add_parameters((max_len, hidden_dim*(max_len+1))) self.pred = model.add_parameters((vocab_size, hidden_dim)) self.hdim = hidden_dim self.max_len = max_len
def build_model(vocab): model = dy.Model() embeddings_lookup = model.add_lookup_parameters((len(vocab), INPUT_DIM)) hidden_W = model.add_parameters((HIDDEN_DIM, HIDDEN_DIM)) hidden_bias = model.add_parameters(HIDDEN_DIM) MLP_W = model.add_parameters((OUTPUT_DIM, HIDDEN_DIM)) MLP_bias = model.add_parameters(OUTPUT_DIM) encoder_lstm = dy.LSTMBuilder(layers=1, hidden_dim=HIDDEN_DIM, input_dim=INPUT_DIM, model=model) return model, embeddings_lookup, hidden_W, hidden_bias, MLP_W, MLP_bias, encoder_lstm