def __init__(self, vocab, word_dims, pret_dims, lemma_dims, tag_dims, dropout_dim, lstm_layers, lstm_hiddens, dropout_lstm_input, dropout_lstm_hidden, mlp_size, dropout_mlp): pc = dy.ParameterCollection() self._vocab = vocab self.word_embs = pc.lookup_parameters_from_numpy( vocab.get_word_embs(word_dims)) self.pret_word_embs = pc.lookup_parameters_from_numpy( vocab.get_pret_embs(pret_dims)) self.lemma_embs = pc.lookup_parameters_from_numpy( vocab.get_lemma_embs(lemma_dims)) self.tag_embs = pc.lookup_parameters_from_numpy( vocab.get_tag_embs(tag_dims)) self.LSTM_builders = [] input_dims = word_dims + pret_dims + lemma_dims + tag_dims f = orthonormal_VanillaLSTMBuilder(1, input_dims, lstm_hiddens, pc) b = orthonormal_VanillaLSTMBuilder(1, input_dims, lstm_hiddens, pc) self.LSTM_builders.append((f, b)) for i in xrange(lstm_layers - 1): f = orthonormal_VanillaLSTMBuilder(1, 2 * lstm_hiddens, lstm_hiddens, pc) b = orthonormal_VanillaLSTMBuilder(1, 2 * lstm_hiddens, lstm_hiddens, pc) self.LSTM_builders.append((f, b)) self.dropout_lstm_input = dropout_lstm_input self.dropout_lstm_hidden = dropout_lstm_hidden W = orthonormal_initializer(mlp_size, 2 * lstm_hiddens) self.mlp_arg_W = pc.parameters_from_numpy(W) self.mlp_pred_W = pc.parameters_from_numpy(W) self.mlp_arg_b = pc.add_parameters((mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_pred_b = pc.add_parameters((mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_size = mlp_size self.dropout_mlp = dropout_mlp self.rel_W = pc.add_parameters( (vocab.rel_size * (mlp_size + 1), mlp_size + 1), init=dy.ConstInitializer(0.)) self._pc = pc def _emb_mask_generator(seq_len, batch_size): ret = [] for i in xrange(seq_len): word_mask = np.random.binomial(1, 1. - dropout_dim, batch_size).astype(np.float32) tag_mask = np.random.binomial(1, 1. - dropout_dim, batch_size).astype(np.float32) scale = 3. / (2. * word_mask + tag_mask + 1e-12) word_mask *= scale tag_mask *= scale word_mask = dy.inputTensor(word_mask, batched=True) tag_mask = dy.inputTensor(tag_mask, batched=True) ret.append((word_mask, tag_mask)) return ret self.generate_emb_mask = _emb_mask_generator
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), dropout: numbers.Real = Ref("exp_global.dropout", default=0.0) ) -> None: self.dropout_rate = dropout self.input_dim = input_dim self.hidden_dim = hidden_dim my_params = param_collections.ParamManager.my_params(self) # [i; o; g] self.p_Wx_iog = my_params.add_parameters(dim=(hidden_dim * 3, input_dim)) self.p_Wh_iog = my_params.add_parameters(dim=(hidden_dim * 3, hidden_dim)) self.p_b_iog = my_params.add_parameters(dim=(hidden_dim * 3, ), init=dy.ConstInitializer(0.0)) self.p_Wx_f = my_params.add_parameters(dim=(hidden_dim, input_dim)) self.p_Wh_f = my_params.add_parameters(dim=(hidden_dim, hidden_dim)) self.p_b_f = my_params.add_parameters(dim=(hidden_dim, ), init=dy.ConstInitializer(1.0)) self.dropout_mask_x = None self.dropout_mask_h = None
def __init__(self, n_hid, model=None): if model is None: model = pm.global_collection() self.p_g = model.add_parameters(dim=n_hid, init=dy.ConstInitializer(1.0)) self.p_b = model.add_parameters(dim=n_hid, init=dy.ConstInitializer(0.0))
def __init__(self, vocab, options): random.seed(1) self.model = dy.ParameterCollection() self.trainer = helpers.get_trainer(options, self.model) self.get_violation = helpers.update_method(options) word_count = vocab.word_freq word_vocab = vocab.wordlookup_tbl pos_vocab = vocab.poslookup_tbl rel_vocab = vocab.rellookup_tbl self.rels = rel_vocab self._enc = helpers.get_encoder(self.model, options, word_count, word_vocab, pos_vocab) self._tree_enc = TreeEncoder.get_tree_encoder(self.model, options, rel_vocab) self.mlp_rel_size = options.mlp_rel_size self.hidden_dim = options.compos_outdim W = orthonormal_initializer(self.mlp_rel_size, self.hidden_dim) self.mlp_dep_W = self.model.parameters_from_numpy(W) self.mlp_head_W = self.model.parameters_from_numpy(W) self.mlp_dep_b = self.model.add_parameters((self.mlp_rel_size,), init = dy.ConstInitializer(0.)) self.mlp_head_b = self.model.add_parameters((self.mlp_rel_size,), init = dy.ConstInitializer(0.)) # self.dropout_mlp = options.dropout_mlp self.rel_W = self.model.add_parameters((len(rel_vocab)*(self.mlp_rel_size +1) , self.mlp_rel_size + 1), init = dy.ConstInitializer(0.)) self._train_flag = True self.oracle = options.oracle self.exploration_rate = options.exploration_rate
def __init__(self, vocab, properties): self.properties = properties self.vocab = vocab self.model = dynet.Model() self.updater = dynet.AdamTrainer(self.model) self.word_embedding = self.model.add_lookup_parameters( (vocab.num_words(), properties.word_embed_dim)) self.pos_embedding = self.model.add_lookup_parameters( (vocab.num_pos(), properties.pos_embed_dim)) self.label_embedding = self.model.add_lookup_parameters( (vocab.num_label(), properties.label_embed_dim)) self.transfer = dynet.rectify input_dim = 20 * properties.word_embed_dim + 20 * properties.pos_embed_dim + 12 * properties.label_embed_dim self.hidden_layer_1 = self.model.add_parameters( (properties.hidden_dim_1, input_dim)) self.hidden_layer_bias_1 = self.model.add_parameters( properties.hidden_dim_1, init=dynet.ConstInitializer(0.2)) self.hidden_layer_2 = self.model.add_parameters( (properties.hidden_dim_2, properties.hidden_dim_1)) self.hidden_layer_bias_2 = self.model.add_parameters( properties.hidden_dim_2, init=dynet.ConstInitializer(0.2)) self.output_layer = self.model.add_parameters( (vocab.num_action(), properties.hidden_dim_2)) self.output_bias = self.model.add_parameters( vocab.num_action(), init=dynet.ConstInitializer(0))
def __init__(self, model, pos_labels, xpos_labels, src_ctx_dim=400, n_pos_tagger_mlp_units=200, n_xpos_tagger_mlp_units=200, mlps_dropout=0.33): self.src_ctx_dim = src_ctx_dim self.dropout = mlps_dropout self.pos_labels = pos_labels self.xpos_labels = xpos_labels Saxe_initializer = Saxe.Orthogonal(gain='leaky_relu',alpha = 0.1) self.W_pos = model.add_parameters((n_pos_tagger_mlp_units, src_ctx_dim), init=dy.NumpyInitializer(Saxe_initializer(((n_pos_tagger_mlp_units, src_ctx_dim))))) self.b_pos = model.add_parameters((n_pos_tagger_mlp_units,),init = dy.ConstInitializer(0)) self.W_xpos = model.add_parameters((n_xpos_tagger_mlp_units, src_ctx_dim), init=dy.NumpyInitializer(Saxe_initializer(((n_xpos_tagger_mlp_units, src_ctx_dim))))) self.b_xpos = model.add_parameters((n_xpos_tagger_mlp_units,),init = dy.ConstInitializer(0)) self.W_affine_pos = model.add_parameters((n_pos_tagger_mlp_units,pos_labels), init = dy.ConstInitializer(0)) self.b_affine_pos = model.add_parameters((pos_labels),init = dy.ConstInitializer(0)) self.W_affine_xpos = model.add_parameters((n_xpos_tagger_mlp_units,xpos_labels), init = dy.ConstInitializer(0)) self.b_affine_xpos = model.add_parameters((xpos_labels),init = dy.ConstInitializer(0))
def __init__(self, model, options, rel_vocab): self.model = model.add_subcollection('scorer') self.activation = get_activation(options) self.dropout_rate = options.dropout_rate self.arc_dims = options.scr_arc_dim self.rel_dims = options.scr_rel_dim self.in_dims = options.scorer_indim self.u_weight = options.unlabel_weight self.rel_size = len(rel_vocab) mlp_size = self.arc_dims + self.rel_dims # head_W = orthonormal_initializer(mlp_size, self.in_dims) # dep_W = orthonormal_initializer(mlp_size, self.in_dims) W = orthonormal_initializer(mlp_size, self.in_dims) self.mlp_head_W = self.model.parameters_from_numpy(W) self.mlp_dep_W = self.model.parameters_from_numpy(W) # self.mlp_head_W = self.model.add_parameters((mlp_size, self.in_dims)) # self.mlp_dep_W = self.model.add_parameters((mlp_size, self.in_dims)) self.mlp_head_b = self.model.add_parameters( (mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_dep_b = self.model.add_parameters( (mlp_size, ), init=dy.ConstInitializer(0.)) self.arc_Wp = self.model.add_parameters( (self.arc_dims, self.arc_dims + 1)) self.rel_Wp = self.model.add_parameters( (self.rel_size * (self.rel_dims + 1), self.rel_dims + 1))
def __init__(self, pc, dim_asp, dim_opi): self.pc = pc.add_subcollection() self.dim_asp = dim_asp self.dim_opi = dim_opi self._W_A = self.pc.add_parameters((2*self.dim_opi, 2*self.dim_asp), init=dy.UniformInitializer(0.2)) self._W_O = self.pc.add_parameters((2*self.dim_opi, 2*self.dim_opi), init=dy.UniformInitializer(0.2)) self._b = self.pc.add_parameters((2*self.dim_opi,), init=dy.ConstInitializer(0.0)) self._W_bilinear = self.pc.add_parameters((2*self.dim_asp, 2*self.dim_opi), init=dy.UniformInitializer(0.2)) self._b_bilinear = self.pc.add_parameters((1,), init=dy.ConstInitializer(0.0))
def __init__(self, model, input_dim, output_dim, act=dy.rectify, init_gain=math.sqrt(2.), ln=False): self.pc = model.add_subcollection() self.act = act self.ln = ln self.W = self.pc.add_parameters((output_dim, input_dim), init=dy.GlorotInitializer(gain=init_gain)) self.b = self.pc.add_parameters(output_dim, init=dy.ConstInitializer(0.)) if ln: self.g = self.pc.add_parameters(output_dim, init=dy.ConstInitializer(1.)) self.spec = (input_dim, output_dim, act, init_gain, ln)
def __init__(self, vocab, properties, pre_train_weights=None): self.properties = properties self.vocab = vocab # first initialize a computation graph container (or model). self.model = dynet.Model() # assign the algorithm for backpropagation updates. self.updater = dynet.AdamTrainer(self.model) # create embeddings for words and tag features. self.word_embedding = self.model.add_lookup_parameters( (vocab.num_words(), properties.word_embed_dim)) if pre_train_weights: self.word_embedding.init_from_array( self.load_init_words(vocab, pre_train_weights, properties.word_embed_dim)) self.pos_embedding = self.model.add_lookup_parameters( (vocab.num_pos_feats(), properties.pos_embed_dim)) self.label_embedding = self.model.add_lookup_parameters( (vocab.num_labels(), properties.label_embed_dim)) # assign transfer function self.transfer = dynet.rectify # can be dynet.logistic or dynet.tanh as well. # define the input dimension for the embedding layer. # here we assume to see two words after and before and current word (meaning 5 word embeddings) # and to see the last two predicted tags (meaning two tag embeddings) self.input_dim = 20 * (properties.word_embed_dim + properties.pos_embed_dim) \ + 12 * properties.label_embed_dim # define the hidden layer 1. self.hidden_layer1 = self.model.add_parameters( (properties.hidden_dim1, self.input_dim)) # define the hidden layer 1 bias term. self.hidden_layer_bias1 = self.model.add_parameters( properties.hidden_dim1, init=dynet.ConstInitializer(0.2)) # define the hidden layer 2. self.hidden_layer2 = self.model.add_parameters( (properties.hidden_dim2, properties.hidden_dim1)) # define the hidden layer 2 bias term. self.hidden_layer_bias2 = self.model.add_parameters( properties.hidden_dim2, init=dynet.ConstInitializer(0.2)) # define the output weight. self.output_layer = self.model.add_parameters( (vocab.num_actions(), properties.hidden_dim2)) # define the bias vector and initialize it as zero. self.output_bias = self.model.add_parameters( vocab.num_actions(), init=dynet.ConstInitializer(0))
def __init__(self, model, vocab_form, d_form, v_train, dropout_emb, vocab_pos, d_pos, vocab_deprel, layers, d_lstm, dropout_lstm_input, dropout_lstm_hidden, mlp_arc_size, mlp_rel_size, dropout_mlp): spc = model.add_subcollection("deepbiaffine") # lookup parameters self.vocab_form = vocab_form self.vocab_pos = vocab_pos self.vocab_deprel = vocab_deprel self.v_train = v_train self.dropout_emb = dropout_emb self.e_form = spc.lookup_parameters_from_numpy( np.random.randn(v_train, d_form) if vocab_form. vectors is not None else np.zeros(v_train, d_form)) self.e_ext = spc.lookup_parameters_from_numpy( vocab_form.vectors) if vocab_form.vectors is not None else None self.e_tag = spc.add_lookup_parameters((len(vocab_pos), d_pos)) # lstm builders, typically self.lstm_builders = [] f = orthonormal_VanillaLSTMBuilder(1, d_form + d_pos, d_lstm, spc) b = orthonormal_VanillaLSTMBuilder(1, d_form + d_pos, d_lstm, spc) self.lstm_builders.append((f, b)) for i in range(layers - 1): f = orthonormal_VanillaLSTMBuilder(1, 2 * d_lstm, d_lstm, spc) b = orthonormal_VanillaLSTMBuilder(1, 2 * d_lstm, d_lstm, spc) self.lstm_builders.append((f, b)) self.dropout_lstm_input = dropout_lstm_input self.dropout_lstm_hidden = dropout_lstm_hidden # things are cated togather to speed up mlp_size = mlp_arc_size + mlp_rel_size W = orthonormal_initializer(mlp_size, 2 * d_lstm) self.mlp_dep_W = spc.parameters_from_numpy(W) self.mlp_head_W = spc.parameters_from_numpy(W) self.mlp_dep_b = spc.add_parameters((mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_head_b = spc.add_parameters((mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_arc_size = mlp_arc_size self.mlp_rel_size = mlp_rel_size self.dropout_mlp = dropout_mlp self.arc_W = spc.add_parameters((mlp_arc_size, mlp_arc_size + 1), init=dy.ConstInitializer(0.)) self.rel_W = spc.add_parameters( (len(vocab_deprel) * (mlp_rel_size + 1), mlp_rel_size + 1), init=dy.ConstInitializer(0.)) self.spec = (vocab_form, d_form, v_train, dropout_emb, vocab_pos, d_pos, vocab_deprel, layers, d_lstm, dropout_lstm_input, dropout_lstm_hidden, mlp_arc_size, mlp_rel_size, dropout_mlp) self.pc = spc
def setUp(self): # Create model self.m = dy.ParameterCollection() # Parameters self.p1 = self.m.add_parameters((10, 10), init=dy.ConstInitializer(1)) self.p2 = self.m.add_parameters((10, 10), init=dy.ConstInitializer(1)) self.lp1 = self.m.add_lookup_parameters((10, 10), init=dy.ConstInitializer(1)) self.lp2 = self.m.add_lookup_parameters((10, 10), init=dy.ConstInitializer(1)) # Trainer self.trainer = dy.SimpleSGDTrainer(self.m, learning_rate=0.1) self.trainer.set_clip_threshold(-1)
def __init__(self, vocab, config, pretrained_embedding): pc = dy.ParameterCollection() self.config = config word_init = np.zeros((vocab.vocab_size, config.word_dims), dtype=np.float32) self.word_embs = pc.lookup_parameters_from_numpy(word_init) self.pret_word_embs = pc.lookup_parameters_from_numpy( pretrained_embedding) tag_init = np.random.randn(vocab.tag_size, config.tag_dims).astype(np.float32) #tag_init = tag_init / np.sqrt(config.tag_dims) self.tag_embs = pc.lookup_parameters_from_numpy(tag_init) self.dropout_emb = config.dropout_emb self.rel_size = vocab.rel_size self.LSTM_builders = [] f = orthonormal_VanillaLSTMBuilder(1, config.word_dims + config.tag_dims, config.lstm_hiddens, pc) b = orthonormal_VanillaLSTMBuilder(1, config.word_dims + config.tag_dims, config.lstm_hiddens, pc) self.LSTM_builders.append((f, b)) for i in range(config.lstm_layers - 1): f = orthonormal_VanillaLSTMBuilder(1, 2 * config.lstm_hiddens, config.lstm_hiddens, pc) b = orthonormal_VanillaLSTMBuilder(1, 2 * config.lstm_hiddens, config.lstm_hiddens, pc) self.LSTM_builders.append((f, b)) self.dropout_lstm_input = config.dropout_lstm_input self.dropout_lstm_hidden = config.dropout_lstm_hidden mlp_size = config.mlp_arc_size + config.mlp_rel_size W = orthonormal_initializer(mlp_size, 2 * config.lstm_hiddens) self.mlp_dep_W = pc.parameters_from_numpy(W) self.mlp_head_W = pc.parameters_from_numpy(W) self.mlp_dep_b = pc.add_parameters((mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_head_b = pc.add_parameters((mlp_size, ), init=dy.ConstInitializer(0.)) self.mlp_arc_size = config.mlp_arc_size self.mlp_rel_size = config.mlp_rel_size self.dropout_mlp = config.dropout_mlp self.arc_W = pc.add_parameters( (config.mlp_arc_size, config.mlp_arc_size + 1), init=dy.ConstInitializer(0.)) self.rel_W = pc.add_parameters( (vocab.rel_size * (config.mlp_rel_size + 1), config.mlp_rel_size + 1), init=dy.ConstInitializer(0.)) self._pc = pc
def __init__(self, model, input_dim, hidden_dim, output_dim, dropout = 0, softmax=False): self.input = input_dim self.hidden = hidden_dim self.output = output_dim self.dropout = dropout self.softmax = softmax self.WI2H = model.add_parameters((self.hidden,self.input)) self.bI2H = model.add_parameters((self.hidden), init = dy.ConstInitializer(0)) self.WH2O = model.add_parameters((self.output,self.hidden)) self.bH20 = model.add_parameters((self.output), init = dy.ConstInitializer(0))
def __init__( self, bigrams_size, unigrams_size, bigrams_dims, unigrams_dims, lstm_units, hidden_units, label_size, span_nums, droprate=0, ): self.bigrams_size = bigrams_size self.bigrams_dims = bigrams_dims self.unigrams_dims = unigrams_dims self.unigrams_size = unigrams_size self.lstm_units = lstm_units self.hidden_units = hidden_units self.span_nums = span_nums self.droprate = droprate self.label_size = label_size self.model = dynet.Model() self.trainer = dynet.AdadeltaTrainer(self.model, eps=1e-7, rho=0.99) random.seed(1) self.activation = dynet.rectify self.bigram_embed = self.model.add_lookup_parameters( (self.bigrams_size, self.bigrams_dims), ) self.unigram_embed = self.model.add_lookup_parameters( (self.unigrams_size, self.unigrams_dims), ) self.fwd_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims, self.lstm_units, self.model) self.back_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims, self.lstm_units, self.model) self.fwd_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model) self.back_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model) self.p_hidden_W = self.model.add_parameters( (self.hidden_units, 2 * self.span_nums * self.lstm_units), dynet.UniformInitializer(0.01)) self.p_hidden_b = self.model.add_parameters((self.hidden_units, ), dynet.ConstInitializer(0)) self.p_output_W = self.model.add_parameters( (self.label_size, self.hidden_units), dynet.ConstInitializer(0)) self.p_output_b = self.model.add_parameters((self.label_size, ), dynet.ConstInitializer(0))
def __init__(self, vocab, properties): self.properties = properties self.vocab = vocab # first initialize a computation graph container (or model). self.model = dynet.Model() # assign the algorithm for backpropagation updates. self.updater = dynet.AdamTrainer(self.model) # create embeddings for words and tag features. self.word_embedding = self.model.add_lookup_parameters( (vocab.num_words(), properties.word_embed_dim)) self.tag_embedding = self.model.add_lookup_parameters( (vocab.num_tags(), properties.pos_embed_dim)) self.dep_embedding = self.model.add_lookup_parameters( (vocab.num_dep(), properties.dep_embed_dim)) # assign transfer function self.transfer = dynet.rectify # can be dynet.logistic or dynet.tanh as well. # define the input dimension for the embedding layer. # here we assume to see two words after and before and current word (meaning 5 word embeddings) # and to see the last two predicted tags (meaning two tag embeddings) self.input_dim = 20 * properties.word_embed_dim + 20 * properties.pos_embed_dim + 12 * properties.dep_embed_dim # define the first hidden layer. self.hidden_layer1 = self.model.add_parameters( (properties.hidden_dim, self.input_dim)) #dynet.dropout(self.hidden_layer1, 0.3) # define the first hidden layer bias term and initialize it as constant 0.2. self.hidden_layer_bias1 = self.model.add_parameters( properties.hidden_dim, init=dynet.ConstInitializer(0.2)) # define the second hidden layer. self.hidden_layer2 = self.model.add_parameters( (properties.hidden_dim, properties.hidden_dim)) # define the second hidden layer bias term and initialize it as constant 0.2. self.hidden_layer_bias2 = self.model.add_parameters( properties.hidden_dim, init=dynet.ConstInitializer(0.2)) # define the output weight. self.output_layer = self.model.add_parameters( (vocab.num_actions(), properties.hidden_dim)) # define the bias vector and initialize it as zero. self.output_bias = self.model.add_parameters( vocab.num_actions(), init=dynet.ConstInitializer(0))
def __init__(self, model, size_list, act_fun, dropout_rate): self.param_layers = [] for i in range(len(size_list) - 1): self.param_layers.append([ model.add_parameters(size_list[i + 1]), #bias model.add_parameters( (size_list[i + 1], size_list[i])), #matrix model.add_parameters(size_list[i], init=dy.ConstInitializer(1)), #norm gain model.add_parameters(size_list[i], init=dy.ConstInitializer(0)) #norm bias ]) self.act_fun = act_fun self.dropout_rate = dropout_rate self.expressions = []
def __init__(self, pc, n_in, n_out, dropout_rate): self.n_in = n_in self.n_out = n_out self.dropout_rate = dropout_rate self.pc = pc.add_subcollection() self._WC = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._WP = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._WR = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._UP = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._UR = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._bc = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0)) self._bp = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0)) self._br = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0))
def test_set_value(self): # add parameter p = self.m.add_parameters((2, 3), init=dy.ConstInitializer(1)) value_to_set = np.arange(6).reshape(2, 3) # set the value p.set_value(value_to_set) self.assertTrue(np.allclose(p.as_array(), value_to_set))
def __init__(self, v, nu, de, pc, pretrained_BU=None): super(FullVocabUserRecognizer, self).__init__(pc) # prediction parameters self.Wh_p = self.pc.add_parameters((de, de), name='Wh') self.bh_p = self.pc.add_parameters((de,), name='bh', init=dy.ConstInitializer(0)) self.Su_p = self.pc.add_parameters((nu, de), name='Su') self.bu_p = self.pc.add_parameters((nu,), name='bu', init=dy.ConstInitializer(0)) self.v, self.nu = v, nu if pretrained_BU is None: init = dy.ConstInitializer(0) self.BU_p = self.pc.add_lookup_parameters((self.nu, self.v),init=init, name='BU') else: self.BU_p = self.pc.lookup_parameter_from_numpy(pretrained_BU, name='BU') self.avg = None
def __init__(self, params, source_alphabet_size, embedding_size, hidden_units, stack_embedding_size): input_size = source_alphabet_size + 2 output_size = source_alphabet_size + 1 self.stack_embedding_size = stack_embedding_size self.input_embeddings = params.add_lookup_parameters( (input_size, embedding_size), name='input-embeddings') self.output_embeddings = params.add_lookup_parameters( (output_size, embedding_size), name='output-embeddings') self.controller = dy.CoupledLSTMBuilder( 1, embedding_size + stack_embedding_size, hidden_units, params) # Intentionally set the gain for the sigmoid layers low, since this # seems to work better gain = 0.5 self.pop_strength_layer = add_layer( params, hidden_units, 1, sigmoid, weights_initializer=dy.GlorotInitializer(False, gain=gain), # Initialize the pop bias to -1 to allow information to propagate # through the stack bias_initializer=dy.ConstInitializer(-1.0), name='pop-strength') self.push_strength_layer = add_layer( params, hidden_units, 1, sigmoid, weights_initializer=dy.GlorotInitializer(False, gain=gain), bias_initializer=dy.GlorotInitializer(False, gain=gain), name='push-strength') self.push_value_layer = add_layer( params, hidden_units, stack_embedding_size, tanh, name='push-value') self.output_layer = combine_layers([ add_layer(params, hidden_units, hidden_units, tanh, name='output'), # This adds an extra affine layer between the tanh and the softmax add_layer(params, hidden_units, output_size, linear, name='softmax') ])
def HighwayConnection(funcs, sz, pc, name="highway"): """Highway Connection around arbitrary functions. This highway block creates highway connections that short circuit each function in funcs. :param funcs: A list of functions you can pass input_ to :param sz: int The size of the input :param pc: dy.ParameterCollection :name str: The name of the layer """ highway_pc = pc.add_subcollection(name=name) weights = [] biases = [] for i in range(len(funcs)): weights.append( highway_pc.add_parameters((sz, sz), name="weight-{}".format(i))) biases.append( highway_pc.add_parameters((sz), init=dy.ConstInitializer(-2), name="bias-{}".format(i))) def highway(input_, train): for func, weight, bias in zip(funcs, weights, biases): proj = dy.rectify(func(input_, train)) transform = dy.logistic(dy.affine_transform([bias, weight, input_])) input_ = dy.cmult(transform, proj) + dy.cmult( input_, 1 - transform) return input_ return highway
def test_delete_parent_model(self): model = dy.ParameterCollection().add_subcollection() p = dy.parameter( model.add_parameters((1, ), init=dy.ConstInitializer(1))) p.value() gc.collect() p.value()
def __init__(self, model, options, rel_vocab): self.model = model.add_subcollection('scorer') self.activation = get_activation(options) self.dropout_rate = options.dropout_rate self.hid_dims = options.hid_dim self.hid2_dims = options.hid2_dim self.in_dims = options.scorer_indim self.u_weight = options.unlabel_weight self.hid_Wp = self.model.add_parameters((self.hid_dims, self.in_dims)) self.hid_bp = self.model.add_parameters((self.hid_dims)) self.lhid_Wp = self.model.add_parameters((self.hid_dims, self.in_dims)) self.lhid_bp = self.model.add_parameters((self.hid_dims)) if self.hid2_dims > 0: self.hid2_Wp = self.model.add_parameters( (self.hid2_dims, self.hid_dims)) self.hid2_bp = self.model.add_parameters((self.hid2_dims)) self.lhid2_Wp = self.model.add_parameters( (self.hid2_dims, self.hid_dims)) self.lhid2_bp = self.model.add_parameters((self.hid2_dims)) self.out_Wp = self.model.add_parameters((1, self.hid2_dims)) self.lout_Wp = self.model.add_parameters( (len(rel_vocab), self.hid2_dims)) else: self.out_Wp = self.model.add_parameters((1, self.hid_dims)) self.lout_Wp = self.model.add_parameters( (len(rel_vocab), self.hid_dims)) self.out_bp = self.model.add_parameters((1, ), init=dy.ConstInitializer(0)) self.lout_bp = self.model.add_parameters((len(rel_vocab), ))
def __init__(self, v, du, nu, de, pc, pretrained_BU=None): super(FactVocabUserRecognizer, self).__init__(pc) # prediction parameters self.Wh_p = self.pc.add_parameters((de, de), name='Wh') self.bh_p = self.pc.add_parameters((de,), name='bh', init=dy.ConstInitializer(0)) self.Su_p = self.pc.add_parameters((du, de), name='Su') self.bu_p = self.pc.add_parameters((du,), name='bu', init=dy.ConstInitializer(0)) self.du = du self.v, self.nu = v, nu # User vectors self.U_p = self.pc.add_lookup_parameters((nu, du), init=dy.ConstInitializer(0), name='U') init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du)) # Biases self.B_p = self.pc.add_parameters((v, du), init=init, name='B') self.avg = None self.BU_p = None
def __init__(self, pc, n_in, n_out, use_bias=False): self.pc = pc.add_subcollection() self.n_in = n_in self.n_out = n_out self.use_bias = use_bias self._W = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) if self.use_bias: self._b = self.pc.add_parameters((self.n_out,), init=dy.ConstInitializer(0.0))
def __init__(self, model, input_dim,output_dim): self.input = input_dim self.output = output_dim Saxe_initializer = Saxe.Orthogonal() self.W = model.add_parameters((self.output,self.input), init=dy.NumpyInitializer(Saxe_initializer(((self.output,self.input))))) self.b = model.add_parameters((self.output), init = dy.ConstInitializer(0))
def __init__(self, input_dims, output_dims, model): self.input_dims = input_dims self.output_dims = output_dims self.model = model self.W_i = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_i = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_f = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_f = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_c = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_c = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_o = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_o = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.c0 = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_params = [self.W_i, self.W_f, self.W_c, self.W_o] self.b_params = [self.b_i, self.b_f, self.b_c, self.b_o] self.params = self.W_params + self.b_params + [self.c0]
def __init__(self, vocab, properties): self.vocab = vocab self.properties = properties # first initialize a computation graph container (or model). self.model = dynet.Model() # create embeddings for word, tag, and dependency label features self.word_embedding = self.model.add_lookup_parameters( (vocab.word_size(), properties.word_embed_dim)) self.pos_embedding = self.model.add_lookup_parameters( (vocab.pos_size(), properties.pos_embed_dim)) self.dep_embedding = self.model.add_lookup_parameters( (vocab.labels_size(), properties.dep_embed_dim)) # Assign the training and transfer functions as defined in the network parameters self.transfer = properties.transfer_f self.updater = properties.training_f(self.model) # Define the input dimension to the embedding layer self.input_dim = 20 * properties.word_embed_dim + 20 * properties.pos_embed_dim + 12 * properties.dep_embed_dim # Define the first hidden layer self.hidden_layer_1 = self.model.add_parameters( (properties.h1_dim, self.input_dim)) # define the first hidden layer bias term and initialize it as constant 0.2. self.hl1_bias = self.model.add_parameters( properties.h1_dim, init=dynet.ConstInitializer(0.2)) # Define the second hidden layer self.hidden_layer_2 = self.model.add_parameters( (properties.h2_dim, properties.h1_dim)) # Define the second hidden layer bias term and initialize it as constant 0.2. self.hl2_bias = self.model.add_parameters( properties.h2_dim, init=dynet.ConstInitializer(0.2)) # define the output weight. self.output_layer = self.model.add_parameters( (vocab.actions_size(), properties.h2_dim)) # define the bias vector and initialize it as zero. self.output_bias = self.model.add_parameters( vocab.actions_size(), init=dynet.ConstInitializer(0))
def __init__(self, model, ldims=400, input_size=100, output_size=100, dropout=0.33): self.input = input_size self.ldims = ldims self.output = output_size self.dropout = dropout self.charlstm = LSTM(model, self.input, self.ldims, forget_bias=0.0) self.W_atten = model.add_parameters((self.ldims, 1), init=dy.ConstInitializer(0)) self.W_linear = model.add_parameters((self.output, self.ldims * 2), init=dy.ConstInitializer(0)) self.b_linear = model.add_parameters((self.output), init=dy.ConstInitializer(0))