def __init__(self, model, l_dim, v_dim, h_dim, s_dim, layers=1): pc = model.add_subcollection() self.layers = layers self.V = ([pc.add_parameters((1, v_dim)) for _ in range(layers - 1)] + [pc.add_parameters((l_dim, v_dim))]) self.W1 = [pc.add_parameters((v_dim, h_dim)) for _ in range(layers)] self.W2 = ([pc.add_parameters((v_dim, s_dim))] + [ pc.add_parameters((v_dim, h_dim + s_dim)) for _ in range(layers - 1) ]) self.B1 = pc.add_parameters((l_dim, h_dim), init=dy.ConstInitializer(0)) self.B2 = pc.add_parameters((l_dim, s_dim), init=dy.ConstInitializer(0)) ## Only single layer support #self._W1 = pc.add_parameters((v_dim, h_dim)) #self._W2 = pc.add_parameters((v_dim, s_dim)) #self._V = pc.add_parameters((l_dim, v_dim)) self.pc = pc self.spec = l_dim, v_dim, h_dim, s_dim, layers
def __init__(self, model, n_layers, x_dim, h_dim, LSTMBuilder, param_init=False, fb_fusion=False): pc = model.add_subcollection() if not fb_fusion: self.f = LSTMBuilder(n_layers, x_dim, h_dim, pc) self.b = LSTMBuilder(n_layers, x_dim, h_dim, pc) if param_init: self.f_init = [pc.add_parameters((h_dim), init=dy.ConstInitializer(0)) for _ in range(n_layers*2)] self.b_init = [pc.add_parameters((h_dim), init=dy.ConstInitializer(0)) for _ in range(n_layers*2)] else: self.DeepBiLSTM = [] f = LSTMBuilder(1, x_dim, h_dim, pc) b = LSTMBuilder(1, x_dim, h_dim, pc) self.DeepBiLSTM.append((f, b)) if n_layers > 1: f = LSTMBuilder(n_layers-1, h_dim*2, h_dim, pc) b = LSTMBuilder(n_layers-1, h_dim*2, h_dim, pc) self.DeepBiLSTM.append((f, b)) self.param_init = param_init self.fb_fusion = fb_fusion self.pc = pc self.spec = (n_layers, x_dim, h_dim, LSTMBuilder, param_init, fb_fusion)
def __init__(self, model, h_dim, s_dim, n_label, h_bias=False, s_bias=False): pc = model.add_subcollection() if h_bias: h_dim += 1 if s_bias: s_dim += 1 if n_label == 1: self.U = pc.add_parameters((h_dim, s_dim), init=dy.ConstInitializer(0.)) else: self.U = pc.add_parameters((h_dim * n_label, s_dim), init=dy.ConstInitializer(0.)) self.pc = pc self.h_dim = h_dim self.s_dim = s_dim self.n_label = n_label self.h_bias = h_bias self.s_bias = s_bias self.spec = (h_dim, s_dim, n_label, h_bias, s_bias)
def __init__(self, model, h_dim: int, s_dim: int, n_label: int, h_bias=False, s_bias=False, init=dy.ConstInitializer(0.)): pc = model.add_subcollection() h_dim += s_bias s_dim += h_bias init_U = init_wrap(init, (h_dim * n_label, s_dim)) self.U = pc.add_parameters((h_dim * n_label, s_dim), init=init_U) self.h_dim, self.s_dim, self.n_label = h_dim, s_dim, n_label self.pc, self.h_bias, self.s_bias = pc, h_bias, s_bias self.spec = (h_dim, s_dim, n_label, h_bias, s_bias, init)
def __init__(self, model, n_char, char_dim, n_filter, win_sizes): pc = model.add_subcollection() self.clookup = pc.add_lookup_parameters((n_char, char_dim)) self.Ws = [ pc.add_parameters((char_dim, size, 1, n_filter), init=dy.GlorotInitializer(gain=0.5)) for size in win_sizes ] self.bs = [ pc.add_parameters((n_filter), init=dy.ConstInitializer(0)) for _ in win_sizes ] self.win_sizes = win_sizes self.pc = pc self.spec = (n_char, char_dim, n_filter, win_sizes)
def _add_parameters0(self, shape, lookup=False, init="default"): def ortho_weight(ndim): W = np.random.randn(ndim, ndim) u, s, v = np.linalg.svd(W) return u.astype(np.float) def get_init(shape, init): # shape is a tuple of dims assert init in ["default", "const", "glorot", "ortho", "gaussian"], "Unknown init method %s" % init if len(shape) == 1: # set bias to 0 return dy.ConstInitializer(0.) elif len(shape) == 2: if init == "default" or init == "glorot": return dy.GlorotInitializer() elif init == "gaussian": return dy.NormalInitializer(var=0.01 * 0.01) elif init == "ortho": assert shape[0] % shape[ 1] == 0, "Bad shape %s for ortho_init" % shape num = shape[0] // shape[1] arr = ortho_weight(shape[1]) if num == 1 else\ np.concatenate([ortho_weight(shape[1]) for _ in range(num)]) return dy.NumpyInitializer(arr) else: raise NotImplementedError( "Currently only support parameter dim <= 2.") # first, if init is np-array if isinstance(init, np.ndarray): return self.model.add_parameters(shape, init=dy.NumpyInitializer(init)) # then ... if lookup: return self.model.add_lookup_parameters( shape) # also default Glorot # shape is a tuple of dims if len(shape) == 1: # set bias to 0 return self.model.add_parameters(shape, init=dy.ConstInitializer(0.)) else: return self.model.add_parameters(shape, init=get_init(shape, init))
def get_init(shape, init): # shape is a tuple of dims assert init in ["default", "const", "glorot", "ortho", "gaussian"], "Unknown init method %s" % init if len(shape) == 1: # set bias to 0 return dy.ConstInitializer(0.) elif len(shape) == 2: if init == "default" or init == "glorot": return dy.GlorotInitializer() elif init == "gaussian": return dy.NormalInitializer(var=0.01 * 0.01) elif init == "ortho": assert shape[0] % shape[ 1] == 0, "Bad shape %s for ortho_init" % shape num = shape[0] // shape[1] arr = ortho_weight(shape[1]) if num == 1 else\ np.concatenate([ortho_weight(shape[1]) for _ in range(num)]) return dy.NumpyInitializer(arr) else: raise NotImplementedError( "Currently only support parameter dim <= 2.")
def __init__(self, model, hidden_sizes, act=dy.tanh, bias=True, dropout=0.0): pc = model.add_subcollection() sizes = hidden_sizes self.W = [ pc.add_parameters((x, y)) for x, y in zip(sizes[1:], sizes[:-1]) ] self.b = [ pc.add_parameters((y, ), init=dy.ConstInitializer(0)) for y in sizes[1:] ] self.pc = pc self.act = act self.bias = bias self.dropout = dropout self.spec = (hidden_sizes, act, bias, dropout)
def __init__(self, model, h_dim: int, s_dim: int, n_label: int, bias=False, init=dy.ConstInitializer(0.)): pc = model.add_subcollection() if bias: if n_label == 1: self.B = pc.add_parameters((h_dim, ), init=0) else: self.V = pc.add_parameters((n_label, h_dim + s_dim), init=0) self.B = pc.add_parameters((n_label, ), init=0) if init != 'orthonormal': self.U = pc.add_parameters((h_dim * n_label, s_dim), init) else: self.U = pc.parameters_from_numpy( orthonormal_initializer(h_dim * n_label, s_dim)) self.h_dim, self.s_dim, self.n_label = h_dim, s_dim, n_label self.pc, self.bias = pc, bias self.spec = (h_dim, s_dim, n_label, bias, init)
def initialize_graph(self, num_words=None, num_chars=None): """ build graph and link to parameters """ num_words = num_words if num_words is not None else len(self.w2i) num_chars = num_chars if num_chars is not None else len(self.c2i) if num_words == 0 or num_chars == 0: raise ValueError('Word2id and char2id have to be loaded before ' 'initializing the graph.') print('Initializing the graph...') # initialize the word embeddings and the parameters self.cembeds = None if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert (emb_dim == self.in_dim) num_words = len( set(embeddings.keys()).union(set( self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=dynet.ConstInitializer(0.01)) init = 0 l = len(embeddings.keys()) for word in embeddings.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in self.w2i: self.wembeds.init_row(self.w2i[word], embeddings[word]) else: self.w2i[word] = len(self.w2i.keys()) # add new word self.wembeds.init_row(self.w2i[word], embeddings[word]) init += 1 print("initialized: {}".format(init), file=sys.stderr) else: self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=dynet.ConstInitializer(0.01)) # make it more flexible to add number of layers as specified by parameter layers = [] # inner layers for layer_num in range(0, self.h_layers): if layer_num == 0: if self.c_in_dim > 0: f_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim + self.c_in_dim * 2, self.h_dim, self.model) # in_dim: size of each layer b_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim + self.c_in_dim * 2, self.h_dim, self.model) else: f_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim, self.h_dim, self.model) b_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor( f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) b_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) # store at which layer to predict task task_num_labels = len(self.tag2idx) output_layer = FFSequencePredictor( Layer(self.model, self.h_dim * 2, task_num_labels, dynet.softmax)) if self.c_in_dim > 0: self.char_rnn = BiRNNSequencePredictor( dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model), dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) else: self.char_rnn = None self.predictors = dict() self.predictors["inner"] = layers self.predictors["output_layers_dict"] = output_layer self.predictors["task_expected_at"] = self.h_layers
import _dynet as dynet """ various helper mappings """ # DyNet adds init option to choose initializer: https://github.com/clab/dynet/blob/master/python/CHANGES.md INITIALIZER_MAP = { 'glorot': dynet.GlorotInitializer(), 'constant': dynet.ConstInitializer(0.01), 'uniform': dynet.UniformInitializer(0.1), 'normal': dynet.NormalInitializer(mean=0, var=1) } TRAINER_MAP = { "sgd": dynet.SimpleSGDTrainer, "adam": dynet.AdamTrainer, "adadelta": dynet.AdadeltaTrainer, "adagrad": dynet.AdagradTrainer, "momentum": dynet.MomentumSGDTrainer } ACTIVATION_MAP = { "tanh": dynet.tanh, "rectify": dynet.rectify } BUILDERS = { "lstm": dynet.LSTMBuilder, # is dynet.VanillaLSTMBuilder (cf. https://github.com/clab/dynet/issues/474) "lstmc": dynet.CoupledLSTMBuilder, "gru": dynet.GRUBuilder, "rnn": dynet.SimpleRNNBuilder }