def initialize_params(self, n_in, n_out, activation): if USE_XAVIER_INIT: if activation == ReLU: scale = np.sqrt(4.0 / (n_in + n_out), dtype=theano.config.floatX) b_vals = np.ones(n_out, dtype=theano.config.floatX) * 0.01 elif activation == softmax: scale = np.float32(0.001).astype(theano.config.floatX) b_vals = np.zeros(n_out, dtype=theano.config.floatX) else: scale = np.sqrt(2.0 / (n_in + n_out), dtype=theano.config.floatX) b_vals = np.zeros(n_out, dtype=theano.config.floatX) W_vals = random_init((n_in, n_out), rng_type="normal") * scale else: W_vals = random_init((n_in, n_out)) if activation == softmax: W_vals *= 0.00 if activation == ReLU: b_vals = np.ones(n_out, dtype=theano.config.floatX) * 0.01 else: b_vals = random_init((n_out,)) self.W = create_shared(W_vals, name="W") if self.has_bias: self.b = create_shared(b_vals, name="b")
def initialize_params(self, n_in, n_out, activation): if USE_XAVIER_INIT: if activation == ReLU: scale = np.sqrt(4.0 / (n_in + n_out), dtype=theano.config.floatX) b_vals = np.ones(n_out, dtype=theano.config.floatX) * 0.01 elif activation == softmax: scale = np.float32(0.001).astype(theano.config.floatX) b_vals = np.zeros(n_out, dtype=theano.config.floatX) else: scale = np.sqrt(2.0 / (n_in + n_out), dtype=theano.config.floatX) b_vals = np.zeros(n_out, dtype=theano.config.floatX) W_vals = random_init((n_in, n_out), rng_type="normal") * scale else: W_vals = random_init((n_in, n_out)) if activation == softmax: W_vals *= 0.00 if activation == ReLU: b_vals = np.ones(n_out, dtype=theano.config.floatX) * 0.01 else: b_vals = random_init((n_out, )) self.W = create_shared(W_vals, name="W") if self.has_bias: self.b = create_shared(b_vals, name="b")
def create_parameters(self): n_d = self.n_d self.W1_c = create_shared(random_init((n_d, n_d)), name="W1_c") self.W1_h = create_shared(random_init((n_d, n_d)), name="W1_h") self.w = create_shared(random_init((n_d, )), name="w") self.W2_r = create_shared(random_init((n_d, n_d)), name="W1_r") self.W2_h = create_shared(random_init((n_d, n_d)), name="W1_h") self.lst_params = [self.W1_h, self.W1_c, self.W2_h, self.W2_r, self.w]
def create_parameters(self): n_in, n_hidden = self.n_in, self.n_hidden activation = self.activation self.w1 = create_shared(random_init((n_in, )), name="w1") self.w2 = create_shared(random_init((n_hidden, )), name="w2") bias_val = random_init((1, ))[0] self.bias = theano.shared(np.cast[theano.config.floatX](bias_val)) rlayer = RCNN((n_in + 1), n_hidden, activation=activation, order=2) self.rlayer = rlayer self.layers = [rlayer]
def create_parameters(self): n_in, n_out = self.n_in, self.n_out rng_type = "uniform" scale = 1.0 / self.n_out ** 0.5 # rng_type = None # scale = 1.0 self.P = create_shared(random_init((n_in, n_out), rng_type=rng_type) * scale, name="P") self.Q = create_shared(random_init((n_in, n_out), rng_type=rng_type) * scale, name="Q") self.R = create_shared(random_init((n_in, n_out), rng_type=rng_type) * scale, name="R") self.O = create_shared(random_init((n_out, n_out), rng_type=rng_type) * scale, name="O") if self.activation == ReLU: self.b = create_shared(np.ones(n_out, dtype=theano.config.floatX) * 0.01, name="b") else: self.b = create_shared(random_init((n_out,)), name="b")
def __init__(self, n_in, n_out, activation=tanh, order=1, has_outgate=False, mode=1, clip_gradients=False): """ :param n_in: :param n_out: :param activation: :param order: CNN feature width :param has_outgate: whether to add a output gate as in LSTM; this can be useful for language modeling :param mode: 0 if non-linear filter; 1 if linear filter (default) :param clip_gradients: """ self.n_in = n_in self.n_out = n_out self.activation = activation self.order = order self.clip_gradients = clip_gradients self.has_outgate = has_outgate self.mode = mode internal_layers = self.internal_layers = [] for i in range(order): input_layer = Layer(n_in, n_out, linear, has_bias=False, clip_gradients=clip_gradients) internal_layers.append(input_layer) forget_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients) internal_layers.append(forget_gate) self.bias = create_shared(random_init((n_out,)), name="bias") if has_outgate: self.out_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients) self.internal_layers += [self.out_gate]
def __init__(self, n_in, n_out, activation=tanh, order=1, has_outgate=False, mode=1, clip_gradients=False): self.n_in = n_in self.n_out = n_out self.activation = activation self.order = order self.clip_gradients = clip_gradients self.has_outgate = has_outgate self.mode = mode internal_layers = self.internal_layers = [] for i in range(order): input_layer = Layer(n_in, n_out, linear, has_bias=False, \ clip_gradients=clip_gradients) internal_layers.append(input_layer) forget_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients) internal_layers.append(forget_gate) self.bias = create_shared(random_init((n_out, )), name="bias") if has_outgate: self.out_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients) self.internal_layers += [self.out_gate]
def initialization(n_clusters, initialize, x, random_state, metric): # this method initializes the first set of labels if initialize == 'random': centroids = init.random_init(x, n_clusters, random_state) elif initialize == 'k-means++': centroids = init.kpp_init(x, n_clusters, metric) return centroids
def __init__(self, n_in, n_out, activation=tanh, order=1, clip_gradients=False): self.n_in = n_in self.n_out = n_out self.activation = activation self.order = order self.clip_gradients = clip_gradients internal_layers = self.internal_layers = [] for i in range(order): input_layer = Layer(n_in, n_out, linear, has_bias=False, clip_gradients=clip_gradients) internal_layers.append(input_layer) self.bias = create_shared(random_init((n_out,)), name="bias")
def __init__(self, n_in, n_out, activation=tanh, order=1, clip_gradients=False): self.n_in = n_in self.n_out = n_out self.activation = activation self.order = order self.clip_gradients = clip_gradients internal_layers = self.internal_layers = [] for i in range(order): input_layer = Layer(n_in, n_out, linear, has_bias=False, \ clip_gradients=clip_gradients) internal_layers.append(input_layer) self.bias = create_shared(random_init((n_out,)), name="bias")
def create_parameters(self): n_d = self.n_d self.W1_c = create_shared(random_init((n_d, n_d)), name="W1_c") self.W1_q = create_shared(random_init((n_d, n_d)), name="W1_h") if self.model_type == 0: say('\nAttention: standard\n') self.W2_r = create_shared(random_init((n_d, n_d)), name="W2_r") self.w = create_shared(random_init((n_d,)), name="w") self.lst_params = [self.W1_q, self.W1_c, self.W2_r, self.w] self.forward = self.attention else: say('\nAttention: decomposition\n') self.W2_r_a = create_shared(random_init((n_d, n_d)), name="W2_r") self.W2_r_b = create_shared(random_init((n_d, n_d)), name="W2_r") self.w_a = create_shared(random_init((n_d,)), name="w") self.w_b = create_shared(random_init((n_d,)), name="w_b") self.lst_params = [self.W1_q, self.W1_c, self.W2_r_a, self.W2_r_b, self.w_a, self.w_b] self.forward = self.attention_decomp
def __init__(self, n_d, vocab, oov="<unk>", embs=None, fix_init_embs=True): """ :param n_d: dimension of word embeddings; may be over-written if embs is specified :param vocab: an iterator of string tokens; the layer will allocate an ID and a vector for each token in it :param oov: out-of-vocabulary token :param embs: an iterator of (word, vector) pairs; these will be added to the layer :param fix_init_embs: whether to fix the initial word vectors loaded from embs """ if embs is not None: lst_words = [] vocab_map = {} emb_vals = [] for word, vector in embs: assert word not in vocab_map, "Duplicate words in initial embeddings" vocab_map[word] = len(vocab_map) emb_vals.append(vector) lst_words.append(word) self.init_end = len(emb_vals) if fix_init_embs else -1 if n_d != len(emb_vals[0]): say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n".format( n_d, len(emb_vals[0]), len(emb_vals[0]) )) n_d = len(emb_vals[0]) say("{} pre-trained embeddings loaded.\n".format(len(emb_vals))) ########################### # Set special vocab vecs # # vocab = [UNK, PAD] # # UNK is the zero vec # # PAD is not the zero vec # ########################### for word in vocab: if word not in vocab_map: vocab_map[word] = len(vocab_map) emb_vals.append(random_init((n_d,)) * (0.001 if word != oov else 0.0)) lst_words.append(word) emb_vals = np.vstack(emb_vals).astype(theano.config.floatX) self.vocab_map = vocab_map self.lst_words = lst_words else: lst_words = [] vocab_map = {} for word in vocab: if word not in vocab_map: vocab_map[word] = len(vocab_map) lst_words.append(word) self.lst_words = lst_words self.vocab_map = vocab_map emb_vals = random_init((len(self.vocab_map), n_d)) self.init_end = -1 if oov is not None and oov is not False: assert oov in self.vocab_map, "oov {} not in vocab".format(oov) self.oov_tok = oov self.oov_id = self.vocab_map[oov] else: self.oov_tok = None self.oov_id = -1 self.embeddings = create_shared(emb_vals) if self.init_end > -1: self.embeddings_trainable = self.embeddings[self.init_end:] else: self.embeddings_trainable = self.embeddings self.n_V = len(self.vocab_map) self.n_d = n_d
def __init__(self, n_d, vocab, oov="<unk>", embs=None, fix_init_embs=True): if embs is not None: lst_words = [] vocab_map = {} emb_vals = [] self.init_end = None for word in vocab: if word in embs: vocab_map[word] = len(vocab_map) vector = embs[word] emb_vals.append(vector) lst_words.append(word) else: if self.init_end is None: self.init_end = len(emb_vals) if fix_init_embs else -1 vocab_map[word] = len(vocab_map) emb_vals.append(random_init((n_d,)) * (0.0 if (word == oov) else 0.001)) lst_words.append(word) if n_d != len(emb_vals[0]): say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n".format( n_d, len(emb_vals[0]), len(emb_vals[0]) )) n_d = len(emb_vals[0]) say("{} pre-trained embeddings loaded.\n".format(len(emb_vals))) emb_vals = np.vstack(emb_vals).astype(theano.config.floatX) self.vocab_map = vocab_map self.lst_words = lst_words else:# TODO: Update to above lst_words = [] vocab_map = {} for word in vocab: if word not in vocab_map: vocab_map[word] = len(vocab_map) lst_words.append(word) self.lst_words = lst_words self.vocab_map = vocab_map emb_vals = random_init((len(self.vocab_map), n_d)) self.init_end = -1 if oov is not None and oov is not False: assert oov in self.vocab_map, "oov {} not in vocab".format(oov) self.oov_tok = oov self.oov_id = self.vocab_map[oov] else: self.oov_tok = None self.oov_id = -1 self.embeddings = create_shared(emb_vals) if self.init_end > -1: self.embeddings_trainable = self.embeddings[self.init_end:] else: self.embeddings_trainable = self.embeddings self.n_V = len(self.vocab_map) self.n_d = n_d
def __init__(self, n_d, vocab, oov="<unk>", embs=None, fix_init_embs=True): """ :param n_d: dimension of word embeddings; may be over-written if embs is specified :param vocab: an iterator of string tokens; the layer will allocate an ID and a vector for each token in it :param oov: out-of-vocabulary token :param embs: an iterator of (word, vector) pairs; these will be added to the layer :param fix_init_embs: whether to fix the initial word vectors loaded from embs """ if embs is not None: lst_words = [] vocab_map = {} emb_vals = [] for word, vector in embs: assert word not in vocab_map, "Duplicate words in initial embeddings" vocab_map[word] = len(vocab_map) emb_vals.append(vector) lst_words.append(word) self.init_end = len(emb_vals) if fix_init_embs else -1 if n_d != len(emb_vals[0]): say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n" .format(n_d, len(emb_vals[0]), len(emb_vals[0]))) n_d = len(emb_vals[0]) say("{} pre-trained embeddings loaded.\n".format(len(emb_vals))) ########################### # Set special vocab vecs # # vocab = [UNK, PAD] # # UNK is the zero vec # # PAD is not the zero vec # ########################### for word in vocab: if word not in vocab_map: vocab_map[word] = len(vocab_map) emb_vals.append( random_init((n_d, )) * (0.001 if word != oov else 0.0)) lst_words.append(word) emb_vals = np.vstack(emb_vals).astype(theano.config.floatX) self.vocab_map = vocab_map self.lst_words = lst_words else: lst_words = [] vocab_map = {} for word in vocab: if word not in vocab_map: vocab_map[word] = len(vocab_map) lst_words.append(word) self.lst_words = lst_words self.vocab_map = vocab_map emb_vals = random_init((len(self.vocab_map), n_d)) self.init_end = -1 if oov is not None and oov is not False: assert oov in self.vocab_map, "oov {} not in vocab".format(oov) self.oov_tok = oov self.oov_id = self.vocab_map[oov] else: self.oov_tok = None self.oov_id = -1 self.embeddings = create_shared(emb_vals) if self.init_end > -1: self.embeddings_trainable = self.embeddings[self.init_end:] else: self.embeddings_trainable = self.embeddings self.n_V = len(self.vocab_map) self.n_d = n_d
def create_parameters(self): self.w1 = create_shared(random_init((self.n_d * 2, self.n_d * 2)), name="w")
def __init__(self, n_d, vocab, oov="<unk>", embs=None, fix_init_embs=True, trainable=True): self.init_embeddings = None if embs is not None: lst_words = [] vocab_map = {} # i.e. {'word1' :1, 'word2': 2 ...} emb_vals = [] for word, vector in embs: assert word not in vocab_map, "Duplicate words in initial embeddings" vocab_map[word] = len(vocab_map) emb_vals.append(vector) lst_words.append(word) self.init_end = len(emb_vals) if fix_init_embs else -1 if n_d != len(emb_vals[0]): print( "WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n" .format(n_d, len(emb_vals[0]), len(emb_vals[0]))) n_d = len(emb_vals[0]) print("{} pre-trained embeddings loaded.\n".format(len(emb_vals))) for word in vocab: if word not in vocab_map: vocab_map[word] = len( vocab_map ) # continue adding words in the embedding matrix # out-of-vocab token is initialized as zero vector emb_vals.append( random_init((n_d, )) * (0.001 if word != oov else 0.0)) lst_words.append(word) emb_vals = np.vstack(emb_vals).astype(np.float32) self.vocab_map = vocab_map self.lst_words = lst_words else: # no embeddings given lst_words = [] vocab_map = {} for word in vocab: if word not in vocab_map: vocab_map[word] = len(vocab_map) lst_words.append(word) self.lst_words = lst_words self.vocab_map = vocab_map emb_vals = random_init( (len(self.vocab_map), n_d)) # random initialization of whole embedding matrix self.init_end = -1 if oov is not None and oov is not False: # if oov is given, it should be already in vocab_map assert oov in self.vocab_map, "oov {} not in vocab".format(oov) self.oov_tok = oov self.oov_id = self.vocab_map[oov] else: # if oov is not given then we set the id to -1 so that it's not used self.oov_tok = None self.oov_id = -1 self.n_V = len(self.vocab_map) self.n_d = n_d self._initialize_params(emb_vals, trainable)
def create_parameters(self): self.W_e = create_shared(random_init((self.n_in, self.n_out)), name="W_e") self.W = create_shared(random_init((self.n_out * 2, self.n_out)), name="W") self.U = theano.shared(random_init((self.n_out * 3, self.n_out * 3)), name="U") self.G = theano.shared(random_init((self.n_out * 2, self.n_out * 2)), name="G") self.lst_params = [self.W_e, self.W, self.G, self.U]
def create_parameters(self): w_shp = (self.n_out, self.n_in, self.window, 1) self.filter = create_shared(random_init(w_shp)) self.bias = create_shared(random_init((w_shp[0], )), name="bias")