def __init__(self, vocab_size, projection, lr): self.Embedding = Embedding(vocab_size, projection) self.HSvector = Embedding(vocab_size - 1, projection) self.lr = lr self.layers = [self.Embedding, self.HSvector] self.params = [] self.grads = [] for layer in self.layers: self.params.extend(layer.params) self.grads.extend(layer.grads)
def __init__(self, vocab_size, projection, lr): self.Embedding = Embedding(vocab_size , projection) self.N_Embdding = Embedding(vocab_size , projection) self.lr = lr self.layers = [self.Embedding, self.N_Embdding] self.params = [] self.grads = [] for layer in self.layers: self.params.append(layer.params) self.grads.append(layer.grads)
def __init__(self, input_size, embed_size, hidden, output, padding_idx): self.embed = Embedding(input_size, embed_size, padding_idx) #self.hidden = Linear(embed_size, hidden) self.output_layer = Linear(hidden, output) self.layer = [self.embed, self.output_layer] self.params = [] self.grads = [] for layer in self.layer: self.params += layer.params self.grads += layer.grads
def __init__(self, encoder_vocab_size, decoder_vocab_size, d_model, d_hidden, n_heads, N): """Main class for the transformer model""" super(Transformer, self).__init__() d_k = d_model // n_heads self.linear = nn.Linear(d_model, decoder_vocab_size, bias=False) # Share the weights between output linear layer and input & output embeddings self.input_embeddings = Embedding(encoder_vocab_size, d_model, self.linear.weight) self.output_embeddings = Embedding(decoder_vocab_size, d_model, self.linear.weight) self.positional_encodings_input = PositionalEncodings(d_model) self.positional_encodings_output = PositionalEncodings(d_model) self.encoder = Encoder(d_model, d_k, d_k, n_heads, N) self.decoder = Decoder(d_model, d_k, d_k, n_heads, N) # init using glorot rather than default kaiming since we're not using ReLUs for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
class Fasttext: def __init__(self, input_size, embed_size, hidden, output, padding_idx): self.embed = Embedding(input_size, embed_size, padding_idx) #self.hidden = Linear(embed_size, hidden) self.output_layer = Linear(hidden, output) self.layer = [self.embed, self.output_layer] self.params = [] self.grads = [] for layer in self.layer: self.params += layer.params self.grads += layer.grads def forward(self, x): ''' x = list of vocab(index) = (batch,S) ''' if len(x.shape) != 0: self.length = len(x) else: x = [x] self.length = 1 output = self.embed.forward(x) #Average of words output = np.sum(output, axis=0, keepdims=True) / (self.length + 1e-6) #output = self.hidden.forward(output) output = self.output_layer.forward(output) return output def backward(self, dev, lr): ''' dev = (Batch, class) ''' dout = self.output_layer.backward(dev, lr) #dout = self.hidden.backward(dout, lr) self.embed.backward(dout, lr)
def __init__(self, cfg): super().__init__() self.label_conditioning = cfg.label_conditioning if cfg.label_conditioning: self.label_embedding = Embedding(input_dim=cfg.labels_size, output_dim=cfg.latent_size) # Calculate number of layers from resolution for latent broadcast num_layers = int(np.log2(cfg.resolution)) * 2 - 2 # Build model sequentially model = tf.keras.Sequential() if cfg.normalize_latents: model.add(NormalizePixels()) for layer_id in range(cfg.num_dense_layers): units = cfg.dlatent_size if layer_id == cfg.num_dense_layers - 1 else cfg.hidden_size model.add(DenseMod(units=units, lr_multiplier=cfg.lr_multiplier)) model.add(LeakyReLU(alpha=cfg.alpha, gain=np.sqrt(2))) model.add(Broadcast(dlatent_broadcast=num_layers)) self.model = model
def __init__(self, hparams): super(Encoder, self).__init__() self.embedding = Embedding(hparams) self.convolutions = nn.ModuleList() for i in range(hparams.encoder_n_convolutions): self.convolutions.append(nn.Sequential( Conv1d(hparams.symbols_embedding_dim if i == 0 else hparams.encoder_embedding_dim, hparams.encoder_embedding_dim, kernel_size=hparams.encoder_kernel_size, w_init_gain='relu'), nn.BatchNorm1d(hparams.encoder_embedding_dim))) self.lstm = nn.LSTM(hparams.encoder_embedding_dim, int(hparams.encoder_embedding_dim / 2), batch_first=True, bidirectional=True)
class HS_skipgram: def __init__(self, vocab_size, projection, lr): self.Embedding = Embedding(vocab_size, projection) self.HSvector = Embedding(vocab_size - 1, projection) self.lr = lr self.layers = [self.Embedding, self.HSvector] self.params = [] self.grads = [] for layer in self.layers: self.params.extend(layer.params) self.grads.extend(layer.grads) def forward(self, x, idx_path): ''' inputs : 1 x D(projection) label : 1 x [direction_path(1, depth), idx_path(1, depth)] ''' self.x = x self.hidden = self.Embedding.forward(self.x) self.hirearchy_vectors = self.HSvector.forward(idx_path) #out = np.sum(self.hirearchy_vectors * self.hidden, axis = 1, keepdims= True) out = np.matmul(self.hirearchy_vectors, self.hidden.T) return out def backward(self, dout): #truth length x hidden d_lin = np.matmul(dout, self.hidden) #d_h = np.matmul(dout.T, self.hirearchy_vectors) #d_h = np.sum(dout * self.hirearchy_vectors, axis = 0) d_h = np.matmul(dout.T, self.hirearchy_vectors) self.HSvector.backward(d_lin, self.lr) self.Embedding.backward(d_h, self.lr) ''' print((self.grads[0] == self.Embedding.grads[0]).all()) print((self.grads[1] == self.HSvector.grads[0]).all()) ''' def save(self, path): with open(path, 'wb') as f: pickle.dump(self.params, f, pickle.HIGHEST_PROTOCOL) def load(self, path): with open("./bestmodel.pickle", 'rb') as f: x = pickle.load(f) self.params = x for param, layer in zip(self.params, self.layers): layer.params = [param] def query(self, word, word2idx, idx2word, top=5): if word not in word2idx: print("%s는 corpus 안에 존재하지 않습니다" % word) return W_in, _ = self.params query_id = word2idx[word] query_vec = W_in[query_id] query_vec = np.expand_dims(query_vec, 0) #오름차순에 의해 정렬 similarity = cosine_similarity(W_in, query_vec) #자기 자신 제외 result = similarity.argsort(axis=0)[-top:] print(word) for i in range(top): print(idx2word[int(result[i])], similarity[int(result[i])])
class Negative_Sampling: def __init__(self, vocab_size, projection, lr): self.Embedding = Embedding(vocab_size , projection) self.N_Embdding = Embedding(vocab_size , projection) self.lr = lr self.layers = [self.Embedding, self.N_Embdding] self.params = [] self.grads = [] for layer in self.layers: self.params.append(layer.params) self.grads.append(layer.grads) def forward(self, x, sampled): ''' x = (N, 1) Batch x 1 sampled = (N, sampled(k) * skip_size + 1) ''' self.x = x #N x projection self.hidden = self.Embedding.forward(x) #N x 1 x projection out = np.expand_dims(self.hidden, axis = 1) #N x sampled x projection self.vec = self.N_Embdding.forward(sampled) #N x sampled output = np.sum(out * self.vec, axis = 2) return output def backward(self, dout): #dout(N x s) #d_emb ==> (S,D) d_nemb = np.matmul(dout.T, self.hidden) #d_emb ==> (sample, proj) dout = np.expand_dims(dout, axis = 2) d_emb = np.sum(dout * self.vec, axis = 1) self.N_Embdding.backward(d_nemb, self.lr) self.Embedding.backward(d_emb, self.lr) def save(self, path): with open(path, 'wb') as f: pickle.dump(self.params, f, pickle.HIGHEST_PROTOCOL) def load(self, path): with open("./bestmodel.pickle", 'rb') as f: x = pickle.load(f) self.params = x for param,layer in zip(self.params, self.layers): layer.params = [param] def query(self, word, word2idx, idx2word, top = 5): if word not in word2idx: print("%s는 corpus 안에 존재하지 않습니다"%word) return W_in , _ = self.params query_id = word2idx[word] query_vec = W_in[query_id] #오름차순에 의해 정렬 similarity = cosine_similarity(W_in , query_vec) #자기 자신 제외 result = similarity.argsort()[-top-1:-1] print(word) for i in range(top): print(idx2word[int(result[i])] , similarity[int(result[i])])