def forward(self, words, dropout=0.1, scale=None): if dropout: size = (self.embed.weight.size(0), 1) mask = Variable(dropout_mask(self.embed.weight.data, size, dropout)) masked_embed_weight = mask * self.embed.weight else: masked_embed_weight = self.embed.weight if scale: masked_embed_weight = scale * masked_embed_weight padding_idx = self.embed.padding_idx if padding_idx is None: padding_idx = -1 if IS_TORCH_04: X = F.embedding(words, masked_embed_weight, padding_idx, self.embed.max_norm, self.embed.norm_type, self.embed.scale_grad_by_freq, self.embed.sparse) else: X = self.embed._backend.Embedding.apply(words, masked_embed_weight, padding_idx, self.embed.max_norm, self.embed.norm_type, self.embed.scale_grad_by_freq, self.embed.sparse) return X
def positional_encoder(embedded_sentence): # embedded_sentence.size() = (batch_size, num_sentences, num_tokens, embedding_length) # l.size() = (num_tokems, embedding_length) # output.size() = (num_batch, num_sentences, embedding_length) # The outputs are basically f1, f2, f3,.... which will go into the input fusion layer in the next step to add share information # between sentences using a BiDirfectional GRU module. batch_size, num_sentences, num_tokens, embedding_length = embedded_sentence.size( ) l = [ ] # It will be same for all sentences in all batches as num_tokens and embedding_length is same for the entire dataset. for j in range(num_tokens): x = [] for d in range(embedding_length): x.append((1 - (j / (num_tokens - 1))) - (d / (embedding_length - 1)) * (1 - 2 * j / (num_tokens - 1))) l.append(x) l = torch.FloatTensor(l) l = l.unsqueeze( 0) # adding an extra dimension at first place for batch_size l = l.unsqueeze( 1) # adding an extra dimension at sencond place for num_sentences l = l.expand_as( embedded_sentence ) # so that l.size() = (batch_size, num_sentences, num_tokens, embedding_length) mat = embedded_sentence * Variable(l.cuda()) f_ids = torch.sum(mat, dim=2).squeeze(2) # sum along token dimension return f_ids
def forward(self, x): h0 = Variable( torch.zeros(self.num_layers, x.size(0), self.hidden_size, batch_first=True)) out, _ = self.rnn(x, h0) out = self.fc(out[:, -1, :]) return out
def prediction(k_data): #k_data (json) is the data of the previous k data-points of the given currenct #k is determined by the number of previous data used for training (Currently k = 5) m = int(k_data['next']) k_data = np.array(k_data['data']) #Model directory here model = torch.load('../prediction/model.pt') output = [] with torch.no_grad(): for i in range(m): data = Variable(torch.from_numpy(k_data)) out = model.forward(data)[0].cpu().float().numpy() k_data.append(out) output.append(out) k_data = k_data[1:-1] return jsonify(output)
def forward(self, facts, G): # facts.size() = (batch_size, num_sentences, embedding_length) # fact.size() = (batch_size, embedding_length=hidden_size) # G.size() = (batch_size, num_sentences) # g.size() = (batch_size, ) h_0 = Variable(torch.zeros(self.hidden_size)).cuda() for sen in range(facts.size()[1]): fact = facts[:, sen, :] g = G[:, sen] if sen == 0: # Initialization for first sentence only hi_1 = h_0.unsqueeze(0).expand_as(fact) hi_1 = self.AttnGRUCell(fact, hi_1, g) C = hi_1 # Final hidden vector as the contextual vector used for updating memory return C
def forward(self, input, word_embedding): # input.size() = (batch_size, num_sentences, num_tokens) # word_embedding -> (batch_size, num_sentences, num_tokens, embedding_length) # positional_encoder(word_embedding(input)) -> (batch_size, num_sentences, embedding_length) # Now BidirectionalGRU blocks receive their input, the output of the positional encoder and finally give facts # facts.size() = (batch_size, num_sentences, embedding_length) embedding_length = hidden_size input = input.view(input.size()[0], -1) # Isn't it already in this format ? input = word_embedding(input) input = input.view(input.size()[0], input.size()[1], input.size()[2], -1) input = self.positional_encoder(input) input = self.dropout(input) h0 = Variable( torch.zeros(2, input.size()[0], self.hidden_size).cuda() ) # Initializing the initial hidden state (at t=0 time step) facts, hdn = self.gru(input, h0) facts = facts[:, :, :hidden_size] + facts[:, :, hidden_size:] return facts
def predict_model(image, checkpoint, topk=5, labels=''): if args.image: image=args.image if args.checkpoint: checkpoint=args.checkpoint if args.topk: topk=args.topk if args.labels: labels=args.labels if args.gpu: gpu=args.gpu checkpoint_dict=torch.load(checkpoint) arch= checkpoint_dict['arch'] num_labels= len(checkpoint_dict['class_to_idx']) hidden_units= checkpoint_dict['hidden_units'] model= load_model(arch=arch, num_labels=num_labels, hidden_units=hidden_units) if gpu and torch.cuda.is_available(): model.cuda() was_training = model.training model.eval() image=process_image(image) image=Variable(torch.FloatTensor(image), requires_grad=True) image=image.unsqueeze(0) if gpu and torch.cuda.is_available(): image=image.cuda() result = model(image).topk(topk) if gpu and torch.cuda.is_available(): probs=torch.nn.functional.softmax(result[0].data,dim=1).cpu().numpy()[0] classes= result[1].data.cpu().numpy[0] else: probs=torch.nn.functional.softmax(result[0].data,dim=1).cpu().numpy()[0] classes= result[1].data.cpu().numpy[0] if lables: with open(labels, 'r') as f: cat_to_name = json.load(f) labels= list(cat_to_name.values()) classes= [labels[x] for x in classes] model.train(mode=was_training) if args.image: print('Prediction and probabilities:', list(zip(classes, probs))) return probs, classes
def init_hidden(self, batch_size): return Variable( torch.zeros(self.num_layers, batch_size, self.hidden_size))
f_scheduler = optim.lr_scheduler.StepLR(f_opt, step_size=5000, gamma=0.1) g_scheduler = optim.lr_scheduler.StepLR(g_opt, step_size=1000, gamma=0.1) # Gradient Penalty Hyper-parameters. c = 1e-2 batch_size = 128 lmbda = 10 max_iters = 1000 sample_gen = mog_gen(d) train_log = open('train.log', 'w') # This loop implements the gradient penalty and Pac-GAN learning algorithm. for it in range(max_iters): for t_critic in range(5): data = sample_gen.get_random_sample(batch_size) x = Variable(torch.from_numpy(data).cuda().float(), requires_grad=True) z = Variable(torch.randn(batch_size, d), requires_grad=True).cuda().float().mul(1) f_x = f(x) g_z = g(z) fg_z = f(g_z) eps = Variable(torch.rand(batch_size), requires_grad=True).cuda().float() x1 = Variable(torch.matmul(torch.diag(eps), x), requires_grad=True) x2 = Variable(torch.matmul(torch.diag(1 - eps), g_z), requires_grad=True) x_hat = Variable(x1 + x2, requires_grad=True) f_xh = f(x_hat) grad_xh_norm = torch.zeros(batch_size).cuda().float() for b in range(f_xh.size()[0]): g_x_hat = ag.grad(f_xh[b][0], x_hat, retain_graph=True)[0]
def one_hidden(self, l): nh = (self.n_hid if l != self.n_layers - 1 else self.emb_sz)//self.ndir if IS_TORCH_04: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_()) else: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_(), volatile=not self.training)
train_load = DataLoader( dataset, batch_size=100, shuffle=True, collate_fn=pad_collate) ### Loading the babi dataset model.train() ### training the network if not early_stop_flag: total_acc = 0 count = 0 for batch_id, data in enumerate(train_load): optim.zero_grad() context, questions, answers = data batch_size = context.size()[0] context = Variable( context.long() ) ## context.size() = (batch_size, num_sentences, embedding_length) embedding_length = hidden_size questions = Variable(questions.long( )) ## questions.size() = (batch_size, num_tokens) answers = Variable(answers) total_loss, acc = model.loss( context, questions, answers ) ## Loss is calculated and gradients are backpropagated through the layers. total_loss.backward() total_acc += acc * batch_size count += batch_size if batch_id % 20 == 0: print('training error') print('task ' + str(task_id) + ',epoch ' +
def make_std_mask(tgt, pad): "创建Mask,使得我们不能attend to未来的词" tgt_mask = (tgt != pad).unsqueeze(-2) tgt_mask = tgt_mask & Variable( subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data)) return tgt_mask