def predict_snli(net, vocab, premise, hypothesis): premise = np.array(vocab[premise], ctx=d2l.try_gpu()) hypothesis = np.array(vocab[hypothesis], ctx=d2l.try_gpu()) label = np.argmax(net([premise.reshape((1, -1)), hypothesis.reshape((1, -1))]), axis=1) return 'entailment' if label == 0 else 'contradiction' if label == 1 \ else 'neutral'
def train_ch6(net, train_iter, test_iter, num_epochs, lr, device=d2l.try_gpu()): """Train a model with a GPU (defined in Chapter 6).""" net.initialize(force_reinit=True, ctx=device, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc']) timer, num_batches = d2l.Timer(), len(train_iter) for epoch in range(num_epochs): # Sum of training loss, sum of training accuracy, no. of examples metric = d2l.Accumulator(3) for i, (X, y) in enumerate(train_iter): timer.start() # Here is the major difference from `d2l.train_epoch_ch3` X, y = X.as_in_ctx(device), y.as_in_ctx(device) with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() trainer.step(X.shape[0]) metric.add(l.sum(), d2l.accuracy(y_hat, y), X.shape[0]) timer.stop() train_l = metric[0] / metric[2] train_acc = metric[1] / metric[2] if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1: animator.add(epoch + (i + 1) / num_batches, (train_l, train_acc, None)) test_acc = evaluate_accuracy_gpu(net, test_iter) animator.add(epoch + 1, (None, None, test_acc)) print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, ' f'test acc {test_acc:.3f}') print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ' f'on {str(device)}')
def get_bert_encoding(net, tokens_a, tokens_b=None): tokens, segments = d2l.get_tokens_and_segments(tokens_a, tokens_b) ctx = d2l.try_gpu() token_ids = np.expand_dims(np.array(vocab[tokens], ctx=ctx), axis=0) segments = np.expand_dims(np.array(segments, ctx=ctx), axis=0) valid_len = np.expand_dims(np.array(len(tokens), ctx=ctx), axis=0) encoded_X, _, _ = net(token_ids, segments, valid_len) return encoded_X
def __init__(self, num_inputs, num_hiddens, num_outputs, ctx=mxd2l.try_gpu()): self.num_inputs = num_inputs self.num_hiddens = num_hiddens self.num_outputs = num_outputs self.ctx = ctx self.i_shape = (num_inputs, num_hiddens) self.h_shape = (num_hiddens, num_hiddens) self.o_shape = (num_hiddens, num_outputs) self.params = self._params_init(ctx=self.ctx)
def gather(self, dim, index): """ Gathers values along an axis specified by ``dim``. For a 3-D tensor the output is specified by: out[i][j][k] = input[index[i][j][k]][j][k] # if dim == 0 out[i][j][k] = input[i][index[i][j][k]][k] # if dim == 1 out[i][j][k] = input[i][j][index[i][j][k]] # if dim == 2 Parameters ---------- dim: The axis along which to index index: A tensor of indices of elements to gather Returns ------- Output Tensor """ idx_xsection_shape = index.shape[:dim] + \ index.shape[dim + 1:] self_xsection_shape = self.shape[:dim] + self.shape[dim + 1:] if idx_xsection_shape != self_xsection_shape: raise ValueError( "Except for dimension " + str(dim) + ", all dimensions of index and self should be the same size") if index.dtype != np.dtype('int_'): raise TypeError("The values of index must be integers") data_swaped = nd.swapaxes(self, 0, dim).asnumpy() index_swaped = nd.swapaxes(index, 0, dim).asnumpy() #print(data_swaped,index_swaped) #print("index_swaped\n",index_swaped,index_swaped.shape,"data_swaped\n",data_swaped,data_swaped.shape,'\n') gathered = nd.from_numpy(np.choose( index_swaped, data_swaped)).as_in_context(d2l.try_gpu()) return nd.swapaxes(gathered, 0, dim)
DecoderBlock(num_hiddens, ffn_num_hiddens, num_heads, dropout, i)) self.dense = nn.Dense(vocab_size, flatten=False) def init_state(self, enc_outputs, env_valid_len, *args): return [enc_outputs, env_valid_len, [None] * self.num_layers] def forward(self, X, state): X = self.pos_encoding(self.embedding(X) * math.sqrt(self.num_hiddens)) for blk in self.blks: X, state = blk(X, state) return self.dense(X), state num_hiddens, num_layers, dropout, batch_size, num_steps = 32, 2, 0.0, 64, 10 lr, num_epochs, ctx = 0.005, 100, d2l.try_gpu() ffn_num_hiddens, num_heads = 64, 4 src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(batch_size, num_steps) encoder = TransformerEncoder(len(src_vocab), num_hiddens, ffn_num_hiddens, num_heads, num_layers, dropout) decoder = TransformerDecoder(len(src_vocab), num_hiddens, ffn_num_hiddens, num_heads, num_layers, dropout) model = d2l.EncoderDecoder(encoder, decoder) d2l.train_s2s_ch9(model, train_iter, lr, num_epochs, ctx) ''' for sentence in ['Go .', 'Wow !', "I'm OK .", 'I won !']: print(sentence + ' => ' + d2l.predict_s2s_ch9( model, sentence, src_vocab, tgt_vocab, num_steps, ctx)) '''
def run(): opt = options_train_executor.parse() print('===== arguments: program executor =====') for key, val in vars(opt).items(): print("{:20} {}".format(key, val)) print('===== arguments: program executor =====') if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) # build dataloader train_loader = gdata.DataLoader( dataset=train_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) val_set = PartPrimitive(opt.val_file) val_loader = gdata.DataLoader( dataset=val_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) # build the model ctx = d2l.try_gpu() model = RenderNet(opt) model.initialize(init = init.Xavier(),ctx = ctx) loss = gloss.SoftmaxCrossEntropyLoss(axis = 1,weight = 5) optimizer = Trainer(model.collect_params(),"adam", {"learning_rate":opt.learning_rate,"wd":opt.weight_decay, 'beta1':opt.beta1, 'beta2':opt.beta2}) train_from0 = False; if train_from0: if os.path.exists('./model of executor'): model.load_parameters('model of executor') print("loaded parameter of model") if os.path.exists('./optimizer of executor'): optimizer.load_states('optimizer of executor') print("loaded state of trainer") for epoch in range(1, opt.epochs+1): adjust_learning_rate(epoch, opt, optimizer) print("###################") print("training") train(epoch, train_loader, model,loss,optimizer, opt,ctx,train_loss,train_iou) print("###################") print("testing") ''' gen_shapes, ori_shapes = validate(epoch, val_loader, model, loss, opt,ctx, val_loss,val_iou, gen_shape=True) gen_shapes = (gen_shapes > 0.5) gen_shapes = gen_shapes.astype(np.float32) iou = BatchIoU(ori_shapes, gen_shapes) print("Mean IoU: {:.3f}".format(iou.mean().asscalar())) ''' if epoch % opt.save_interval == 0: print('Saving...') optimizer.save_states("optimizer of executor_3"), model.save_parameters("model of executor_3") print('Saving...') optimizer.save_states("optimizer of executor_3"), model.save_parameters("model of executor_3")
def predict_sentiment(net, vocab, sentence): sentence = np.array(vocab[sentence.split()], ctx=d2l.try_gpu()) label = np.argmax(net(sentence.reshape(1, -1)), axis=1) return 'positive' if label == 1 else 'negative'
if len(kwargs) > 0: # It will be used in section AutoRec test_rmse = evaluator(net, test_iter, kwargs['inter_mat'], ctx_list) else: test_rmse = evaluator(net, test_iter, ctx_list) train_l = l / (i + 1) animator.add(epoch + 1, (train_l, test_rmse)) print(f'train loss {metric[0] / metric[1]:.3f}, ' f'test RMSE {test_rmse:.3f}') print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ' f'on {str(ctx_list)}') ctx = d2l.try_all_gpus() num_users, num_items, train_iter, test_iter = d2l.split_and_load_ml100k( test_ratio=0.1, batch_size=512) net = MF(30, num_users, num_items) net.initialize(ctx=ctx, force_reinit=True, init=mx.init.Normal(0.01)) lr, num_epochs, wd, optimizer = 0.002, 20, 1e-5, 'adam' loss = gluon.loss.L2Loss() trainer = gluon.Trainer(net.collect_params(), optimizer, { "learning_rate": lr, 'wd': wd }) train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs, ctx, evaluator) #Test one score for user:20's score on item 30 scores = net(np.array([20], dtype='int', ctx=d2l.try_gpu()), np.array([30], dtype='int', ctx=d2l.try_gpu()))
def run(): # get options opt = options_guided_adaptation.parse() opt_gen = options_train_generator.parse() opt_exe = options_train_executor.parse() print('===== arguments: guided adaptation =====') for key, val in vars(opt).items(): print("{:20} {}".format(key, val)) print('===== arguments: guided adaptation =====') if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) # build loaders train_set = ShapeNet3D(opt.train_file) train_loader = gdata.DataLoader(dataset=train_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_set = ShapeNet3D(opt.val_file) val_loader = gdata.DataLoader(dataset=val_set, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) def visual(path, epoch, gen_shapes, file_name, nums_samples): data = gen_shapes.transpose((0, 3, 2, 1)) data = np.flip(data, axis=2) num_shapes = data.shape[0] for i in range(min(nums_samples, num_shapes)): voxels = data[i] save_name = os.path.join(path, file_name.format(epoch, i)) visualization(voxels, threshold=0.1, save_name=save_name, uniform_size=0.9) ctx = d2l.try_gpu() # load program generator generator = BlockOuterNet(opt_gen) generator.init_blocks(ctx) generator.load_parameters("model of blockouternet") # load program executor executor = RenderNet(opt_exe) executor.initialize(init=init.Xavier(), ctx=ctx) executor.load_parameters("model of executor") # build loss functions criterion = gloss.SoftmaxCrossEntropyLoss(axis=1, from_logits=True) optimizer = Trainer( generator.collect_params(), "adam", { "learning_rate": opt.learning_rate, "wd": opt.weight_decay, 'beta1': opt.beta1, 'beta2': opt.beta2, 'clip_gradient': opt.grad_clip }) print("###################") print("testing") gen_shapes, ori_shapes = validate(0, val_loader, generator, opt, ctx, gen_shape=True) #visual('imgs of chairs/adaption/chair/',0,ori_shapes,'GT {}-{}.png',8) #visual('imgs of chairs/adaption/chair/',0,gen_shapes,'epoch{}-{}.png',8) gen_shapes = nd.from_numpy(gen_shapes) ori_shapes = nd.from_numpy(ori_shapes) #print(gen_shapes.dtype,ori_shapes.dtype) #print("done",ori_shapes.shape,gen_shapes.shape) IoU = BatchIoU(gen_shapes, ori_shapes) #print(IoU) print("iou: ", IoU.mean()) best_iou = 0 print(opt.epochs) for epoch in range(1, opt.epochs + 1): print("###################") print("adaptation") train(epoch, train_loader, generator, executor, criterion, optimizer, opt, ctx) print("###################") print("testing") gen_shapes, ori_shapes = validate(epoch, val_loader, generator, opt, ctx, gen_shape=True) #visual('imgs of chairs/adaption/chair/',epoch,gen_shapes,'epoch{}-{}.png',8) gen_shapes = nd.from_numpy(gen_shapes) ori_shapes = nd.from_numpy(ori_shapes) IoU = BatchIoU(gen_shapes, ori_shapes) print("iou: ", IoU.mean()) if epoch % opt.save_interval == 0: print('Saving...') generator.save_parameters("generator of GA on shapenet") optimizer.save_states("optimazer of generator of GA on shapenet") if IoU.mean() >= best_iou: print('Saving best model') generator.save_parameters("generator of GA on shapenet") optimizer.save_states("optimazer of generator of GA on shapenet") best_iou = IoU.mean()
def run(): opt = options_train_generator.parse() print('===== arguments: program generator =====') for key, val in vars(opt).items(): print("{:20} {}".format(key, val)) print('===== arguments: program generator =====') if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) # build model ctx = d2l.try_gpu() model = BlockOuterNet(opt) model.init_blocks(ctx) crit_cls = LSTMClassCriterion() crit_reg = LSTMRegressCriterion() ctri_cls = crit_cls.initialize(ctx=ctx) ctri_reg = crit_reg.initialize(ctx=ctx) optimizer = Trainer( model.collect_params(), "adam", { "learning_rate": opt.learning_rate, "wd": opt.weight_decay, 'beta1': opt.beta1, 'beta2': opt.beta2, 'clip_gradient': opt.grad_clip }) # build dataloader train_set = Synthesis3D(opt.train_file, n_block=opt.outer_seq_length) train_loader = gdata.DataLoader( dataset=train_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) val_set = Synthesis3D(opt.val_file, n_block=opt.outer_seq_length) val_loader = gdata.DataLoader( dataset=val_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) for epoch in range(1, opt.epochs + 1): print("###################") print("training") train(epoch, train_loader, model, crit_cls, crit_reg, optimizer, opt, ctx) print("###################") print("testing") validate(epoch, val_loader, model, crit_cls, crit_reg, opt, ctx, True) if epoch % 1 == 0: print('Saving...') optimizer.save_states("optimizer of PG"), model.save_parameters("model of blockouternet") optimizer.save_states("optimizer of PG"), model.save_parameters("model of blockouternet")