def run(): # get options opt = options_guided_adaptation.parse() opt_gen = options_train_generator.parse() opt_exe = options_train_executor.parse() print('===== arguments: guided adaptation =====') for key, val in vars(opt).items(): print("{:20} {}".format(key, val)) print('===== arguments: guided adaptation =====') if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) # build loaders train_set = ShapeNet3D(opt.train_file) train_loader = gdata.DataLoader(dataset=train_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_set = ShapeNet3D(opt.val_file) val_loader = gdata.DataLoader(dataset=val_set, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) def visual(path, epoch, gen_shapes, file_name, nums_samples): data = gen_shapes.transpose((0, 3, 2, 1)) data = np.flip(data, axis=2) num_shapes = data.shape[0] for i in range(min(nums_samples, num_shapes)): voxels = data[i] save_name = os.path.join(path, file_name.format(epoch, i)) visualization(voxels, threshold=0.1, save_name=save_name, uniform_size=0.9) ctx = d2l.try_gpu() # load program generator generator = BlockOuterNet(opt_gen) generator.init_blocks(ctx) generator.load_parameters("model of blockouternet") # load program executor executor = RenderNet(opt_exe) executor.initialize(init=init.Xavier(), ctx=ctx) executor.load_parameters("model of executor") # build loss functions criterion = gloss.SoftmaxCrossEntropyLoss(axis=1, from_logits=True) optimizer = Trainer( generator.collect_params(), "adam", { "learning_rate": opt.learning_rate, "wd": opt.weight_decay, 'beta1': opt.beta1, 'beta2': opt.beta2, 'clip_gradient': opt.grad_clip }) print("###################") print("testing") gen_shapes, ori_shapes = validate(0, val_loader, generator, opt, ctx, gen_shape=True) #visual('imgs of chairs/adaption/chair/',0,ori_shapes,'GT {}-{}.png',8) #visual('imgs of chairs/adaption/chair/',0,gen_shapes,'epoch{}-{}.png',8) gen_shapes = nd.from_numpy(gen_shapes) ori_shapes = nd.from_numpy(ori_shapes) #print(gen_shapes.dtype,ori_shapes.dtype) #print("done",ori_shapes.shape,gen_shapes.shape) IoU = BatchIoU(gen_shapes, ori_shapes) #print(IoU) print("iou: ", IoU.mean()) best_iou = 0 print(opt.epochs) for epoch in range(1, opt.epochs + 1): print("###################") print("adaptation") train(epoch, train_loader, generator, executor, criterion, optimizer, opt, ctx) print("###################") print("testing") gen_shapes, ori_shapes = validate(epoch, val_loader, generator, opt, ctx, gen_shape=True) #visual('imgs of chairs/adaption/chair/',epoch,gen_shapes,'epoch{}-{}.png',8) gen_shapes = nd.from_numpy(gen_shapes) ori_shapes = nd.from_numpy(ori_shapes) IoU = BatchIoU(gen_shapes, ori_shapes) print("iou: ", IoU.mean()) if epoch % opt.save_interval == 0: print('Saving...') generator.save_parameters("generator of GA on shapenet") optimizer.save_states("optimazer of generator of GA on shapenet") if IoU.mean() >= best_iou: print('Saving best model') generator.save_parameters("generator of GA on shapenet") optimizer.save_states("optimazer of generator of GA on shapenet") best_iou = IoU.mean()
n += y.size test_acc = d2l.evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) if __name__ == '__main__': batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) net = nn.Sequential() #sequential用来存储所有神经网络中的所有单元 net.add(nn.Dense(10)) #设置网络输出个数为10 net.initialize( init.Normal(sigma=0.01)) #初始化网络参数为从均值为0、标准差为0.01的正态分布的随机取样的值 loss = gloss.SoftmaxCrossEntropyLoss() # softmax运算和交叉熵损失计算的函数 trainer = gluon.Trainer( net.collect_params(), 'sgd', {'learning_rate': 0.1}) # 使⽤学习率为0.1的小批量随机梯度下降作为优化算法 num_epochs = 10 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer) #训练并输出结果 for X, y in test_iter: break true_labels = d2l.get_fashion_mnist_labels(y.asnumpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy()) print(true_labels) print(pred_labels) print( float(
[512, 238]) model.collect_params().initialize(mx.init.Xavier(rnd_type="gaussian"), ctx=ctx) model.embedding.weight.set_data(weight) model.embedding.collect_params().setattr('grad_req', 'null') mask = False num_epochs = 10 lr = 0.001 start_time = time.time() # best_acc = 0 # best_f1 = 0 # best_lost = 999 # _max_round = 10 # max_round = _max_round pad_id = index[pad] loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False) for epoch in range(num_epochs): # data reset train_data.reset() valid_data.reset() # Epoch training stats epoch_L = 0.0 epoch_sent_num = 0 if epoch % 2: trainer_name = "adagrad" trainer = Trainer(model.collect_params(), trainer_name, { 'learning_rate': lr, "wd": 0.001 }) else: trainer_name = "sgd"
import mxnet as mx from mxnet.gluon import nn from mxnet.gluon import data as gdata from mxnet.gluon import loss as gloss from gluoncv import loss as gcvloss from mxnet import autograd, init, contrib, nd, sym from utils.utils import calc_loss, cls_eval, bbox_eval cls_lossfunc = gloss.SoftmaxCrossEntropyLoss() # cls_lossfunc = gcvloss.FocalLoss() bbox_lossfunc = gloss.L1Loss() def training(data_iter, num_epoches, cls_lossfunc, bbox_lossfunc): # TODO: define the way that the model should be trained # wth gluon.Trainer(...) for eph in range(num_epoches): pass pass def validate(val_iter, net, ctx=mx.gpu()): acc_cls, acc_bbox, acc_l, n, m = 0, 0, 0, 0, 0 val_iter.reset() for batch in val_iter: X = batch.data[0].as_in_context(ctx) Y = batch.label[0].as_in_context(ctx) # generate anchors and generate bboxes anchors, cls_preds, bbox_preds = net(X) # assign classes and bboxes for each anchor bbox_labels, bbox_masks, cls_labels = nd.contrib.MultiBoxTarget(
def train_and_predict_rnn_gluon( model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes, ): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01)) trainer = gluon.Trainer( model.collect_params(), "sgd", {"learning_rate": lr, "momentum": 0, "wd": 0} ) for epoch in range(num_epochs): l_sum, n, start = 0.0, 0, time.time() data_iter = d2l.data_iter_consecutive( corpus_indices, batch_size, num_steps, ctx ) state = model.begin_state(batch_size=batch_size, ctx=ctx) for X, Y in data_iter: for s in state: s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1,)) l = loss(output, y).mean() l.backward() # 梯度裁剪 params = [p.data() for p in model.collect_params().values()] d2l.grad_clipping(params, clipping_theta, ctx) trainer.step(1) # 因为已经误差取过均值,梯度不用再做平均 l_sum += l.asscalar() * y.size n += y.size if (epoch + 1) % pred_period == 0: print( "epoch %d, perplexity %f, time %.2f sec" % (epoch + 1, math.exp(l_sum / n), time.time() - start) ) for prefix in prefixes: print( " -", predict_rnn_gluon( prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx, ), )
def train_eval(opt): mx.random.seed(123) np.random.seed(123) os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3' gpus = [] if opt.gpus is None or opt.gpus is '' else [ int(gpu) for gpu in opt.gpus.split(',')] num_gpus = len(gpus) batch_size = opt.batch_per_device*max(1,num_gpus) context = [mx.gpu(i) for i in gpus] if num_gpus>0 else [mx.cpu()] steps = [int(step) for step in opt.lr_scheduler_steps.split(',')] vis_env = opt.dataset + opt.output vis = Visulizer(env=vis_env) vis.log(opt) #optional ucf101 or meitu,get net structure,loss criterion,train val loader if opt.dataset=='ucf101' or opt.dataset=='ucf': net = R2Plus2D(num_class=101,model_depth=opt.model_depth) loss_criterion = gloss.SoftmaxCrossEntropyLoss() # loss function train_loader, val_loader = get_ucf101trainval(datadir='/data/jh/notebooks/hudengjun/DeepVideo/UCF-101', batch_size=batch_size, n_frame=opt.n_frame, crop_size=opt.crop_size, scale_h=opt.scale_h, scale_w=opt.scale_w, num_workers=opt.num_workers) # the train and evaluation data loader elif opt.dataset =='meitu': net = R2Plus2D(num_class=63,model_depth=opt.model_depth,final_temporal_kernel=opt.n_frame//8) # labels set 63 # train_loader,val_loader = get_meitu_dataloader(data_dir=opt.meitu_dir, # device_id=opt.decoder_gpu, # batch_size=batch_size, # n_frame=opt.n_frame, # crop_size=opt.crop_size, # scale_h=opt.scale_h, # scale_w=opt.scale_w, # num_workers=opt.num_workers) # use multi gpus to load data train_loader, val_loader = get_meitu_dataloader(data_dir=opt.meitu_dir, device_id=opt.decoder_gpu, batch_size=batch_size, num_workers=opt.num_workers, n_frame=opt.n_frame, crop_size=opt.crop_size, scale_h=opt.scale_h, scale_w=opt.scale_w, cache_size=opt.cache_size) #[type(data) for i,enumerate(train_loader) if i<2] # step when 66,in data/nvvl_meitu.py # create new find_nvv_error.py ,copy train_nvvl_r3d.py one by one test, # find error loss_dict = {'bce':gloss.SigmoidBinaryCrossEntropyLoss, 'warp_nn':WarpLoss, 'warp_fn':WARP_funcLoss, 'lsep_nn':LsepLoss, 'lsep_fn':LSEP_funcLoss} if opt.loss_type == 'lsep_nnh': loss_criterion = LsepLossHy(batch_size=batch_size//num_gpus,num_class=opt.num_class) loss_criterion.hybridize() elif opt.loss_type =='bce': loss_criterion = gloss.SigmoidBinaryCrossEntropyLoss() loss_criterion.hybridize() else: loss_criterion = loss_dict[opt.loss_type]() # net.initialize(mx.init.Xavier(), # ctx=context) # net parameter initialize in several cards net.initialize(mx.init.Xavier(),ctx=context) if not opt.pretrained is None: if opt.pretrained.endswith('.pkl'): net.load_from_caffe2_pickle(opt.pretrained) elif opt.pretrained.endswith('.params'): try: print("load pretrained params ",opt.pretrained) net.load_from_sym_params(opt.pretrained,ctx = context) except Exception as e: print("load as sym params failed,reload as gluon params") net.load_params(opt.pretrained,ctx=context) #load params to net context net.hybridize() trainer = gluon.Trainer(net.collect_params(),'sgd', {'learning_rate':opt.lr,'momentum':0.9,'wd':opt.wd}, kvstore=opt.kvstore) # the trainer lr_steps = lr_schedualer.MultiFactorScheduler(steps,opt.lr_schedualer_factor) lr_steps.base_lr = opt.lr best_eval = 0.0 for epoch in range(opt.num_epoch): tic = time() pre_loss,cumulative_loss = 0.0,0.0 trainer.set_learning_rate(lr_steps(epoch)) vis.log('Epoch %d learning rate %f'%(epoch,trainer.learning_rate)) for i,(data,label) in enumerate(train_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch_axis=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch_axis=0) except Exception as e: logging.info(e) continue Ls =[] with autograd.record(): for x,y in zip(data_list,label_list): y_hat = net(x) loss = loss_criterion(y_hat,y) Ls.append(loss) cumulative_loss +=nd.mean(loss).asscalar() for L in Ls: L.backward() trainer.step(data.shape[0]) if (i+1)%opt.log_interval ==0: vis.log('[Epoch %d,Iter %d ] training loss= %f'%( epoch,i+1,cumulative_loss-pre_loss )) vis.plot('loss',cumulative_loss-pre_loss) pre_loss =cumulative_loss if opt.debug: if (i+1)//(opt.log_interval)==3: break vis.log('[Epoch %d] training loss=%f'%(epoch,cumulative_loss)) vis.log('[Epoch %d] time used: %f'%(epoch,time()-tic)) vis.log('[Epoch %d] saving net') save_path = './{0}/{1}_test-val{2}.params'.format(opt.output, str(opt.dataset + opt.loss_type), str(epoch)) vis.log("save path %s" % (save_path)) net.save_parameters(save_path) best_iou=0.0 if opt.dataset=='ucf101' or opt.dataset =='ucf': acc = nd.array([0],ctx=mx.cpu()) test_iter = 0 for i,(data,label) in enumerate(val_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch_axis=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch_axis=0) except Exception as e: logging.info(e) continue for x,y in zip(data_list,label_list): y_hat = net(x) test_iter +=1 # single iter y_pred = y_hat.argmax(axis=1) acc += (y_pred == y.astype('float32')).mean().asscalar() # acc in cpu val_acc = acc.asscalar() / test_iter if (i+1) %(opt.log_interval)==0: logging.info("[Epoch %d,Iter %d],acc=%f" % (epoch,i,val_acc)) if opt.debug: if (i+1)//opt.log_interval ==3: break vis.plot('acc',val_acc) elif opt.dataset=='meitu': k=4 topk_inter = np.array([1e-4]*k) # a epsilon for divide not by zero topk_union = np.array([1e-4]*k) for i,(data,label) in enumerate(val_loader): try: data_list = gluon.utils.split_and_load(data,ctx_list=context,batch_axis=0) label_list = gluon.utils.split_and_load(label,ctx_list=context,batch_axis=0) except Exception as e: logging.info(e) continue for x,y in zip(data_list,label_list): y_hat = net(x) pred_order = y_hat.argsort()[:,::-1] # sort and desend order #just compute top1 label pred_order_np = pred_order.asnumpy() y_np = y.asnumpy() if opt.debug: print("pred shape and target shape",pred_order_np.shape,y_np.shape) for pred_vec,y_vec in zip(pred_order_np,y_np): label_set =set([index for index,value in enumerate(y_vec) if value>0.1]) pred_topk = [set(pred_vec[0:k]) for k in range(1,k+1)] topk_inter +=np.array([len(p_k.intersection(label_set)) for p_k in pred_topk]) topk_union +=np.array([len(p_k.union(label_set)) for p_k in pred_topk]) if (i+1) %(opt.log_interval)==0: logging.info("[Epoch %d,Iter %d],time %s,Iou %s" % (epoch, i, \ tmm.strftime("%Y-%D:%H-%S"), \ str(topk_inter / topk_union))) for i in range(k): vis.plot('val_iou_{0}'.format(i+1),topk_inter[i]/topk_union[i]) if opt.debug: if (i + 1) // (opt.log_interval) == 2: break vis.log("""---------------------------------------- ----XXXX------finished------------------ ----------------------------------------""")
def __init__(self, x_train, y_train, x_test, y_test, labels_num, ctx): super().__init__(x_train, y_train, x_test, y_test, labels_num, ctx) self.loss_func=gloss.SoftmaxCrossEntropyLoss() self.net=None
import d2lzh as d2l from mxnet import gluon, init, autograd from mxnet.gluon import loss as gloss, nn batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist((batch_size)) net = nn.Sequential() net.add(nn.Flatten()) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.SoftmaxCrossEntropyLoss() # 定义损失函数Softmax trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) # 训练器初始化 num_epochs = 5 lr = 0.1 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() # 求导 trainer.step(batch_size) # 迭代并更新 y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
def train(self, train_data, log_folder, params_folder, epochs, batch_size, ctx, init_lr, lr_step=5, lr_factor=0.1): """ Train network. :param train_mode: :param train_data: Data and Label for training. Instance of tuple(dict). - valid_keys: valid keys of current category of clothes. - images: Instance of tuple. All images info of current category: - orig_images_id: Instance of list. (image_count) - orig_images_shape: Instance of np.array. (image_count, orig_h, orig_w) - orig_keypoints: Instance of np.array. (image_count, keypoints_count, 3) - norm_images: Instance of np.array. (image_count, 3, h, w) - belief_maps: Instance of np.array. (image_count, keypoints_count, h, w) - norm_centermap: Instance of np.array. (h, w) :param params_folder: Folder holds saved params. :param epochs: :param batch_size: :param ctx: Instance of list. :return: """ logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(), logging.FileHandler(log_folder + 'train_' + self._name + '_batch_' + str(epochs) + '_' + str(batch_size))]) # 1. check params files and get last epoch and batch epoch_index, batch_index, file = self.utils_params_file(batch_size, 'check', params_folder) # (1) begin a new training if epoch_index == -1 and batch_index == -1: logging.info("No params files detected. Begin a new training.") self.initialize(mx.init.Xavier(magnitude=2.34), ctx=ctx) epoch_index = 0 batch_index = 0 # (2) resume training from params file else: logging.info("Params file '%s' detected. Last (epoch, batch): (%d, %d). Resuming training." % (file, epoch_index, batch_index)) self.collect_params().load(params_folder + file, ctx=ctx) batch_index += 1 # 2. train # (1) trainer and loss function for total training mode model_trainer = trainer.Trainer(self.collect_params(), 'sgd', {'learning_rate': init_lr, 'momentum': 0.9, 'wd': 5e-4}) loss_function = loss.SoftmaxCrossEntropyLoss(sparse_label=False) # (2) train each epoch and batch for e in range(epoch_index, epochs): if e != epoch_index: batch_index = 0 # 1> set learning rate model_trainer.set_learning_rate(init_lr * pow(lr_factor, int(e / lr_step))) if e % lr_step == 0: logging.info('Learning rate now is set to be %.6f' % model_trainer.learning_rate) # 2> train batch while True: # (1) get data _, _, orig_images_shape_batch, orig_keypoints_batch, norm_images_batch, norm_center_maps_batch, belief_maps_batch, _ = \ train_data.get_batch_data(if_data_aug=True, loss_mode='softmax', batch_index=batch_index, batch_size=batch_size) if norm_images_batch is None and norm_center_maps_batch is None and belief_maps_batch is None: break # (2) split data into multiple GPU norm_images_batch_LIST = split_and_load(norm_images_batch, ctx_list=ctx) norm_center_maps_batch_LIST = split_and_load(norm_center_maps_batch, ctx_list=ctx) belief_maps_batch_LIST = split_and_load(belief_maps_batch, ctx_list=ctx) #------------------------------------------------------------------------------------------------------- # (3) train total pred_beliefMaps_batch = [] # 1> record auto grad with autograd.record(): # 1.initiate gpu losses gpu_losses = [] # 2.calculate losses on each gpu of each stage for norm_images_batch, norm_center_maps_batch, belief_maps_batch in zip(norm_images_batch_LIST, norm_center_maps_batch_LIST, belief_maps_batch_LIST): # (1) initiate current gpu loss current_gpu_loss = None # (2) network forward pred_beliefMaps = self.forward(input_images=norm_images_batch, center_maps=norm_center_maps_batch) for p_b in pred_beliefMaps[-1].asnumpy(): pred_beliefMaps_batch.append(p_b) # (3) shape groud-truth belief maps to use softmax loss shaped_gt_beliefMaps = nd.reshape(belief_maps_batch, shape=(belief_maps_batch.shape[0], belief_maps_batch.shape[1], belief_maps_batch.shape[2] * belief_maps_batch.shape[3])) # (4) calculate each and every stage loss on current gpu for stage in range(len(self._block_stage)): # 1> shape predicted belief map of current stage shaped_pred_beliefMap = nd.reshape(pred_beliefMaps[stage], shape=(pred_beliefMaps[stage].shape[0], pred_beliefMaps[stage].shape[1], pred_beliefMaps[stage].shape[2] * pred_beliefMaps[stage].shape[3])) # 2> calculate current stage loss on current gpu current_loss = loss_function(shaped_pred_beliefMap, shaped_gt_beliefMaps) # 3> summary current_gpu_loss = current_loss if current_gpu_loss is None else (current_gpu_loss + current_loss) # (5) append & save gpu_losses.append(current_gpu_loss) # 3> backward and update for gpu_loss in gpu_losses: gpu_loss.backward() model_trainer.step(batch_size) nd.waitall() # 4> calculate batch average loss batch_loss = sum([nd.sum(gpu_loss).asscalar() for gpu_loss in gpu_losses]) / (batch_size * len(self._block_stage)) NE = self.calculate_error(valid_keys=utils.keypoints_order[train_data.category], category=train_data.category, predicted_keypoints=self.transform_beliefMaps_into_origKeypoints( predicted_beliefMaps=np.array(pred_beliefMaps_batch), orig_images_shape=orig_images_shape_batch), orig_keypoints=np.array(orig_keypoints_batch)) # 5> print logging.info("Epoch[%d]-Batch[%d] lr: %f. Average loss: %f. NE:%.2f%%" % (e, batch_index, model_trainer.learning_rate, batch_loss, NE*100)) #------------------------------------------------------------------------------------------------------- # (4) save params with batch info (batch_size, batch_index) params_file = self.utils_params_file(operation='generate', batch_size=batch_size, epoch_index=e, batch_index=batch_index) params_old_file = self.utils_params_file(operation='generate', batch_size=batch_size, epoch_index=e, batch_index=batch_index - 1, batches=train_data.calc_batches_count(batch_size)) self.collect_params().save(params_folder + params_file) if os.path.exists(params_folder + params_old_file): os.remove(params_folder + params_old_file) batch_index += 1 # 3.finish logging.info("Training completed.")
def method0(): num_epochs, lr, batch_size = 5, 0.5, 256 loss = gloss.SoftmaxCrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): # 如使用相邻采样,隐藏变量只需在该 epoch 开始时初始化。 if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): # 如使用随机采样,读取每个随机小批量前都需要初始化隐藏变量。 if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) # 如使用相邻采样,需要使用 detach 函数从计算图分离隐藏状态变量。 else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): # outputs 形状:(batch_size, vocab_size)。 if is_lstm: outputs, state_h, state_c = rnn(get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(X, vocab_size), state_h, *params) # 设 t_ib_j 为时间步 i 批量中的元素 j: # y 形状:(batch_size * num_steps,) # y = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]。 y = Y.T.reshape((-1, )) # 拼接 outputs,形状:(batch_size * num_steps, vocab_size)。 outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() # 裁剪梯度。 grad_clipping(params, state_h, Y, clipping_theta, ctx) gb.sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print("\nepoch %d, perplexity %f" % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print( ' - ', predict_rnn(rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def train_and_valid(transformer_model): loss = gloss.SoftmaxCrossEntropyLoss() bert = bert_embedding.BertEmbedding(ctx=ghp.ctx) global_step = 0 for epoch in range(ghp.epoch_num): train_data_loader = get_data_loader() print("********开始训练********") learning_rate = get_learning_rate(ghp.learning_rate, ghp.model_dim, ghp.learning_rate_warmup_steps, global_step) optimizer = mx.optimizer.Adam(learning_rate=learning_rate, beta1=ghp.optimizer_adam_beta1, beta2=ghp.optimizer_adam_beta2, epsilon=ghp.optimizer_adam_epsilon) model_trainer = gluon.Trainer(transformer_model.collect_params(), optimizer) count = 0 last_acc = 0 for en_sentences, zh_idxs in train_data_loader: learning_rate = get_learning_rate(ghp.learning_rate, ghp.model_dim, ghp.learning_rate_warmup_steps, global_step) count += 1 print("现在是第{}个epoch(总计{}个epoch),第{}批数据。(lr:{}s)".format( epoch + 1, ghp.epoch_num, count, model_trainer.learning_rate)) result = bert(en_sentences) all_sentences_emb = [] all_sentences_idx = [] real_batch_size = len(en_sentences) for i in range(real_batch_size): one_sent_emb = [] seq_valid_len = len(result[i][0]) one_sent_idx = [1] * (seq_valid_len) + [0] * (ghp.max_seq_len - seq_valid_len) # embedding for word_emb in result[i][1]: one_sent_emb.append(word_emb.tolist()) # padding for n in range(ghp.max_seq_len - seq_valid_len): one_sent_emb.append([1e-9] * 768) all_sentences_emb.append(one_sent_emb) all_sentences_idx.append(one_sent_idx) x_en_emb = nd.array(all_sentences_emb, ctx=ghp.ctx) x_en_idx = nd.array(all_sentences_idx, ctx=ghp.ctx) y_zh_idx = zh_idxs with autograd.record(): loss_mean, acc = batch_loss(transformer_model, en_sentences, x_en_emb, x_en_idx, y_zh_idx, loss) loss_scalar = loss_mean.asscalar() acc_scalar = acc.asscalar() sw.add_scalar(tag='cross_entropy', value=loss_scalar, global_step=global_step) sw.add_scalar(tag='acc', value=acc_scalar, global_step=global_step) global_step += 1 loss_mean.backward() model_trainer.set_learning_rate(learning_rate) if acc_scalar > 0.3: acc_diff = acc_scalar - last_acc last_acc = acc_scalar if acc_diff > 0.1: print("上一步acc:{},此步acc:{},差值为{}过大,放弃更新参数。".format( str(last_acc)[:5], str(acc_scalar)[:5], str(acc_diff)[:5])) continue model_trainer.step(1) print("loss:{0}, acc:{1}".format( str(loss_scalar)[:5], str(acc_scalar)[:5])) print("\n") if count % 3000 == 0: if not os.path.exists("parameters"): os.makedirs("parameters") model_params_file = "parameters/" + "epoch{}_batch{}_loss{}_acc{}.params".format( epoch, count, str(loss_scalar)[:5], str(acc_scalar)[:5]) transformer_model.save_parameters(model_params_file)
def train_model(net, train_iter, num_epochs=5, batch_size=2, save_interval=10): num_steps = len(tct_train) // batch_size # 设置训练参数 trainer = gluon.Trainer( net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'wd': 0.0005, 'momentum': 0.9, 'lr_scheduler': mx.lr_scheduler.PolyScheduler( # 训练计划? num_steps * num_epochs, 0.1, 2, 0.0001) }) soft_loss = gloss.SoftmaxCrossEntropyLoss(axis=1, sparse_label=False) for epoch in range(num_epochs): t0 = time.time() # images:[batch, N, H, W] for i, (images, labels) in enumerate(train_iter): tmp = time.time() images = images.as_in_context(ctx) labels = labels.as_in_context(ctx) yz = labels[:, 0, :, :] labels = nd.stack(yz, 255 - yz, axis=1) labels = labels.astype('float32') / 255 with autograd.record(): # [batch, 2, H, W], 2 is num_classes outputs = net(images.astype('float32')) loss = soft_loss(outputs, labels) loss.backward() # mean the loss using the batch to update the params print('Epoch %s/%s, iter:%s, time:%.7f, loss:%s' % (epoch, num_epochs, i, time.time() - tmp, loss.mean().asscalar())) trainer.step( images.shape[0] ) # images.shape[0]是图像的高,images.shape[1]是宽,images.shape[2]是通道数 cost_time = time.time() - t0 print('Epoch time:{}'.format(cost_time)) # 一步迭代使用的时间 if epoch % save_interval == 0 and epoch != num_epochs - 1: # 每5步保存一次模型 print('Epoch [{}/{}], Loss:{}'.format(epoch, num_epochs, loss.mean().asscalar())) # save the model if not os.path.exists('./output'): os.mkdir('output') prefix_file = './output' print('save the model to output...') net.hybridize() # test the model x_test = nd.random.uniform(shape=(1, 3, 224, 224), ctx=mx.gpu()) net(x_test) # 这个x_test传入net后就是Unet那里的x net.export(path=prefix_file, epoch=epoch) print('save finished~~~') if epoch == num_epochs - 1: # 结束时再保存模型 print('save the model to output...') net.hybridize() x_test = nd.random.uniform(shape=(1, 3, 224, 224), ctx=mx.gpu()) net(x_test) net.export(path=prefix_file, epoch=epoch) print('save finished~~~')
from mxnet.gluon import loss loss_softmax = loss.SoftmaxCrossEntropyLoss(sparse_label=False, axis=1)
def __init__( self, num_classes, # rpn anchor_scales, anchor_ratios, rpn_fg_threshold=0.5, rpn_bg_threshold=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.3, rpn_pre_nms_top_n_in_train=2000, rpn_post_nms_top_n_in_train=1000, rpn_pre_nms_top_n_in_test=2000, rpn_post_nms_top_n_in_test=1000, rpn_nms_thresh=0.7, use_fpn=False, # head fg_threshold=0.5, batch_size_per_image=256, positive_fraction=0.5, max_objs_per_images=100, nms_thresh=0.7, backbone_pretrained=True, ctx=cpu(), **kwargs): super(FasterRCNNDetector, self).__init__(**kwargs) self.backbone = Resnet50Backbone(backbone_pretrained, ctx) self.use_fpn = use_fpn if use_fpn: self.fpn = FeaturePyramidNetwork(prefix='fpn_') self.rpn = RegionProposalNetwork(anchor_scales, anchor_ratios, rpn_fg_threshold, rpn_bg_threshold, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n_in_train, rpn_post_nms_top_n_in_train, rpn_pre_nms_top_n_in_test, rpn_post_nms_top_n_in_test, rpn_nms_thresh, prefix="rpn_") if use_fpn: self.roi_extractor = RoIExtractor(self.fpn.output_layers[:-1], use_fpn) else: self.roi_extractor = RoIExtractor(["c5"]) self.head = nn.Sequential(prefix="head_") with self.head.name_scope(): self.head.add(nn.Dense(1024, activation='relu')) self.head.add(nn.Dense(1024, activation='relu')) self.num_classes = num_classes self.cls = nn.Dense(num_classes) self.reg = nn.Dense(num_classes * 4) self.fg_threshold = fg_threshold self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction self.max_objs_per_images = max_objs_per_images self.nms_thresh = nms_thresh self.cls_loss = gloss.SoftmaxCrossEntropyLoss() self.reg_loss = gloss.HuberLoss()
def run(): opt = options_train_executor.parse() print('===== arguments: program executor =====') for key, val in vars(opt).items(): print("{:20} {}".format(key, val)) print('===== arguments: program executor =====') if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) # build dataloader train_loader = gdata.DataLoader( dataset=train_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) val_set = PartPrimitive(opt.val_file) val_loader = gdata.DataLoader( dataset=val_set, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) # build the model ctx = d2l.try_gpu() model = RenderNet(opt) model.initialize(init = init.Xavier(),ctx = ctx) loss = gloss.SoftmaxCrossEntropyLoss(axis = 1,weight = 5) optimizer = Trainer(model.collect_params(),"adam", {"learning_rate":opt.learning_rate,"wd":opt.weight_decay, 'beta1':opt.beta1, 'beta2':opt.beta2}) train_from0 = False; if train_from0: if os.path.exists('./model of executor'): model.load_parameters('model of executor') print("loaded parameter of model") if os.path.exists('./optimizer of executor'): optimizer.load_states('optimizer of executor') print("loaded state of trainer") for epoch in range(1, opt.epochs+1): adjust_learning_rate(epoch, opt, optimizer) print("###################") print("training") train(epoch, train_loader, model,loss,optimizer, opt,ctx,train_loss,train_iou) print("###################") print("testing") ''' gen_shapes, ori_shapes = validate(epoch, val_loader, model, loss, opt,ctx, val_loss,val_iou, gen_shape=True) gen_shapes = (gen_shapes > 0.5) gen_shapes = gen_shapes.astype(np.float32) iou = BatchIoU(ori_shapes, gen_shapes) print("Mean IoU: {:.3f}".format(iou.mean().asscalar())) ''' if epoch % opt.save_interval == 0: print('Saving...') optimizer.save_states("optimizer of executor_3"), model.save_parameters("model of executor_3") print('Saving...') optimizer.save_states("optimizer of executor_3"), model.save_parameters("model of executor_3")
train_iter = gdata.DataLoader(d2l.VOCSegDataset(True, crop_size, voc_dir, colormap2label), batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) test_iter = gdata.DataLoader(d2l.VOCSegDataset(False, crop_size, voc_dir, colormap2label), batch_size, last_batch='discard', num_workers=num_workers) #9.10.5-训练模型 print('try_all_gpus') ctx = d2l.try_all_gpus() loss = gloss.SoftmaxCrossEntropyLoss(axis=1) net.collect_params().reset_ctx(ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'wd': 1e-3 }) print("start train...\n") d2l.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs=1) # 5 print("end train...\n") #9.10.6-预测像素类别 def predict(img): X = test_iter._dataset.normalize_image(img) X = X.transpose((2, 0, 1)).expand_dims(axis=0) pred = nd.argmax(net(X.as_in_context(ctx[0])), axis=1)
def train_eval(opt): mx.random.seed(123) np.random.seed(123) os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' gpus = [] if opt.gpus is None or opt.gpus is '' else [ int(gpu) for gpu in opt.gpus.split(',')] num_gpus = len(gpus) batch_size = opt.batch_per_device * max(1, num_gpus) context = [mx.gpu(i) for i in gpus][0] if num_gpus > 0 else [mx.cpu()] steps = [int(step) for step in opt.lr_scheduler_steps.split(',')] vis_env = opt.dataset + opt.output vis = Visulizer(env=vis_env) vis.log(opt) net = R2Plus2D_MT(num_scenes=19,num_actions=44, model_depth=opt.model_depth, final_temporal_kernel=opt.n_frame // 8) # labels set 63 # train_loader,val_loader = get_meitu_dataloader(data_dir=opt.meitu_dir, # device_id=opt.decoder_gpu, # batch_size=batch_size, # n_frame=opt.n_frame, # crop_size=opt.crop_size, # scale_h=opt.scale_h, # scale_w=opt.scale_w, # num_workers=opt.num_workers) # use multi gpus to load data train_loader, val_loader,sample_weight = get_meitu_multi_task_dataloader(data_dir=opt.meitu_dir, device_id=opt.decoder_gpu, batch_size=batch_size, num_workers=opt.num_workers, n_frame=opt.n_frame, crop_size=opt.crop_size, scale_h=opt.scale_h, scale_w=opt.scale_w, cache_size=opt.cache_size) action_loss = gloss.SoftmaxCrossEntropyLoss() #scene_loss = LsepLoss() # [type(data) for i,enumerate(train_loader) if i<2] # step when 66,in data/nvvl_meitu.py # create new find_nvv_error.py ,copy train_nvvl_r3d.py one by one test, # find error loss_dict = {'bce': gloss.SigmoidBinaryCrossEntropyLoss, 'warp_nn': WarpLoss, 'warp_fn': WARP_funcLoss, 'lsep_nn': LsepLoss, 'lsep_fn': LSEP_funcLoss} scene_loss = loss_dict[opt.loss_type]() # if opt.loss_type == 'lsep_nnh': # loss_criterion = LsepLossHy(batch_size=batch_size // num_gpus, num_class=opt.num_class) # loss_criterion.hybridize() # elif opt.loss_type == 'bce': # loss_criterion = gloss.SigmoidBinaryCrossEntropyLoss() # loss_criterion.hybridize() # else: # # net.initialize(mx.init.Xavier(), # ctx=context) # net parameter initialize in several cards net.initialize(mx.init.Xavier(), ctx=context) if not opt.pretrained is None: if opt.pretrained.endswith('.pkl'): net.load_from_caffe2_pickle(opt.pretrained) elif opt.pretrained.endswith('.params'): try: print("load pretrained params ", opt.pretrained) net.load_from_sym_params(opt.pretrained, ctx=context) except Exception as e: print("load as sym params failed,reload as gluon params") net.load_params(opt.pretrained, ctx=context) # load params to net context #net.hybridize() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': opt.lr, 'momentum': 0.9, 'wd': opt.wd}, kvstore=opt.kvstore) # the trainer lr_steps = lr_schedualer.MultiFactorScheduler(steps, opt.lr_schedualer_factor) lr_steps.base_lr = opt.lr best_eval = 0.0 for epoch in range(opt.num_epoch): tic = time() scene_pre_loss, scene_cumulative_loss = 0.0,0.0 action_pre_loss,action_cumulative_loss = 0.0, 0.0 trainer.set_learning_rate(lr_steps(epoch)) vis.log('Epoch %d learning rate %f' % (epoch, trainer.learning_rate)) for i, (data, scene_label,action_label) in enumerate(train_loader): # single card not split with autograd.record(): data = data.as_in_context(context) scene_label = scene_label.as_in_context(context) action_label = action_label.as_in_context(context) pred_scene,pred_action = net(data) loss_scene = scene_loss(pred_scene,scene_label) loss_action = action_loss(pred_action,action_label) loss = loss_scene + opt.action_rate*loss_action.mean() scene_cumulative_loss += nd.mean(loss_scene).asscalar() action_cumulative_loss +=nd.mean(loss_action).asscalar() loss.backward() trainer.step(data.shape[0]) if (i + 1) % opt.log_interval == 0: vis.log('[Epoch %d,Iter %d ] scene loss= %f' % (epoch, i + 1, scene_cumulative_loss - scene_pre_loss)) vis.plot('scene_loss', scene_cumulative_loss - scene_pre_loss) scene_pre_loss = scene_cumulative_loss vis.log('[Epoch %d,Iter %d ] action loss= %f' % (epoch, i + 1, action_cumulative_loss - action_pre_loss )) vis.plot("action_loss", action_cumulative_loss - action_pre_loss) action_pre_loss = action_cumulative_loss if opt.debug: if (i + 1) // (opt.log_interval) == 3: break vis.log('[Epoch %d] scene loss=%f,action loss=%f' % (epoch, scene_cumulative_loss,action_cumulative_loss)) vis.log('[Epoch %d] time used: %f' % (epoch, time() - tic)) vis.log('[Epoch %d] saving net') save_path = './{0}/{1}_test-val{2}.params'.format(opt.output, str(opt.dataset + 'multi'), str(epoch)) vis.log("save path %s" % (save_path)) net.save_parameters(save_path) label_inter =1e-4 label_union =1e-4 acc = nd.array([0], ctx=mx.cpu()) val_iter =0 for i,(data,scene_label,action_label) in enumerate(val_loader): data = data.as_in_context(context) action_label = action_label.as_in_context(context) scene_pred,action_pred = net(data) scene_order = scene_pred.argsort()[:,::-1] scene_order_np = scene_order.asnumpy() scene_label_np = scene_label.asnumpy() for scene_pred_v,scene_label_v in zip(scene_order_np,scene_label_np): label_set = set([index for index,value in enumerate(scene_label_v) if value>0.1]) pred_top1 = set([scene_pred_v[0]]) label_inter += len(pred_top1.intersection(label_set)) label_union += len(pred_top1.union(label_set)) action_pred = action_pred.argmax(axis=1) acc += (action_pred == action_label.astype('float32')).mean().asscalar() val_iter +=1 if (i + 1) % (opt.log_interval) == 0: vis.log("[Epoch %d,Iter %d],action_acc= %f"%(epoch,i,acc.asscalar()/val_iter)) vis.log("[Epoch %d,Iter %d],scene_top1=%f"%(epoch,i,label_inter/label_union)) if opt.debug: if (i + 1) // (opt.log_interval) == 2: break vis.log("""---------------------------------------- ----XXXX------finished------------------ ----------------------------------------""")
with net.name_scope(): net.add(nn.Conv2D(channels=20, kernel_size=5), nn.BatchNorm(axis=1), nn.Activation('relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), nn.Conv2D(channels=50, kernel_size=3), nn.BatchNorm(axis=1), nn.Activation('relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), nn.Flatten(), nn.Dense(128, activation='relu'), nn.Dense(10)) return net batch_size = 128 train_iter, test_iter = utils.loadMnistData(batch_size) net = get_net() net.initialize() softmax_loss = loss.SoftmaxCrossEntropyLoss() trainer = Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) epochs = 5 for epoch in range(epochs): total_loss = .0 total_acc = .0 for data, label in train_iter: with autograd.record(): output = net(data) losses = softmax_loss(output, label) losses.backward() trainer.step(batch_size) total_loss += nd.mean(losses).asscalar() total_acc = utils.accuracy(output, label) test_acc = utils.evaluate_accuracy(test_iter, net)
def __init__(self, net, ctx=mx.cpu()): super(Classification, self).__init__(net=net, ctx=ctx) self.loss_fun = gloss.SoftmaxCrossEntropyLoss() self.metric = mx.metric.Accuracy()
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum'})
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, is_random_iter, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): ''' :param rnn: 循环神经网络 :param get_params: 参数权重 :param init_rnn_state: 模型初始化 :param num_hiddens: 隐藏层大小 :param vocab_size: 不同字符的个数 :param ctx: :param corpus_indices: 字符的索引(不同) :param idx_to_char: :param char_to_idx: :param is_random_iter: 数据是否随机采样 :param num_epochs: 总轮数 :param num_steps: :param lr: 学习率 :param clipping_theta: 梯度裁剪 :param batch_size: 批量大小 :param pred_period: 预测周期 :param pred_len: :param prefixes: 需要预测的字符 :return: ''' if is_random_iter: # 一共vocab_size的大小 # 每次返回batch_size * num_steps的大小,一共vocab_size/xx 次 data_iter_fn = d2l.data_iter_random else: data_iter_fn = d2l.data_iter_consecutive params = get_params(vocab_size, num_hiddens, vocab_size, ctx) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): if not is_random_iter: # 如果使用相邻采样,开始时初始化隐藏状态 state = init_rnn_state(batch_size, num_hiddens, ctx) l_sum, n, start = 0.0, 0, time.time() data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) for X, Y in data_iter: if is_random_iter: # 随机采样,每个小批量开始前初始化状态. state = init_rnn_state(batch_size, num_hiddens, ctx) else: # 使用detach函数从计算图分离隐藏状态 for s in state: # 将某个node变成不需要梯度的Varibale。因此当反向传播经过这个node时,梯度就不会从这个node往前面传播 # 不想计算A网络的,那么可以把Y通过detach()函数分离出来 s.detach() with autograd.record(): # inputs 是num_steps个(batch_size,vocab_size) = num_steps*batch_size*vocab_size inputs = to_onehot(X, vocab_size) # outputs 有num_steps 个形状为(batch_size,vocab_size)的矩阵 (outputs, state) = rnn(inputs, state, params) # 连结之后形状为(num_steps*batch_size,vocab_size)的矩阵 outputs = nd.concat(*outputs, dim=0) # 转置就是vocab_size,num_steps*batch_size # Y的形状时(batch_size,num_steps),转置变成长度时 # batch_size * num_steps的向量,一一对应 y = Y.T.reshape((-1, )) # 转换为一维矩阵 # 使用交叉熵损失计算平均分类误差 l = loss(outputs, y).mean() l.backward() grad_clipping(params, clipping_theta, ctx) # 裁剪梯度 d2l.sgd(params, lr, 1) # 误差取过均值,梯度不做平均 l_sum += l.asscalar() * y.size # 平均损失*总数 # n += y.size if (epoch + 1) % pred_period == 0: # perplexity print('epoch %d,perplexity %f,time %.2f sec' % (epoch + 1, math.exp(l_sum / n), time.time() - start)) for prefix in prefixes: print( ' -', predict_rnn(prefix, pred_len, rnn, params, init_rnn_state, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx))
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=)
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): data_iter_fn = data_iter_consecutive if os.path.exists(para_file): params = nd.load(para_file) for param in params: param.attach_grad() print('successfully load the params before...\n') else: params = get_params() print('generated new params to training') loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): if epoch > 0: if epoch % 20 == 0: lr = 0.85 * lr print('training...now is on epoch' + str(epoch) + '\n') # 在epoch开始时初始化隐藏状态 state = init_rnn_state(batch_size, num_hiddens, ctx) l_sum, n, start = 0.0, 0, time.time() data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) for X, Y in data_iter: for s in state: s.detach() with autograd.record(): inputs = to_onehot(X, vocab_size) # outputs有num_steps个形状为(batch_size, vocab_size)的矩阵 (outputs, state) = rnn(inputs, state, params) # 拼接之后形状为(num_steps * batch_size, vocab_size) outputs = nd.concat(*outputs, dim=0) # Y的形状是(batch_size, num_steps),转置后再变成长度为 # batch * num_steps 的向量,这样跟输出的行一一对应 y = Y.T.reshape((-1, )) # 使用交叉熵损失计算平均分类误差 l = loss(outputs, y).mean() l.backward() grad_clipping(params, clipping_theta) # 裁剪梯度 sgd(params, lr, 1) l_sum += l.asscalar() * y.size n += y.size #print loss print('loss now is' + str(l_sum)) if (epoch + 1) % pred_period == 0: print('epoch %d, perplexity %f, time %.2f sec' % (epoch + 1, math.exp(l_sum / n), time.time() - start)) print('\nlearning rate now is' + str(lr)) for prefix in prefixes: print( ' -', predict_rnn(prefix, pred_len, rnn, params, num_hiddens, vocab_size, idx_to_char, char_to_idx, batch_size)) # 保存x nd.save(para_file, params) print('save success to params_rnn.para')
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): if is_lstm: outputs, state_h, state_c = rnn(get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(X, vocab_size), state_h, *params) y = Y.T.reshape((-1, )) outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() grad_clipping(params, clipping_theta, ctx) sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print("\nepoch %d, perplexity %f" % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print( ' - ', predict_rnn(rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def __init__(self, vocab, word_dims, tag_dims, dropout_dim, lstm_layers, lstm_hiddens, dropout_lstm_input, dropout_lstm_hidden, mlp_arc_size, mlp_rel_size, dropout_mlp, debug=False): super(BiaffineParser, self).__init__() def embedding_from_numpy(_we, trainable=True): word_embs = nn.Embedding(_we.shape[0], _we.shape[1], weight_initializer=mx.init.Constant(_we)) apply_weight_drop(word_embs, 'weight', dropout_dim, axes=(1,)) if not trainable: word_embs.collect_params().setattr('grad_req', 'null') return word_embs self._vocab = vocab self.word_embs = embedding_from_numpy(vocab.get_word_embs(word_dims)) self.pret_word_embs = embedding_from_numpy(vocab.get_pret_embs(), trainable=False) if vocab.has_pret_embs() \ else None self.tag_embs = embedding_from_numpy(vocab.get_tag_embs(tag_dims)) self.f_lstm = nn.Sequential() self.b_lstm = nn.Sequential() self.f_lstm.add(utils.orthonormal_VanillaLSTMBuilder(1, word_dims + tag_dims, lstm_hiddens, dropout_lstm_hidden, debug)) self.b_lstm.add( utils.orthonormal_VanillaLSTMBuilder(1, word_dims + tag_dims, lstm_hiddens, dropout_lstm_hidden, debug)) for _ in range(lstm_layers - 1): self.f_lstm.add( utils.orthonormal_VanillaLSTMBuilder(1, 2 * lstm_hiddens, lstm_hiddens, dropout_lstm_hidden, debug)) self.b_lstm.add( utils.orthonormal_VanillaLSTMBuilder(1, 2 * lstm_hiddens, lstm_hiddens, dropout_lstm_hidden, debug)) self.dropout_lstm_input = dropout_lstm_input self.dropout_lstm_hidden = dropout_lstm_hidden mlp_size = mlp_arc_size + mlp_rel_size W = utils.orthonormal_initializer(mlp_size, 2 * lstm_hiddens, debug) self.mlp_dep_W = self.parameter_from_numpy('mlp_dep_W', W) self.mlp_head_W = self.parameter_from_numpy('mlp_head_W', W) self.mlp_dep_b = self.parameter_init('mlp_dep_b', (mlp_size,), mx.init.Zero()) self.mlp_head_b = self.parameter_init('mlp_head_b', (mlp_size,), mx.init.Zero()) self.mlp_arc_size = mlp_arc_size self.mlp_rel_size = mlp_rel_size self.dropout_mlp = dropout_mlp self.arc_W = self.parameter_init('arc_W', (mlp_arc_size, mlp_arc_size + 1), init=mx.init.Zero()) self.rel_W = self.parameter_init('rel_W', (vocab.rel_size * (mlp_rel_size + 1), mlp_rel_size + 1), init=mx.init.Zero()) self.softmax_loss = loss.SoftmaxCrossEntropyLoss(axis=0, batch_axis=-1) self.initialize()
import mxnet as mx from mxnet.gluon import model_zoo from mxnet.gluon import loss from mxnet import autograd from mxnet.gluon import trainer import time import os os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' vgg16 = model_zoo.vision.vgg16(ctx=mx.gpu()) vgg16.initialize(ctx=mx.gpu()) criterion = loss.SoftmaxCrossEntropyLoss() update = trainer.Trainer(vgg16.collect_params(), optimizer='sgd') begin = time.time() bs = 60 for i in range(1000): print(i) inputs = mx.nd.normal(shape=(bs, 3, 224, 224), ctx=mx.gpu()) labels = mx.nd.array([0] * bs, ctx=mx.gpu()) with autograd.record(): logits = vgg16(inputs) loss = criterion(logits, labels) loss.backward() update.step(batch_size=bs) print("time ", time.time() - begin)
def test(ctx=mx.cpu()): net = MySSD(1, num_anchors) net.initialize(init="Xavier", ctx=ctx) net.hybridize() # print(net) # x = nd.random.normal(0,1,(100,3,256,256), ctx=ctx) # net(x) batch_size, edge_size = args.batch_size, args.input_size train_iter, _ = predata.load_data_uav(args.data_path, batch_size, edge_size) batch = train_iter.next() batch.data[0].shape, batch.label[0].shape if batch_size >= 25: # show f*****g pikachuus in grid imgs = (batch.data[0][0:25].transpose((0, 2, 3, 1))) / 255 axes = utils.show_images(imgs, 5, 5).flatten() for ax, label in zip(axes, batch.label[0][0:25]): utils.show_bboxes(ax, [label[0][1:5] * edge_size], colors=['w']) plt.show() # net.initialize(init=init.Xavier(), ctx=ctx) trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.2, 'wd': 5e-4 }) cls_loss = gloss.SoftmaxCrossEntropyLoss() bbox_loss = gloss.L1Loss() def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks): cls = cls_loss(cls_preds, cls_labels) bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks) return cls + bbox def cls_eval(cls_preds, cls_labels): # the result from class prediction is at the last dim # argmax() should be assigned with the last dim of cls_preds return (cls_preds.argmax(axis=-1) == cls_labels).sum().asscalar() def bbox_eval(bbox_preds, bbox_labels, bbox_masks): return ((bbox_labels - bbox_preds) * bbox_masks).abs().sum().asscalar() IF_LOAD_MODEL = args.load if IF_LOAD_MODEL: net.load_parameters(args.model_path) else: for epoch in range(args.num_epoches): acc_sum, mae_sum, n, m = 0.0, 0.0, 0, 0 train_iter.reset( ) # reset data iterator to read-in images from beginning start = time.time() for batch in train_iter: X = batch.data[0].as_in_context(ctx) Y = batch.label[0].as_in_context(ctx) with autograd.record(): # generate anchors and generate bboxes im, anchors, cls_preds, bbox_preds = net(X) # assign classes and bboxes for each anchor bbox_labels, bbox_masks, cls_labels = nd.contrib.MultiBoxTarget( anchors, Y, cls_preds.transpose((0, 2, 1))) # calc loss l = calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks) l.backward() trainer.step(batch_size) acc_sum += cls_eval(cls_preds, cls_labels) n += cls_labels.size mae_sum += bbox_eval(bbox_preds, bbox_labels, bbox_masks) m += bbox_labels.size if (epoch + 1) % 1 == 0: print( 'epoch %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (epoch + 1, 1 - acc_sum / n, mae_sum / m, time.time() - start)) net.save_parameters("myssd.params") def predict(X): im, anchors, cls_preds, bbox_preds = net(X.as_in_context(ctx)) # im = im.transpose((2, 3, 1, 0)).asnumpy() # imgs = [im[1:-2,1:-2, k, 0] for k in range(256)] # why are there boundary effect? # utils.show_images_np(imgs, 16, 16) # plt.show() # plt.savefig("./activdation/figbase%s"%nd.random.randint(0,100,1).asscalar()) # plt.imshow(nd.sum(nd.array(im[1:-2, 1:-2, :, :]), axis=2).asnumpy()[:, :, 0], cmap='gray') # plt.savefig("./suming_act") cls_probs = cls_preds.softmax().transpose((0, 2, 1)) output = contrib.nd.MultiBoxDetection(cls_probs, bbox_preds, anchors) idx = [i for i, row in enumerate(output[0]) if row[0].asscalar() != -1] if idx == []: raise ValueError("NO TARGET. Seq Terminated.") return output[0, idx] def display(img, output, threshold): lscore = [] for row in output: lscore.append(row[1].asscalar()) for row in output: score = row[1].asscalar() if score < min(max(lscore), threshold): continue h, w = img.shape[0:2] bbox = [row[2:6] * nd.array((w, h, w, h), ctx=row.context)] cv.rectangle(img, (bbox[0][0].asscalar(), bbox[0][1].asscalar()), (bbox[0][2].asscalar(), bbox[0][3].asscalar()), (0, 255, 0), 3) cv.imshow("res", img) cv.waitKey(60) cap = cv.VideoCapture(args.test_path) rd = 0 while True: ret, frame = cap.read() img = nd.array(frame) feature = image.imresize(img, 512, 512).astype('float32') X = feature.transpose((2, 0, 1)).expand_dims(axis=0) countt = time.time() output = predict(X) if rd == 0: net.export('ssd') countt = time.time() - countt print("SPF: %3.2f" % countt) utils.set_figsize((5, 5)) display(frame / 255, output, threshold=0.8) plt.show() rd += 1
if i == 0 and not first_block: blk.add(Residual(num_channels, use_1x1conv=True, strides=2)) else: blk.add(Residual(num_channels)) return blk net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) net.add(resnet_block(64, 2, first_block=True), resnet_block(128, 2), resnet_block(256, 2), resnet_block(512, 2)) net.add(nn.GlobalAvgPool2D(), nn.Dense(10)) X = nd.random.uniform(shape=(1, 1, 224, 224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) lr = 0.05 num_epochs = 5 batch_size = 256 ctx = gb.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gloss.SoftmaxCrossEntropyLoss() trainer_iter, test_iter = gb.load_data_fashion_mnist(batch_size, resize=96) gb.train(trainer_iter, test_iter, net, loss, trainer, ctx, num_epochs)
def __init__(self, **kwargs): super(SoftMaxGluon, self).__init__(**kwargs) self.net = None self.loss = g_loss.SoftmaxCrossEntropyLoss()