def model_trainer(self, config: Configuration, model_path, network_model, model_name): model_checkpoint_path = '/checkpoints/' + config.weights_name + '/' + model_name inference_configuration = self.define_inference_configuration( config.processor) # add network name to config.json inference_configuration['network'] = config.weights_name if not os.path.exists(model_checkpoint_path): os.makedirs(model_checkpoint_path, exist_ok=True) with open(model_checkpoint_path + '/config.json', 'w') as outfile: json.dump(inference_configuration, outfile) classes = self.get_classes(model_path, config.weights_name, model_name) ctx = self.get_ctx(config.processor, config.gpus_count) if (config.weights_type == "from_scratch"): net = network_model network = str(net) if (config.Xavier == True): net.initialize(init.Xavier(), ctx=ctx) else: net.initialize(mx.init.MSRAPrelu(), ctx=ctx) elif (config.weights_type == 'pre_trained') or (config.weights_type == 'pretrained_offline'): net = network_model network = str(net) network = str(net) print(net.name) output_exists = hasattr(net, 'output') ##check if output exists network_name = net.name ##get the model's name if ("resnext" in network_name): ##check if model is resnext with net.name_scope(): net.output = nn.Dense(classes) net.initialize() elif (output_exists): ##check if output exists If_HybridSequential = False if ("HybridSequential" in str(net.output) ): ##check if output contains HybridSequential If_HybridSequential = True if If_HybridSequential: If_HybridSequential_2 = len( net.output ) ##check if HybridSequential contains more than 2 items if (If_HybridSequential_2 > 2): with net.name_scope(): print('2------------------------') print('------------------------') x = nn.HybridSequential() x.add(nn.Conv2D(classes, 1, strides=1)) x.add(net.output[1]) x.add(net.output[2]) x.add(net.output[3]) net.output = x else: with net.name_scope(): print('3------------------------') # print(net.output[1]) print('------------------------') x = nn.HybridSequential() x.add(nn.Conv2D(classes, 1, strides=1)) x.add(net.output[1]) net.output = x else: print("4") with net.name_scope(): net.output = nn.Dense(classes) if (config.Xavier == True): net.output.initialize(init.Xavier(), ctx=ctx) else: net.output.initialize(mx.init.MSRAPrelu(), ctx=ctx) else: print("5") with net.name_scope(): net.fc = nn.Dense(classes) if (config.Xavier == True): net.fc.initialize(init.Xavier(), ctx=ctx) else: net.fc.initialize(mx.init.MSRAPrelu(), ctx=ctx) else: net = network_model net.collect_params().reset_ctx(ctx) net.hybridize() trainer = gluon.Trainer( net.collect_params(), 'sgd', { 'learning_rate': config.lr, 'momentum': config.momentum, 'wd': config.wd }) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() return trainer, metric, L
from mxnet import init # net = get_R2plus1d(101,model_depth=34) # net.initialize() # print(net) # x = nd.random.uniform(shape=(2,3,32,112,112)) # for layer in net: # x = layer(x) # print(layer.name,'output shape',x.shape) net2 = R2Plus2D(num_class=101, model_depth=34) import os os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' context = mx.gpu(2) #context = mx.cpu() net2.initialize(init=init.Xavier(), ctx=context) net2.hybridize() x = nd.random.uniform(shape=(1, 3, 8, 112, 112)) #net2.collect_params().reset_ctx(mx.gpu(1)) x = x.as_in_context(context) print("beging to fintune time") for i in range(10): y = net2(x) y.wait_to_read() print("begin to calculate time") tic = time() for i in range(100): y = net2(x) y.wait_to_read()
if resize: transformer += [gdata.vision.transforms.Resize(resize)] transformer += [gdata.vision.transforms.ToTensor()] transformer = gdata.vision.transforms.Compose(transformer) mnist_train = gdata.vision.FashionMNIST(root=root, train=True) mnist_test = gdata.vision.FashionMNIST(root=root, train=False) num_workers = 0 if sys.platform.startswith('win32') else 4 train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers) test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter batch_size = 128 # 如出现“ out of memory”的报错信息,可减⼩batch_size或resize train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224) ############################################################################## # 开始训练 ############################################################################## lr, num_epochs, ctx = 0.01, 5, mx.cpu() # d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def main(args): with open(args.file, 'r') as f: settings = yaml.load(f) assert args.file[:-5].endswith(settings['model']['name']), \ 'The model name is not consistent! %s != %s' % (args.file[:-5], settings['model']['name']) mx.random.seed(settings['seed']) np.random.seed(settings['seed']) random.seed(settings['seed']) setting_dataset = settings['dataset'] setting_model = settings['model'] setting_train = settings['training'] name = os.path.join(PARAM_PATH, setting_model['name']) model_type = getattr(model, setting_model['type']) net = model_type.net(settings) try: logger = Logger.load('%s.yaml' % name) net.load_parameters('%s-%04d.params' % (name, logger.best_epoch()), ctx=args.gpus) logger.set_net(net) print('Successfully loading the model %s [epoch: %d]' % (setting_model['name'], logger.best_epoch())) except: logger = Logger(name, net, setting_train['early_stop_metric'], setting_train['early_stop_epoch']) net.initialize(init.Xavier(), ctx=args.gpus) print('Initialize the model') num_params = 0 for v in net.collect_params().values(): num_params += np.prod(v.shape) print(net.collect_params()) print('NUMBER OF PARAMS:', num_params) flow_train, flow_eval, flow_test, flow_scaler = getattr( data.dataloader, setting_dataset['flow'])(settings) model_trainer = ModelTrainer( net=net, trainer=gluon.Trainer( net.collect_params(), mx.optimizer.Adam( learning_rate=setting_train['lr'], lr_scheduler=mx.lr_scheduler.FactorScheduler( step=setting_train['lr_decay_step'] * len(args.gpus), factor=setting_train['lr_decay_factor'], stop_factor_lr=1e-6)), update_on_kvstore=False), clip_gradient=setting_train['clip_gradient'], logger=logger, ctx=args.gpus) flow_metrics = [ MAE(scaler=flow_scaler, pred_name='flow_pred', label_name='flow_label', name='flow_mae'), RMSE(scaler=flow_scaler, pred_name='flow_pred', label_name='flow_label', name='flow_rmse'), MAPE(scaler=flow_scaler, pred_name='flow_pred', label_name='flow_label', name='flow_mape'), SMAPE(scaler=flow_scaler, pred_name='flow_pred', label_name='flow_label', name='flow_smape') ] model_trainer.fit(begin_epoch=logger.best_epoch(), num_epochs=args.epochs, train=flow_train, eval=flow_eval, test=flow_test, metrics=flow_metrics) net.load_parameters('%s-%04d.params' % (name, logger.best_epoch()), ctx=args.gpus) model_trainer.fit(begin_epoch=0, num_epochs=1, train=None, eval=flow_eval, test=flow_test, metrics=flow_metrics)
# net首次被初始化,使用默认初始化方式 net.initialize() X = nd.random.uniform(shape=(2, 20)) Y = net(X) # 前向计算 # net再次被初始化,使用init模块中正太分布初始化方法 net.initialize(init=init.Normal(sigma=0.01), force_reinit=True) print(net[0].weight.data()[0]) # net再次被初始化,使用init模块的常数来初始化权重参数 net.initialize(init=init.Constant(1), force_reinit=True) print(net[0].weight.data()[0]) # net再次被初始化,使用init模块的Xavier随机初始化方法 net.initialize(init=init.Xavier(), force_reinit=True) print(net[0].weight.data()[0]) # net再次被初始化,使用自定义的初始化方法 net.initialize(init=MyInit(), force_reinit=True) print(net[0].weight.data()[0]) # 测试共享模型参数,第二隐藏层和第三隐藏层共享模型参数 net2 = nn.Sequential() second = nn.Dense(8, activation='relu') third = nn.Dense(8, activation='relu', params=second.params) net2.add( nn.Dense(8, activation='relu'), second, # 第二隐藏层 third, # 第三隐藏层 nn.Dense(10))
def get_net(ctx): num_outputs = 100 net = Alexnet(num_outputs) net.initialize(ctx=ctx, init=init.Xavier()) return net
def train(encoder, decoder, max_seq_len, ctx, eval_fr_ens): encoder.initialize(init.Xavier(), ctx=ctx) decoder.initialize(init.Xavier(), ctx=ctx) encoder_optimizer = gluon.Trainer(encoder.collect_params(), 'adam', {'learning_rate': lr}) decoder_optimizer = gluon.Trainer(decoder.collect_params(), 'adam', {'learning_rate': lr}) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) l_sum = 0 #loss? for epoch in range(1, num_epochs + 1): for x, y in data_iter: cur_batch_size = x.shape[0] with autograd.record(): l = nd.array([0], ctx=ctx) valid_length = nd.array([0], ctx=ctx) encoder_state = encoder.begin_state(func=nd.zeros, batch_size=cur_batch_size, ctx=ctx) # encoder_outputs 包含了编码器在每个时间步的隐藏状态。 encoder_outputs, encoder_state = encoder(x, encoder_state) encoder_outputs = encoder_outputs.flatten() # 解码器的第一个输入为 BOS 符号。 decoder_input = nd.array([output_vocab.token_to_idx[BOS]] * cur_batch_size, ctx=ctx) mask = nd.ones(shape=(cur_batch_size, ), ctx=ctx) #用处 decoder_state = decoder.begin_state(func=nd.zeros, batch_size=cur_batch_size, ctx=ctx) for i in range(max_seq_len): #print(i) decoder_output, decoder_state = decoder( decoder_input, decoder_state, encoder_outputs) decoder_input = y[:, i] valid_length = valid_length + mask.sum() l = l + (mask * loss(decoder_output, y[:, i])).sum() #print(l) mask = mask * (y[:, i] != eos_id) #print(y[:,i]) #print(mask) l = l / valid_length print(l) with open('train_loss.txt', 'a', encoding="utf-8") as f: f.write('epoch:' + str(epoch) + 'batch_size_loss' + str(l) + '\n') l.backward() encoder_optimizer.step(1) decoder_optimizer.step(1) l_sum += l.asscalar() / max_seq_len
def train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs): # 训练函数,与之前写的一样 print('training on', ctx) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() trainer.step(batch_size) y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size test_acc = evaluate_accuracy(test_iter, net, ctx) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, ' 'time %.1f sec' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start)) ctx = try_gpu() lr, num_epochs = 0.9, 5 net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) # 用Xaiver函数进行初始化 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
batch_size=batch_size, shuffle=True, num_workers=num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.ImageFolderDataset(val_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers = num_workers) test_data = gluon.data.DataLoader( gluon.data.vision.ImageFolderDataset(test_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers = num_workers) # Model and Trainer model_name = sys.argv[1] finetune_net = get_model(model_name, pretrained=True) with finetune_net.name_scope(): finetune_net.output = nn.Dense(classes) finetune_net.output.initialize(init.Xavier(), ctx = ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': momentum, 'wd': wd}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() # we define a evaluation function for validation and testing def test(net, val_data, ctx): metric = mx.metric.Accuracy() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [net(X) for X in data]
def Cam_resp(network, image, model_path, ctx, data_shape, class_names, thresh, num_class=14): for i in range(num_class): net = getattr(models, network)(classes=1) with net.name_scope(): net.output = nn.Dense(1, activation="sigmoid") net.output.initialize(init.Xavier()) params_features = os.path.join(model_path, '%s_f.params' % class_names[i]) params_output = os.path.join(model_path, '%s_o.params' % class_names[i]) net.features.load_params(params_features, ctx=ctx) net.output.load_params(params_output, ctx=mx.cpu(0)) net.collect_params().reset_ctx(ctx) net.hybridize() params = net.output.collect_params() class_weights = params[list(params.keys())[0]] c = nn.Conv2D(channels=1, kernel_size=1) c.initialize(ctx=ctx) test = nd.random.normal(shape=(8, 1024, 7, 7), ctx=ctx) c(test) c.weight.set_data(class_weights.data().reshape((1, 1024, 1, 1))) n = len(image) X = np.zeros((n, 3, data_shape, data_shape), dtype=np.float32) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) for j in range(n): img = cv2.imread(image[j]) X[j] = ((cv2.resize(img, (data_shape, data_shape))[:, :, ::-1] / 255.0 - mean) / std).transpose((2, 0, 1)) predictions, cams = forward(X[np.arange(n)], net, ctx, c) predictions = predictions.asnumpy() cams = cams.asnumpy() for j in range(n): img = cv2.imread(image[j]) X[j] = ((cv2.resize(img, (data_shape, data_shape))[:, :, ::-1] / 255.0 - mean) / std).transpose((2, 0, 1)) if predictions[j, 0] > thresh: cam = cams[j][0] cam -= cam.min() cam /= cam.max() cam = cv2.resize((cam * 255).astype(np.uint8), (img.shape[1], img.shape[0])) heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET) out = cv2.addWeighted(img, 0.8, heatmap, 0.4, 0) cv2.imshow('Image:%s pred:%s' % (image[j], class_names[i]), out) cv2.waitKey(0) cv2.destroyAllWindows()
def train_cifar10(args, config, reporter): vars(args).update(config) np.random.seed(args.seed) random.seed(args.seed) mx.random.seed(args.seed) # Set Hyper-params batch_size = args.batch_size * max(args.num_gpus, 1) ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] # Define DataLoader transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch="discard", num_workers=args.num_workers) test_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=args.num_workers) # Load model architecture and Initialize the net with pretrained model finetune_net = get_model(args.model, pretrained=True) with finetune_net.name_scope(): finetune_net.fc = nn.Dense(args.classes) finetune_net.fc.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Define trainer trainer = gluon.Trainer(finetune_net.collect_params(), "sgd", { "learning_rate": args.lr, "momentum": args.momentum, "wd": args.wd }) L = gluon.loss.SoftmaxCrossEntropyLoss() metric = mx.metric.Accuracy() def train(epoch): for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) mx.nd.waitall() def test(): test_loss = 0 for i, batch in enumerate(test_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] test_loss += sum(l.mean().asscalar() for l in loss) / len(loss) metric.update(label, outputs) _, test_acc = metric.get() test_loss /= len(test_data) reporter(mean_loss=test_loss, mean_accuracy=test_acc) for epoch in range(1, args.epochs + 1): train(epoch) test()
nn.GlobalAvgPool2D(), # 将输出4D->2D(N,10) nn.Flatten()) # Test X = nd.uniform(shape=(1, 1, 224, 224)) net.initialize() # 默认初始化init=initializer.Uniform() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) # 读取数据 batch_size = 32 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) # 重新初始化模型 ctx = d2l.try_gpu() net.initialize(force_reinit=True, init=init.Xavier(), ctx=ctx) # 模型重新初始化 # 优化函数, 0.1 lr = 0.05 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr}) # 这行代码要放在模型初始化完成后,否则collect_params出错 # 训练 num_epochs = 5 d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def train(encoder, decoder, decoder_init_state, max_seq_len, ctx, eval_fr_ens): encoder.initialize(init.Xavier(), ctx=ctx) decoder.initialize(init.Xavier(), ctx=ctx) decoder_init_state.initialize(init.Xavier(), ctx=ctx) encoder_optimizer = gluon.Trainer(encoder.collect_params(), 'adam', {'learning_rate': lr}) decoder_optimizer = gluon.Trainer(decoder.collect_params(), 'adam', {'learning_rate': lr}) decoder_init_state_optimizer = gluon.Trainer( decoder_init_state.collect_params(), 'adam', {'learning_rate': lr}) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) l_sum = 0 for epoch in range(1, epochs + 1): for x, y in data_iter: cur_batch_size = x.shape[0] with autograd.record(): l = nd.array([0], ctx=ctx) valid_length = nd.array([0], ctx=ctx) # encoder状态尺寸: (num_layers, batch_size, hidden_dim) encoder_state = encoder.begin_state( func=nd.zeros, batch_size=cur_batch_size, ctx=ctx) # encoder_outputs尺寸: (step_size, batch_size, hidden_dim) encoder_outputs, encoder_state = encoder(x, encoder_state) # For an input array with shape (d1, d2, ..., dk), flatten operation reshapes the input array into # an output array of shape (d1, d2*...*dk) # flatten将矩阵变换为二维的,encoder_outputs : (step_size, batch_size * hidden_dim) encoder_outputs = encoder_outputs.flatten() # 解码器的第一个输入为 BOS 字符。 decoder_input = nd.array( [output_vocab.token_to_idx[BOS]] * cur_batch_size, ctx=ctx) mask = nd.ones(shape=(cur_batch_size,), ctx=ctx) # encoder_state是一个list,其中的每个元素是一个() #print(encoder_state[0].shape) #print(len(encoder_state)) # 编码器的最终状态来初始化解码器的初始状态 decoder_state = decoder_init_state(encoder_state[0]) #print(decoder_state) for i in range(max_seq_len): # 解码器的输入: 解码器前一时刻状态, 前一时刻输出, 编码器的输出 decoder_output, decoder_state = decoder( decoder_input, decoder_state, encoder_outputs) # 解码器使用当前时刻的预测结果作为下一时刻的输入。 # 输出所有词的概率,argmax在第1维上(第0维是bath_size)选择概率最大的那个 decoder_input = decoder_output.argmax(axis=1) valid_length = valid_length + mask.sum() l = l + (mask * loss(decoder_output, y[:, i])).sum() mask = mask * (y[:, i] != eos_id) l = l / valid_length l.backward() encoder_optimizer.step(1) decoder_optimizer.step(1) decoder_init_state_optimizer.step(1) l_sum += l.asscalar() / max_seq_len if epoch % epoch_period == 0 or epoch == 1: if epoch == 1: print('epoch %d, loss %f, ' % (epoch, l_sum / len(data_iter))) else: print('epoch %d, loss %f, ' % (epoch, l_sum / epoch_period / len(data_iter))) if epoch != 1: l_sum = 0 translate(encoder, decoder, decoder_init_state, eval_fr_ens, ctx, max_seq_len)
num_convs_in_dense_blocks = [4,4,4,4] for i,num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, grow_rate)) num_channels += num_convs * grow_rate if i != len(num_convs_in_dense_blocks) - 1: net.add(transition_block(num_channels // 2)) net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(), nn.Dense(10)) #【初始化模型参数】 net.initialize(init=init.Xavier(), ctx=mx.gpu()) #【定义损失函数】 loss = gloss.SoftmaxCrossEntropyLoss() #【定义优化算法】 trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.05}) #【训练模型】 def batch_accuracy(y_hat, y): return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar() def all_accuracy(data_iter, net): acc = 0 for X, y in data_iter: X, y = X.as_in_context(mx.gpu()), y.as_in_context(mx.gpu())
for i in range(int(60000/batch_size)): X = X_train[i * batch_size: i * batch_size + batch_size] # 批量 y = y_train[i * batch_size: i * batch_size + batch_size] X,y = X.as_in_context(ctx),y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat,y).sum() l.backward() # 更新权重 trainer.step(batch_size) # 训练 y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum +=(y_hat.argmax(axis=1)==y).sum().asscalar() n += y.size test_acc = evaluate_accuracy(net) loss_list.append(train_l_sum / n) acc_list.append(train_acc_sum / n) test_acc_list.append(test_acc) print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f,time %.1f sec' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start)) return loss_list, acc_list, test_acc_list if __name__ == '__main__': lr,num_epochs = 0.9,10 batch_size = 200 net = Net() # 定义网络模型 net.initialize(force_reinit=True,init=init.Xavier()) # 首次对模型初始化需要指定force_reinit为真 # init=init.Xavier() 一种参数初始化的方式 trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr}) loss_list, acc_list, test_acc_list = train(batch_size,trainer,num_epochs) plot(loss_list, acc_list, test_acc_list)
## 2.从文件夹读取数据 print("读取数据") train_data, test_data = d2l.read_imdb('train'), d2l.read_imdb('test') print("读取数据 ok") ## 3.整理数据 vocab = d2l.get_vocab_imdb(train_data) train_iter = gdata.DataLoader( gdata.ArrayDataset(*d2l.preprocess_imdb(train_data, vocab)), batch_size, shuffle=True) test_iter = gdata.DataLoader( gdata.ArrayDataset(*d2l.preprocess_imdb(test_data, vocab)), batch_size) ## 4.指定参数并加载模型 embed_size, num_hiddens, num_layers, ctx = 100, 200, 2, d2l.try_all_gpus() net = BiRNN(vocab, embed_size, num_hiddens, num_layers) #实例化一个双向RNN net.initialize(init.Xavier(), ctx=ctx) #对模型进行初始化 ## 4.1 加载词向量预训练集 print("加载预训练的词向量,若电脑没有预训练数据集将会自动从网上下载") glove_embedding = text.embedding.create( 'glove', pretrained_file_name='glove.6B.100d.txt', vocabulary=vocab) #其中的100指的是词向量长度为100 print("加载与训练的词向量 ok") ## 4.2 初试化模型参数 net.embedding.weight.set_data(glove_embedding.idx_to_vec) net.embedding.collect_params().setattr('grad_req', 'null') ## 5. 指定损失函数下降速度,训练轮数,并训练模型 lr, num_epochs = 0.01, 5 trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) loss = gloss.SoftmaxCrossEntropyLoss()
from mxnet import nd, init from mxnet.gluon import nn import matplotlib.pyplot as plt ################################################################################ # init.Constant # init.Normal # init.Zero # init.One # ... ## init with Xavier (search for more) layer = nn.Conv2D(channels=1, kernel_size=(3, 3), in_channels=1) layer.initialize(init.Xavier()) print(layer.weight.data()) # [[[[ 0.05636501 0.10720772 0.24847925] # [ 0.39752382 0.11866093 0.41332 ] # [ 0.05182666 0.4009717 -0.08815584]]]] # <NDArray 1x1x3x3 @cpu(0)> ## init with ones # set it directly layer.weight.set_data(nd.ones((1, 1, 3, 3), ctx=mx.cpu())) print(layer.weight.data()) # [[[[1. 1. 1.] # [1. 1. 1.] # [1. 1. 1.]]]] # <NDArray 1x1x3x3 @cpu(0)>
def train(): logging.info('Start Training for Task: %s\n' % (task)) # Initialize the net with pretrained model pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True) finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class) finetune_net.features = pretrained_net.features finetune_net.output.initialize(init.Xavier(), ctx = ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() train_transform = transforms.Compose([ transforms.Resize(input_scale), #transforms.RandomResizedCrop(448,scale=(0.76, 1.0),ratio=(0.999, 1.001)), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(0.2), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'train')) train_data = gluon.data.DataLoader(train_dataset.transform_first(train_transform), batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard') val_transform = transforms.Compose([ transforms.Resize(input_scale), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'val')) val_data = gluon.data.DataLoader(val_dataset.transform_first(val_transform), batch_size=batch_size, shuffle=False, num_workers = num_workers, last_batch='discard') trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', { 'learning_rate': lr}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() lr_counter = 0 num_batch = len(train_data) # Start Training best_AP = 0 best_acc = 0 for epoch in range(epochs): if epoch == lr_steps[lr_counter]: finetune_net.collect_params().load(best_path, ctx= ctx) trainer.set_learning_rate(trainer.learning_rate*lr_factor) lr_counter += 1 tic = time.time() train_loss = 0 metric.reset() AP = 0. AP_cnt = 0 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) metric.update(label, outputs) #ap, cnt = calculate_ap(label, outputs) #AP += ap #AP_cnt += cnt #progressbar(i, num_batch-1) #train_map = AP / AP_cnt _, train_acc = metric.get() train_loss /= num_batch this_AP, val_acc, val_loss = validate(finetune_net, val_data, ctx) logging.info('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' % (epoch, train_acc, train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate)) f_val.writelines('[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate)) if val_acc > best_acc: best_AP = this_AP best_acc = val_acc best_path = os.path.join('.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc)) finetune_net.collect_params().save(best_path) logging.info('\n') finetune_net.collect_params().load(best_path, ctx= ctx) f_val.writelines('Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, Best-mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, best_acc, best_AP, val_loss, time.time() - tic, trainer.learning_rate)) return (finetune_net)
def create_and_train(self, model_name: str, batch_size: int, learning_rate: float, epochs: int): """ Create a ModelCreationService and Train it :param model_name: model name :param batch_size: size of the batch :param learning_rate: learning rate to be used :param epochs: nb of epochs to train the model :return: """ model = model_creation_service.ModelCreationService() acc = accuracy_calculation_service.AccuracyCalculationService() save_model = model_manipulation_service.ModelManipulationService() # datasets try: cifar_train = datasets.CIFAR10(train=True) X, y = cifar_train[0:10] except Exception as ex: raise ex # transform image try: transform = data_transformation_service.DataTransformationService() transformer = transform.data_transformation() cifar_train = cifar_train.transform_first(transformer) # train data train_data = gluon.data.DataLoader(cifar_train, batch_size=batch_size, shuffle=True) cifar_valid = gluon.data.vision.CIFAR10(train=False) valid_data = gluon.data.DataLoader( cifar_valid.transform_first(transformer), batch_size=batch_size) except Exception as ex: raise ex # build model try: net = model.create_model() net.initialize(init=init.Xavier()) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() except Exception as ex: raise ex try: trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate}) for epoch in range(epochs): train_loss, train_acc, valid_acc = 0., 0., 0. tic = time.time() for data, label in train_data: # forward + backward with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() # update parameters trainer.step(batch_size) # calculate training metrics train_loss += loss.mean().asscalar() train_acc += acc.acc(output=output, label=label) # calculate validation accuracy for data, label in valid_data: valid_acc += acc.acc(net(data), label) print( "Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec" % (epoch, train_loss / len(train_data), train_acc / len(train_data), valid_acc / len(valid_data), time.time() - tic)) except Exception as ex: raise ex try: save_model.save_model( net, os.path.join(self.path.model_dir, model_name)) except Exception as ex: raise ex
def train_net_resp(network, train_csv, num_classes, batch_size, data_shape, ctx, epochs, learning_rate, momentum, weight_decay, lr_refactor_step, lr_refactor_ratio, identifier, class_names=None, optimizer='sgd'): """ Wrapper for training phase. Parameters: ---------- network : str name for the network structure train_csv : str .csv file path for training num_classes : int number of object classes, not including background batch_size : int training batch-size data_shape : int or tuple width/height as integer or (3, height, width) tuple ctx : [mx.cpu()] or [mx.gpu(x)] list of mxnet contexts epochs : int epochs of training optimizer : str usage of different optimizers, other then default sgd learning_rate : float training learning rate momentum : float trainig momentum weight_decay : float training weight decay param lr_refactor_ratio : float multiplier for reducing learning rate lr_refactor_step : comma separated integers at which epoch to rescale learning rate, e.g. '30, 60, 90' identifier : int identifier(number) of the object of class to classify """ # load data df = pd.read_csv(train_csv) n = len(df) X = np.zeros((n, 3, data_shape, data_shape), dtype=np.float32) Y = np.zeros((n, 1), dtype=np.float32) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) # if for i, dfv in enumerate(df.values): img = cv2.imread('./images/%s' % dfv[0]) X[i] = ( (cv2.resize(img, (data_shape, data_shape))[:, :, ::-1] / 255.0 - mean) / std).transpose((2, 0, 1)) Y[i, 0] = dfv[identifier + 2] X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, random_state=8) w_train = 1. - np.sum(Y_train) / len(Y_train) w_val = 1. - np.sum(Y_valid) / len(Y_valid) # fine-tune net pretrained_net = getattr(models, network)(pretrained=True) net = getattr(models, network)(classes=1) with net.name_scope(): net.features = pretrained_net.features net.output = nn.Dense(1, activation="sigmoid") net.output.initialize(init.Xavier()) # init net.collect_params().reset_ctx(ctx) net.hybridize() loss = wSigmoidBinaryCrossEntropyLoss(from_sigmoid=True) best_auc_avg = 0 # optimizer opt, opt_params = get_optimizer_params(optimizer=optimizer, learning_rate=learning_rate, momentum=momentum, weight_decay=weight_decay, ctx=ctx) train_data = gluon.data.DataLoader(gluon.data.ArrayDataset( X_train, Y_train), batch_size, shuffle=True) test_data = gluon.data.DataLoader( gluon.data.ArrayDataset(X_valid, Y_valid), batch_size) print('Running on', ctx) trainer = gluon.Trainer(net.collect_params(), opt, opt_params) for epoch in range(epochs): train_loss = 0. steps = len(train_data) if len(lr_refactor_step) > 0: if epoch == lr_refactor_step[0]: trainer.set_learning_rate(trainer.learning_rate * lr_refactor_ratio) del lr_refactor_step[0] for data, label in train_data: data_list = gluon.utils.split_and_load(data, ctx) label_list = gluon.utils.split_and_load(label, ctx) with autograd.record(): losses = [ loss(net(x), y, w_train) for x, y in zip(data_list, label_list) ] for l in losses: l.backward() lmean = [l.mean().asscalar() for l in losses] train_loss += sum(lmean) / len(lmean) trainer.step(batch_size) val_loss = evaluate_resp(net, test_data, w_val, ctx[0]) val_aucs = AUC(net, test_data, 1, ctx[0]) val_aucs_avg = val_aucs.mean() print("Epoch %d. loss: %.4f, val_loss %.4f" % (epoch, train_loss / steps, val_loss)) print('The AUROC of {} is {}'.format(class_names[identifier], val_aucs_avg)) if val_aucs_avg >= best_auc_avg: best_auc_avg = val_aucs_avg net.features.save_params('./model/%s_f_Epoch%d.params' % (class_names[identifier], epoch)) net.output.save_params('./model/%s_o_Epoch%d.params' % (class_names[identifier], epoch))
if __name__ == '__main__': from train import arg_parse config = arg_parse() config['embedding_dim'] = 300 config['vocab_size'] = 10 net = Source2TokenAttention(config) data = nd.array([[np.random.randint(10) for _ in range(200)] for _ in range(1000)]) label = nd.array([np.random.randint(2) for _ in range(1000)]) dataset_train = gluon.data.ArrayDataset(data, label) train_data = gluon.data.DataLoader(dataset_train, batch_size=50, shuffle=True, last_batch='rollover') embedding = text.embedding.CustomEmbedding( 'embedding_files/dummy.embedding', elem_delim=' ') net.collect_params().initialize(init.Xavier(), ctx=mx.cpu()) net.embedding.weight.set_data(embedding.idx_to_vec) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for data, label in train_data: with autograd.record(): out = net(data) loss = softmax_cross_entropy(out, label) loss.backward() print(loss.sum().asscalar())
def get_net(ctx): num_classes = 10 net = resnet18(num_classes) net.initialize(ctx=ctx, init=init.Xavier()) return net
num_anchors = len(sizes[0]) + len(ratios[0]) - 1 # 创建模型 net = TinySSD(num_classes=1) net.initialize() X = nd.zeros((32.3, 256, 256)) anchors, cls_preds, bbox_preds = net(X) print('output anchors:', anchors.shape) print('output class preds:', cls_preds.shape) print('output bbox preds:', bbox_preds.shape) # 读物数据集,初始化 batch_size = 32 train_iter, _ = util.load_data_pikachu(batch_size) ctx, net = util.try_gpu(), TinySSD(num_classes=1) net.initialize(init=init.Xavier(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.2, 'wd': 5e-4 }) # 定义损失函数和评价函数 cls_loss = gluon.loss.SoftmaxCrossEntropyLoss() bbox_loss = gluon.loss.L1Loss() # 训练模型 for epoch in range(20): acc_sum, mae_sum, n, m = 0.0, 0.0, 0, 0 train_iter.reset() # 从头读取数据 start = time.time() for batch in train_iter:
def function_set(self): self.__net = GoogLeNet(num_classes=10, verbose=False) self.__net.initialize(init=init.Xavier(), ctx=self.__ctx)
ctx = [mx.gpu(0), mx.gpu(1)] net.collect_params().reset_ctx(ctx) net.hybridize() loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": lr, "wd": 0.001}) gb.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs) if __name__ == "__main__": train_imgs = gdata.vision.ImageFolderDataset("../data/hotdog/train") test_imgs = gdata.vision.ImageFolderDataset("../data/hotdog/test") train_augs = gdata.vision.transforms.Compose([ gdata.vision.transforms.RandomResizedCrop(224), gdata.vision.transforms.RandomFlipLeftRight(), gdata.vision.transforms.ToTensor(), gdata.vision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_augs = gdata.vision.transforms.Compose([ gdata.vision.transforms.Resize(256), gdata.vision.transforms.CenterCrop(224), gdata.vision.transforms.ToTensor(), gdata.vision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) pretrained_net = model_zoo.vision.resnet18_v2(pretrained=True, root="../models") finetune_net = model_zoo.vision.resnet18_v2(classes=2) finetune_net.features = pretrained_net.features finetune_net.features.collect_params().setattr("grad_req", "null") finetune_net.output.initialize(init=init.Xavier()) finetune_net.output.collect_params().setattr("lr_mult", 10) train_fine_tunning(finetune_net, lr=0.01, batch_size=128, num_epochs=5)
loss = F.smooth_l1((output - label) * mask, scalar=1.0) return loss.mean(self._batch_axis, exclude=True) box_loss = SmoothL1Loss() from mxnet import init from mxnet import gpu ctx = gpu(0) # the CUDA implementation requres each image has at least 3 lables. # Padd two -1 labels for each instance train_data.reshape(label_shape=(3, 5)) train_data = test_data.sync_label_shape(train_data) net = ToySSD(num_class) net.initialize(init.Xavier(magnitude=2), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'wd': 5e-4 }) from mxnet import metric cls_metric = metric.Accuracy() box_metric = metric.MAE() import time from mxnet import autograd for epoch in range(5): # reset data iterators and metrics train_data.reset() cls_metric.reset()
test_pic = np.transpose(test_pic, (0, 3, 1, 2)) train_data = gluon.data.DataLoader(gluon.data.ArrayDataset( train_pic, train_label.astype('float32')), batch_size, shuffle=True) test_data = gluon.data.DataLoader(gluon.data.ArrayDataset( test_pic, test_label.astype('float32')), batch_size, shuffle=False) aug_train = image.CreateAugmenter(data_shape=(3, 32, 32), rand_crop=True, rand_mirror=True) net = ResNet(10) net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'nag', { 'learning_rate': 0.1, 'momentum': 0.9, 'wd': 5e-4 }) epochs = 160 a, b = [], [] for epoch in range(epochs): if epoch == 80: trainer.set_learning_rate(0.01) if epoch == 140: trainer.set_learning_rate(0.001)
net = nn.Sequential() with net.name_scope(): # net.add(nn.Conv2D(256, kernel_size=(5, DIMENSION), padding=(1, 0), activation='relu')) net.add( nn.Conv2D(256, kernel_size=(3, DIMENSION), padding=(1, 0), activation='relu')) # net.add(nn.MaxPool2D(pool_size=(FIXED_WORD_LENGTH - 2, 1))) net.add(nn.MaxPool2D(pool_size=(FIXED_WORD_LENGTH, 1))) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dropout(0.5)) net.add(nn.Dense(18)) net.collect_params().initialize(init=init.Xavier(), ctx=ctx) print(net) batch_size = 100 num_epochs = 100 decay_rate = 0.1 gap = 25 loss = gloss.SoftmaxCrossEntropyLoss() # trainer = gluon.Trainer(net.collect_params(), 'AdaDelta', {'rho': 0.95, 'epsilon': 1e-6, 'wd': 0.01}) # trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.0001}) # trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01}) if ADAPTIVE_LEARNING_RATE: trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.01}) else:
valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) ## net = nn.Sequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10)) net.initialize(init=init.Xavier()) ## softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) ## def acc(output, label): # output: (batch, num_output) float32 ndarray # label: (batch, ) int32 ndarray return (output.argmax(axis=1) == label.astype('float32')).mean().asscalar()
def __init__( self, d_hidden: int, kernel_sizes: List[int], n_head: int = 1, bias: bool = True, bidirectional: bool = False, dist_enc: Optional[str] = None, share_values: bool = False, dropout: float = 0.0, temperature: float = 1.0, **kwargs, ): """ Self-attention module with q,k,v from the same input Parameters ---------- d_hidden : int hidden dimension kernel_sizes: int kernel sizes of convolutions to generate queries and keys n_head : int, optional number of attention heads, by default 1 bias : bool, optional add bias term in input and output projections, by default True bidirectional : bool, optional if False, add a mask to avoid backward attention, by default False dist_enc : Optional[str], optional add relative distance embeddings to dot-product attention, can be 'add' (linearly combine key and dist), 'dot' (dot product between key and dist), or None (disabled), by default None share_values : bool, optional if True, a value reprensentation is shared by all attention heads, by default False ref. https://arxiv.org/abs/1912.09363 dropout : float, optional dropout rate, by default 0.0 temperature : float, optional softmax temperature, by default 1.0 """ super(SelfAttention, self).__init__(**kwargs) n_groups = len(kernel_sizes) assert ( d_hidden % n_head == 0 ), f"hidden dim {d_hidden} cannot be split into {n_head} heads." assert ( d_hidden % n_groups == 0 ), f"hidden dim {d_hidden} cannot be split into {n_groups} groups." assert ( n_head % n_groups == 0 ), f"num_heads {n_heads} cannot be allocated for {n_groups} groups." self.d_hidden = d_hidden self.kernel_sizes = kernel_sizes self.n_groups = n_groups self.d_group = self.d_hidden // self.n_groups self.n_head = n_head self.d_head = self.d_hidden // self.n_head self.bias = bias self.dist_enc = dist_enc self.bidirectional = bidirectional self.share_values = share_values self.temperature = temperature with self.name_scope(): self.qk_proj = HybridConcurrent(axis=-1, prefix="qk_proj_") for ksize in self.kernel_sizes: self.qk_proj.add( CausalConv1D( channels=self.d_group * 2, kernel_size=ksize, prefix=f"conv{ksize}_", )) self.v_proj = nn.Dense( units=self.d_head if self.share_values else d_hidden, use_bias=bias, flatten=False, weight_initializer=init.Xavier(), prefix="v_proj_", ) self.out_proj = nn.Dense( units=d_hidden, use_bias=bias, flatten=False, weight_initializer=init.Xavier(), prefix="out_proj_", ) if self.dist_enc is not None: assert self.dist_enc in [ "dot", "add", ], f"distance encoding type {self.dist_enc} is not supported" self.posemb = SinusoidalPositionalEmbedding(d_hidden) self.pos_proj = nn.Dense( units=d_hidden, use_bias=bias, flatten=False, weight_initializer=init.Xavier(), prefix="pos_proj_", ) if self.dist_enc == "add": self._ctt_bias_weight = Parameter( "_ctt_bias_weight", shape=(1, n_head, 1, self.d_head), init=init.Xavier(), ) self._pos_bias_weight = Parameter( "_pos_bias_weight", shape=(1, n_head, 1, self.d_head), init=init.Xavier(), ) self.dropout = nn.Dropout(dropout)