def build_fn(batchnorm, dropout, l2): # initilize the net, Loss, optimizer net = CosineNet(batchnorm=batchnorm, dropout=dropout) loss = nn.loss.MSELoss() opt = nn.optim.Adam(net.trainable_params(), learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY if l2 else 0.0) # Build a net with loss and optimizer. with_loss = nn.WithLossCell(net, loss) train_step = nn.TrainOneStepCell(with_loss, opt).set_train() return train_step, with_loss, net
def train_net__(data_dir, seg_dir, run_distribute, config=None): train_data_size = 5 print("train dataset length is:", train_data_size) network = UNet3d(config=config) loss = SoftmaxCrossEntropyWithLogits() # loss = nn.DiceLoss() lr = Tensor(dynamic_lr(config, train_data_size), mstype.float32) optimizer = nn.Adam(params=network.trainable_params(), learning_rate=lr) scale_manager = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) network.set_train() network.to_float(mstype.float16) _do_keep_batchnorm_fp32(network) network = _add_loss_network(network, loss, mstype.float16) loss_scale = 1.0 loss_scale = scale_manager.get_loss_scale() update_cell = scale_manager.get_update_cell() if update_cell is not None: model = nn.TrainOneStepWithLossScaleCell( network, optimizer, scale_sense=update_cell).set_train() else: model = nn.TrainOneStepCell(network, optimizer, loss_scale).set_train() inputs = mindspore.Tensor(np.random.rand(1, 1, 224, 224, 96), mstype.float32) labels = mindspore.Tensor(np.random.rand(1, 4, 224, 224, 96), mstype.float32) step_per_epoch = train_data_size print("============== Starting Training ==============") # for epoch_id in range(1): for epoch_id in range(cfg.epoch_size): time_epoch = 0.0 for step_id in range(step_per_epoch): # for step_id in range(1): time_start = time.time() loss = model(inputs, labels) # loss = network(inputs, labels) # loss = network(inputs) loss = loss.asnumpy() time_end = time.time() time_step = time_end - time_start time_epoch = time_epoch + time_step print( 'Epoch: [%3d/%3d], step: [%5d/%5d], loss: [%6.4f], time: [%.4f]' % (epoch_id, cfg.epoch_size, step_id, step_per_epoch, loss, time_step)) print('Epoch time: %10.4f, per step time: %7.4f' % (time_epoch, time_epoch / step_per_epoch)) print("============== End Training ==============")
def me_train_tensor(net, input_np, label_np, epoch_size=2): """me_train_tensor""" loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024) Model(net, loss, opt) _network = nn.WithLossCell(net, loss) _train_net = nn.TrainOneStepCell(_network, opt) _train_net.set_train() label_np = np.argmax(label_np, axis=-1).astype(np.int32) for epoch in range(0, epoch_size): print(f"epoch %d" % (epoch)) _train_net(Tensor(input_np), Tensor(label_np))
def me_train_tensor(net, input_np, label_np, epoch_size=2): loss = SoftmaxCrossEntropyWithLogits(sparse=True) opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])), filter(lambda x: x.requires_grad, net.get_parameters())) context.set_context(mode=context.GRAPH_MODE) Model(net, loss, opt) _network = nn.WithLossCell(net, loss) _train_net = MsWrapper(nn.TrainOneStepCell(_network, opt)) _train_net.set_train() for epoch in range(0, epoch_size): print(f"epoch %d" % (epoch)) output = _train_net(Tensor(input_np), Tensor(label_np)) print(output.asnumpy())
def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) self.policy_net = DQN(self.state_space_dim, 256, self.action_space_dim) self.target_net = DQN(self.state_space_dim, 256, self.action_space_dim) self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr) loss_fn = nn.MSELoss() loss_q_net = WithLossCell(self.policy_net, loss_fn) self.policy_net_train = nn.TrainOneStepCell(loss_q_net, self.optimizer) self.policy_net_train.set_train(mode=True) self.buffer = [] self.steps = 0
def TrainWrap(net, loss_fn=None, optimizer=None, weights=None): """ TrainWrap """ if loss_fn is None: loss_fn = nn.SoftmaxCrossEntropyWithLogits(reduction='mean', sparse=True) loss_net = nn.WithLossCell(net, loss_fn) loss_net.set_train() if weights is None: weights = ParameterTuple(net.trainable_params()) if optimizer is None: optimizer = nn.Adam(weights, learning_rate=0.003, beta1=0.9, beta2=0.999, eps=1e-5, use_locking=False, use_nesterov=False, weight_decay=4e-5, loss_scale=1.0) train_net = nn.TrainOneStepCell(loss_net, optimizer) return train_net
def me_train_tensor(net, input_np, label_np, epoch_size=2): context.set_context(mode=context.GRAPH_MODE) loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = ApplyMomentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])), filter(lambda x: x.requires_grad, net.get_parameters())) Model(net, loss, opt) _network = wrap.WithLossCell(net, loss) _train_net = MsWrapper(wrap.TrainOneStepCell(_network, opt)) _train_net.set_train() with SummaryRecord(SUMMARY_DIR, file_suffix="_MS_GRAPH", network=_train_net) as summary_writer: for epoch in range(0, epoch_size): print(f"epoch %d" % (epoch)) output = _train_net(Tensor(input_np), Tensor(label_np)) summary_writer.record(i) print("********output***********") print(output.asnumpy())
def define_network(args): """Define train network with TrainOneStepCell.""" # backbone and loss num_classes = args.num_classes num_anchors_list = args.num_anchors_list anchors = args.anchors anchors_mask = args.anchors_mask momentum = args.momentum args.logger.info('train opt momentum:{}'.format(momentum)) weight_decay = args.weight_decay * float(args.batch_size) args.logger.info('real weight_decay:{}'.format(weight_decay)) lr_scale = args.world_size / 8 args.logger.info('lr_scale:{}'.format(lr_scale)) args.lr = warmup_step_new(args, lr_scale=lr_scale) network = backbone_HwYolov3(num_classes, num_anchors_list, args) criterion0 = YoloLoss(num_classes, anchors, anchors_mask[0], 64, 0, head_idx=0.0) criterion1 = YoloLoss(num_classes, anchors, anchors_mask[1], 32, 0, head_idx=1.0) criterion2 = YoloLoss(num_classes, anchors, anchors_mask[2], 16, 0, head_idx=2.0) # load pretrain model if os.path.isfile(args.pretrained): param_dict = load_checkpoint(args.pretrained) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('network.'): param_dict_new[key[8:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load model {} success'.format(args.pretrained)) train_net = BuildTrainNetworkV2(network, criterion0, criterion1, criterion2, args) # optimizer opt = nn.Momentum(params=train_net.trainable_params(), learning_rate=Tensor(args.lr), momentum=momentum, weight_decay=weight_decay) # package training process if args.use_loss_scale: train_net = TrainOneStepWithLossScaleCell(train_net, opt) else: train_net = nn.TrainOneStepCell(train_net, opt) if args.world_size != 1: train_net.set_broadcast_flag() return train_net
def test_qat_mobile_train(): net = MobileNetV2(num_class=10) img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) label = Tensor(np.ones((1, 10)).astype(np.float32)) net = qat.convert_quant_network(net, quant_delay=0, bn_fold=False, freeze_bn=10000, weight_bits=8, act_bits=8) loss = nn.SoftmaxCrossEntropyWithLogits(reduction='mean') optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) net = nn.WithLossCell(net, loss) net = nn.TrainOneStepCell(net, optimizer) net(img, label)
ds_valid = ds.NumpySlicesDataset( { 'R': valid_data['R'], 'F': valid_data['F'], 'E': valid_data['E'] }, shuffle=False) ds_valid = ds_valid.batch(len(valid_data['E'])) ds_valid = ds_valid.repeat(1) loss_opeartion = WithForceLossCell(net, SquareLoss(), SquareLoss()) eval_opeartion = WithForceEvalCell(net) optim = nn.Adam(params=net.trainable_params(), learning_rate=1e-4) train_net = nn.TrainOneStepCell(loss_opeartion, optim) energy_mae = 'EnergyMAE' forces_mae = 'ForcesMAE' forces_mse = 'ForcesMSE' # model = Model(train_net,eval_network=eval_opeartion,metrics={energy_mae:MAE([2,3]),forces_mae:MAE([4,5]),forces_mse:MSE([4,5])},amp_level='O3') model = Model(train_net, eval_network=eval_opeartion, metrics={ energy_mae: MAE([2, 3]), forces_mae: MAE([4, 5]), forces_mse: MSE([4, 5]) }, amp_level='O0') params_name = mol_name + '_' + network_name
def train(): """ 对训练函数进行定义 """ # 可设置训练结点个数,后续可把训练参数加入 parser = argparse.ArgumentParser(description='Graphsage') parser.add_argument('--data_dir', type=str, default='../data_mr/cora', help='Dataset directory') parser.add_argument('--train_nodes_num', type=int, default=1208, help='Nodes numbers for training') parser.add_argument('--eval_nodes_num', type=int, default=500, help='Nodes numbers for evaluation') parser.add_argument('--test_nodes_num', type=int, default=1000, help='Nodes numbers for test') args = parser.parse_args() # 创建文件,保存最优训练模型 if not os.path.exists("ckpts_graphsage"): os.mkdir("ckpts_graphsage") # 对模式、环境进行定义 context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU", save_graphs=False) # 读取训练、验证、测试数据 features, labels, train_mask, test_mask, eval_mask = load_and_process(args.data_dir, args.train_nodes_num, args.eval_nodes_num, args.test_nodes_num) rand_incides = np.random.permutation(features.shape[0]) test_nodes = rand_incides[args.train_nodes_num+args.eval_nodes_num:] val_nodes = rand_incides[args.train_nodes_num:args.train_nodes_num+args.eval_nodes_num] train_nodes = rand_incides[:args.train_nodes_num] feature_size = features.shape[2] num_nodes = features.shape[0] num_class = labels.max() + 1 print("feature size: ", feature_size) print("nodes number: ", num_nodes) print("node classes: ", num_class) # 定义训练参数、损失函数、优化器、训练过程 early_stopping = 15 eval_acc_max = 0.8 net_original = Graphsage(input_dim=1433, hidden_dim=128, output_dim=7, hops=[10, 10]) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) opt_Adam = nn.Adam(net_original.trainable_params()) net_with_loss = nn.WithLossCell(net_original, loss_fn=loss) net_train_step = nn.TrainOneStepCell(net_with_loss, opt_Adam) for epoch in range(10): net_train_step.set_train(mode=True) for batch in range(20): # 取每一个batch的训练数据 batch_src_index = np.random.choice(train_nodes, size=(16,)) features_sampled = [] for node in batch_src_index: features_sampled.append((features[node])) batch_train_mask = train_mask[batch_src_index] label_source = labels[batch_src_index] train_step_loss = net_train_step(Tensor(features_sampled, mindspore.float32), Tensor(label_source[:, 0], mindspore.int32)) step_loss = P.ReduceSum()(train_step_loss).asnumpy() # 取每一个batch的验证数据 batch_eval_index = val_nodes eval_fea_sampled = [] for node in batch_eval_index: eval_fea_sampled.append((features[node])) batch_eval_mask = eval_mask[batch_eval_index] eval_label_source = labels[batch_eval_index] eval_lable = Tensor(eval_label_source[:, 0], mindspore.int32) eval_soln = net_original(Tensor(eval_fea_sampled, mindspore.float32)) eval_logits = P.Argmax()(eval_soln) eval_acc = P.ReduceMean()(P.Cast()((P.Equal()(eval_lable, eval_logits)), mindspore.float32)) print("Epoch:", epoch + 1, " Batch: ", batch + 1, "'s train loss =", step_loss, " val accuracy =", eval_acc) # 保存最优模型 if eval_acc.asnumpy() > eval_acc_max: eval_acc_max = eval_acc print("a more accurate model!") if os.path.exists("ckpts_graphsage/graphsage.ckpt"): os.remove("ckpts_graphsage/graphsage.ckpt") save_checkpoint(net_train_step, "ckpts_graphsage/graphsage.ckpt") # 取测试数据 batch_test_index = test_nodes test_fea_sampled = [] for node in batch_test_index: test_fea_sampled.append((features[node])) batch_test_mask = eval_mask[batch_test_index] test_label_source = labels[batch_test_index] test_lable = Tensor(test_label_source[:, 0], mindspore.int32) # 读取最优模型,进行测试集上的预测 test_net = Graphsage(input_dim=1433, hidden_dim=128, output_dim=7, hops=[10, 10]) test_net.set_train(mode=False) load_checkpoint("ckpts_graphsage/graphsage.ckpt", net=test_net) loss_test = nn.SoftmaxCrossEntropyWithLogits(sparse=True) test_soln = test_net(Tensor(test_fea_sampled, mindspore.float32)) test_logits = P.Argmax()(test_soln) print("test accuracy:", P.ReduceMean()(P.Cast()((P.Equal()(test_lable, test_logits)), mindspore.float32))) test_with_loss = nn.WithLossCell(test_net, loss_fn=loss_test) test_loss = test_with_loss(Tensor(test_fea_sampled, mindspore.float32), Tensor(test_label_source[:, 0], mindspore.int32)) print("test loss:", P.ReduceSum()(test_loss).asnumpy())
def __init__(self, network, loss_fn, optimizer): self.optimizer = optimizer self.step = nn.TrainOneStepCell(nn.WithLossCell(network, loss_fn), self.optimizer)
def _train(self, train_data, test_data): """ Applying augmented Lagrangian to solve the continuous constrained problem. Parameters ---------- train_data: NormalizationData object train samples test_data: NormalizationData object test samples for validation """ # Initialize stuff for learning loop aug_lagrangians_val = [] hs = [] not_nlls = [] # Augmented Lagrangian minus (pseudo) NLL trainable_para_list = self.model.get_trainable_params() if self.optimizer == "sgd": optimizer = nn.optim.SGD(trainable_para_list, learning_rate=self.lr) elif self.optimizer == "rmsprop": optimizer = nn.optim.RMSProp(trainable_para_list, learning_rate=self.lr) else: raise ValueError("optimizer should be in {'sgd', 'rmsprop'}") # Package training information net_loss = GranLoss() net = nn.WithLossCell(self.model, net_loss) net = nn.TrainOneStepCell(net, optimizer) # Learning loop: for iter_num in tqdm(range(self.iterations), desc='Training Iterations'): x, _ = train_data.sample(self.batch_size) ds_data = self._create_dataset(x.asnumpy(), batch_size=self.batch_size) w_adj = self.model.get_w_adj() expm_input = expm(w_adj.asnumpy()) h = np.trace(expm_input) - self.input_dim # model train self.model.set_train(True) net(*list(ds_data)[0]) self.model.set_train(False) # clamp edges, thresholding if self.edge_clamp_range != 0: to_keep = (w_adj > self.edge_clamp_range) * 1 self.model.adjacency *= to_keep # logging not_nlls.append(0.5 * self.model.mu * h**2 + self.model.lamb * h) # compute loss on whole validation set if iter_num % self.stop_crit_win == 0: x, _ = test_data.sample(test_data.n_samples) loss_val = -ops.reduce_mean( self.model.compute_log_likelihood(x)) aug_lagrangians_val.append( [iter_num, loss_val.asnumpy().item() + not_nlls[-1]]) # compute delta for lambda if iter_num >= 2 * self.stop_crit_win \ and iter_num % (2 * self.stop_crit_win) == 0: t0 = aug_lagrangians_val[-3][1] t_half = aug_lagrangians_val[-2][1] t1 = aug_lagrangians_val[-1][1] # if the validation loss went up and down, # do not update lagrangian and penalty coefficients. if not (min(t0, t1) < t_half < max(t0, t1)): delta_lambda = -np.inf else: delta_lambda = (t1 - t0) / self.stop_crit_win else: delta_lambda = -np.inf # do not update lambda nor mu # Does the augmented lagrangian converged? # if h value less than equal self.h_threshold value, # means augmented lagrangian has converged, stop model training if h > self.h_threshold: # if we have found a stationary point of the augmented loss if abs(delta_lambda) < self.omega_lambda or delta_lambda > 0: self.model.lamb += self.model.mu * h # Did the constraint improve sufficiently? hs.append(h) if len(hs) >= 2: if hs[-1] > hs[-2] * self.omega_mu: self.model.mu *= 10 # little hack to make sure the moving average is going down. gap_in_not_nll = 0.5 * self.model.mu * h ** 2 +\ self.model.lamb * h - not_nlls[-1] aug_lagrangians_val[-1][1] += gap_in_not_nll trainable_para_list = self.model.get_trainable_params() if self.optimizer == "rmsprop": optimizer = nn.optim.RMSProp(trainable_para_list, learning_rate=self.lr) else: optimizer = nn.optim.SGD(trainable_para_list, learning_rate=self.lr) net_loss = GranLoss() net = nn.WithLossCell(self.model, net_loss) net = nn.TrainOneStepCell(net, optimizer) else: # Final clamping of all edges == 0 to_keep = (w_adj > 0).astype(mstype.float32) self.model.adjacency *= to_keep return self.model
import numpy as np import mindspore as ms from mindspore import nn from mindspore import context context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") x = np.arange(-5, 5, 0.3)[:32].reshape((32, 1)) y = -5 * x + 0.1 * np.random.normal(loc=0.0, scale=20.0, size=x.shape) net = nn.Dense(1, 1) loss_fn = nn.loss.MSELoss() opt = nn.optim.SGD(net.trainable_params(), learning_rate=0.01) with_loss = nn.WithLossCell(net, loss_fn) train_step = nn.TrainOneStepCell(with_loss, opt).set_train() for epoch in range(20): loss = train_step(ms.Tensor(x, ms.float32), ms.Tensor(y, ms.float32)) print('epoch: {0}, loss is {1}'.format(epoch, loss)) wb = [x.data.asnumpy() for x in net.trainable_params()] w, b = np.squeeze(wb[0]), np.squeeze(wb[1]) print('The true linear function is y = -5 * x + 0.1') # works in MindSpore0.3.0 or later. print('The trained linear model is y = {0} * x + {1}'.format(w, b)) for i in range(-10, 11, 5): print('x = {0}, predicted y = {1}'.format( i, net(ms.Tensor([[i]], ms.float32))))
def __init__(self, net, loss, opt): super(Linear_Train, self).__init__() self.netwithloss = nn.WithLossCell(net, loss) self.train_net = nn.TrainOneStepCell(self.netwithloss, opt) self.train_net.set_train()
def construct(self, x): x = self.fc1(x) x = self.sig(x) x = self.fc2(x) return x m = Net(HIDDEN_SIZE) # create your optimizer optim = nn.Momentum(m.trainable_params(), learning_rate=0.15, momentum=0.9) loss_fn = nn.MSELoss() loss_net = nn.WithLossCell(m, loss_fn) train_net = nn.TrainOneStepCell(loss_net, optim) train_net.set_train(True) for e in range(ITERATIONS): mloss = 0.0 for mi in range(4): x1 = mi % 2 x2 = (mi // 2) % 2 data = Tensor([[1. if x1 else 0., 1. if x2 else 0.]], mindspore.float32) label = Tensor([[1. if x1 != x2 else 0.]], mindspore.float32) #import pdb;pdb.set_trace() loss = train_net(data, label) print(f"data: {data}, label: {label}, pred: {m(data)}, loss: %0.9f" % loss.asnumpy()) mloss += loss.asnumpy()
else: for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): cell.weight.set_data( weight_init.initializer(weight_init.XavierUniform(), cell.weight.shape, cell.weight.dtype)) if isinstance(cell, nn.Dense): cell.weight.set_data( weight_init.initializer(weight_init.TruncatedNormal(), cell.weight.shape, cell.weight.dtype)) optimizer = get_optimizer(net, dataset.get_dataset_size(), args) loss = NT_Xent_Loss(args.batch_size, args.temperature) net_loss = NetWithLossCell(net, loss) train_net = nn.TrainOneStepCell(net_loss, optimizer) model = Model(train_net) time_cb = TimeMonitor(data_size=dataset.get_dataset_size()) config_ck = CheckpointConfig( save_checkpoint_steps=args.save_checkpoint_epochs) ckpts_dir = os.path.join(args.train_output_path, "checkpoint") ckpoint_cb = ModelCheckpoint(prefix="checkpoint_simclr", directory=ckpts_dir, config=config_ck) print("============== Starting Training ==============") model.train(args.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, LossMonitor()]) if args.run_cloudbrain and args.device_id == 0: mox.file.copy_parallel(src_url=_local_train_url,