def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy()
def costs(self): p = self.output(self.input) cost = CrossEntropy()(p, self.target) pred = tensor.argmax(p, axis=1) true = tensor.argmax(self.target, axis=1) acc = tensor.mean(tensor.eq(pred, true)) return [cost, acc]
def test_resnet50_quant(): set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") config = config_quant print("training configure: {}".format(config)) epoch_size = config.epoch_size # define network net = resnet50_quant(class_num=config.class_num) net.set_train(True) # define loss if not config.use_label_smooth: config.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) # define dataset dataset = create_dataset(dataset_path=dataset_path, config=config, repeat_num=1, batch_size=config.batch_size) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network net = quant.convert_quant_network(net, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) # get learning rate lr = Tensor( get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine')) # define optimization opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) # define model #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) model = Model(net, loss_fn=loss, optimizer=opt) print("============== Starting Training ==============") monitor = Monitor(lr_init=lr.asnumpy(), step_threshold=config.step_threshold) callbacks = [monitor] model.train(epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=False) print("============== End Training ==============") expect_avg_step_loss = 2.40 avg_step_loss = np.mean(np.array(monitor.losses)) print("average step loss:{}".format(avg_step_loss)) assert avg_step_loss < expect_avg_step_loss
def costs(self): p = self.output(self.input) cost = CrossEntropy()(p, self.target.flatten()) pp = T.exp(cost) # already take the mean operator scaled_cost = self.sample_size * cost # mean over batch return [scaled_cost, pp]
def main(cfg, gpus): torch.backends.cudnn.enabled = False # cudnn.deterministic = False # cudnn.enabled = True # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) if cfg.MODEL.arch_decoder == 'ocr': print('Using cross entropy loss') crit = CrossEntropy(ignore_label=-1) else: crit = nn.NLLLoss(ignore_index=-1) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET, batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=len(gpus), shuffle=False, # parameter is not used collate_fn=user_scattered_collate, num_workers=cfg.TRAIN.workers, drop_last=True, pin_memory=True) # create loader iterator iterator_train = iter(loader_train) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) if cfg.TRAIN.eval: # Dataset and Loader for validtaion data dataset_val = ValDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_val, cfg.DATASET) loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=cfg.VAL.batch_size, shuffle=False, collate_fn=user_scattered_collate, num_workers=5, drop_last=True) iterator_val = iter(loader_val) # load nets into gpu if len(gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) # Main loop history = { 'train': { 'epoch': [], 'loss': [], 'acc': [], 'last_score': 0, 'best_score': cfg.TRAIN.best_score } } for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): train(segmentation_module, iterator_train, optimizers, history, epoch + 1, cfg) # calculate segmentation score if cfg.TRAIN.eval and epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch, step=cfg.TRAIN.eval_step): iou, acc = evaluate(segmentation_module, iterator_val, cfg, gpus) history['train']['last_score'] = (iou + acc) / 2 if history['train']['last_score'] > history['train']['best_score']: history['train']['best_score'] = history['train']['last_score'] checkpoint(nets, history, cfg, 'best_score') # checkpointing checkpoint(nets, history, cfg, epoch + 1) print('Training Done!')
class MultilayerPerceptron(): def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() def _initialize_weights(self, X, y): n_samples, n_features = X.shape _, n_outputs = y.shape # Hidden layer limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) self.w0 = np.zeros((1, self.n_hidden)) # Output layer limit = 1 / math.sqrt(self.n_hidden) self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) self.v0 = np.zeros((1, n_outputs)) def fit(self, X, y): self._initialize_weights(X, y) for i in range(self.n_iterations): # .............. # Forward Pass # .............. # HIDDEN LAYER hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) # OUTPUT LAYER output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) # ............... # Backward Pass # ............... # OUTPUT LAYER # Grad. w.r.t input of output layer grad_wrt_o_layer = self.loss.gradient( y, y_pred) * self.output_activation.gradient(output_layer_input) grad_v = hidden_output.T.dot(grad_wrt_o_layer) grad_v0 = np.sum(grad_wrt_o_layer, axis=0, keepdims=True) # HIDDEN LAYER # Grad. w.r.t input of hidden layer grad_wrt_hidden_layer = grad_wrt_o_layer.dot( self.V.T) * self.hidden_activation.gradient(hidden_input) grad_w = X.T.dot(grad_wrt_hidden_layer) grad_w0 = np.sum(grad_wrt_hidden_layer, axis=0, keepdims=True) # Update weights (by gradient descent) # Move against the gradient to minimize loss self.V -= self.learning_rate * grad_v self.v0 -= self.learning_rate * grad_v0 self.W -= self.learning_rate * grad_w self.w0 -= self.learning_rate * grad_w0 # Use the trained model to predict labels of X def predict(self, X): hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) return y_pred