Example #1
0
 def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
     self.n_hidden = n_hidden
     self.n_iterations = n_iterations
     self.learning_rate = learning_rate
     self.hidden_activation = Sigmoid()
     self.output_activation = Softmax()
     self.loss = CrossEntropy()
Example #2
0
    def costs(self):
        p = self.output(self.input)
        cost = CrossEntropy()(p, self.target)

        pred = tensor.argmax(p, axis=1)
        true = tensor.argmax(self.target, axis=1)
        acc = tensor.mean(tensor.eq(pred, true))

        return [cost, acc]
Example #3
0
def test_resnet50_quant():
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    config = config_quant
    print("training configure: {}".format(config))
    epoch_size = config.epoch_size

    # define network
    net = resnet50_quant(class_num=config.class_num)
    net.set_train(True)

    # define loss
    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=config.label_smooth_factor,
                        num_classes=config.class_num)
    #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)

    # define dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             config=config,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()

    # convert fusion network to quantization aware network
    net = quant.convert_quant_network(net,
                                      bn_fold=True,
                                      per_channel=[True, False],
                                      symmetric=[True, False])

    # get learning rate
    lr = Tensor(
        get_lr(lr_init=config.lr_init,
               lr_end=0.0,
               lr_max=config.lr_max,
               warmup_epochs=config.warmup_epochs,
               total_epochs=config.epoch_size,
               steps_per_epoch=step_size,
               lr_decay_mode='cosine'))

    # define optimization
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                   config.momentum, config.weight_decay, config.loss_scale)

    # define model
    #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
    model = Model(net, loss_fn=loss, optimizer=opt)

    print("============== Starting Training ==============")
    monitor = Monitor(lr_init=lr.asnumpy(),
                      step_threshold=config.step_threshold)

    callbacks = [monitor]
    model.train(epoch_size,
                dataset,
                callbacks=callbacks,
                dataset_sink_mode=False)
    print("============== End Training ==============")

    expect_avg_step_loss = 2.40
    avg_step_loss = np.mean(np.array(monitor.losses))

    print("average step loss:{}".format(avg_step_loss))
    assert avg_step_loss < expect_avg_step_loss
Example #4
0
 def costs(self):
     p = self.output(self.input)
     cost = CrossEntropy()(p, self.target.flatten())
     pp = T.exp(cost)  # already take the mean operator
     scaled_cost = self.sample_size * cost  # mean over batch
     return [scaled_cost, pp]
def main(cfg, gpus):
    torch.backends.cudnn.enabled = False
    # cudnn.deterministic = False
    # cudnn.enabled = True
    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)

    if cfg.MODEL.arch_decoder == 'ocr':
        print('Using cross entropy loss')
        crit = CrossEntropy(ignore_label=-1)
    else:
        crit = nn.NLLLoss(ignore_index=-1)

    if cfg.MODEL.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit,
                                                 cfg.TRAIN.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train,
                                 cfg.DATASET,
                                 batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=len(gpus),
        shuffle=False,  # parameter is not used
        collate_fn=user_scattered_collate,
        num_workers=cfg.TRAIN.workers,
        drop_last=True,
        pin_memory=True)
    # create loader iterator
    iterator_train = iter(loader_train)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    if cfg.TRAIN.eval:
        # Dataset and Loader for validtaion data
        dataset_val = ValDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_val, cfg.DATASET)
        loader_val = torch.utils.data.DataLoader(
            dataset_val,
            batch_size=cfg.VAL.batch_size,
            shuffle=False,
            collate_fn=user_scattered_collate,
            num_workers=5,
            drop_last=True)
        iterator_val = iter(loader_val)

    # load nets into gpu
    if len(gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    # Main loop
    history = {
        'train': {
            'epoch': [],
            'loss': [],
            'acc': [],
            'last_score': 0,
            'best_score': cfg.TRAIN.best_score
        }
    }
    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        train(segmentation_module, iterator_train, optimizers, history,
              epoch + 1, cfg)
        # calculate segmentation score
        if cfg.TRAIN.eval and epoch in range(cfg.TRAIN.start_epoch,
                                             cfg.TRAIN.num_epoch,
                                             step=cfg.TRAIN.eval_step):
            iou, acc = evaluate(segmentation_module, iterator_val, cfg, gpus)
            history['train']['last_score'] = (iou + acc) / 2
            if history['train']['last_score'] > history['train']['best_score']:
                history['train']['best_score'] = history['train']['last_score']
                checkpoint(nets, history, cfg, 'best_score')
        # checkpointing
        checkpoint(nets, history, cfg, epoch + 1)
    print('Training Done!')
Example #6
0
class MultilayerPerceptron():
    def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
        self.n_hidden = n_hidden
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()

    def _initialize_weights(self, X, y):
        n_samples, n_features = X.shape
        _, n_outputs = y.shape
        # Hidden layer
        limit = 1 / math.sqrt(n_features)
        self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden))
        self.w0 = np.zeros((1, self.n_hidden))
        # Output layer
        limit = 1 / math.sqrt(self.n_hidden)
        self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))
        self.v0 = np.zeros((1, n_outputs))

    def fit(self, X, y):

        self._initialize_weights(X, y)

        for i in range(self.n_iterations):

            # ..............
            #  Forward Pass
            # ..............

            # HIDDEN LAYER
            hidden_input = X.dot(self.W) + self.w0
            hidden_output = self.hidden_activation(hidden_input)
            # OUTPUT LAYER
            output_layer_input = hidden_output.dot(self.V) + self.v0
            y_pred = self.output_activation(output_layer_input)

            # ...............
            #  Backward Pass
            # ...............

            # OUTPUT LAYER
            # Grad. w.r.t input of output layer
            grad_wrt_o_layer = self.loss.gradient(
                y,
                y_pred) * self.output_activation.gradient(output_layer_input)
            grad_v = hidden_output.T.dot(grad_wrt_o_layer)
            grad_v0 = np.sum(grad_wrt_o_layer, axis=0, keepdims=True)
            # HIDDEN LAYER
            # Grad. w.r.t input of hidden layer
            grad_wrt_hidden_layer = grad_wrt_o_layer.dot(
                self.V.T) * self.hidden_activation.gradient(hidden_input)
            grad_w = X.T.dot(grad_wrt_hidden_layer)
            grad_w0 = np.sum(grad_wrt_hidden_layer, axis=0, keepdims=True)

            # Update weights (by gradient descent)
            # Move against the gradient to minimize loss
            self.V -= self.learning_rate * grad_v
            self.v0 -= self.learning_rate * grad_v0
            self.W -= self.learning_rate * grad_w
            self.w0 -= self.learning_rate * grad_w0

    # Use the trained model to predict labels of X
    def predict(self, X):
        hidden_input = X.dot(self.W) + self.w0
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_output.dot(self.V) + self.v0
        y_pred = self.output_activation(output_layer_input)
        return y_pred