Esempio n. 1
0
def test_spares_ftrl():
    """ test sparse ftrl"""
    indices = Tensor(np.array([0, 1]).astype(np.int32))
    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
    net = NetWithSparseGatherV2()
    net.set_train()

    optimizer = FTRL(net.trainable_params(), weight_decay=0.9, loss_scale=2.0)
    optimizer.target = 'Ascend'
    train_network = TrainOneStepCell(net, optimizer)
    _executor.compile(train_network, indices, label)
Esempio n. 2
0
    def __init__(self, network, sens=1024.0, host_device_mix=False, parameter_server=False, sparse=False):
        super(TrainStepWrap, self).__init__()
        parallel_mode = context.get_auto_parallel_context("parallel_mode")
        is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL)
        self.network = network
        self.network.set_train()
        self.trainable_params = network.trainable_params()
        weights_w = []
        weights_d = []
        for params in self.trainable_params:
            if 'wide' in params.name:
                weights_w.append(params)
            else:
                weights_d.append(params)
        self.weights_w = ParameterTuple(weights_w)
        self.weights_d = ParameterTuple(weights_d)

        if (sparse and is_auto_parallel) or parameter_server:
            self.optimizer_d = LazyAdam(
                self.weights_d, learning_rate=3.5e-4, eps=1e-8, loss_scale=sens)
            self.optimizer_w = FTRL(learning_rate=5e-2, params=self.weights_w,
                                    l1=1e-8, l2=1e-8, initial_accum=1.0, loss_scale=sens)
            if host_device_mix or parameter_server:
                self.optimizer_w.target = "CPU"
                self.optimizer_d.target = "CPU"
        else:
            self.optimizer_d = Adam(
                self.weights_d, learning_rate=3.5e-4, eps=1e-8, loss_scale=sens)
            self.optimizer_w = FTRL(learning_rate=5e-2, params=self.weights_w,
                                    l1=1e-8, l2=1e-8, initial_accum=1.0, loss_scale=sens)
        self.hyper_map = C.HyperMap()
        self.grad_w = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.grad_d = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.sens = sens
        self.loss_net_w = IthOutputCell(network, output_index=0)
        self.loss_net_d = IthOutputCell(network, output_index=1)
        self.loss_net_w.set_grad()
        self.loss_net_d.set_grad()

        self.reducer_flag = False
        self.grad_reducer_w = None
        self.grad_reducer_d = None
        self.reducer_flag = parallel_mode in (ParallelMode.DATA_PARALLEL,
                                              ParallelMode.HYBRID_PARALLEL)
        if self.reducer_flag:
            mean = context.get_auto_parallel_context("gradients_mean")
            degree = context.get_auto_parallel_context("device_num")
            self.grad_reducer_w = DistributedGradReducer(self.optimizer_w.parameters, mean, degree)
            self.grad_reducer_d = DistributedGradReducer(self.optimizer_d.parameters, mean, degree)
 def __init__(self, network, sens=1000.0):
     super(TrainStepWarp, self).__init__()
     self.network = network
     self.network.set_train()
     self.trainable_params = network.trainable_params()
     weights_w = []
     weights_d = []
     for params in self.trainable_params:
         weights_w.append(params)
         weights_d.append(params)
     self.weights_w = ParameterTuple(weights_w)
     self.weights_d = ParameterTuple(weights_d)
     self.optimizer_w = FTRL(learning_rate=1e-2,
                             params=self.weights_w,
                             l1=1e-8,
                             l2=1e-8,
                             initial_accum=1.0)
     self.optimizer_d = Adam(self.weights_d,
                             learning_rate=3.5e-4,
                             eps=1e-8,
                             loss_scale=sens)
     self.hyper_map = C.HyperMap()
     self.grad_w = C.GradOperation(get_by_list=True, sens_param=True)
     self.grad_d = C.GradOperation(get_by_list=True, sens_param=True)
     self.sens = sens
     self.loss_net_w = IthOutputCell(network, output_index=0)
     self.loss_net_d = IthOutputCell(network, output_index=1)
Esempio n. 4
0
def test_ftrl_net():
    indices = Tensor(np.array([0, 0, 1]).astype(np.int32))
    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
    net = NetWithSparseGatherV2()

    optimizer = FTRL(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, loss_scale=2.0)
    optimizer.target = 'Ascend'
    train_network = TrainOneStepCell(net, optimizer)
    output = train_network(indices, label)
    np.allclose(output.asnumpy(), np.array([[[2, 2]], [[2, 2]], [[2, 2]]]))
    np.allclose(net.weight1.asnumpy(), np.array([[[0.7884067, 0.7884067]],
                                                 [[0.68213105, 0.68213105]],
                                                 [[1.0, 1.0]]]))
    np.allclose(net.weight2.asnumpy(), np.array([[[0.6821311, 0.6821311]],
                                                 [[0.6821311, 0.6821311]],
                                                 [[0.6821311, 0.6821311]]]))
Esempio n. 5
0
def test_ftrl():
    epoch = 3
    net = NetFtrl()
    optimizer = FTRL(filter(lambda x: x.requires_grad,
                            net.get_parameters()), learning_rate=0.01)
    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    net_with_criterion = WithLossCell(net, criterion)
    train_network = TrainOneStepCell(
        net_with_criterion, optimizer)
    train_network.set_train()

    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    losses1 = []
    for _ in range(epoch):
        data = Tensor(np.arange(0, 16).reshape(
            1, 1, 4, 4).astype(np.float32) * 0.01)
        label = Tensor(np.array([0]).astype(np.int32))
        loss = train_network(data, label)
        losses1.append(loss.asnumpy())
    assert losses1[0] > losses1[1]
    assert losses1[1] > losses1[2]

    context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
    losses2 = []
    for _ in range(epoch):
        data = Tensor(np.arange(0, 16).reshape(
            1, 1, 4, 4).astype(np.float32) * 0.01)
        label = Tensor(np.array([0]).astype(np.int32))
        loss = train_network(data, label)
        losses2.append(loss.asnumpy())
Esempio n. 6
0
def test_ftrl_target():
    """ test_ftrl_target """
    net = NetWithSparseGatherV2()
    net.set_train()

    optimizer = FTRL(net.trainable_params(), weight_decay=0.9, loss_scale=2.0)
    if optimizer.target not in ('CPU', 'Ascend'):
        raise ValueError("The value must be 'CPU' or 'Ascend', but got value {}".format(optimizer.target))
Esempio n. 7
0
    def __init__(self, network, config, sens=1000.0):
        super(TrainStepWrap, self).__init__()
        self.network = network
        self.network.set_train()
        self.trainable_params = network.trainable_params()
        weights_w = []
        weights_d = []
        for params in self.trainable_params:
            if 'wide' in params.name:
                weights_w.append(params)
            else:
                weights_d.append(params)

        self.weights_w = ParameterTuple(weights_w)
        self.weights_d = ParameterTuple(weights_d)
        self.optimizer_w = FTRL(learning_rate=config.ftrl_lr,
                                params=self.weights_w,
                                l1=5e-4,
                                l2=5e-4,
                                initial_accum=0.1,
                                loss_scale=sens)

        #self.optimizer_d = ProximalAdagrad(self.weights_d, learning_rate=config.adam_lr,loss_scale=sens)
        self.optimizer_d = Adam(self.weights_d,
                                learning_rate=config.adam_lr,
                                eps=1e-6,
                                loss_scale=sens)

        self.hyper_map = C.HyperMap()

        self.grad_w = C.GradOperation('grad_w',
                                      get_by_list=True,
                                      sens_param=True)
        self.grad_d = C.GradOperation('grad_d',
                                      get_by_list=True,
                                      sens_param=True)

        self.sens = sens
        self.loss_net_w = IthOutputCell(network, output_index=0)
        self.loss_net_d = IthOutputCell(network, output_index=1)

        self.reducer_flag = False
        self.grad_reducer_w = None
        self.grad_reducer_d = None
        parallel_mode = _get_parallel_mode()
        if parallel_mode in (ParallelMode.DATA_PARALLEL,
                             ParallelMode.HYBRID_PARALLEL):
            self.reducer_flag = True
        if self.reducer_flag:
            mean = _get_mirror_mean()
            degree = _get_device_num()
            self.grad_reducer_w = DistributedGradReducer(
                self.optimizer_w.parameters, mean, degree)
            self.grad_reducer_d = DistributedGradReducer(
                self.optimizer_d.parameters, mean, degree)
Esempio n. 8
0
def test_ftrl():
    """ test_ftrl """
    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
    label = Tensor(np.zeros([1, 10]).astype(np.float32))
    net = Net()
    net.set_train()
    loss = nn.SoftmaxCrossEntropyWithLogits()
    optimizer = FTRL(net.trainable_params(), weight_decay=0.9, loss_scale=2.0)
    net_with_loss = WithLossCell(net, loss)
    train_network = TrainOneStepCell(net_with_loss, optimizer)
    _executor.compile(train_network, inputs, label)