Esempio n. 1
0
    def testSetNumpyBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [0.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                # set lr to 0.0, not update parameter
                new_lr = 0.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                        beta1=0.8,
                        beta2=0.6,
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            np_opti_dict = {}
            np_state_dict = {}

            for k, v in self.opti_dict.items():
                np_opti_dict[v.name] = v.numpy()

            for k, v in self.state_dict.items():
                np_state_dict[k] = v.numpy()

            adam.set_dict(np_opti_dict)
            ptb_model.set_dict(np_state_dict)
            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "global_step":
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            # check parameter

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]
                self.assertTrue(np.array_equal(new_t, base_t))
Esempio n. 2
0
class Training(Base):
    '''
    Training the searched network
    cf: config.yml path
    cv_i: Which fold in the cross validation. If cv_i >= n_fold: use all the training dataset.
    for_train: If True, for training process, otherwise for searching.
    new_lr: if True, check_resume() will not load the saved states of optimizers and lr_schedulers.
    '''
    def __init__(self, cf='config.yml', cv_i=0, for_train=True, new_lr=False):
        super().__init__(cf=cf, cv_i=cv_i, for_train=for_train)
        self._init_model()
        self.check_resume(new_lr=new_lr)

    def _init_model(self):
        geno_file = os.path.join(self.log_path,
                                 self.config['search']['geno_file'])
        with open(geno_file, 'rb') as f:
            gene = eval(pickle.load(f)[0])
        self.model = SearchedNet(
            gene=gene,
            in_channels=self.config['data']['in_channels'],
            init_node_c=self.config['search']['init_node_c'],
            out_channels=self.config['data']['out_channels'],
            depth=self.config['search']['depth'],
            n_nodes=self.config['search']['n_nodes'],
            drop_rate=self.config['train']['drop_rate'])
        print('Param size = {:.3f} MB'.format(
            calc_param_size(self.model.parameters())))
        self.loss = lambda props, y_truth: fluid.layers.reduce_mean(
            fluid.layers.softmax_with_cross_entropy(props, y_truth))

        self.optim = Adam(parameter_list=self.model.parameters())
        self.scheduler = ReduceLROnPlateau(self.optim)

    def check_resume(self, new_lr=False):
        self.last_save = os.path.join(self.log_path,
                                      self.config['train']['last_save'])
        self.last_aux = os.path.join(self.log_path,
                                     self.config['train']['last_aux'])
        self.best_shot = os.path.join(self.log_path,
                                      self.config['train']['best_shot'])
        self.best_aux = os.path.join(self.log_path,
                                     self.config['train']['best_aux'])
        if os.path.exists(self.last_aux):
            self.model.set_dict(fluid.dygraph.load_dygraph(self.last_save)[0])
            with open(self.last_aux, 'rb') as f:
                state_dicts = pickle.load(f)
            self.epoch = state_dicts['epoch'] + 1
            self.history = state_dicts['history']
            if not new_lr:
                self.optim.set_dict(
                    fluid.dygraph.load_dygraph(self.last_save)[1])
                self.scheduler.load_state_dict(state_dicts['scheduler'])
            self.best_val_loss = state_dicts['best_loss']
        else:
            self.epoch = 0
            self.history = defaultdict(list)
            self.best_val_loss = float('inf')

    def main_run(self):
        #         pdb.set_trace()
        n_epochs = self.config['train']['epochs']

        for epoch in range(n_epochs):
            is_best = False
            loss, acc1, acc5 = self.train()
            val_loss, val_acc1, val_acc5 = self.validate()
            self.scheduler.step(val_loss)
            self.history['loss'].append(loss)
            self.history['acc1'].append(acc1)
            self.history['acc5'].append(acc5)
            self.history['val_loss'].append(val_loss)
            self.history['val_acc1'].append(val_acc1)
            self.history['val_acc5'].append(val_acc5)
            if val_loss < self.best_val_loss:
                is_best = True
                self.best_val_loss = val_loss

            # Save what the current epoch ends up with.
            fluid.save_dygraph(self.model.state_dict(), self.last_save)
            fluid.save_dygraph(self.optim.state_dict(), self.last_save)
            state_dicts = {
                'epoch': self.epoch,
                'history': self.history,
                'scheduler': self.scheduler.state_dict(),
                'best_loss': self.best_val_loss
            }
            with open(self.last_aux, 'wb') as f:
                pickle.dump(state_dicts, f)

            if is_best:
                shutil.copy(self.last_save + '.pdparams',
                            self.best_shot + '.pdparams')
                shutil.copy(self.last_save + '.pdopt',
                            self.best_shot + '.pdopt')
                shutil.copy(self.last_aux, self.best_aux)

            self.epoch += 1
            if self.epoch > n_epochs:
                break

            if DEBUG_FLAG and epoch >= 1:
                break
        print('Training Finished.')
        return

    def train(self):
        '''
        Training | Training process
        '''
        self.model.train()
        n_steps = self.train_generator.steps_per_epoch
        sum_loss = 0
        sum_acc1 = 0
        sum_acc5 = 0
        with tqdm(self.train_generator.epoch(),
                  total=n_steps,
                  desc='Training | Epoch {} | Training'.format(
                      self.epoch)) as pbar:
            for step, (x, y_truth) in enumerate(pbar):
                x = fluid.dygraph.to_variable(x.astype('float32'))
                y_truth = fluid.dygraph.to_variable(
                    y_truth.astype('int64')[:, np.newaxis])

                y_pred = self.model(x)
                loss = self.loss(y_pred, y_truth)
                sum_loss += loss.numpy()[0]
                acc1 = fluid.layers.accuracy(y_pred, y_truth, k=1)
                acc5 = fluid.layers.accuracy(y_pred, y_truth, k=5)
                sum_acc1 += acc1.numpy()[0]
                sum_acc5 += acc5.numpy()[0]
                loss.backward()
                self.optim.minimize(loss)
                self.optim.clear_gradients()

                postfix = OrderedDict()
                postfix['Loss'] = round(sum_loss / (step + 1), 3)
                postfix['Top-1-Acc'] = round(sum_acc1 / (step + 1), 3)
                postfix['Top-5-Acc'] = round(sum_acc5 / (step + 1), 3)
                pbar.set_postfix(postfix)

                if DEBUG_FLAG and step >= 1:
                    break

        return [round(i / n_steps, 3) for i in [sum_loss, sum_acc1, sum_acc5]]

    def validate(self):
        '''
        Training | Validation process
        '''
        self.model.eval()
        n_steps = self.val_generator.steps_per_epoch
        sum_loss = 0
        sum_acc1 = 0
        sum_acc5 = 0
        with tqdm(self.val_generator.epoch(),
                  total=n_steps,
                  desc='Training | Epoch {} | Val'.format(self.epoch)) as pbar:
            for step, (x, y_truth) in enumerate(pbar):
                x = fluid.dygraph.to_variable(x.astype('float32'))
                y_truth = fluid.dygraph.to_variable(
                    y_truth.astype('int64')[:, np.newaxis])
                y_pred = self.model(x)
                loss = self.loss(y_pred, y_truth)
                sum_loss += loss.numpy()[0]
                acc1 = fluid.layers.accuracy(y_pred, y_truth, k=1)
                acc5 = fluid.layers.accuracy(y_pred, y_truth, k=5)
                sum_acc1 += acc1.numpy()[0]
                sum_acc5 += acc5.numpy()[0]

                postfix = OrderedDict()
                postfix['Loss'] = round(sum_loss / (step + 1), 3)
                postfix['Top-1-Acc'] = round(sum_acc1 / (step + 1), 3)
                postfix['Top-5-Acc'] = round(sum_acc5 / (step + 1), 3)
                pbar.set_postfix(postfix)

                if DEBUG_FLAG and step >= 1:
                    break
        return [round(i / n_steps, 3) for i in [sum_loss, sum_acc1, sum_acc5]]
Esempio n. 3
0
    def testSetNumpy(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                new_lr = 1.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            # check optimizer
            opti_dict = adam.state_dict()
            np_opti_dict = {}
            # set to zero
            for k, v in opti_dict.items():
                np_t = v.numpy()
                np_opti_dict[v.name] = np_t
                var = v.value().get_tensor()
                var.set(np.zeros_like(np_t), place)

                self.assertTrue(np.sum(np.abs(v.numpy())) == 0)

            if isinstance(adam._learning_rate, LearningRateDecay):
                adam._learning_rate.step_num = 0

            adam.set_dict(np_opti_dict)

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                self.assertTrue(
                    np.array_equal(v.numpy(), self.base_opti[v.name]))

            # check parameter
            state_dict = ptb_model.state_dict()
            np_state_dict = {}
            for k, v in state_dict.items():
                np_t = v.numpy()
                np_state_dict[k] = np_t
                var = v.value().get_tensor()

                var.set(np.zeros_like(np_t), place)

            ptb_model.set_dict(np_state_dict)

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]

                self.assertTrue(np.array_equal(new_t, base_t))