def setUp(self):
        # enable dygraph mode
        place = paddle.CPUPlace()
        paddle.disable_static(place)

        # config seed
        paddle.seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        # create network
        self.layer = LinearNet()
        self.loss_fn = nn.CrossEntropyLoss()
        self.sgd = opt.SGD(learning_rate=0.001,
                           parameters=self.layer.parameters())

        # create data loader
        dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
        self.loader = paddle.io.DataLoader(
            dataset,
            places=place,
            batch_size=BATCH_SIZE,
            shuffle=True,
            drop_last=True,
            num_workers=0)

        # train
        train(self.layer, self.loader, self.loss_fn, self.sgd)

        # save
        self.model_path = "linear.example.model"
        paddle.jit.save(self.layer, self.model_path)
Beispiel #2
0
def train_worker(rank, train_config, network, config):
    # # set the parallel
    # torch.distributed.init_process_group(backend='nccl',
    #     init_method='env://', world_size=train_config.world_size, rank=rank)
    # initialize model
    net = network()
    # load pretrain model

    # backbone_dict = torch.load(train_config.init_weights)
    # del backbone_dict['state_dict']['fc.weight']
    # del backbone_dict['state_dict']['fc.bias']
    # net.resnet50.load_state_dict(backbone_dict['state_dict'])

    #net.cuda(rank)
    begin_epoch = 1
    # build optimizer
    # optimizer = SGD_bias.SGD(net.parameters(),
    optimizer = optim.SGD(
        parameters=net.parameters(),
        learning_rate=train_config.lr,  #momentum=train_config.momentum,
        weight_decay=train_config.weight_decay)
    if train_config.resume_weights:
        model_file = os.path.join(
            train_config.model_dir,
            'dump-{}.pth'.format(train_config.resume_weights))
        check_point = torch.load(model_file)
        net.load_state_dict(check_point['state_dict'])
        begin_epoch = train_config.resume_weights + 1
    # using distributed data parallel
    # net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[rank], broadcast_buffers=False)
    # build data provider
    crowdhuman = CrowdHuman(config, if_train=True)
    data_iter = DataLoader(dataset=crowdhuman,
                           batch_size=train_config.mini_batch_size,
                           num_workers=1,
                           collate_fn=crowdhuman.merge_batch,
                           shuffle=True)
    for epoch_id in range(begin_epoch, train_config.total_epoch + 1):
        do_train_epoch(net, data_iter, optimizer, rank, epoch_id, train_config)
        if rank == 0:
            # save the model
            fpath = os.path.join(train_config.model_dir,
                                 'dump-{}.pth'.format(epoch_id))
            model = dict(epoch=epoch_id,
                         state_dict=net.module.state_dict(),
                         optimizer=optimizer.state_dict())
            torch.save(model, fpath)
Beispiel #3
0
    def load_and_fine_tuning(self):
        # load
        translated_layer = paddle.jit.load(self.model_path)

        # train original layer continue
        self.layer.train()
        orig_loss = train(self.layer, self.loader, self.loss_fn, self.sgd)

        # fine-tuning
        translated_layer.train()
        sgd = opt.SGD(learning_rate=0.001,
                      parameters=translated_layer.parameters())
        loss = train(translated_layer, self.loader, self.loss_fn, sgd)

        self.assertTrue(np.array_equal(orig_loss.numpy(), loss.numpy()),
                        msg="original loss:\n{}\nnew loss:\n{}\n".format(
                            orig_loss.numpy(), loss.numpy()))
Beispiel #4
0
def get_optimizer(config, parameters):
    clip = nn.ClipGradByNorm(clip_norm=config.optim.grad_clip)
    if config.optim.optimizer == 'Adam':
        return optim.Adam(parameters=parameters,
                          learning_rate=config.optim.lr,
                          weight_decay=config.optim.weight_decay,
                          beta1=config.optim.beta1,
                          beta2=0.999,
                          epsilon=config.optim.eps,
                          grad_clip=clip)
    elif config.optim.optimizer == 'RMSProp':
        return optim.RMSprop(parameters=parameters,
                             learning_rate=config.optim.lr,
                             weight_decay=config.optim.weight_decay,
                             grad_clip=clip)
    elif config.optim.optimizer == 'SGD':
        return optim.SGD(parameters=parameters,
                         learning_rate=config.optim.lr,
                         momentum=0.9,
                         grad_clip=clip)
    else:
        raise NotImplementedError('Optimizer {} not understood.'.format(
            config.optim.optimizer))
Beispiel #5
0
# ]))

print(net)

for param in net.parameters():
    print(param)

# 3.3.4 初始化模型参数
# 设置全局参数初始化
fluid.set_global_initializer(initializer.Uniform(), initializer.Constant())

# 3.3.5 定义损失函数
loss = nn.MSELoss()

# 3.3.6 定义优化算法
optimizer = optim.SGD(learning_rate=0.03, parameters=net.parameters())
print(optimizer)

# 设置不同自网络的学习率(待修改)
# optimizer = optim.SGD([
#     {'params': net._sub_layers1.paramaters()},
#     {'params': net._sub_layers2.paramaters(), 'lr': 0.01}
# ], learning_rate=0.03)

# for param_group in optimizer.param_groups:
#     param_group['lr'] *= 0.1

# 3.3.7 训练模型
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter: