Exemple #1
0
def train(**kwargs):
    # opt = Config()
    for k, v in kwargs.items():
        setattr(opt, k, v)
    vis = Visualizer(opt.env, opt.port)
    device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    lr = opt.lr

    #网络配置
    featurenet = FeatureNet(4, 5)
    if opt.model_path:
        featurenet.load_state_dict(
            t.load(opt.model_path, map_location=lambda _s, _: _s))
    featurenet.to(device)

    #加载数据
    data_set = dataset.FeatureDataset(root=opt.data_root,
                                      train=True,
                                      test=False)
    dataloader = DataLoader(data_set,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.num_workers)
    val_dataset = dataset.FeatureDataset(root=opt.data_root,
                                         train=False,
                                         test=False)
    val_dataloader = DataLoader(val_dataset,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    #定义优化器和随时函数
    optimizer = t.optim.SGD(featurenet.parameters(), lr)
    criterion = t.nn.CrossEntropyLoss().to(device)

    #计算重要指标
    loss_meter = AverageValueMeter()

    #开始训练
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        for ii, (data, label) in enumerate(dataloader):
            feature = data.to(device)
            target = label.to(device)

            optimizer.zero_grad()
            prob = featurenet(feature)
            # print(prob)
            # print(target)
            loss = criterion(prob, target)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.item())

            if (ii + 1) % opt.plot_every:
                vis.plot('train_loss', loss_meter.value()[0])
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        t.save(
            featurenet.state_dict(),
            'checkpoints/{epoch}_{time}_{loss}.pth'.format(
                epoch=epoch,
                time=time.strftime('%m%d_%H_%M_%S'),
                loss=loss_meter.value()[0]))

        #验证和可视化
        accu, loss = val(featurenet, val_dataloader, criterion)
        featurenet.train()
        vis.plot('val_loss', loss)
        vis.log('epoch: {epoch}, loss: {loss}, accu: {accu}'.format(
            epoch=epoch, loss=loss, accu=accu))

        lr = lr * 0.9
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
Exemple #2
0
def train(**kwargs):
    for k_, v_ in kwargs.items():
        setattr(config, k_, v_)

    vis_ = Visualizer()

    # data
    train_dataset = get_data.Ali(config.train_path, 'train',
                                 config.feature_index_path)
    val_dataset = get_data.Ali(config.val_path, 'val',
                               config.feature_index_path)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              drop_last=True)
    val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size)

    # model
    model = deepfm.FNN(config.feature_index_path)
    print(model)

    # print('initializing...')
    # model.apply(weight_init)

    # testing
    if config.test_flag:
        test_dataset = get_data.Ali(config.test_path, 'test',
                                    config.feature_index_path)
        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=config.batch_size)
        model.load_state_dict(
            torch.load(os.path.join(config.model_path, '_best')))
        test(model, test_loader, config.output_path)

    # criterion and optimizer
    criterion = torch.nn.BCELoss()
    lr = config.lr
    optimizer = Adam(model.parameters(),
                     lr=lr,
                     betas=(config.beta1, config.beta2),
                     weight_decay=config.weight_decay)
    previous_loss = 1e6
    if torch.cuda.is_available():
        model.cuda()
        criterion.cuda()

    # meters
    loss_meter = tnt.meter.AverageValueMeter()
    # class_err = tnt.meter.ClassErrorMeter()
    # confusion_matrix = tnt.meter.ConfusionMeter(2, normalized=True)

    # val(model, val_loader, criterion)
    # resume training
    start = 0
    if config.resume:
        model_epoch = [
            int(fname.split('_')[-1])
            for fname in os.listdir(config.model_path) if 'best' not in fname
        ]
        start = max(model_epoch)
        model.load_state_dict(
            torch.load(os.path.join(config.model_path, '_epoch_{start}')))
    if start >= config.epochs:
        print('Training already Done!')
        return

    # train
    print('start training...')
    for i in range(start, config.epochs):
        loss_meter.reset()
        # class_err.reset()
        # confusion_matrix.reset()
        for ii, (c_data, labels) in tqdm(enumerate(train_loader)):
            c_data = to_var(c_data)
            labels = to_var(labels).float()
            # labels = labels.view(-1, 1)

            pred = model(c_data)
            # print(pred, labels)
            loss = criterion(pred, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            # confusion_matrix.add(pred.data.squeeze(), labels.data.type(torch.LongTensor))

            if (ii + 1) % config.print_every == 0:
                vis_.plot('train_loss', loss_meter.value()[0])
                print(
                    f'''epochs: {i + 1}/{config.epochs} batch: {ii + 1}/{len(train_loader)}
							train_loss: {loss.data[0]}''')

        print('evaluating...')
        # train_cm = confusion_matrix.value()
        val_cm, val_accuracy, val_loss = val(model, val_loader, criterion)
        vis_.plot('val_loss', val_loss)
        vis_.log(f"epoch:{start + 1},lr:{lr},loss:{val_loss}")

        torch.save(model.state_dict(),
                   os.path.join(config.model_path, f'_epoch_{i}'))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            torch.save(model.state_dict(),
                       os.path.join(config.model_path, '_best'))
            lr = lr * config.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]