Ejemplo n.º 1
0
n_classes = len(train_data_loader.dataset.label2id)
model = MultiInstanceLearning(n_classes=n_classes,
                              vocab_size=len(vocab),
                              device=device,
                              bert_checkpoint=bert_checkpoint,
                              dense_layer_checkpoint=dense_layer_checkpoint,
                              **cfg.config)
model.to(device)
logger.info(model)

# optimizer and criterion

param = filter(lambda p: p.requires_grad, model.parameters())
# param = [p for p in model.parameters() if p.requires_grad]
optimizer = get_optimizer(cfg.config['optimizer'], param, lr=cfg.config['lr'])
lr_scheduler = ReduceLROnPlateau(optimizer,
                                 'max',
                                 factor=cfg.config['lr_decay'],
                                 patience=cfg.config['lr_decay_patience'])
criterion = nn.CrossEntropyLoss(
    weight=train_data_loader.dataset.weights.to(device))

#trainer
trainer = MILTrainer(model=model,
                     optimizer=optimizer,
                     criterion=criterion,
                     cfg=cfg.config,
                     logger=logger,
                     data_loader=train_data_loader,
                     valid_data_loader=valid_data_loader,
Ejemplo n.º 2
0
def train(conf, data_category):
    print(json.dumps(conf, indent=4))

    os.environ["CUDA_VISIBLE_DEVICES"] = str(conf['device'])
    device = torch.device(0)

    model_name = conf['model']['name']
    optimizer_name = conf['optimizer']['name']
    data_set = conf['data']['dataset']
    graph = h5py.File(os.path.join('data', data_set, 'all_graph.h5'), 'r')
    scheduler_name = conf['scheduler']['name']
    loss = get_loss(**conf['loss'])
    # data_category = conf['data']['data_category']

    loss.to(device)
    encoder, decoder, support = None, None, None
    if model_name == 'Costnet':
        base_model_name = conf['Base']['name']
        encoder, decoder = preprocessing(base_model_name, conf, loss, graph,
                                         data_category, device, data_set,
                                         optimizer_name, scheduler_name)
    if model_name == 'Metricnet' or model_name == 'GWNET' or model_name == 'Evonet' or model_name == 'STGCN' or model_name == 'DCRNN' or model_name == 'STG2Seq' or model_name == 'Evonet2':
        support = preprocessing_for_metric(
            data_category=data_category,
            dataset=conf['data']['dataset'],
            Normal_Method=conf['data']['Normal_Method'],
            _len=conf['data']['_len'],
            **conf['preprocess'])
    model, trainer = create_model(model_name, loss, conf['model'][model_name],
                                  data_category, device, graph, encoder,
                                  decoder, support)

    optimizer = get_optimizer(optimizer_name, model.parameters(),
                              conf['optimizer'][optimizer_name]['lr'])
    scheduler = get_scheduler(scheduler_name, optimizer,
                              **conf['scheduler'][scheduler_name])
    if torch.cuda.device_count() > 1:
        print("use ", torch.cuda.device_count(), "GPUS")
        model = nn.DataParallel(model)
    else:
        model.to(device)

    save_folder = os.path.join('save', conf['name'],
                               f'{data_set}_{"".join(data_category)}',
                               conf['tag'])
    run_folder = os.path.join('run', conf['name'],
                              f'{data_set}_{"".join(data_category)}',
                              conf['tag'])

    shutil.rmtree(save_folder, ignore_errors=True)
    os.makedirs(save_folder)
    shutil.rmtree(run_folder, ignore_errors=True)
    os.makedirs(run_folder)

    with open(os.path.join(save_folder, 'config.yaml'), 'w+') as _f:
        yaml.safe_dump(conf, _f)

    data_loader, normal = get_data_loader(**conf['data'],
                                          data_category=data_category,
                                          device=device,
                                          model_name=model_name)

    if len(data_category) == 2:
        train_model(model=model,
                    dataloaders=data_loader,
                    trainer=trainer,
                    node_num=conf['node_num'],
                    loss_func=loss,
                    optimizer=optimizer,
                    normal=normal,
                    scheduler=scheduler,
                    folder=save_folder,
                    tensorboard_folder=run_folder,
                    device=device,
                    **conf['train'])
        # test_model(folder = save_folder)
    else:
        train_baseline(model=model,
                       dataloaders=data_loader,
                       trainer=trainer,
                       optimizer=optimizer,
                       normal=normal,
                       scheduler=scheduler,
                       folder=save_folder,
                       tensorboard_folder=run_folder,
                       device=device,
                       **conf['train'])
        test_baseline(folder=save_folder,
                      trainer=trainer,
                      model=model,
                      normal=normal,
                      dataloaders=data_loader,
                      device=device)
Ejemplo n.º 3
0
def train(label, phi, t_label, t_phi, cfg):
    # writer = SummaryWriter()
    train_label, validate_label, _, _ = train_test_split(
        label.label, test_size=cfg.tv_value, random_state=20, shuffle=True)

    train_dataset = ds.SnapshotDataset(phi, train_label)
    validate_dataset = ds.SnapshotDataset(phi, validate_label)
    t_dataset = ds.SnapshotDataset(t_phi, t_label)

    phi = phi.to(cfg.device)
    model = End2end(phi, cfg)
    print(sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = model.to(cfg.device)
    optimizer = util.get_optimizer(cfg.o_name, model, cfg.learning_rate)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', 0.5,
                                               cfg.scheduler)
    loss_func = get_loss(cfg)

    # with writer as w:
    #     dummy_x = torch.zeros_like(label[0].unsqueeze(0))
    #     dummy_y = torch.zeros_like(label[0, 0].unsqueeze(0))
    #     w.add_graph(model, (dummy_x, dummy_y, phi))

    losses = []
    val_losses = []
    best_val_loss = 1
    best_psnr = 0

    accumulation_steps = cfg.poor

    tain_data_loader = DataLoader(train_dataset,
                                  batch_size=cfg.batch,
                                  shuffle=True,
                                  drop_last=True)
    validate_data_loader = DataLoader(validate_dataset,
                                      batch_size=math.floor(cfg.batch / 2),
                                      shuffle=False,
                                      drop_last=True)
    for ep in range(cfg.epoch):
        optimizer.zero_grad()
        for ep_i, batch in enumerate(train_data_loader):
            label, y = batch
            initial = y.repeat(args.frame, 1, 1, 1).permute(1, 0, 2, 3).mul(
                phi.cpu()).div(phi.cpu().sum(0) + 0.0001)
            initial = initial.to(cfg.device)
            y = y.to(cfg.device)
            label = label.to(cfg.device)
            model.train()
            layers, symmetric = model(initial, y, phi)
            net_output = layers[-1]
            loss = loss_func(layers, label, symmetric)
            loss.backward()
            if (ep_i + 1) % accumulation_steps == 0:
                print("ep", ep, "ep_i ", ep_i, "loss ", loss.item())
                optimizer.step()
                optimizer.zero_grad()

        with torch.no_grad():
            losses.append(loss.item())
            val_loss = torch.zeros([1])
            for v_ep_i, v_batch in enumerate(validate_data_loader):
                v_initial = v_y.repeat(args.frame, 1, 1, 1).permute(
                    1, 0, 2, 3).mul(phi.cpu()).div(phi.cpu().sum(0) + 0.0001)
                v_initial = v_initial.to(cfg.device)
                v_y = v_y.to(cfg.device)
                v_label = v_label.to(cfg.device)
                model.eval()
                v_layers, symmetric = model(v_initial, v_y, phi)
                net_output = v_layers[-1]
                val_loss += loss_func(v_layers, v_label, symmetric)
                scheduler.step(val_loss)
                val_losses.append(val_loss.item())

            print("ep ", ep, "loss ", loss.item(), "val loss ", val_loss, "lr",
                  optimizer.param_groups[0]['lr'], "time ", time())

            if ep % cfg.store == 0:
                best_val_loss = val_loss
                best_img = np.clip(net_output.detach().cpu().numpy(), 0,
                                   1).astype(np.float64)
                best_psnr = compare_psnr(v_label.cpu().numpy(), best_img)
                print("PSNR: ", np.round(best_psnr, 2))
                util.save(model, best_psnr, best_img,
                          v_label.cpu().numpy(), cfg)

    t_phi = t_phi.to(cfg.device)
    data_loader = DataLoader(t_dataset,
                             batch_size=t_label.shape[0],
                             shuffle=False)
    label, y = next(iter(data_loader))
    initial = y.repeat(args.frame, 1, 1, 1).permute(1, 0, 2, 3).mul(
        t_phi.cpu()).div(t_phi.cpu().sum(0) + 0.0001)
    initial = initial.to(cfg.device)
    y = y.to(cfg.device)
    layers, _ = model(initial, y, t_phi)
    net_output = layers[-1].detach().cpu().numpy()
    psnr = compare_psnr(label.numpy(),
                        np.clip(net_output, 0, 1).astype(np.float64))
    return model, psnr, net_output
Ejemplo n.º 4
0
def preprocessing(base_model_name, conf, loss, graph, data_category, device,
                  data_set, optimizer_name, scheduler_name):

    if base_model_name == 'LinearDecompose':
        data_loader = get_data_loader_base(base_model_name=base_model_name,
                                           dataset=conf['data']['dataset'],
                                           batch_size=conf['batch_size_base'],
                                           _len=conf['data']['_len'],
                                           data_category=data_category,
                                           device=device)
        model, trainer = create_model(base_model_name, loss,
                                      conf['Base'][base_model_name],
                                      data_category, device, graph)
        save_folder = os.path.join('saves',
                                   f"{conf['name']}_{base_model_name}",
                                   f'{data_set}_{"".join(data_category)}')
        run_folder = os.path.join('run', f"{conf['name']}_{base_model_name}",
                                  f'{data_set}_{"".join(data_category)}')
        optimizer = get_optimizer(optimizer_name, model.parameters(),
                                  conf['optimizerbase'][optimizer_name]['lr'])
        scheduler = get_scheduler(scheduler_name, optimizer,
                                  **conf['scheduler'][scheduler_name])
        shutil.rmtree(save_folder, ignore_errors=True)
        os.makedirs(save_folder)
        shutil.rmtree(run_folder, ignore_errors=True)
        os.makedirs(run_folder)
        model = train_decompose(model=model,
                                dataloaders=data_loader,
                                trainer=trainer,
                                optimizer=optimizer,
                                scheduler=scheduler,
                                folder=save_folder,
                                tensorboard_floder=run_folder,
                                device=device,
                                **conf['train'])
        model.load_state_dict(
            torch.load(f"{os.path.join(save_folder, 'best_model.pkl')}")
            ['model_state_dict'])
        return model.encoder, model.decoder
    if base_model_name == 'SvdDecompose':
        data = get_data_loader_base(base_model_name=base_model_name,
                                    dataset=conf['data']['dataset'],
                                    batch_size=conf['batch_size_base'],
                                    _len=conf['data']['_len'],
                                    data_category=data_category,
                                    device=device)
        data = torch.from_numpy(data).float().to(device)
        save_folder = os.path.join('saves',
                                   f"{conf['name']}_{base_model_name}",
                                   f'{data_set}_{"".join(data_category)}')
        run_folder = os.path.join('run', f"{conf['name']}_{base_model_name}",
                                  f'{data_set}_{"".join(data_category)}')
        model, trainer = create_model(base_model_name, loss,
                                      conf['Base'][base_model_name],
                                      data_category, device, graph)
        shutil.rmtree(save_folder, ignore_errors=True)
        os.makedirs(save_folder)
        shutil.rmtree(run_folder, ignore_errors=True)
        os.makedirs(run_folder)
        model.decompose(data)
        return model.encoder, model.decoder
Ejemplo n.º 5
0
def train(conf, data_category):
    print(json.dumps(conf, indent=4))

    os.environ["CUDA_VISIBLE_DEVICES"] = str(conf['device'])
    device = torch.device(0)

    model_name = conf['model']['name']
    optimizer_name = conf['optimizer']['name']
    data_set = conf['data']['dataset']
    scheduler_name = conf['scheduler']['name']
    loss = get_loss(**conf['loss'])

    loss.to(device)


    support = preprocessing_for_metric(data_category=data_category, dataset=conf['data']['dataset'],
                                           Normal_Method=conf['data']['Normal_Method'], _len=conf['data']['_len'], **conf['preprocess'])
    model, trainer = create_model(model_name,
                                  loss,
                                  conf['model'][model_name],
                                  data_category,
                                  device,
                                  support)

    optimizer = get_optimizer(optimizer_name, model.parameters(), conf['optimizer'][optimizer_name]['lr'])
    scheduler = get_scheduler(scheduler_name, optimizer, **conf['scheduler'][scheduler_name])
    if torch.cuda.device_count() > 1:
        print("use ", torch.cuda.device_count(), "GPUS")
        model = nn.DataParallel(model)
    else:
        model.to(device)

    save_folder = os.path.join('save', conf['name'], f'{data_set}_{"".join(data_category)}', conf['tag'])
    run_folder = os.path.join('run', conf['name'], f'{data_set}_{"".join(data_category)}', conf['tag'])

    shutil.rmtree(save_folder, ignore_errors=True)
    os.makedirs(save_folder)
    shutil.rmtree(run_folder, ignore_errors=True)
    os.makedirs(run_folder)

    with open(os.path.join(save_folder, 'config.yaml'), 'w+') as _f:
        yaml.safe_dump(conf, _f)

    data_loader, normal = get_data_loader(**conf['data'], data_category=data_category, device=device,
                                          model_name=model_name)


    train_model(model=model,
                       dataloaders=data_loader,
                       trainer=trainer,
                       optimizer=optimizer,
                       normal=normal,
                       scheduler=scheduler,
                       folder=save_folder,
                       tensorboard_folder=run_folder,
                       device=device,
                       **conf['train'])
    test_model(folder=save_folder,
                      trainer=trainer,
                      model=model,
                      normal=normal,
                      dataloaders=data_loader,
                      device=device)
Ejemplo n.º 6
0
def train_seg(**kwargs):
    '''
    训练分割网络
    '''
    parse(kwargs)

    loss_function = getattr(Loss_, opt.seg_loss_function)
    model = getattr(models, opt.seg_model)().cuda()
    if opt.seg_model_path is not None:
        model.load(opt.seg_model_path)
    dataset = SegDataLoader()
    dataloader = t.utils.data.DataLoader(dataset,
                                         opt.batch_size,
                                         num_workers=opt.num_workers,
                                         shuffle=opt.shuffle,
                                         pin_memory=opt.pin_memory)

    pre_loss = 100
    lr = opt.lr
    optimizer = get_optimizer(model, opt.lr)
    loss_meter = tnt.meter.AverageValueMeter()

    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        start = time.time()

        for ii, (input, mask) in enumerate(dataloader):

            optimizer.zero_grad()
            input = t.autograd.Variable(input).cuda()
            target = t.autograd.Variable(mask).cuda()

            output = model(input)
            loss, _ = loss_function(output, target)
            # othter_info = [jj.data.cpu().tolist() for jj in othter_info]
            # vis.vis.text(othter_info, win='othter_info')

            loss_meter.add(loss.data[0])
            (loss).backward()
            optimizer.step()

            ### 可视化, 记录, log,print
            if ii % opt.plot_every == 0 and ii > opt.plot_every:
                if os.path.exists(opt.seg_debug_file):
                    import ipdb
                    ipdb.set_trace()
                vis_plots = {'loss': loss_meter.value()[0], 'ii': ii}
                vis.plot_many(vis_plots)

                # 随机展示一张图片
                k = t.randperm(input.size(0))[0]
                vis.vis.histogram(output.data[k].view(-1).cpu(),
                                  win=u'output_hist',
                                  opts=dict(title='output_hist'))
                #!TODO: tell 代成 make 1/3 和1 ,而不是1和3
                vis_imgs = {
                    'input': input.data[k],
                    'mask': target.data[k],
                    'output': output.data[k]
                }
                vis.img_grid_many(vis_imgs)

                print "epoch:%4d/%4d,time: %.8f,loss: %.8f " % (
                    epoch, ii, time.time() - start, loss_meter.value()[0])

        model.save()
        vis.log({
            ' epoch:': epoch,
            ' loss:': str(loss_meter.value()[0]),
            ' lr: ': lr
        })

        # info = time.strftime('[%m%d %H:%M] epoch') + str(epoch) + ':' + \
        #     str(loss_meter.value()[0]) + str('; lr:') + str(self.lr) + '<br>'
        # vis.vis.texts += info
        # vis.vis.text(vis.vis.texts, win=u'log')

        # 梯度衰减
        if loss_meter.value()[0] > pre_loss:
            lr = lr * opt.lr_decay
            optimizer = get_optimizer(model, lr)

        pre_loss = loss_meter.value()[0]
        if lr < opt.min_lr:
            break
Ejemplo n.º 7
0
def train_cls(**kwargs):
    '''
    训练分类网络
    '''
    parse(kwargs)

    loss_function = getattr(Loss_, opt.cls_loss_function)
    model = getattr(models, opt.cls_model)().cuda()
    if opt.cls_model_path is not None:
        model.load(opt.cls_model_path)
    dataset = ClsDataset()
    dataloader = t.utils.data.DataLoader(dataset,
                                         opt.batch_size,
                                         num_workers=opt.num_workers,
                                         shuffle=opt.shuffle,
                                         pin_memory=opt.pin_memory)

    pre_loss = 100
    lr = opt.lr
    optimizer = get_optimizer(model, opt.lr, weight_decay=opt.weight_decay)
    loss_meter = tnt.meter.AverageValueMeter()

    confusem = tnt.meter.ConfusionMeter(2)
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusem.reset()
        start = time.time()
        for ii, (input, label) in enumerate(dataloader):

            optimizer.zero_grad()
            input = t.autograd.Variable(input).cuda()
            #!TODO: modify label
            target = label.cuda()
            #!TODO: output maybe a list
            output = model(input)
            loss = loss_function(output, target)
            (loss).backward()
            optimizer.step()

            # loss1,loss2,loss3 = loss_function(score1,target),loss_function(score2,target),loss_function(score3,target)
            # loss = loss1+loss2+loss3
            # prob1,prob2,prob3=t.nn.functional.softmax(score1),t.nn.functional.softmax(score2),t.nn.functional.softmax(score3)
            # prob=(prob1+prob2+prob3)/3.0
            confusem.add(get_pro(output).data, target)
            loss_meter.add(loss.data[0])

            if ii % opt.plot_every == 0 and ii > 0:

                vis_plots = {'loss': loss_meter.value()[0], 'ii': ii}
                vis.plot_many(vis_plots)
                vis.img_grid(label[0], input.data[0])

                vis.vis.text('cm:%s, loss:%s' %
                             (str(confusem.value()), loss.data[0]),
                             win=u'confusionmatrix')
                if os.path.exists(opt.cls_debug_file):
                    import ipdb
                    ipdb.set_trace()

                print "epoch:%4d/%4d, time:%.8f,loss:%.8f" % (
                    epoch, ii, time.time() - start, loss_meter.value()[0])

        model.save()
        val_cm, val_loss = val_cls(model, loss_function)
        vis.log(
            'epoch:{epoch},loss:{loss:.4f},lr:{lr:.6f},cm:{cm},val_loss:{val_loss:.4f},val_cm:{val_cm}'
            .format(epoch=epoch,
                    loss=(loss_meter.value()[0]),
                    lr=lr,
                    cm=str(confusem.value()),
                    val_loss=(val_loss.value()[0]),
                    val_cm=str(val_cm.value())))
        vis.plot('val_loss', (val_loss.value()[0]))

        if loss_meter.value()[0] > pre_loss * 1.:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            # optimizer = get_optimizer(model, lr)
        pre_loss = loss_meter.value()[0]
        if lr < opt.min_lr:
            break