Beispiel #1
0
 def build_optimizer(self):
     self.logger.write("Building optimizer")
     optimizer_name = self.config.training.optimizer
     if optimizer_name == "adam":
         self.optimizer = Adam(self.model.parameters(), lr=self.lr)
     elif optimizer_name == "adadelta":
         self.optimizer = Adadelta(self.model.parameters(), lr=self.lr)
     else:
         raise ValueError(
             "{} optimizer is not supported.".format(optimizer_name))
Beispiel #2
0
 def compile(self, optimizer='adam', initial_lr=0.002):
     if optimizer.lower() == 'adam':
         self.optimizer = Adam(self.parameters(), lr=initial_lr)
     elif optimizer.lower() == 'adadelta':
         self.optimizer = Adadelta(self.parameters(), lr=initial_lr, rho=0.95, eps=1e-08)
     else:
         raise NotImplementedError("the optimizer hasn't been implemented")
Beispiel #3
0
 def _set_optimizer(self, lr, opt_conf):
     """optimizerとしてself._optimizerの指示の元、インスタンスを立てるメソッド
     """
     if self._optimizer in adam:
         return Adam([{
             'params': self.model.parameters()
         }],
                     lr=lr,
                     **opt_conf)
     elif self._optimizer in sgd:
         return SGD([{
             'params': self.model.parameters()
         }],
                    lr=lr,
                    **opt_conf)
     elif self._optimizer in rmsprop:
         return RMSprop([{
             'params': self.model.parameters()
         }],
                        lr=lr,
                        **opt_conf)
     elif self._optimizer in adadelta:
         return Adadelta([{
             'params': self.model.parameters()
         }],
                         lr=lr,
                         **opt_conf)
     elif self._optimizer in adagrad:
         return Adagrad([{
             'params': self.model.parameters()
         }],
                        lr=lr,
                        **opt_conf)
     else:
         raise ValueError(f'optimizer={self._optimizer}は用意されていません')
Beispiel #4
0
def build_optimizer(cfg, model):
    # params = [p for p in model.parameters() if p.requires_grad]
    _params = []
    # filter(lambda p: p.requires_grad, model.parameters())
    for n, p in dict(model.named_parameters()).items():
        if p.requires_grad:
            _args = deepcopy(cfg.OPTIMIZER.BIAS_PARAMS if "bias" in n else cfg.OPTIMIZER.WEIGHT_PARAMS)
            _args.pop("data")
            _params += [{"params": [p], "lr": cfg.INIT_LR, **_args}]
            if "bias" in n:
                _params[-1]["lr"] *= cfg.OPTIMIZER.BIAS_LR_MULTIPLIER or 1.0

    opt_type = cfg.OPTIMIZER.TYPE.lower()

    if opt_type == "sgd":
        '''torch.optim.SGD(params, lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False)'''
        optimizer = SGD(_params)
    elif opt_type == "adam":
        '''torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)'''
        optimizer = Adam(_params)
    elif opt_type == "adamw":
        '''torch.optim.AdamW(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)'''
        optimizer = AdamW(_params)
    elif opt_type == "adadelta":
        '''torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0)'''
        optimizer = Adadelta(_params)
    elif opt_type == 'rmsprop':
        '''torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)'''
        optimizer = RMSprop(_params)
    else:
        raise ValueError("Unsupported optimizer type: {}, Expected optimizer method in [SGD, Adam, Adadelta, RMSprop]".format(cfg.OPTIMIZER.TYPE))

    return optimizer
Beispiel #5
0
 def build(self, params):
     from torch.optim import Adadelta
     return Adadelta(
         params,
         lr=self.lr,
         rho=self.rho,
         eps=self.eps,
         weight_decay=self.weight_decay
     )
Beispiel #6
0
 def init_optimizer(self):
     if self.args.optimizer.lower() == 'adam':
         self.optimizer = Adam(self.parameters(),
                               lr=self.args.lr,
                               weight_decay=1e-3)
     elif self.args.optimizer.lower() == 'sgd':
         self.optimizer = SGD(self.parameters(),
                              lr=self.args.lr,
                              weight_decay=0.99999)
     elif self.args.optimizer.lower() == 'adad':
         self.optimizer = Adadelta(self.parameters(), lr=self.args.lr)
     else:
         raise ValueError('No such optimizer implement.')
Beispiel #7
0
def adadelta(parameters):
    # pick defaults
    if not ("rho" in parameters["optimizer"]):
        parameters["optimizer"]["rho"] = 0.9
    if not ("eps" in parameters["optimizer"]):
        parameters["optimizer"]["eps"] = 1e-6
    if not ("weight_decay" in parameters["optimizer"]):
        parameters["optimizer"]["weight_decay"] = 0
    return Adadelta(
        parameters["model_parameters"],
        lr=parameters["learning_rate"],
        rho=parameters["optimizer"]["rho"],
        eps=parameters["optimizer"]["eps"],
        weight_decay=parameters["optimizer"]["weight_decay"],
    )
Beispiel #8
0
def get_optimizer(net):
	if args.optimizer == 'sgd':
		optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
	elif args.optimizer == 'nesterov':
		optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
						weight_decay=args.weight_decay,nesterov=True)
	elif args.optimizer == 'adagrad':
		optimizer = Adagrad(net.parameters(), weight_decay=args.weight_decay)
	elif args.optimizer == 'adadelta':
		optimizer = Adadelta(net.parameters(), weight_decay=args.weight_decay)
	elif args.optimizer == 'adam':
		optimizer = Adam(net.parameters(), weight_decay=args.weight_decay)
	else:
		raise Exception('Invalid optimizer specified.')

	return optimizer
Beispiel #9
0
def main(cfg):  # DIFF
    print(cfg.pretty())
    use_cuda = not cfg.no_cuda and torch.cuda.is_available()  # DIFF
    torch.manual_seed(cfg.seed)  # DIFF
    device = torch.device("cuda" if use_cuda else "cpu")

    train_kwargs = {"batch_size": cfg.batch_size}  # DIFF
    test_kwargs = {"batch_size": cfg.test_batch_size}  # DIFF
    if use_cuda:
        cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset1 = datasets.MNIST("../data",
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST("../data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

    model = Net().to(device)

    optimizer = Adadelta(
        lr=cfg.adadelta.lr,
        rho=cfg.adadelta.rho,
        eps=cfg.adadelta.eps,
        weight_decay=cfg.adadelta.weight_decay,
        params=model.parameters(),
    )  # DIFF
    scheduler = StepLR(
        step_size=cfg.steplr.step_size,
        gamma=cfg.steplr.gamma,
        last_epoch=cfg.steplr.last_epoch,
        optimizer=optimizer,
    )  # DIFF

    for epoch in range(1, cfg.epochs + 1):  # DIFF
        train(cfg, model, device, train_loader, optimizer, epoch)  # DIFF
        test(model, device, test_loader)
        scheduler.step()

    if cfg.save_model:  # DIFF
        torch.save(model.state_dict(), cfg.checkpoint_name)  # DIFF
Beispiel #10
0
def get_optimizer(params, cfg):
    if cfg.optimizer == 'SGD':
        return SGD(params,
                   lr=cfg.lr,
                   momentum=cfg.momentum,
                   weight_decay=cfg.weight_decay)
    elif cfg.optimizer == 'Adadelta':
        return Adadelta(params, lr=cfg.lr, weight_decay=cfg.weight_decay)
    elif cfg.optimizer == 'Adagrad':
        return Adagrad(params, lr=cfg.lr, weight_decay=cfg.weight_decay)
    elif cfg.optimizer == 'Adam':
        return Adam(params, lr=cfg.lr, weight_decay=cfg.weight_decay)
    elif cfg.optimizer == 'RMSprop':
        return RMSprop(params,
                       lr=cfg.lr,
                       momentum=cfg.momentum,
                       weight_decay=cfg.weight_decay)
    else:
        raise Exception('Unknown optimizer : {}'.format(cfg.optimizer))
 def generate_optimizer(opt, lr, params):
     if opt == 'adam':
         return Adam(params,
                     lr=lr,
                     betas=betas,
                     weight_decay=gamma,
                     eps=eps)
     elif opt == 'sgd':
         return SGD(params,
                    lr=lr,
                    momentum=momentum,
                    weight_decay=gamma,
                    nesterov=True)
     elif opt == 'adadelta':
         return Adadelta(params,
                         lr=lr,
                         rho=rho,
                         weight_decay=gamma,
                         eps=eps)
     else:
         raise ValueError('Unknown optimization algorithm: %s' % opt)
def DICTAVAILOPTIMIZERS(option, model_params, lr):
    list_optimizer_avail = [
        'SGD', 'SGDmom', 'Adagrad', 'RMSprop', 'Adadelta', 'Adam'
    ]

    if (option == 'SGD'):
        return SGD(model_params, lr=lr)
    elif (option == 'SGDmom'):
        return SGD(model_params, lr=lr, momentum=0.9)
    elif (option == 'Adagrad'):
        return Adagrad(model_params, lr=lr)
    elif (option == 'RMSprop'):
        return RMSprop(model_params, lr=lr)
    elif (option == 'Adadelta'):
        return Adadelta(model_params, lr=lr)
    elif (option == 'Adam'):
        return Adam(model_params, lr=lr)
    else:
        message = 'Optimizer chosen not found. Optimizers available: (%s)' % (
            ', '.join(list_optimizer_avail))
        CatchErrorException(message)
        return NotImplemented
def initialize_optimizer(params, cfg):
    """
    Create an optimizer for the given params based on the given cfg.

    :param params: The parameters of the model we optimize.
    :params cfg: The config from which we configure the optimizer.

    :returns: An optimizer for given `params` based on the `cfg`.
    """
    optimizer = cfg.optimizer.lower()
    assert optimizer in ["adam", "adadelta", "adamax", "rmsprop", "adagrad"]
    if optimizer == "adam":
        return Adam(params, lr=cfg.learning_rate)
    if optimizer == "adadelta":
        return Adadelta(params, lr=cfg.learning_rate)
    if optimizer == "adamax":
        return Adamax(params, lr=cfg.learning_rate)
    if optimizer == "rmsprop":
        return RMSprop(params, lr=cfg.learning_rate)
    if optimizer == "adagrad":
        return Adagrad(params,
                       lr=cfg.learning_rate,
                       initial_accumulator_value=cfg.adagrad_init_acc)
Beispiel #14
0
def train(train_loader, validation_loader, net, embeddings):
    """
    Trains the network with a given training data loader and validation data
    loader.
    """
    optimizer = Adadelta(net.parameters())
    evaluate(validation_loader, net, 'validation', log=False)
    prev_best_acc = 0

    for i in range(10):
        print('Epoch:', i)
        net.train()

        avg_loss = 0
        avg_acc = 0

        for i, (vectors, targets) in enumerate(train_loader):
            optimizer.zero_grad()
            logits = net(vectors)
            loss = F.cross_entropy(logits, targets)
            loss.backward()
            optimizer.step()

            corrects = float((torch.max(logits, 1)[1].view(
                targets.size()).data == targets.data).sum())
            accuracy = 100.0 * corrects / batch_size
            avg_loss += float(loss)
            avg_acc += accuracy

        avg_loss /= i + 1
        avg_acc /= i + 1

        logger('training', 'loss', avg_loss)
        logger('training', 'accuracy', avg_acc)

        acc = evaluate(validation_loader, net, 'validation')

        if acc > prev_best_acc:
            torch.save(net.state_dict(), params_file)
            prev_best_acc = acc
Beispiel #15
0
def main(args):
    # first, we define some pre-processing
    data_transforms = transforms.Compose([
        # extra augmentations
        # transforms.ColorJitter(brightness=0.3),
        # necessary transformations
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    # build our data loaders
    train_dataset = MNIST('../data', train=True, download=True, transform=data_transforms)
    test_dataset = MNIST('../data', train=False, transform=data_transforms)

    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size)
    test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size)

    # create the model
    model = Net()

    # build an optimizer for optimizing the parameters of our model
    optimizer = Adadelta(model.parameters(), lr=args.lr)

    # if we want to use cuda, we have to copy all parameters to the GPU
    model.to(args.device)

    # build object that handles updating routine
    updater = MNISTUpdater(
        iterators={'images': train_dataloader},
        networks={'net': model},
        optimizers={'main': optimizer},
        device=args.device,
        copy_to_device=True,
    )

    # build the trainer
    trainer = Trainer(
        updater,
        stop_trigger=get_trigger((args.epochs, 'epoch'))
    )

    # prepare logging
    logger = TensorboardLogger(
        args.log_dir,
        args,
        {},
        Path(__file__).resolve().parent,
        trigger=get_trigger((100, 'iteration'))
    )

    # make sure we are evaluating
    trainer.extend(Evaluator(
        test_dataloader,
        logger,
        MNISTEvaluator(model),
        args.device,
    ))

    # make sure we are saving the trained models to disk, including the optimizer. This allows us to resume training.
    snapshotter = Snapshotter(
        {
            'network': model,
            'optimizer': optimizer
        },
        args.log_dir
    )
    trainer.extend(snapshotter)

    # add learning rate scheduling, in this case Cosine Annealing
    schedulers = {
        "encoder": CosineAnnealingLR(optimizer, trainer.num_epochs * trainer.iterations_per_epoch, eta_min=1e-8)
    }
    lr_scheduler = LRScheduler(schedulers, trigger=get_trigger((1, 'iteration')))
    trainer.extend(lr_scheduler)

    trainer.extend(logger)

    trainer.train()
Beispiel #16
0
def main(args):
    # ================================================
    # Preparation
    # ================================================
    if not torch.cuda.is_available():
        raise Exception('At least one gpu must be available.')
    gpu = torch.device('cuda:0')

    # create result directory (if necessary)
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)
    for phase in ['phase_1', 'phase_2', 'phase_3']:
        if not os.path.exists(os.path.join(args.result_dir, phase)):
            os.makedirs(os.path.join(args.result_dir, phase))

    # load dataset
    trnsfm = transforms.Compose([
        transforms.Resize(args.cn_input_size),
        transforms.RandomCrop((args.cn_input_size, args.cn_input_size)),
        transforms.ToTensor(),
    ])
    print('loading dataset... (it may take a few minutes)')
    train_dset = ImageDataset(os.path.join(args.data_dir, 'train'),
                              trnsfm,
                              recursive_search=args.recursive_search)
    test_dset = ImageDataset(os.path.join(args.data_dir, 'test'),
                             trnsfm,
                             recursive_search=args.recursive_search)
    train_loader = DataLoader(train_dset,
                              batch_size=(args.bsize // args.bdivs),
                              shuffle=True)

    # compute mpv (mean pixel value) of training dataset
    if args.mpv is None:
        mpv = np.zeros(shape=(1, ))
        pbar = tqdm(total=len(train_dset.imgpaths),
                    desc='computing mean pixel value of training dataset...')
        for imgpath in train_dset.imgpaths:
            img = Image.open(imgpath)
            x = np.array(img) / 255.
            mpv += x.mean(axis=(0, 1))
            pbar.update()
        mpv /= len(train_dset.imgpaths)
        pbar.close()
    else:
        mpv = np.array(args.mpv)

    # save training config
    mpv_json = []
    for i in range(1):
        mpv_json.append(float(mpv[i]))
    args_dict = vars(args)
    # args_dict['mpv'] = mpv_json
    with open(os.path.join(args.result_dir, 'config.json'), mode='w') as f:
        json.dump(args_dict, f)

    # make mpv & alpha tensors
    mpv = torch.tensor(mpv.reshape(1, 1, 1, 1), dtype=torch.float32).to(gpu)
    alpha = torch.tensor(args.alpha, dtype=torch.float32).to(gpu)

    # ================================================
    # Training Phase 1
    # ================================================
    # load completion network
    model_cn = CompletionNetwork()
    if args.init_model_cn is not None:
        model_cn.load_state_dict(
            torch.load(args.init_model_cn, map_location='cpu'))
    if args.data_parallel:
        model_cn = DataParallel(model_cn)
    model_cn = model_cn.to(gpu)
    opt_cn = Adadelta(model_cn.parameters())

    # training
    cnt_bdivs = 0
    pbar = tqdm(total=args.steps_1)
    while pbar.n < args.steps_1:
        for x in train_loader:
            # forward
            x = x.to(gpu)
            mask = gen_input_mask(shape=(x.shape[0], 1, x.shape[2],
                                         x.shape[3]), ).to(gpu)
            x_mask = x - x * mask + mpv * mask
            input = torch.cat((x_mask, mask), dim=1)
            output = model_cn(input)
            loss = completion_network_loss(x, output, mask)

            # backward
            loss.backward()
            cnt_bdivs += 1
            if cnt_bdivs >= args.bdivs:
                cnt_bdivs = 0

                # optimize
                opt_cn.step()
                opt_cn.zero_grad()
                pbar.set_description('phase 1 | train loss: %.5f' % loss.cpu())
                pbar.update()

                # test
                if pbar.n % args.snaperiod_1 == 0:
                    model_cn.eval()
                    with torch.no_grad():
                        x = sample_random_batch(
                            test_dset,
                            batch_size=args.num_test_completions).to(gpu)
                        mask = gen_input_mask(shape=(x.shape[0], 1, x.shape[2],
                                                     x.shape[3]), ).to(gpu)
                        x_mask = x - x * mask + mpv * mask
                        input = torch.cat((x_mask, mask), dim=1)
                        output = model_cn(input)
                        completed = rejoiner(x_mask, output, mask)
                        imgs = torch.cat(
                            (x.cpu(), x_mask.cpu(), completed.cpu()), dim=0)
                        imgpath = os.path.join(args.result_dir, 'phase_1',
                                               'step%d.png' % pbar.n)
                        model_cn_path = os.path.join(
                            args.result_dir, 'phase_1',
                            'model_cn_step%d' % pbar.n)
                        save_image(imgs, imgpath, nrow=len(x))
                        if args.data_parallel:
                            torch.save(model_cn.module.state_dict(),
                                       model_cn_path)
                        else:
                            torch.save(model_cn.state_dict(), model_cn_path)
                    model_cn.train()
                if pbar.n >= args.steps_1:
                    break
    pbar.close()

    # ================================================
    # Training Phase 2
    # ================================================
    # load context discriminator
    model_cd = ContextDiscriminator(
        local_input_shape=(1, args.ld_input_size, args.ld_input_size),
        global_input_shape=(1, args.cn_input_size, args.cn_input_size),
    )
    if args.init_model_cd is not None:
        model_cd.load_state_dict(
            torch.load(args.init_model_cd, map_location='cpu'))
    if args.data_parallel:
        model_cd = DataParallel(model_cd)
    model_cd = model_cd.to(gpu)
    opt_cd = Adadelta(model_cd.parameters(), lr=0.1)
    bceloss = BCELoss()

    # training
    cnt_bdivs = 0
    pbar = tqdm(total=args.steps_2)
    while pbar.n < args.steps_2:
        for x in train_loader:
            # fake forward
            x = x.to(gpu)
            hole_area_fake = gen_hole_area(
                (args.ld_input_size, args.ld_input_size),
                (x.shape[3], x.shape[2]))
            mask = gen_input_mask(shape=(x.shape[0], 1, x.shape[2],
                                         x.shape[3]), ).to(gpu)
            fake = torch.zeros((len(x), 1)).to(gpu)
            x_mask = x - x * mask + mpv * mask
            input_cn = torch.cat((x_mask, mask), dim=1)
            output_cn = model_cn(input_cn)
            input_gd_fake = output_cn.detach()
            input_ld_fake = crop(input_gd_fake, hole_area_fake)
            output_fake = model_cd(
                (input_ld_fake.to(gpu), input_gd_fake.to(gpu)))
            loss_fake = bceloss(output_fake, fake)

            # real forward
            hole_area_real = gen_hole_area(
                (args.ld_input_size, args.ld_input_size),
                (x.shape[3], x.shape[2]))
            real = torch.ones((len(x), 1)).to(gpu)
            input_gd_real = x
            input_ld_real = crop(input_gd_real, hole_area_real)
            output_real = model_cd((input_ld_real, input_gd_real))
            loss_real = bceloss(output_real, real)

            # reduce
            loss = (loss_fake + loss_real) / 2.

            # backward
            loss.backward()
            cnt_bdivs += 1
            if cnt_bdivs >= args.bdivs:
                cnt_bdivs = 0

                # optimize
                opt_cd.step()
                opt_cd.zero_grad()
                pbar.set_description('phase 2 | train loss: %.5f' % loss.cpu())
                pbar.update()

                # test
                if pbar.n % args.snaperiod_2 == 0:
                    model_cn.eval()
                    with torch.no_grad():
                        x = sample_random_batch(
                            test_dset,
                            batch_size=args.num_test_completions).to(gpu)
                        mask = gen_input_mask(shape=(x.shape[0], 1, x.shape[2],
                                                     x.shape[3]), ).to(gpu)
                        x_mask = x - x * mask + mpv * mask
                        input = torch.cat((x_mask, mask), dim=1)
                        output = model_cn(input)
                        completed = rejoiner(x_mask, output, mask)
                        imgs = torch.cat(
                            (x.cpu(), x_mask.cpu(), completed.cpu()), dim=0)
                        imgpath = os.path.join(args.result_dir, 'phase_2',
                                               'step%d.png' % pbar.n)
                        model_cd_path = os.path.join(
                            args.result_dir, 'phase_2',
                            'model_cd_step%d' % pbar.n)
                        save_image(imgs, imgpath, nrow=len(x))
                        if args.data_parallel:
                            torch.save(model_cd.module.state_dict(),
                                       model_cd_path)
                        else:
                            torch.save(model_cd.state_dict(), model_cd_path)
                    model_cn.train()
                if pbar.n >= args.steps_2:
                    break
    pbar.close()

    # ================================================
    # Training Phase 3
    # ================================================
    cnt_bdivs = 0
    pbar = tqdm(total=args.steps_3)
    while pbar.n < args.steps_3:
        for x in train_loader:
            # forward model_cd
            x = x.to(gpu)
            hole_area_fake = gen_hole_area(
                (args.ld_input_size, args.ld_input_size),
                (x.shape[3], x.shape[2]))
            mask = gen_input_mask(shape=(x.shape[0], 1, x.shape[2],
                                         x.shape[3]), ).to(gpu)

            # fake forward
            fake = torch.zeros((len(x), 1)).to(gpu)
            x_mask = x - x * mask + mpv * mask
            input_cn = torch.cat((x_mask, mask), dim=1)
            output_cn = model_cn(input_cn)
            input_gd_fake = output_cn.detach()
            input_ld_fake = crop(input_gd_fake, hole_area_fake)
            output_fake = model_cd((input_ld_fake, input_gd_fake))
            loss_cd_fake = bceloss(output_fake, fake)

            # real forward
            hole_area_real = gen_hole_area(
                (args.ld_input_size, args.ld_input_size),
                (x.shape[3], x.shape[2]))
            real = torch.ones((len(x), 1)).to(gpu)
            input_gd_real = x
            input_ld_real = crop(input_gd_real, hole_area_real)
            output_real = model_cd((input_ld_real, input_gd_real))
            loss_cd_real = bceloss(output_real, real)

            # reduce
            loss_cd = (loss_cd_fake + loss_cd_real) * alpha / 2.

            # backward model_cd
            loss_cd.backward()
            cnt_bdivs += 1
            if cnt_bdivs >= args.bdivs:
                # optimize
                opt_cd.step()
                opt_cd.zero_grad()

            # forward model_cn
            loss_cn_1 = completion_network_loss(x, output_cn, mask)
            input_gd_fake = output_cn
            input_ld_fake = crop(input_gd_fake, hole_area_fake)
            output_fake = model_cd((input_ld_fake, (input_gd_fake)))
            loss_cn_2 = bceloss(output_fake, real)

            # reduce
            loss_cn = (loss_cn_1 + alpha * loss_cn_2) / 2.

            # backward model_cn
            loss_cn.backward()
            if cnt_bdivs >= args.bdivs:
                cnt_bdivs = 0

                # optimize
                opt_cn.step()
                opt_cn.zero_grad()
                pbar.set_description(
                    'phase 3 | train loss (cd): %.5f (cn): %.5f' %
                    (loss_cd.cpu(), loss_cn.cpu()))
                pbar.update()

                # test
                if pbar.n % args.snaperiod_3 == 0:
                    model_cn.eval()
                    with torch.no_grad():
                        x = sample_random_batch(
                            test_dset,
                            batch_size=args.num_test_completions).to(gpu)
                        mask = gen_input_mask(shape=(x.shape[0], 1, x.shape[2],
                                                     x.shape[3]), ).to(gpu)
                        x_mask = x - x * mask + mpv * mask
                        input = torch.cat((x_mask, mask), dim=1)
                        output = model_cn(input)
                        completed = rejoiner(x_mask, output, mask)
                        imgs = torch.cat(
                            (x.cpu(), x_mask.cpu(), completed.cpu()), dim=0)
                        imgpath = os.path.join(args.result_dir, 'phase_3',
                                               'step%d.png' % pbar.n)
                        model_cn_path = os.path.join(
                            args.result_dir, 'phase_3',
                            'model_cn_step%d' % pbar.n)
                        model_cd_path = os.path.join(
                            args.result_dir, 'phase_3',
                            'model_cd_step%d' % pbar.n)
                        save_image(imgs, imgpath, nrow=len(x))
                        if args.data_parallel:
                            torch.save(model_cn.module.state_dict(),
                                       model_cn_path)
                            torch.save(model_cd.module.state_dict(),
                                       model_cd_path)
                        else:
                            torch.save(model_cn.state_dict(), model_cn_path)
                            torch.save(model_cd.state_dict(), model_cd_path)
                    model_cn.train()
                if pbar.n >= args.steps_3:
                    break
    pbar.close()
Beispiel #17
0
def main(rank, args):

    # Distributed setup

    if args.distributed:
        setup_distributed(rank, args.world_size)

    not_main_rank = args.distributed and rank != 0

    logging.info("Start time: %s", datetime.now())

    # Explicitly set seed to make sure models created in separate processes
    # start from same random weights and biases
    torch.manual_seed(args.seed)

    # Empty CUDA cache
    torch.cuda.empty_cache()

    # Change backend for flac files
    torchaudio.set_audio_backend("soundfile")

    # Transforms

    melkwargs = {
        "n_fft": args.win_length,
        "n_mels": args.n_bins,
        "hop_length": args.hop_length,
    }

    sample_rate_original = 16000

    if args.type == "mfcc":
        transforms = torch.nn.Sequential(
            torchaudio.transforms.MFCC(
                sample_rate=sample_rate_original,
                n_mfcc=args.n_bins,
                melkwargs=melkwargs,
            ), )
        num_features = args.n_bins
    elif args.type == "waveform":
        transforms = torch.nn.Sequential(UnsqueezeFirst())
        num_features = 1
    else:
        raise ValueError("Model type not supported")

    if args.normalize:
        transforms = torch.nn.Sequential(transforms, Normalize())

    augmentations = torch.nn.Sequential()
    if args.freq_mask:
        augmentations = torch.nn.Sequential(
            augmentations,
            torchaudio.transforms.FrequencyMasking(
                freq_mask_param=args.freq_mask),
        )
    if args.time_mask:
        augmentations = torch.nn.Sequential(
            augmentations,
            torchaudio.transforms.TimeMasking(time_mask_param=args.time_mask),
        )

    # Text preprocessing

    char_blank = "*"
    char_space = " "
    char_apostrophe = "'"
    labels = char_blank + char_space + char_apostrophe + string.ascii_lowercase
    language_model = LanguageModel(labels, char_blank, char_space)

    # Dataset

    training, validation = split_process_librispeech(
        [args.dataset_train, args.dataset_valid],
        [transforms, transforms],
        language_model,
        root=args.dataset_root,
        folder_in_archive=args.dataset_folder_in_archive,
    )

    # Decoder

    if args.decoder == "greedy":
        decoder = GreedyDecoder()
    else:
        raise ValueError("Selected decoder not supported")

    # Model

    model = Wav2Letter(
        num_classes=language_model.length,
        input_type=args.type,
        num_features=num_features,
    )

    if args.jit:
        model = torch.jit.script(model)

    if args.distributed:
        n = torch.cuda.device_count() // args.world_size
        devices = list(range(rank * n, (rank + 1) * n))
        model = model.to(devices[0])
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=devices)
    else:
        devices = ["cuda" if torch.cuda.is_available() else "cpu"]
        model = model.to(devices[0], non_blocking=True)
        model = torch.nn.DataParallel(model)

    n = count_parameters(model)
    logging.info("Number of parameters: %s", n)

    # Optimizer

    if args.optimizer == "adadelta":
        optimizer = Adadelta(
            model.parameters(),
            lr=args.learning_rate,
            weight_decay=args.weight_decay,
            eps=args.eps,
            rho=args.rho,
        )
    elif args.optimizer == "sgd":
        optimizer = SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adam":
        optimizer = Adam(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adamw":
        optimizer = AdamW(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    else:
        raise ValueError("Selected optimizer not supported")

    if args.scheduler == "exponential":
        scheduler = ExponentialLR(optimizer, gamma=args.gamma)
    elif args.scheduler == "reduceonplateau":
        scheduler = ReduceLROnPlateau(optimizer, patience=10, threshold=1e-3)
    else:
        raise ValueError("Selected scheduler not supported")

    criterion = torch.nn.CTCLoss(blank=language_model.mapping[char_blank],
                                 zero_infinity=False)

    # Data Loader

    collate_fn_train = collate_factory(model_length_function, augmentations)
    collate_fn_valid = collate_factory(model_length_function)

    loader_training_params = {
        "num_workers": args.workers,
        "pin_memory": True,
        "shuffle": True,
        "drop_last": True,
    }
    loader_validation_params = loader_training_params.copy()
    loader_validation_params["shuffle"] = False

    loader_training = DataLoader(
        training,
        batch_size=args.batch_size,
        collate_fn=collate_fn_train,
        **loader_training_params,
    )
    loader_validation = DataLoader(
        validation,
        batch_size=args.batch_size,
        collate_fn=collate_fn_valid,
        **loader_validation_params,
    )

    # Setup checkpoint

    best_loss = 1.0

    load_checkpoint = args.checkpoint and os.path.isfile(args.checkpoint)

    if args.distributed:
        torch.distributed.barrier()

    if load_checkpoint:
        logging.info("Checkpoint: loading %s", args.checkpoint)
        checkpoint = torch.load(args.checkpoint)

        args.start_epoch = checkpoint["epoch"]
        best_loss = checkpoint["best_loss"]

        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        scheduler.load_state_dict(checkpoint["scheduler"])

        logging.info("Checkpoint: loaded '%s' at epoch %s", args.checkpoint,
                     checkpoint["epoch"])
    else:
        logging.info("Checkpoint: not found")

        save_checkpoint(
            {
                "epoch": args.start_epoch,
                "state_dict": model.state_dict(),
                "best_loss": best_loss,
                "optimizer": optimizer.state_dict(),
                "scheduler": scheduler.state_dict(),
            },
            False,
            args.checkpoint,
            not_main_rank,
        )

    if args.distributed:
        torch.distributed.barrier()

    torch.autograd.set_detect_anomaly(False)

    for epoch in range(args.start_epoch, args.epochs):

        logging.info("Epoch: %s", epoch)

        train_one_epoch(
            model,
            criterion,
            optimizer,
            scheduler,
            loader_training,
            decoder,
            language_model,
            devices[0],
            epoch,
            args.clip_grad,
            not_main_rank,
            not args.reduce_lr_valid,
        )

        loss = evaluate(
            model,
            criterion,
            loader_validation,
            decoder,
            language_model,
            devices[0],
            epoch,
            not_main_rank,
        )

        if args.reduce_lr_valid and isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(loss)

        is_best = loss < best_loss
        best_loss = min(loss, best_loss)
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": model.state_dict(),
                "best_loss": best_loss,
                "optimizer": optimizer.state_dict(),
                "scheduler": scheduler.state_dict(),
            },
            is_best,
            args.checkpoint,
            not_main_rank,
        )

    logging.info("End time: %s", datetime.now())

    if args.distributed:
        torch.distributed.destroy_process_group()
Beispiel #18
0
def train_model(args):
  # Read and process data
  train, dev, test, batch_size, test_batch_size, train_ques_to_para,\
  dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\
  dev_tokenized_paras, test_tokenized_paras, train_order, dev_order, test_order,\
  train_data, dev_data, test_data, train_tokenized_paras_chars,\
  dev_tokenized_paras_chars, test_tokenized_paras_chars = read_and_process_data(args)

  # Build model
  model, config = build_model(args, train_data.dictionary.size(),
                              train_data.dictionary.index_to_word,
                              train_data.dictionary.word_to_index,
                              train_data.dictionary.char_to_index,
                              train_data.dictionary.index_to_char)

  if not os.path.exists(args.model_dir):
    os.mkdir(args.model_dir)

  #------------------------------ Train System ----------------------------------#
  # Should we resume running from an existing checkpoint?
  last_done_epoch = config['ckpt']
  if last_done_epoch > 0:
    model = model.load(args.model_dir, last_done_epoch)
    print "Loaded model."
    if not args.disable_glove:
      print "Embedding shape:", model.embedding.shape

  start_time = time.time()
  print "Starting training."

  # Decide which optimizer to use.
  if args.optimizer == "SGD":
    print "Using SGD optimizer."
    optimizer = SGD(model.parameters(), lr = args.learning_rate)
  elif args.optimizer == "Adamax":
    print "Using Adamax optimizer."
    optimizer = Adamax(model.parameters(), lr= args.learning_rate)
  elif args.optimizer == "Adadelta":
    print "Using Adadelta optimizer."
    optimizer = Adadelta(model.parameters(), lr=args.learning_rate, rho=0.95)
  else:
    assert False, "Unrecognized optimizer."

  if last_done_epoch > 0:
    if os.path.exists(args.model_dir + "/optim_%d.pt" % last_done_epoch):
      optimizer = torch.load(args.model_dir + "/optim_%d.pt" % last_done_epoch)
    else:
      print "Optimizer saved state not found. Not loading optimizer."

  # Model summary.
  print(model)

  for EPOCH in range(last_done_epoch+1, args.epochs):
    start_t = time.time()
    train_loss_sum = 0.0
    model.train()
    for i, num in enumerate(train_order):
      print "\rTrain epoch %d, %.2f s - (Done %d of %d)" %\
            (EPOCH, (time.time()-start_t)*(len(train_order)-i-1)/(i+1), i+1,
             len(train_order)),

      # Create next batch by getting lengths and padding
      train_batch = train[num:num+batch_size]

      passage_input_f, passage_input_b, question_input_f, question_input_b,\
      passage_input_lens, question_input_lens, passage_input_chars_f,\
      passage_input_chars_b, question_input_chars_f, question_input_chars_b,\
      passage_input_chars_lens, question_input_chars_lens, answer_input =\
        get_minibatch_input(train_batch, train_tokenized_paras,
                            train_tokenized_paras_chars, train_ques_to_para)

      # Zero previous gradient.
      model.zero_grad()
      model((passage_input_chars_f, passage_input_chars_lens),\
            (passage_input_chars_b, passage_input_chars_lens),\
            (question_input_chars_f, question_input_chars_lens),\
            (question_input_chars_b, question_input_chars_lens),\
            (passage_input_f, passage_input_lens),\
            (passage_input_b, passage_input_lens),\
            (question_input_f, question_input_lens),\
            (question_input_b, question_input_lens),\
            answer_input)

      model.loss.backward()
      optimizer.step()
      train_loss_sum += model.loss.data[0]
      model.free_memory()

      print "Loss: %.5f (in time %.2fs)" % \
            (train_loss_sum/(i+1), time.time() - start_t),
      sys.stdout.flush()

    print "\nLoss: %.5f (in time %.2fs)" % \
          (train_loss_sum/len(train_order), time.time() - start_t)

    # End of epoch.
    random.shuffle(train_order)
    model.zero_grad()
    model.save(args.model_dir, EPOCH)

    # Updating LR for optimizer
    for param in optimizer.param_groups:
      param['lr'] *= config['decay']

    torch.save(optimizer, args.model_dir + "/optim_%d.pt" % EPOCH)

    # Run pass over dev data.
    dev_start_t = time.time()
    dev_loss_sum = 0.0
    all_predictions = {}
    print "\nRunning on Dev."

    model.eval()
    for i, num in enumerate(dev_order):
      print "\rDev: %.2f s (Done %d of %d)" %\
            ((time.time()-dev_start_t)*(len(dev_order)-i-1)/(i+1), i+1,
            len(dev_order)),

      dev_batch = dev[num:num+test_batch_size]

      passage_input_f, passage_input_b, question_input_f, question_input_b,\
      passage_input_lens, question_input_lens, passage_input_chars_f,\
      passage_input_chars_b, question_input_chars_f, question_input_chars_b,\
      passage_input_chars_lens, question_input_chars_lens, answer_input =\
        get_minibatch_input(dev_batch, dev_tokenized_paras,
                            dev_tokenized_paras_chars, dev_ques_to_para)

      # distributions[{0,1}].shape = (batch, max_passage_len)
      distributions = \
        model((passage_input_chars_f, passage_input_chars_lens),\
              (passage_input_chars_b, passage_input_chars_lens),\
              (question_input_chars_f, question_input_chars_lens),\
              (question_input_chars_b, question_input_chars_lens),\
              (passage_input_f, passage_input_lens),\
              (passage_input_b, passage_input_lens),\
              (question_input_f, question_input_lens),\
              (question_input_b, question_input_lens),\
              answer_input)
      distributions[0] = distributions[0].data.cpu().numpy()
      distributions[1] = distributions[1].data.cpu().numpy()

      # Add all batch qids to predictions dict, if they don't already exist.
      qids = [ example[2] for example in dev_batch ]
      for qid in qids:
        if not qid in all_predictions:
          all_predictions[qid] = []

      best_idxs = []
      for idx in range(len(dev_batch)):
        best_prob = -1
        best = [0, 0]
        max_end = passage_input_lens[idx]
        for j, start_prob in enumerate(distributions[0][idx][:max_end]):
          cur_end_idx = min(j + args.max_answer_span, max_end)
          end_idx = np.argmax(distributions[1][idx][j:cur_end_idx])
          prob = distributions[1][idx][j+end_idx] * start_prob
          if prob > best_prob:
            best_prob = prob
            best = [j, j+end_idx]
        best_idxs.append(best)

      tokenized_paras = dev_data.tokenized_paras
      answers = [ tokenized_paras[dev_ques_to_para[qids[idx]]][start:end+1] \
                    for idx, (start, end) in enumerate(best_idxs) ]
      answers = [ " ".join([ dev_data.dictionary.get_word(idx) for idx in ans ]) \
                    for ans in answers ]

      for qid, answer in zip(qids, answers):
        all_predictions[qid] = answer

      dev_loss_sum += model.loss.data[0]
      model.free_memory()
      print "[Average loss : %.5f]" % (dev_loss_sum/(i+1)),
      sys.stdout.flush()

    # Print dev stats for epoch
    print "\nDev Loss: %.4f (in time: %.2f s)" %\
          (dev_loss_sum/len(dev_order), (time.time() - dev_start_t))

    # Dump the results json in the required format
    print "Dumping prediction results."
    json.dump(
      all_predictions,
      open(args.model_dir + "/dev_predictions_" + str(EPOCH) + ".json", "w"))
    print "Done."
Beispiel #19
0
# Create a loder for the training set
train_loader = DataLoader(train_set,batch_size=256,shuffle=True,num_workers=4)

# Load the test set, note that train is set to False
test_set = CIFAR10(root="./data", train=False, transform=transformations, download=True)
# Create a loder for the test set, note that both shuffle is set to false for the test loader
test_loader = DataLoader(test_set, batch_size=64, shuffle=False, num_workers=4)

# Create model, optimizer and loss function
# model = AlexNet(num_classes=10)
model = SqueezeNet(version=1.1, num_classes=10)
# model = SqueezeNet(version=1.2, num_classes=10)
# model = SqueezeNetPReLu(version=1.1, num_classes=10)
# model = SqueezeNetPReLu(version=1.2, num_classes=10)

if torch.cuda.is_available():
    model.cuda()

optimizer = Adadelta(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# Print the number of model parameters
print(count_parameters(model))

# Train the model for 100 epochs
train(100)




Beispiel #20
0
def train_predict(batch_size=100, epochs=10, topk=30, L2=1e-8):
    patients = getTrainData(4000000)  # patients × visits × medical_code

    patients_num = len(patients)
    train_patient_num = int(patients_num * 0.8)
    patients_train = patients[0:train_patient_num]
    test_patient_num = patients_num - train_patient_num
    patients_test = patients[train_patient_num:]

    train_batch_num = int(np.ceil(float(train_patient_num) / batch_size))
    test_batch_num = int(np.ceil(float(test_patient_num) / batch_size))

    model = Dipole(input_dim=3393,
                   day_dim=200,
                   rnn_hiddendim=300,
                   output_dim=283)

    params = list(model.parameters())
    k = 0
    for i in params:
        l = 1
        print("该层的结构:" + str(list(i.size())))
        for j in i.size():
            l *= j
        print("该层参数和:" + str(l))
        k = k + l
    print("总参数数量和:" + str(k))

    optimizer = Adadelta(model.parameters(), lr=1, weight_decay=L2)
    loss_mce = nn.BCELoss(reduction='sum')
    model = model.cuda(device=1)

    for epoch in range(epochs):
        starttime = time.time()
        # 训练
        model.train()
        all_loss = 0.0
        for batch_index in range(train_batch_num):
            patients_batch = patients_train[batch_index *
                                            batch_size:(batch_index + 1) *
                                            batch_size]
            patients_batch_reshape, patients_lengths = model.padTrainMatrix(
                patients_batch)  # maxlen × n_samples × inputDimSize
            batch_x = patients_batch_reshape[0:-1]  # 获取前n-1个作为x,来预测后n-1天的值
            # batch_y = patients_batch_reshape[1:]
            batch_y = patients_batch_reshape[1:, :, :283]  # 取出药物作为y
            optimizer.zero_grad()
            # h0 = model.initHidden(batch_x.shape[1])
            batch_x = torch.tensor(batch_x, device=torch.device('cuda:1'))
            batch_y = torch.tensor(batch_y, device=torch.device('cuda:1'))
            y_hat = model(batch_x)
            mask = out_mask2(y_hat,
                             patients_lengths)  # 生成mask,用于将padding的部分输出置0
            # 通过mask,将对应序列长度外的网络输出置0
            y_hat = y_hat.mul(mask)
            batch_y = batch_y.mul(mask)
            # (seq_len, batch_size, out_dim)->(seq_len*batch_size*out_dim, 1)->(seq_len*batch_size*out_dim, )
            y_hat = y_hat.view(-1, 1).squeeze()
            batch_y = batch_y.view(-1, 1).squeeze()

            loss = loss_mce(y_hat, batch_y)
            loss.backward()
            optimizer.step()
            all_loss += loss.item()
        print("Train:Epoch-" + str(epoch) + ":" + str(all_loss) +
              " Train Time:" + str(time.time() - starttime))

        # 测试
        model.eval()
        NDCG = 0.0
        RECALL = 0.0
        DAYNUM = 0.0
        all_loss = 0.0
        gbert_pred = []
        gbert_true = []
        gbert_len = []

        for batch_index in range(test_batch_num):
            patients_batch = patients_test[batch_index *
                                           batch_size:(batch_index + 1) *
                                           batch_size]
            patients_batch_reshape, patients_lengths = model.padTrainMatrix(
                patients_batch)
            batch_x = patients_batch_reshape[0:-1]
            batch_y = patients_batch_reshape[1:, :, :283]
            batch_x = torch.tensor(batch_x, device=torch.device('cuda:1'))
            batch_y = torch.tensor(batch_y, device=torch.device('cuda:1'))
            y_hat = model(batch_x)
            mask = out_mask2(y_hat, patients_lengths)
            loss = loss_mce(y_hat.mul(mask), batch_y.mul(mask))

            all_loss += loss.item()
            y_hat = y_hat.detach().cpu().numpy()
            ndcg, recall, daynum = validation(y_hat, patients_batch,
                                              patients_lengths, topk)
            NDCG += ndcg
            RECALL += recall
            DAYNUM += daynum
            gbert_pred.append(y_hat)
            gbert_true.append(batch_y.cpu())
            gbert_len.append(patients_lengths)

        avg_NDCG = NDCG / DAYNUM
        avg_RECALL = RECALL / DAYNUM
        y_pred_all, y_true_all = batch_squeeze(gbert_pred, gbert_true,
                                               gbert_len)
        acc_container = metric_report(y_pred_all, y_true_all, 0.2)
        print("Test:Epoch-" + str(epoch) + " Loss:" + str(all_loss) +
              " Test Time:" + str(time.time() - starttime))
        print("Test:Epoch-" + str(epoch) + " NDCG:" + str(avg_NDCG) +
              " RECALL:" + str(avg_RECALL))
        print("Test:Epoch-" + str(epoch) + " Jaccard:" +
              str(acc_container['jaccard']) + " f1:" +
              str(acc_container['f1']) + " prauc:" +
              str(acc_container['prauc']) + " roauc:" +
              str(acc_container['auc']))

        print("")
Beispiel #21
0
def train(training_data_file, valid_data_file, super_batch_size, tokenizer, mode, kw, p_key, model1, device, model2, model3, \
            batch_size, num_epoch, gradient_accumulation_steps, lr1, lr2, lambda_, valid_critic, early_stop):
    '''Train three models
    
    Train models through bundles
    
    Args:
        training_data_file (list) : training data json file, raw json file used to load data
        super_batch_size (int) : how many samples will be loaded into memory at once
        tokenizer : SentencePiece tokenizer used to obtain the token ids
        mode (str): mode of the passage format, coule be a list (processed) or a long string (unprocessed).
        kw (str) : the key word map to the passage in each data dictionary. Defaults to 'abstract'
        p_key (str) : the key word to search for specific passage. Default to 'title'
        model1 (nn.DataParallel) : local dependency encoder
        device (torch.device): The device which models and data are on.
        model2 (nn.Module): global coherence encoder
        model3 (nn.Module): attention decoder
        batch_size (int): Defaults to 4.
        num_epoch (int): Defaults to 1.
        gradient_accumulation_steps (int): Defaults to 1. 
        lr (float): Defaults to 1e-4. The Start learning rate.
        lambda_ (float): Defaults to 0.01. Balance factor for param nomalization.
        valid_critic (bool) : what critic to use when early stop evaluation. Default to 5 
        early_stop (int) : set the early stop boundary. Default to 5 

    '''

    # Prepare optimizer for Sys1
    param_optimizer_bert = list(model1.named_parameters())
    param_optimizer_others = list(model2.named_parameters()) + list(
        model3.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    # We tend to fix the embedding. Temeporarily we doesn't find the embedding layer
    optimizer_grouped_parameters_bert = [{
        'params': [
            p for n, p in param_optimizer_bert
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        lambda_
    }, {
        'params': [
            p for n, p in param_optimizer_bert
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]

    optimizer_grouped_parameters_others = [{
        'params': [
            p for n, p in param_optimizer_others
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        lambda_
    }, {
        'params': [
            p for n, p in param_optimizer_others
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    # We shall adda  module to count the num of parameters here
    critic = nn.NLLLoss(reduction='none')

    line_num = int(os.popen("wc -l " + training_data_file).read().split()[0])
    global_step = 0  # global step
    opt1 = BertAdam(optimizer_grouped_parameters_bert,
                    lr=lr1,
                    warmup=0.1,
                    t_total=line_num / batch_size * num_epoch)  # optimizer 1
    # opt = Adam(optimizer_grouped_parameter, lr=lr)
    opt2 = Adadelta(optimizer_grouped_parameters_others, lr=lr2, rho=0.95)
    model1.to(device)  #
    model1.train()  #
    model2.to(device)  #
    model2.train()  #
    model3.to(device)  #
    model3.train()  #
    warmed = True
    for epoch in trange(num_epoch, desc='Epoch'):

        smooth_mean = WindowMean()
        opt1.zero_grad()
        opt2.zero_grad()

        for superbatch, line_num in load_superbatch(training_data_file,
                                                    super_batch_size):
            bundles = []

            for data in superbatch:
                try:
                    bundles.append(
                        convert_passage_to_samples_bundle(
                            tokenizer, data, mode, kw, p_key))

                except:
                    print_exc()

            num_batch, dataloader = homebrew_data_loader(bundles,
                                                         batch_size=batch_size)

            tqdm_obj = tqdm(dataloader, total=num_batch)
            num_steps = line_num  #
            for step, batch in enumerate(tqdm_obj):
                try:
                    #batch[0] = batch[0].to(device)
                    #batch[1] = batch[1].to(device)
                    #batch[2] = batch[2].to(device)
                    batch = tuple(t for t in batch)
                    log_prob_loss, pointers_output, ground_truth = calculate_loss(
                        batch, model1, model2, model3, device, critic)
                    # here we need to add code to cal rouge-w and acc
                    rouge_ws = []
                    accs = []
                    ken_taus = []
                    pmrs = []
                    for pred, true in zip(pointers_output, ground_truth):
                        rouge_ws.append(rouge_w(pred, true))
                        accs.append(acc(pred, true))
                        ken_taus.append(kendall_tau(pred, true))
                        pmrs.append(pmr(pred, true))

                    log_prob_loss.backward()

                    # ******** In the following code we gonna edit it and made early stop ************

                    if (step + 1) % gradient_accumulation_steps == 0:
                        # modify learning rate with special warm up BERT uses. From BERT pytorch examples
                        lr_this_step = lr1 * warmup_linear(
                            global_step / num_steps, warmup=0.1)
                        for param_group in opt1.param_groups:
                            param_group['lr'] = lr_this_step
                        global_step += 1

                        opt2.step()
                        opt2.zero_grad()
                        smooth_mean_loss = smooth_mean.update(
                            log_prob_loss.item())
                        tqdm_obj.set_description(
                            '{}: {:.4f}, {}: {:.4f}, smooth_mean_loss: {:.4f}'.
                            format('accuracy', np.mean(accs), 'rough-w',
                                   np.mean(rouge_ws), smooth_mean_loss))
                        # During warming period, model1 is frozen and model2 is trained to normal weights
                        if smooth_mean_loss < 1.0 and step > 100:  # ugly manual hyperparam
                            warmed = True
                        if warmed:
                            opt1.step()
                        opt1.zero_grad()
                        if step % 1000 == 0:
                            output_model_file = './models/bert-base-cased.bin.tmp'
                            saved_dict = {
                                'params1': model1.module.state_dict()
                            }
                            saved_dict['params2'] = model2.state_dict()
                            saved_dict['params3'] = model3.state_dict()
                            torch.save(saved_dict, output_model_file)

                except Exception as err:
                    traceback.print_exc()
                    exit()
                    # if mode == 'list':
                    #     print(batch._id)

        if epoch < 5:
            best_score = 0
            continue

        with torch.no_grad():
            print('valid..............')

            valid_critic_dict = {
                'rouge-w': rouge_w,
                'acc': acc,
                'ken-tau': kendall_tau,
                'pmr': pmr
            }

            for superbatch, _ in load_superbatch(valid_data_file,
                                                 super_batch_size):
                bundles = []

                for data in superbatch:
                    try:
                        bundles.append(
                            convert_passage_to_samples_bundle(
                                tokenizer, data, mode, kw, p_key))
                    except:
                        print_exc()

                num_batch, valid_dataloader = homebrew_data_loader(
                    bundles, batch_size=1)

                valid_value = []
                for step, batch in enumerate(valid_dataloader):
                    try:
                        batch = tuple(t for idx, t in enumerate(batch))
                        pointers_output, ground_truth \
                            = dev_test(batch, model1, model2, model3, device)
                        valid_value.append(valid_critic_dict[valid_critic](
                            pointers_output, ground_truth))

                    except Exception as err:
                        traceback.print_exc()
                        # if mode == 'list':
                        #     print(batch._id)

                score = np.mean(valid_value)
            print('epc:{}, {} : {:.2f} best : {:.2f}\n'.format(
                epoch, valid_critic, score, best_score))

            if score > best_score:
                best_score = score
                best_iter = epoch

                print('Saving model to {}'.format(
                    output_model_file))  # save model structure
                saved_dict = {
                    'params1': model1.module.state_dict()
                }  # save parameters
                saved_dict['params2'] = model2.state_dict()  # save parameters
                saved_dict['params3'] = model3.state_dict()
                torch.save(saved_dict, output_model_file)  #

                # print('save best model at epc={}'.format(epc))
                # checkpoint = {'model': model.state_dict(),
                #             'args': args,
                #             'loss': best_score}
                # torch.save(checkpoint, '{}/{}.best.pt'.format(args.model_path, args.model))

            if early_stop and (epoch - best_iter) >= early_stop:
                print('early stop at epc {}'.format(epoch))
                break
Beispiel #22
0
        self.map3 = Linear(hidden_size, output_size)

    def forward(self, x):
        x = leaky_relu(self.map1(x), 0.1)
        x = leaky_relu(self.map2(x), 0.1)
        return sigmoid(self.map3(x))


generator = SimpleMLP(input_size=z_dim, hidden_size=50, output_size=DIMENSION)
discriminator = SimpleMLP(input_size=DIMENSION, hidden_size=100, output_size=1)
if GPU_NUMS > 0:
    generator.cuda()
    discriminator.cuda()
criterion = BCELoss()

d_optimizer = Adadelta(discriminator.parameters(), lr=1)
g_optimizer = Adadelta(generator.parameters(), lr=1)
progBar = ProgressBar(1, iterations,
                      "D Loss:(real/fake) %.3f/%.3f,G Loss:%.3f")
for train_iter in range(1, iterations + 1):
    for d_index in range(3):
        # 1. Train D on real+fake
        discriminator.zero_grad()

        #  1A: Train D on real
        real_samples = sample_2d(lut_2d, bs)
        d_real_data = Variable(torch.Tensor(real_samples))
        if GPU_NUMS > 0:
            d_real_data = d_real_data.cuda()
        d_real_decision = discriminator(d_real_data)
        labels = Variable(torch.ones(bs))
def main():
    args = read_args(default_config="confs/kim_cnn_sst2.json")
    set_seed(args.seed)
    try:
        os.makedirs(args.workspace)
    except:
        pass
    torch.cuda.deterministic = True

    dataset_cls = find_dataset(args.dataset_name)
    training_iter, dev_iter, test_iter = dataset_cls.iters(args.dataset_path, args.vectors_file, args.vectors_dir,
        batch_size=args.batch_size, device=args.device, train=args.train_file, dev=args.dev_file, test=args.test_file)

    args.dataset = training_iter.dataset
    args.words_num = len(training_iter.dataset.TEXT_FIELD.vocab)
    model = mod.SiameseRNNModel(args).to(args.device)
    ckpt_attrs = mod.load_checkpoint(model, args.workspace,
        best=args.load_best_checkpoint) if args.load_last_checkpoint or args.load_best_checkpoint else {}
    offset = ckpt_attrs.get("epoch_idx", -1) + 1
    args.epochs -= offset

    training_pbar = tqdm(total=len(training_iter), position=2)
    training_pbar.set_description("Training")
    dev_pbar = tqdm(total=args.epochs, position=1)
    dev_pbar.set_description("Dev")

    criterion = nn.CrossEntropyLoss()
    kd_criterion = nn.KLDivLoss(reduction="batchmean")
    params = list(filter(lambda x: x.requires_grad, model.parameters()))
    optimizer = Adadelta(params, lr=args.lr, rho=0.95)
    increment_fn = mod.make_checkpoint_incrementer(model, args.workspace, save_last=True,
        best_loss=ckpt_attrs.get("best_dev_loss", 10000))
    non_embedding_params = model.non_embedding_params()

    if args.use_data_parallel:
        model = nn.DataParallel(model)
    if args.eval_test_only:
        test_acc, _ = evaluate(model, test_iter, criterion, export_eval_labels=args.export_eval_labels)
        print(test_acc)
        return
    if args.epochs == 0:
        print("No epochs left from loaded model.", file=sys.stderr)
        return
    for epoch_idx in tqdm(range(args.epochs), position=0):
        training_iter.init_epoch()
        model.train()
        training_pbar.n = 0
        training_pbar.refresh()
        for batch in training_iter:
            training_pbar.update(1)
            optimizer.zero_grad()
            logits = model(batch.sentence1, batch.sentence2)
            kd_logits = torch.stack((batch.logits_0, batch.logits_1, batch.logits_2), 1)
            kd = args.distill_lambda * kd_criterion(F.log_softmax(logits / args.distill_temperature, 1),
                F.softmax(kd_logits / args.distill_temperature, 1))
            loss = args.ce_lambda * criterion(logits, batch.gold_label) + kd
            loss.backward()
            clip_grad_norm_(non_embedding_params, args.clip_grad)
            optimizer.step()
            acc = ((logits.max(1)[1] == batch.gold_label).float().sum() / batch.gold_label.size(0)).item()
            training_pbar.set_postfix(accuracy=f"{acc:.2}")

        model.eval()
        dev_acc, dev_loss = evaluate(model, dev_iter, criterion)
        dev_pbar.update(1)
        dev_pbar.set_postfix(accuracy=f"{dev_acc:.4}")
        is_best_dev = increment_fn(dev_loss, dev_acc=dev_acc, epoch_idx=epoch_idx + offset)

        if is_best_dev:
            dev_pbar.set_postfix(accuracy=f"{dev_acc:.4} (best loss)")
            test_acc, _ = evaluate(model, test_iter, criterion, export_eval_labels=args.export_eval_labels)
    training_pbar.close()
    dev_pbar.close()
    print(f"Test accuracy of the best model: {test_acc:.4f}", file=sys.stderr)
    print(test_acc)
Beispiel #24
0
model = MwanModel(
    num_class=len(data_info.vocabs[Const.TARGET]),
    EmbLayer=StaticEmbedding(data_info.vocabs[Const.INPUT],
                             requires_grad=False,
                             normalize=False),
    ElmoLayer=None,
    args_of_imm={
        "input_size": 300,
        "hidden_size": arg.hidden_size,
        "dropout": arg.dropout,
        "use_allennlp": False,
    },
)

optimizer = Adadelta(lr=arg.lr, params=model.parameters())
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

callbacks = [
    LRScheduler(scheduler),
]

if arg.task in ['snli']:
    callbacks.append(
        FitlogCallback(data_info.datasets[arg.testset_name], verbose=1))
elif arg.task == 'mnli':
    callbacks.append(
        FitlogCallback(
            {
                'dev_matched': data_info.datasets['dev_matched'],
                'dev_mismatched': data_info.datasets['dev_mismatched']
#lesion_model = UNet3D_2(input_size=len(options['input_data']), output_size=2)
lesion_model.cuda()
input_tensor = torch.rand(5, 3, 2, 160, 200).cuda()
pred = lesion_model(input_tensor)

options['model_name'] = lesion_model.__class__.__name__
model_name = 'ms_lesion_segmentation'

# define the torch.device
device = torch.device('cuda') if options['gpu_use'] else torch.device('cpu')

# define the optimizer
if options['optimizer'] == "adam":
    optimizer = Adam(lesion_model.parameters())
elif options['optimizer'] == "adadelta":
    optimizer = Adadelta(lesion_model.parameters())

# send the model to the device
lesion_model = lesion_model.to(device)

early_stopping = EarlyStopping(patience=options['patience'], verbose=True)

train_losses = []
val_losses = []
train_jaccs = []
val_jaccs = []

# training loop
training = True
train_complete = False
epoch = 1
Beispiel #26
0
                          args.get('min_length'), args.get('max_length'))
val_dataloader = TextDataLoader(dataset=val_dataset,
                                dictionary=dictionary,
                                batch_size=args.get('batch_size'),
                                shuffle=not args.get('sort_dataset'),
                                num_workers=args.get('num_workers'))
# test_dataset = TextDataset(test_data, dictionary, args.get('sort_dataset'), args.get('min_length'), args.get('max_length'))
# test_dataloader = TextDataLoader(dataset=test_dataset, dictionary=dictionary, batch_size=args.get('batch_size'), shuffle = not args.get('sort_dataset'))

logger.info("Training...")
# trainable_params = [p for p in model.parameters() if p.requires_grad]
if args.get('optimizer') == 'Adam':
    optimizer = Adam(model.parameters(), lr=args.get('initial_lr'))
elif args.get('optimizer') == 'Adadelta':
    optimizer = Adadelta(params=trainable_params,
                         lr=args.get('initial_lr'),
                         weight_decay=0.95)
else:
    raise NotImplementedError()

lr_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5)
criterion = nn.CrossEntropyLoss
trainer = Trainer(model,
                  train_dataloader,
                  val_dataloader,
                  criterion=criterion,
                  optimizer=optimizer,
                  lr_schedule=args.get('lr_schedule'),
                  lr_scheduler=lr_plateau,
                  use_gpu=args.get('use_gpu'),
                  logger=logger)
Beispiel #27
0
def main():
    args = read_args(default_config="confs/kim_cnn_sst2.json")
    set_seed(args.seed)
    try:
        os.makedirs(args.workspace)
    except:
        pass
    torch.cuda.deterministic = True

    dataset_cls = find_dataset(args.dataset_name)
    training_iter, dev_iter, test_iter = dataset_cls.iters(
        args.dataset_path,
        args.vectors_file,
        args.vectors_dir,
        batch_size=args.batch_size,
        device=args.device,
        train=args.train_file,
        dev=args.dev_file,
        test=args.test_file)

    args.dataset = training_iter.dataset
    args.words_num = len(training_iter.dataset.TEXT_FIELD.vocab)
    model = mod.SiameseRNNModel(args).to(args.device)

    sd = torch.load('sst.pt')['state_dict']
    del sd['static_embed.weight']
    del sd['non_static_embed.weight']
    del sd['fc1.weight']
    del sd['fc1.bias']
    del sd['fc2.weight']
    del sd['fc2.bias']
    model.load_state_dict(sd, strict=False)
    mod.init_embedding(model, args)
    # embs, field_src  = torch.load('embs_tmp.pt')
    # field_mappings = list_field_mappings(dataset_cls.TEXT_FIELD, field_src)
    # replace_embeds(model.non_static_embed, embs, field_mappings)
    model.to(args.device)

    ckpt_attrs = mod.load_checkpoint(
        model, args.workspace, best=args.load_best_checkpoint
    ) if args.load_last_checkpoint or args.load_best_checkpoint else {}
    torch.save((model.non_static_embed, dataset_cls.TEXT_FIELD.vocab),
               'qqp-embs.pt')
    return
    offset = ckpt_attrs.get("epoch_idx", -1) + 1
    args.epochs -= offset

    training_pbar = tqdm(total=len(training_iter), position=2)
    training_pbar.set_description("Training")
    dev_pbar = tqdm(total=args.epochs, position=1)
    dev_pbar.set_description("Dev")

    criterion = nn.CrossEntropyLoss()
    kd_criterion = nn.MSELoss()  # KLDivLoss(reduction="batchmean")
    filter_params = [(n, p) for n, p in model.named_parameters()
                     if p.requires_grad and 'fc' in n]
    params = list(map(lambda x: x[1], filter_params))
    # print([x[0] for x in filter_params])
    optimizer = Adadelta(params, lr=args.lr, rho=0.95)
    #optimizer = Adam(params, lr=args.lr)
    increment_fn = mod.make_checkpoint_incrementer(model,
                                                   args.workspace,
                                                   save_last=True,
                                                   best_loss=ckpt_attrs.get(
                                                       "best_dev_loss", 10000))
    non_embedding_params = model.non_embedding_params()

    if args.use_data_parallel:
        model = nn.DataParallel(model)
    if args.eval_test_only:
        test_acc, _ = evaluate(model,
                               test_iter,
                               criterion,
                               export_eval_labels=args.export_eval_labels)
        print(test_acc)
        return
    if args.epochs == 0:
        print("No epochs left from loaded model.", file=sys.stderr)
        return
    for epoch_idx in tqdm(range(args.epochs), position=0):
        training_iter.init_epoch()
        model.train()
        training_pbar.n = 0
        training_pbar.refresh()
        for batch in training_iter:
            training_pbar.update(1)
            optimizer.zero_grad()
            logits = model(batch.question1, batch.question2)
            # kd_logits = torch.stack((batch.logits_0, batch.logits_1), 1)
            #kd = args.distill_lambda * kd_criterion(F.log_softmax(logits / args.distill_temperature, 1),
            #    F.softmax(kd_logits / args.distill_temperature, 1))
            # kd = args.distill_lambda * kd_criterion(logits, kd_logits)
            loss = criterion(logits, batch.is_duplicate)
            loss.backward()
            clip_grad_norm_(non_embedding_params, args.clip_grad)
            optimizer.step()
            acc = ((logits.max(1)[1] == batch.is_duplicate).float().sum() /
                   batch.is_duplicate.size(0)).item()
            training_pbar.set_postfix(accuracy=f"{acc:.2}")

        model.eval()
        dev_acc, dev_loss = evaluate(model, dev_iter, criterion)
        dev_pbar.update(1)
        dev_pbar.set_postfix(accuracy=f"{dev_acc:.4}")
        is_best_dev = increment_fn(dev_loss,
                                   dev_acc=dev_acc,
                                   epoch_idx=epoch_idx + offset)

        if is_best_dev:
            dev_pbar.set_postfix(accuracy=f"{dev_acc:.4} (best loss)")
            # test_acc, _ = evaluate(model, test_iter, criterion, export_eval_labels=args.export_eval_labels)
    training_pbar.close()
    dev_pbar.close()
    print(f"Test accuracy of the best model: {test_acc:.4f}", file=sys.stderr)
    print(test_acc)
def train(pretrain=PRETRAIN):
    logging.debug('pretrain:{}'.format(pretrain))
    if DEVICE == 'cuda':
        if torch.cuda.is_available() == False:
            logging.error("can't find a GPU device")
            pdb.set_trace()
    #model=DenseLSTM(NUM_CLASS)
    #model=VGGLSTM(NUM_CLASS)
    #model=DenseCNN(NUM_CLASS)
    #model=VGGFC(NUM_CLASS)
    model = ResNetLSTM(NUM_CLASS)
    if os.path.exists(MODEL_PATH) == False:
        os.makedirs(MODEL_PATH)
    if os.path.exists(PATH + DICTIONARY_NAME) == False:
        logging.error("can't find the dictionary")
        pdb.set_trace()
    with open(PATH + DICTIONARY_NAME, 'r') as f:
        dictionary = json.load(f)
    if pretrain == True:
        model.load_state_dict(
            torch.load(MODEL_PATH + MODEL_NAME, map_location=DEVICE))
    model.to(DEVICE).train()
    model.register_backward_hook(backward_hook)  #transforms.Resize((32,400))
    dataset = ICDARRecTs_2DataSet(IMAGE_PATH,
                                  dictionary,
                                  BATCH_SIZE,
                                  img_transform=transforms.Compose([
                                      transforms.ColorJitter(brightness=0.5,
                                                             contrast=0.5,
                                                             saturation=0.5,
                                                             hue=0.3),
                                      transforms.ToTensor(),
                                      transforms.Normalize(
                                          (0.485, 0.456, 0.406),
                                          (0.229, 0.224, 0.225))
                                  ]))
    dataloader = DataLoader(dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=True,
                            num_workers=4,
                            drop_last=False)  #collate_fn=dataset.collate
    #optimizer=Adam(model.parameters(),lr=LR,betas=(0.9,0.999),weight_decay=0)
    optimizer = Adadelta(model.parameters(), lr=0.01, rho=0.9, weight_decay=0)
    criterion = CTCLoss(blank=0)
    length = len(dataloader)
    max_accuracy = 0
    if os.path.exists('max_accuracy.txt') == True:
        with open('max_accuracy.txt', 'r') as f:
            max_accuracy = float(f.read())
    for epoch in range(EPOCH):
        epoch_time = datetime.now()
        epoch_correct = 0
        epoch_loss = 0
        min_loss = 100
        for step, data in enumerate(dataloader):
            step_time = datetime.now()
            imgs, names, label_size, img_name = data
            #print(names,label_size)
            logging.debug("imgs' size:{}".format(imgs.size()))
            imgs = Variable(imgs, requires_grad=True).to(DEVICE)
            label, batch_label = dataset.transform_label(batch_name=names)
            label = Variable(label).to(DEVICE)
            label_size = Variable(label_size).to(DEVICE)
            preds = model(imgs)
            logging.debug("preds size:{}".format(preds.size()))
            preds_size = Variable(
                torch.LongTensor([preds.size(0)] * BATCH_SIZE)).to(DEVICE)
            loss = criterion(preds, label, preds_size, label_size)
            epoch_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            optimizer.step()
            if min_loss > loss.item():
                min_loss = loss.item()
                torch.save(model.state_dict(), MODEL_PATH + MODEL_NAME)
            num_same = if_same(preds.cpu().data, batch_label)
            epoch_correct += num_same
            logging.debug(
                "Epoch:{}|length:{}|step:{}|num_same:{}|loss:{:.4f}|min loss:{:.4f}"
                .format(epoch, length, step, num_same, loss.item(), min_loss))
            logging.debug("the time of one step:{}".format(datetime.now() -
                                                           step_time))
            if step % 100 == 0:
                clear_output(wait=True)
        accuracy = epoch_correct / (length) * BATCH_SIZE
        if accuracy > max_accuracy:
            max_accuracy = accuracy
            with open('max_accuracy.txt', 'w') as f:
                f.write(str(max_accuracy))
            torch.save(model.state_dict(), MODEL_PATH + MODEL_NAME)
            torch.save(model.state_dict(),
                       MODEL_PATH + 'optimal' + str(max_accuracy) + MODEL_NAME)
        mean_loss = epoch_loss / length
        logging.info(
            'Epoch:{}|accuracy:{}|mean loss:{}|the time of one epoch:{}|max accuracy:{}'
            .format(epoch, accuracy, mean_loss,
                    datetime.now() - epoch_time, max_accuracy))
        with open('accuracy.txt', 'a+') as f:
            f.write(
                'Epoch:{}|accuracy:{}|mean loss:{}|the time of one epoch:{}|max accuracy:{}\n'
                .format(epoch, accuracy, mean_loss,
                        datetime.now() - epoch_time, max_accuracy))
Beispiel #29
0
        # static : pre-trained vectors are kept
        pass
    else:
        # MODE == 'nonstatic'
        if isinstance(U, torch.Tensor):
            U = nn.Parameter(U)
        else:
            LOGGER.info("Type error, U should be torch.Tensor. ")

    model = TextCNN(cfg.hidden_units[0] * 3, cfg.hidden_units[0],
                    cfg.filter_hs)
    model.train()

    LR = 1e-04
    #optimizer = Adam(model.parameters(), lr=LR) # 0.001
    optimizer = Adadelta(model.parameters(), lr=1.)

    ###################################
    ### Training
    ###################################
    best_loss = float('inf')
    best_acc = float('-inf')
    for epoch in range(cfg.n_epochs):
        epoch_loss = 0.0
        steps = 0
        num = 0
        pbar = tqdm(train_loader)
        pbar_eval = tqdm(test_loader)
        for field in pbar:
            # check shape
            if DEBUG:
Beispiel #30
0
    def train(self):
        os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        device_ids = [0]
        self.classifier = classifier()
        get_paprams(self.classifier)
        get_paprams(self.classifier.base)
        # data_set_eval = my_dataset(eval=True)
        # data_set = my_dataset_10s()
        # data_set_test = my_dataset_10s()
        data_set = my_dataset_10s_smote()
        data_set_test = my_dataset_10s_smote(test=True, all_data=data_set.all_data, all_label=data_set.all_label,
                                             index_=data_set.index)
        # data_set_eval = my_dataset_10s(eval=True)
        # data_set_combine = my_dataset(combine=True)
        batch = 300
        totoal_epoch = 2000
        print('batch:{}'.format(batch))
        # self.evaluation = evaluation
        data_loader = DataLoader(data_set, batch, shuffle=True, collate_fn=detection_collate)
        data_loader_test = DataLoader(data_set_test, batch, False, collate_fn=detection_collate)
        # data_loader_eval = DataLoader(data_set_eval, batch, False, collate_fn=detection_collate)
        self.classifier = self.classifier.cuda()
        self.classifier = DataParallel(self.classifier, device_ids=device_ids)
        optim = Adadelta(self.classifier.parameters(), 0.1, 0.9, weight_decay=1e-5)

        self.cretion = smooth_focal_weight()

        self.classifier.apply(weights_init)
        start_time = time.time()
        count = 0
        epoch = -1
        while 1:
            epoch += 1
            runing_losss = [0] * 5
            for data in data_loader:
                loss = [0] * 5
                y = data[1].cuda()
                x = data[0].cuda()
                optim.zero_grad()

                weight = torch.Tensor([0.5, 2, 0.5, 2]).cuda()

                inputs, targets_a, targets_b, lam = mixup_data(x, y)
                predict = self.classifier(x)
                ############################3

                loss_func = mixup_criterion(targets_a, targets_b, lam, weight)
                loss5 = loss_func(self.cretion, predict[0])
                loss4 = loss_func(self.cretion, predict[1]) * 0.4
                loss3 = loss_func(self.cretion, predict[2]) * 0.3
                loss2 = loss_func(self.cretion, predict[3]) * 0.2
                loss1 = loss_func(self.cretion, predict[4]) * 0.1

                tmp = loss5 + loss4 + loss3 + loss2 + loss1

                # tmp = sum(loss)
                tmp.backward()
                optim.step()
                for i in range(5):
                    # runing_losss[i] += (loss[i].item())
                    runing_losss[i] += (tmp.item())

                count += 1
                # torch.cuda.empty_cache()
            end_time = time.time()
            print(
                "epoch:{a}: loss:{b} spend_time:{c} time:{d}".format(a=epoch, b=sum(runing_losss),
                                                                     c=int(end_time - start_time),
                                                                     d=time.asctime()))
            start_time = end_time

            # vis.line(np.asarray([optim.param_groups[0]['lr']]), np.asarray([epoch]), win="lr", update='append',
            #          opts=dict(title='lr'))
            # if (epoch > 20):
            #     runing_losss = np.asarray(runing_losss).reshape(1, 5)

            # vis.line(runing_losss,
            #          np.asarray([epoch] * 5).reshape(1, 5), win="loss-epoch", update='append',
            #          opts=dict(title='loss', legend=['loss1', 'loss2', 'loss3', 'loss4', 'loss5', 'loss6']))
            save(self.classifier.module.base.state_dict(),
                 str(epoch) + 'base_c2.p')
            save(self.classifier.module.state_dict(),
                 str(epoch) + 'base_all_c2.p')
            # print('eval:{}'.format(time.asctime(time.localtime(time.time()))))
            self.classifier.eval()
            # self.evaluation(self.classifier, data_loader_eval)
            # print('test:{}'.format(time.asctime(time.localtime(time.time()))))
            # self.evaluation(self.classifier, data_loader_eval, epoch)
            self.evaluation(self.classifier, data_loader_test, epoch)
            # self.evaluation(self.classifier, data_loader, epoch)

            # print('combine:{}'.format(time.asctime(time.localtime(time.time()))))
            # evaluation(self.classifier, data_loader_combine)
            self.classifier.train()
            if epoch % 10 == 0:
                adjust_learning_rate(optim, 0.9, epoch, totoal_epoch, 0.1)