예제 #1
0
def _train_step(optim: AdaBound,
                train_iter: t.Iterator,
                train_loader: ComLoader,
                model_prior: Prior,
                model_vae: GraphInf,
                clip_grad: float,
                num_post_samples: int
                ) -> torch.Tensor:
    """
    Perform one-step training

    Args:
        optim (AdaBound): The optimizer
        train_iter (t.Iterator): The iterator for training
        train_loader (ComLoader): The Loader for training
        mini_batch (t.Tuple): The mini-batch input
        model_prior (Prior): The prior network
        model_vae (GraphInf): The VAE model
        clip_grad (float): Cliping gradient

    Returns:
        torch.Tensor: The calculated loss
    """
    model_prior.train()
    optim.zero_grad()
    record, train_iter = _next(train_iter, train_loader)
    loss = _loss(record, model_prior, model_vae, num_post_samples, True)
    # Clip gradient
    torch.nn.utils.clip_grad_value_(model_prior.parameters(), clip_grad)
    optim.step()
    return loss
예제 #2
0
 def test_same(self):
     self.reset_seed(0xcafe)
     w, b = self.gen_random_weights()
     torch_linear = self.gen_torch_linear(w, b)
     keras_linear = self.gen_keras_linear(w, b)
     model_path = os.path.join(tempfile.gettempdir(), 'keras_adabound.h5')
     keras_linear.save(model_path)
     keras_linear = keras.models.load_model(model_path, custom_objects={'AdaBound': AdaBound})
     w, b = self.gen_random_weights()
     criterion = torch.nn.MSELoss()
     optimizer = OfficialAdaBound(torch_linear.parameters(), lr=1e-3, final_lr=0.1, eps=1e-8)
     for i in range(300):
         x = np.random.standard_normal((1, 3))
         y = np.dot(x, w) + b
         optimizer.zero_grad()
         y_hat = torch_linear(torch.Tensor(x.tolist()))
         loss = criterion(y_hat, torch.Tensor(y.tolist()))
         torch_loss = loss.tolist()
         loss.backward()
         optimizer.step()
         keras_loss = keras_linear.train_on_batch(x, y)
         # print(i, torch_loss, keras_loss)
     self.assertTrue(abs(torch_loss - keras_loss) < 1e-4)
     self.assertTrue(np.allclose(
         torch_linear.weight.detach().numpy().transpose(),
         keras_linear.get_weights()[0],
         atol=1e-4,
     ))
     self.assertTrue(np.allclose(
         torch_linear.bias.detach().numpy(),
         keras_linear.get_weights()[1],
         atol=1e-4,
     ))
예제 #3
0
 def test_same_amsgrad(self):
     if sys.version_info[0] < 3:
         return
     self.reset_seed(0xcafe)
     w, b = self.gen_random_weights()
     torch_linear = self.gen_torch_linear(w, b)
     keras_linear = self.gen_keras_linear(w, b, amsgrad=True)
     w, b = self.gen_random_weights()
     criterion = torch.nn.MSELoss()
     optimizer = OfficialAdaBound(
         torch_linear.parameters(),
         lr=1e-3,
         final_lr=0.1,
         eps=K.epsilon(),
         amsbound=True,
     )
     for i in range(300):
         x = np.random.standard_normal((1, 3))
         y = np.dot(x, w) + b
         optimizer.zero_grad()
         y_hat = torch_linear(torch.Tensor(x.tolist()))
         loss = criterion(y_hat, torch.Tensor(y.tolist()))
         torch_loss = loss.tolist()
         loss.backward()
         optimizer.step()
         keras_loss = keras_linear.train_on_batch(x, y).tolist()
         # print(i, torch_loss, keras_loss)
     self.assertTrue(abs(torch_loss - keras_loss) < 1e-2)
     self.assertTrue(
         np.allclose(
             torch_linear.weight.detach().numpy().transpose(),
             keras_linear.get_weights()[0],
             atol=1e-2,
         ))
     self.assertTrue(
         np.allclose(
             torch_linear.bias.detach().numpy(),
             keras_linear.get_weights()[1],
             atol=1e-2,
         ))
예제 #4
0
class CQTModel(BaseModel):
    @staticmethod
    def modify_commandline_options(parser, is_train=True):
        """Add new model-specific options and rewrite default values for existing options.

        Parameters:
            parser -- the option parser
            is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options.

        Returns:
            the modified parser.
        """
        opt, _ = parser.parse_known_args() 
        preprocess = 'mulaw,normalize,cqt'
        parser.set_defaults(preprocess=preprocess)
        parser.add_argument('--wavenet_layers', type=int, default=30, help='wavenet layers')
        parser.add_argument('--wavenet_blocks', type=int, default=15, help='wavenet layers')
        parser.add_argument('--width', type=int, default=128, help='width')
        return parser

    def __init__(self, opt):
        BaseModel.__init__(self, opt)  # call the initialization method of BaseModel
        self.loss_names = ['D_A', 'D_B']
        if opt.isTrain:
            self.output_names = [] # ['aug_A', 'aug_B', 'rec_A', 'rec_B']
        else:
            self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A']
            self.params_names = ['params_A', 'params_B'] * 2
        self.model_names = ['D_A', 'D_B'] 

        if 'stft' in self.preprocess:
            stride = 2 * ((opt.nfft // 8) - 1)
            window =  opt.nfft // opt.duration_ratio
        elif 'cqt' in self.preprocess:
            stride = opt.hop_length
            window = opt.hop_length
 
        self.netD_A = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, 
                              opt.width, 256, 256,
                              opt.tensor_height, window, stride).to(self.devices[-1]) 
        self.netD_B = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks,
                              opt.width, 256, 256,
                              opt.tensor_height, window, stride).to(self.devices[-1])
        self.softmax = nn.LogSoftmax(dim=1) # (1, 256, audio_len) -> pick 256
        
        if self.isTrain:
            self.criterionDecode = nn.CrossEntropyLoss(reduction='mean')
            self.optimizer_D_A = AdaBound(self.netD_A.parameters(), lr=opt.lr, final_lr=0.1)
            self.optimizer_D_B = AdaBound(self.netD_B.parameters(), lr=opt.lr, final_lr=0.1)
            self.optimizers = [self.optimizer_D_A, self.optimizer_D_B] 
        else:
            self.preprocesses = []
            load_suffix = str(opt.load_iter) if opt.load_iter > 0 else opt.epoch
            self.load_networks(load_suffix)
            self.netD_A.eval()
            self.netD_B.eval()
             
            self.infer_A = NVWaveNet(**(self.netD_A.export_weights()))
            self.infer_B = NVWaveNet(**(self.netD_B.export_weights()))

    def set_input(self, input): 
        A, params_A = input[0]  
        B, params_B = input[1] 
         
        self.real_A = params_A['original'].to(self.devices[0])
        self.real_B = params_B['original'].to(self.devices[0])
        self.aug_A = A.to(self.devices[0])
        self.aug_B = B.to(self.devices[0])

        self.params_A = self.decollate_params(params_A)
        self.params_B = self.decollate_params(params_B)

    def get_indices(self, y):
        y = (y + 1.) * .5 * self.opt.mu
        return y.long() 

    def inv_indices(self, y):
        return y.float() / self.opt.mu * 2. - 1.
 
    def train(self): 
        self.optimizer_D_A.zero_grad() 
        real_A = self.get_indices(self.real_A).to(self.devices[-1])
        pred_D_A = self.netD_A((self.aug_A, real_A))
        self.loss_D_A = self.criterionDecode(pred_D_A, real_A)
        self.loss_D_A.backward()
        self.optimizer_D_A.step() 

        self.optimizer_D_B.zero_grad() 
        real_B = self.get_indices(self.real_B).to(self.devices[-1]) 
        pred_D_B = self.netD_B((self.aug_B, real_B))
        self.loss_D_B = self.criterionDecode(pred_D_B, real_B)
        self.loss_D_B.backward()
        self.optimizer_D_B.step() 
  
    def test(self):  
        with torch.no_grad():   
            self.fake_B = self.infer_A.infer(self.netD_A.get_cond_input(self.aug_A), Impl.AUTO)
            self.fake_A = self.infer_B.infer(self.netD_B.get_cond_input(self.aug_B), Impl.AUTO)
            self.fake_B = self.inv_indices(self.fake_B)
            self.fake_A = self.inv_indices(self.fake_A)
예제 #5
0
파일: train.py 프로젝트: kondo-kk/flu_trend
def train(region):
    np.random.seed(0)
    torch.manual_seed(0)

    input_len = 10
    encoder_units = 32
    decoder_units = 64
    encoder_rnn_layers = 3
    encoder_dropout = 0.2
    decoder_dropout = 0.2
    input_size = 2
    output_size = 1
    predict_len = 5
    batch_size = 16
    epochs = 500
    force_teacher = 0.8

    train_dataset, test_dataset, train_max, train_min = create_dataset(
        input_len, predict_len, region)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

    enc = Encoder(input_size, encoder_units, input_len,
                  encoder_rnn_layers, encoder_dropout)
    dec = Decoder(encoder_units*2, decoder_units, input_len,
                  input_len, decoder_dropout, output_size)

    optimizer = AdaBound(list(enc.parameters()) +
                         list(dec.parameters()), 0.01, final_lr=0.1)
    # optimizer = optim.Adam(list(enc.parameters()) + list(dec.parameters()), 0.01)
    criterion = nn.MSELoss()

    mb = master_bar(range(epochs))
    for ep in mb:
        train_loss = 0
        enc.train()
        dec.train()
        for encoder_input, decoder_input, target in progress_bar(train_loader, parent=mb):
            optimizer.zero_grad()
            enc_vec = enc(encoder_input)
            h = enc_vec[:, -1, :]
            _, c = dec.initHidden(batch_size)
            x = decoder_input[:, 0]
            pred = []
            for pi in range(predict_len):
                x, h, c = dec(x, h, c, enc_vec)
                rand = np.random.random()
                pred += [x]
                if rand < force_teacher:
                    x = decoder_input[:, pi]
            pred = torch.cat(pred, dim=1)
            # loss = quantile_loss(pred, target)
            loss = criterion(pred, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        test_loss = 0
        enc.eval()
        dec.eval()
        for encoder_input, decoder_input, target in progress_bar(test_loader, parent=mb):
            with torch.no_grad():
                enc_vec = enc(encoder_input)
                h = enc_vec[:, -1, :]
                _, c = dec.initHidden(batch_size)
                x = decoder_input[:, 0]
                pred = []
                for pi in range(predict_len):
                    x, h, c = dec(x, h, c, enc_vec)
                    pred += [x]
                pred = torch.cat(pred, dim=1)
            # loss = quantile_loss(pred, target)
            loss = criterion(pred, target)
            test_loss += loss.item()
        print(
            f"Epoch {ep} Train Loss {train_loss/len(train_loader)} Test Loss {test_loss/len(test_loader)}")

    if not os.path.exists("models"):
        os.mkdir("models")
    torch.save(enc.state_dict(), f"models/{region}_enc.pth")
    torch.save(dec.state_dict(), f"models/{region}_dec.pth")

    test_loader = DataLoader(test_dataset, batch_size=1,
                             shuffle=False, drop_last=False)

    rmse = 0
    p = 0
    predicted = []
    true_target = []
    enc.eval()
    dec.eval()
    for encoder_input, decoder_input, target in progress_bar(test_loader, parent=mb):
        with torch.no_grad():
            enc_vec = enc(encoder_input)
            x = decoder_input[:, 0]
            h, c = dec.initHidden(1)
            pred = []
            for pi in range(predict_len):
                x, h, c = dec(x, h, c, enc_vec)
                pred += [x]
            pred = torch.cat(pred, dim=1)
            predicted += [pred[0, p].item()]
            true_target += [target[0, p].item()]
    predicted = np.array(predicted).reshape(1, -1)
    predicted = predicted * (train_max - train_min) + train_min
    true_target = np.array(true_target).reshape(1, -1)
    true_target = true_target * (train_max - train_min) + train_min
    rmse, peasonr = calc_metric(predicted, true_target)
    print(f"{region} RMSE {rmse}")
    print(f"{region} r {peasonr[0]}")
    return f"{region} RMSE {rmse} r {peasonr[0]}"
예제 #6
0
class ECGTrainer(object):

    def __init__(self, block_config='small', num_threads=2):
        torch.set_num_threads(num_threads)
        self.n_epochs = 60
        self.batch_size = 128
        self.scheduler = None
        self.num_threads = num_threads
        self.cuda = torch.cuda.is_available()

        if block_config == 'small':
            self.block_config = (3, 6, 12, 8)
        else:
            self.block_config = (6, 12, 24, 16)

        self.__build_model()
        self.__build_criterion()
        self.__build_optimizer()
        self.__build_scheduler()
        return

    def __build_model(self):
        self.model = DenseNet(
            num_classes=55, block_config=self.block_config
        )
        if self.cuda:
            self.model.cuda()
        return

    def __build_criterion(self):
        self.criterion = ComboLoss(
            losses=['mlsml', 'f1', 'focal'], weights=[1, 1, 3]
        )
        return

    def __build_optimizer(self):
        opt_params = {'lr': 1e-3, 'weight_decay': 0.0,
                      'params': self.model.parameters()}
        self.optimizer = AdaBound(amsbound=True, **opt_params)
        return

    def __build_scheduler(self):
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, 'max', factor=0.333, patience=5,
            verbose=True, min_lr=1e-5)
        return

    def run(self, trainset, validset, model_dir):
        print('=' * 100 + '\n' + 'TRAINING MODEL\n' + '-' * 100 + '\n')
        model_path = os.path.join(model_dir, 'model.pth')
        thresh_path = os.path.join(model_dir, 'threshold.npy')

        dataloader = {
            'train': ECGLoader(trainset, self.batch_size, True, self.num_threads).build(),
            'valid': ECGLoader(validset, 64, False, self.num_threads).build()
        }

        best_metric, best_preds = None, None
        for epoch in range(self.n_epochs):
            e_message = '[EPOCH {:0=3d}/{:0=3d}]'.format(epoch + 1, self.n_epochs)

            for phase in ['train', 'valid']:
                ep_message = e_message + '[' + phase.upper() + ']'
                if phase == 'train':
                    self.model.train()
                else:
                    self.model.eval()

                losses, preds, labels = [], [], []
                batch_num = len(dataloader[phase])
                for ith_batch, data in enumerate(dataloader[phase]):
                    ecg, label = [d.cuda() for d in data] if self.cuda else data

                    pred = self.model(ecg)
                    loss = self.criterion(pred, label)
                    if phase == 'train':
                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()

                    pred = torch.sigmoid(pred)
                    pred = pred.data.cpu().numpy()
                    label = label.data.cpu().numpy()

                    bin_pred = np.copy(pred)
                    bin_pred[bin_pred > 0.5] = 1
                    bin_pred[bin_pred <= 0.5] = 0
                    f1 = f1_score(label.flatten(), bin_pred.flatten())

                    losses.append(loss.item())
                    preds.append(pred)
                    labels.append(label)

                    sr_message = '[STEP {:0=3d}/{:0=3d}]-[Loss: {:.6f} F1: {:.6f}]'
                    sr_message = ep_message + sr_message
                    print(sr_message.format(ith_batch + 1, batch_num, loss, f1), end='\r')

                preds = np.concatenate(preds, axis=0)
                labels = np.concatenate(labels, axis=0)
                bin_preds = np.copy(preds)
                bin_preds[bin_preds > 0.5] = 1
                bin_preds[bin_preds <= 0.5] = 0

                avg_loss = np.mean(losses)
                avg_f1 = f1_score(labels.flatten(), bin_preds.flatten())
                er_message = '-----[Loss: {:.6f} F1: {:.6f}]'
                er_message = '\n\033[94m' + ep_message + er_message + '\033[0m'
                print(er_message.format(avg_loss, avg_f1))

                if phase == 'valid':
                    if self.scheduler is not None:
                        self.scheduler.step(avg_f1)
                    if best_metric is None or best_metric < avg_f1:
                        best_metric = avg_f1
                        best_preds = [labels, preds]
                        best_loss_metrics = [epoch + 1, avg_loss, avg_f1]
                        torch.save(self.model.state_dict(), model_path)
                        print('[Best validation metric, model: {}]'.format(model_path))
                    print()

        best_f1, best_th = best_f1_score(*best_preds)
        np.save(thresh_path, np.array(best_th))
        print('[Searched Best F1: {:.6f}]\n'.format(best_f1))
        res_message = '[VALIDATION PERFORMANCE: BEST F1]' + '\n' \
            + '[EPOCH:{} LOSS:{:.6f} F1:{:.6f} BEST F1:{:.6f}]\n'.format(
                best_loss_metrics[0], best_loss_metrics[1],
                best_loss_metrics[2], best_f1) \
            + '[BEST THRESHOLD:\n{}]\n'.format(best_th) \
            + '=' * 100 + '\n'
        print(res_message)
        return
예제 #7
0
class TrainNetwork(object):
    """The main train network"""

    def __init__(self, args):
        super(TrainNetwork, self).__init__()
        self.args = args
        self.dur_time = 0
        self.logger = self._init_log()

        if not torch.cuda.is_available():
            self.logger.info('no gpu device available')
            sys.exit(1)

        self._init_hyperparam()
        self._init_random_and_device()
        self._init_model()

    def _init_hyperparam(self):
        if 'cifar100' == self.args.train_dataset:
            # cifar10:  6000 images per class, 10 classes, 50000 training images and 10000 test images
            # cifar100: 600 images per class, 100 classes, 500 training images and 100 testing images per class
            self.args.num_classes = 100
            self.args.layers = 20
            self.args.data = '/train_tiny_data/train_data/cifar100'
        elif 'imagenet' == self.args.train_dataset:
            self.args.data = '/train_data/imagenet'
            self.args.num_classes = 1000
            self.args.weight_decay = 3e-5
            self.args.report_freq = 100
            self.args.init_channels = 50
            self.args.drop_path_prob = 0
        elif 'tiny-imagenet' == self.args.train_dataset:
            self.args.data = '/train_tiny_data/train_data/tiny-imagenet'
            self.args.num_classes = 200
        elif 'food101' == self.args.train_dataset:
            self.args.data = '/train_tiny_data/train_data/food-101'
            self.args.num_classes = 101
            self.args.init_channels = 48

    def _init_log(self):
        self.args.save = '../logs/eval/' + self.args.arch + '/' + self.args.train_dataset + '/eval-{}-{}'.format(self.args.save, time.strftime('%Y%m%d-%H%M'))
        dutils.create_exp_dir(self.args.save, scripts_to_save=None)

        log_format = '%(asctime)s %(message)s'
        logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                            format=log_format, datefmt='%m/%d %I:%M:%S %p')
        fh = logging.FileHandler(os.path.join(self.args.save, 'log.txt'))
        fh.setFormatter(logging.Formatter(log_format))
        logger = logging.getLogger('Architecture Training')
        logger.addHandler(fh)
        return logger

    def _init_random_and_device(self):
        # Set random seed and cuda device
        np.random.seed(self.args.seed)
        cudnn.benchmark = True
        torch.manual_seed(self.args.seed)
        cudnn.enabled = True
        torch.cuda.manual_seed(self.args.seed)
        max_free_gpu_id, gpus_info = dutils.get_gpus_memory_info()
        self.device_id = max_free_gpu_id
        self.gpus_info = gpus_info
        self.device = torch.device('cuda:{}'.format(0 if self.args.multi_gpus else self.device_id))

    def _init_model(self):

        self.train_queue, self.valid_queue = self._load_dataset_queue()

        def _init_scheduler():
            if 'cifar' in self.args.train_dataset:
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, float(self.args.epochs))
            else:
                scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, self.args.decay_period,
                                                            gamma=self.args.gamma)
            return scheduler

        genotype = eval('geno_types.%s' % self.args.arch)
        reduce_level = (0 if 'cifar10' in self.args.train_dataset else 0)
        model = EvalNetwork(self.args.init_channels, self.args.num_classes, 0,
                            self.args.layers, self.args.auxiliary, genotype, reduce_level)

        # Try move model to multi gpus
        if torch.cuda.device_count() > 1 and self.args.multi_gpus:
            self.logger.info('use: %d gpus', torch.cuda.device_count())
            model = nn.DataParallel(model)
        else:
            self.logger.info('gpu device = %d' % self.device_id)
            torch.cuda.set_device(self.device_id)
        self.model = model.to(self.device)

        self.logger.info('param size = %fM', dutils.calc_parameters_count(model))

        criterion = nn.CrossEntropyLoss()
        if self.args.num_classes >= 50:
            criterion = CrossEntropyLabelSmooth(self.args.num_classes, self.args.label_smooth)
        self.criterion = criterion.to(self.device)

        if self.args.opt == 'adam':
            self.optimizer = torch.optim.Adamax(
                model.parameters(),
                self.args.learning_rate,
                weight_decay=self.args.weight_decay
            )
        elif self.args.opt == 'adabound':
            self.optimizer = AdaBound(model.parameters(),
            self.args.learning_rate,
            weight_decay=self.args.weight_decay)
        else:
            self.optimizer = torch.optim.SGD(
                model.parameters(),
                self.args.learning_rate,
                momentum=self.args.momentum,
                weight_decay=self.args.weight_decay
            )

        self.best_acc_top1 = 0
        # optionally resume from a checkpoint
        if self.args.resume:
            if os.path.isfile(self.args.resume):
                print("=> loading checkpoint {}".format(self.args.resume))
                checkpoint = torch.load(self.args.resume)
                self.dur_time = checkpoint['dur_time']
                self.args.start_epoch = checkpoint['epoch']
                self.best_acc_top1 = checkpoint['best_acc_top1']
                self.args.drop_path_prob = checkpoint['drop_path_prob']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})".format(self.args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.args.resume))

        self.scheduler = _init_scheduler()
        # reload the scheduler if possible
        if self.args.resume and os.path.isfile(self.args.resume):
            checkpoint = torch.load(self.args.resume)
            self.scheduler.load_state_dict(checkpoint['scheduler'])

    def _load_dataset_queue(self):
        if 'cifar' in self.args.train_dataset:
            train_transform, valid_transform = dutils.data_transforms_cifar(self.args)
            if 'cifar10' == self.args.train_dataset:
                train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform)
                valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform)
            else:
                train_data = dset.CIFAR100(root=self.args.data, train=True, download=True, transform=train_transform)
                valid_data = dset.CIFAR100(root=self.args.data, train=False, download=True, transform=valid_transform)

            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size = self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size = self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
        elif 'tiny-imagenet' == self.args.train_dataset:
            train_transform, valid_transform = dutils.data_transforms_tiny_imagenet()
            train_data = dartsdset.TinyImageNet200(self.args.data, train=True, download=True, transform=train_transform)
            valid_data = dartsdset.TinyImageNet200(self.args.data, train=False, download=True, transform=valid_transform)
            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
        elif 'imagenet' == self.args.train_dataset:
            traindir = os.path.join(self.args.data, 'train')
            validdir = os.path.join(self.args.data, 'val')
            train_transform, valid_transform = dutils.data_transforms_imagenet()
            train_data = dset.ImageFolder(
                traindir,train_transform)
            valid_data = dset.ImageFolder(
                validdir,valid_transform)

            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4)

            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=4)
        elif 'food101' == self.args.train_dataset:
            traindir = os.path.join(self.args.data, 'train')
            validdir = os.path.join(self.args.data, 'val')
            train_transform, valid_transform = dutils.data_transforms_food101()
            train_data = dset.ImageFolder(
                traindir,train_transform)
            valid_data = dset.ImageFolder(
                validdir,valid_transform)

            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4)

            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

        return train_queue, valid_queue

    def run(self):
        self.logger.info('args = %s', self.args)
        run_start = time.time()
        for epoch in range(self.args.start_epoch, self.args.epochs):
            self.scheduler.step()
            self.logger.info('epoch % d / %d  lr %e', epoch, self.args.epochs, self.scheduler.get_lr()[0])

            if self.args.no_dropout:
                self.model._drop_path_prob = 0
            else:
                self.model._drop_path_prob = self.args.drop_path_prob * epoch / self.args.epochs
                self.logger.info('drop_path_prob %e', self.model._drop_path_prob)

            train_acc, train_obj = self.train()
            self.logger.info('train loss %e, train acc %f', train_obj, train_acc)

            valid_acc_top1, valid_acc_top5, valid_obj = self.infer()
            self.logger.info('valid loss %e, top1 valid acc %f top5 valid acc %f',
                        valid_obj, valid_acc_top1, valid_acc_top5)
            self.logger.info('best valid acc %f', self.best_acc_top1)

            is_best = False
            if valid_acc_top1 > self.best_acc_top1:
                self.best_acc_top1 = valid_acc_top1
                is_best = True

            dutils.save_checkpoint({
                'epoch': epoch+1,
                'dur_time': self.dur_time + time.time() - run_start,
                'state_dict': self.model.state_dict(),
                'drop_path_prob': self.args.drop_path_prob,
                'best_acc_top1': self.best_acc_top1,
                'optimizer': self.optimizer.state_dict(),
                'scheduler': self.scheduler.state_dict()
            }, is_best, self.args.save)
        self.logger.info('train epoches %d, best_acc_top1 %f, dur_time %s',
                         self.args.epochs, self.best_acc_top1, dutils.calc_time(self.dur_time + time.time() - run_start))

    def train(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()
        top5 = dutils.AverageMeter()

        self.model.train()

        for step, (input, target) in enumerate(self.train_queue):

            input = input.cuda(self.device, non_blocking=True)
            target = target.cuda(self.device, non_blocking=True)

            self.optimizer.zero_grad()
            logits, logits_aux = self.model(input)
            loss = self.criterion(logits, target)
            if self.args.auxiliary:
                loss_aux = self.criterion(logits_aux, target)
                loss += self.args.auxiliary_weight*loss_aux
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip)
            self.optimizer.step()

            prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5))
            n = input.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

            if step % args.report_freq == 0:
                self.logger.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

        return top1.avg, objs.avg

    def infer(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()
        top5 = dutils.AverageMeter()
        self.model.eval()
        with torch.no_grad():
            for step, (input, target) in enumerate(self.valid_queue):
                input = input.cuda(self.device, non_blocking=True)
                target = target.cuda(self.device, non_blocking=True)

                logits, _ = self.model(input)
                loss = self.criterion(logits, target)

                prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5))
                n = input.size(0)
                objs.update(loss.item(), n)
                top1.update(prec1.item(), n)
                top5.update(prec5.item(), n)

                if step % args.report_freq == 0:
                    self.logger.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
            return top1.avg, top5.avg, objs.avg
예제 #8
0
    try:
        for epoch in range(opt.max_epoch):
            scheduler.step()
            model.train()
            callback_manager.on_epoch_start(epoch)

            for i, data in enumerate(train_loader):
                callback_manager.on_batch_start(n_batch=i)
                data_input, label = data
                data_input = data_input.to(device)
                label = label.to(device).long()
                feature = model(data_input)
                output = metric_fc(feature, label)
                loss = criterion(output, label)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                iters = epoch * len(train_loader) + i

                metric = calculate_metrics(output, label)
                metric[Config.loss] = loss.item()
                metric['lr'] = get_lr(optimizer)
                callback_manager.on_batch_end(loss=loss.item(),
                                              n_batch=i,
                                              train_metric=metric)
                if Config.is_debug:
                    break
            if epoch % opt.save_interval == 0 or epoch == opt.max_epoch:
                save_model(model, opt.checkpoints_path, opt.backbone, epoch)
예제 #9
0
class OriginalModel(BaseModel):
    @staticmethod
    def modify_commandline_options(parser, is_train=True):
        """Add new model-specific options and rewrite default values for existing options.

        Parameters:
            parser -- the option parser
            is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options.

        Returns:
            the modified parser.
        """
        preprocess = 'normalize,mulaw,cqt'
        parser.set_defaults(preprocess=preprocess, flatten=True)
        parser.add_argument('--wavenet_layers', type=int, default=40, help='wavenet layers')
        parser.add_argument('--wavenet_blocks', type=int, default=10, help='wavenet layers')
        parser.add_argument('--width', type=int, default=128, help='width')
        parser.add_argument('--dc_lambda', type=float, default=0.01, help='dc lambda') 
        parser.add_argument('--tanh', action='store_true', help='tanh')
        parser.add_argument('--sigmoid', action='store_true', help='sigmoid')
        return parser

    def __init__(self, opt):
        BaseModel.__init__(self, opt)  # call the initialization method of BaseModel
        self.loss_names = ['C_A_right', 'C_B_right', 'C_A_wrong', 'C_B_wrong', 'D_A', 'D_B']
        if opt.isTrain:
            self.output_names = [] # ['aug_A', 'aug_B', 'rec_A', 'rec_B']
        else:
            self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A']
        self.params_names = ['params_A', 'params_B'] * 2
        self.model_names = ['E', 'C', 'D_A', 'D_B']

        # use get generator
        self.netE = getGenerator(self.devices[0], opt)
        self.netC = getDiscriminator(opt, self.devices[0])

        self.netD_A = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, 
                              opt.width, 256, 256,
                              opt.tensor_height, 1, 1).to(self.devices[-1]) # opt.pool_length, opt.pool_length
        self.netD_B = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks,
                              opt.width, 256, 256,
                              opt.tensor_height, 1, 1).to(self.devices[-1]) # opt.pool_length, opt.pool_length
        self.softmax = nn.LogSoftmax(dim=1) # (1, 256, audio_len) -> pick 256
        
        if self.isTrain:
            self.A_target = torch.zeros(opt.batch_size).to(self.devices[0])
            self.B_target = torch.ones(opt.batch_size).to(self.devices[0])
            self.criterionDC = nn.MSELoss(reduction='mean')
            self.criterionDecode = nn.CrossEntropyLoss(reduction='mean')
            self.optimizer_C = AdaBound(self.netC.parameters(), lr=opt.lr, final_lr=0.1)
            self.optimizer_D = AdaBound(itertools.chain(self.netE.parameters(), self.netD_A.parameters(), self.netD_B.parameters()), lr=opt.lr, final_lr=0.1)
            self.optimizers = [self.optimizer_C, self.optimizer_D] 
        else:
            self.preprocesses = []
            # TODO change structure of test.py and setup() instead
            load_suffix = str(opt.load_iter) if opt.load_iter > 0 else opt.epoch
            self.load_networks(load_suffix)
            self.netC.eval()
            self.netD_A.eval()
            self.netD_B.eval()
             
            self.infer_A = NVWaveNet(**(self.netD_A.export_weights()))
            self.infer_B = NVWaveNet(**(self.netD_B.export_weights()))

    def set_input(self, input): 
        A, params_A = input[0]  
        B, params_B = input[1] 
         
        self.real_A = params_A['original'].to(self.devices[0])
        self.real_B = params_B['original'].to(self.devices[0])
        self.aug_A = A.to(self.devices[0])
        self.aug_B = B.to(self.devices[0])

        self.params_A = self.decollate_params(params_A)
        self.params_B = self.decollate_params(params_B)

    def get_indices(self, y):
        y = (y + 1.) * .5 * self.opt.mu
        return y.long() 

    def inv_indices(self, y):
        return y.float() / self.opt.mu * 2. - 1.
 
    def train(self): 
        self.optimizer_C.zero_grad() 
        encoded_A = self.netE(self.aug_A) # Input range: (-1, 1) Output: R^64
        encoded_A = nn.functional.interpolate(encoded_A, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_A = self.netC(encoded_A)
        self.loss_C_A_right = self.opt.dc_lambda * self.criterionDC(pred_C_A, self.A_target)
        self.loss_C_A_right.backward()

        encoded_B = self.netE(self.aug_B) 
        encoded_B = nn.functional.interpolate(encoded_B, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_B = self.netC(encoded_B)
        self.loss_C_B_right = self.opt.dc_lambda * self.criterionDC(pred_C_B, self.B_target)
        self.loss_C_B_right.backward()
        self.optimizer_C.step()
  
        self.optimizer_D.zero_grad() 
        encoded_A = self.netE(self.aug_A) # Input range: (-1, 1) Output: R^64
        encoded_A = nn.functional.interpolate(encoded_A, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_A = self.netC(encoded_A) 
        self.loss_C_A_wrong = self.criterionDC(pred_C_A, self.A_target)
        real_A = self.get_indices(self.real_A).to(self.devices[-1])
        pred_D_A = self.netD_A((encoded_A, real_A))
        self.loss_D_A = self.criterionDecode(pred_D_A, real_A)
        loss = self.loss_D_A - self.opt.dc_lambda * self.loss_C_A_wrong
        loss.backward()
        
        encoded_B = self.netE(self.aug_B)
        encoded_B = nn.functional.interpolate(encoded_B, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_B = self.netC(encoded_B) 
        self.loss_C_B_wrong = self.criterionDC(pred_C_B, self.B_target)
        real_B = self.get_indices(self.real_B).to(self.devices[-1]) 
        pred_D_B = self.netD_B((encoded_B, real_B))
        self.loss_D_B = self.criterionDecode(pred_D_B, real_B)
        loss = self.loss_D_B - self.opt.dc_lambda * self.loss_C_B_wrong
        loss.backward()
        self.optimizer_D.step() 
  
    def test(self):  
        with torch.no_grad():   
            encoded_A = self.netE(self.aug_A)
            encoded_B = self.netE(self.aug_B)
            self.fake_B = self.infer_A.infer(self.netD_A.get_cond_input(encoded_B), Impl.AUTO)
            self.fake_A = self.infer_B.infer(self.netD_B.get_cond_input(encoded_A), Impl.AUTO)
            self.fake_B = self.inv_indices(self.fake_B)
            self.fake_A = self.inv_indices(self.fake_A)
예제 #10
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  char_vectors=char_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    #optimizer = optim.Adamax(model.parameters(), args.lr,
    #                            weight_decay=args.l2_wd)
    #scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    optimizer = AdaBound(model.parameters())

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids, cwf in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                cwf = cwf.to(device)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cc_idxs, qc_idxs, cw_idxs, qw_idxs, cwf)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                #scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
예제 #11
0
class NNProcess:
    def __init__(self,
                 import_trained=(False, ''),
                 model_pretrained=(True, True),
                 save_model=True,
                 resnet_depth=50,
                 lr=1e-3,
                 momentum=0.09,
                 nesterov=False,
                 threshold=0.5,
                 epochs=50,
                 batch_size=64,
                 train_val_split=0.7,
                 data_interval='1min',
                 predict_period=1,
                 mins_interval=30,
                 start_date='2020-08-24',
                 end_date='2020-08-29'):
        '''
        import_trained = (whether if you want to import a trained pth file, if yes what is the filename)
        model_pretrained = (whether if you want to import a pretrained model, whether if you want to only want to train the linear layers)
        save_model = whether to save model when training finished
        resnet_depth = to decide the depth of the residual network
        lr = learning rate for the stochastic gradient descend optimizer
        momentum = momentum for the sgd
        nesterov = whether to use nesterov momentum for sgd
        threshold = investment threshold, advices to invest if the returned probability > threshold
        epochs = training hyperparameter: the number of times the entire dataset is exposed to the neural network
        batch_size = training hyperparameter: the number of items to show the dataset at once
        train_val_split = training hyperparameter: how to split the data
        data_interval = the time interval between each datapoint
        predict_period = the amount of time period to predict forwards
        days = the amount of days to use
        mins_interval = the amount of minutes to show in the graph
        start_date = the first date to get data - data for each day would start from 9am and end at 8pm
        end_date = the last date to get data - data for each day would start from 9am and end at 8pm
        '''

        self.__import_trained = import_trained
        self.__model_pretrained = model_pretrained
        self.__saveModel = save_model
        self.__resnet_depth = resnet_depth
        self.__threshold = threshold
        self.__epochs = epochs
        self.__batch_size = batch_size
        data = dataset.stockGraphGenerator(split=train_val_split,
                                           interval=data_interval,
                                           predict_period=predict_period,
                                           mins_interval=mins_interval,
                                           start_date=start_date,
                                           end_date=end_date,
                                           stride=15)
        self.__train_set = torch.utils.data.DataLoader(
            data.train_data, batch_size=self.__batch_size, shuffle=False)
        self.__test_set = torch.utils.data.DataLoader(
            data.test_data, batch_size=self.__batch_size, shuffle=False)
        self.__model = self.__loadmodelInstance(
        ) if self.__import_trained[0] else self.__createmodelInstance()
        self.__criterion = nn.BCELoss()
        self.__optim = AdaBound(self.__model.parameters(),
                                amsbound=True,
                                lr=lr,
                                final_lr=0.1)
        self.__trainHist = [[], [], [], []]

    def __loadmodelInstance(self):
        model = torch.load(self.__import_trained[1] + '.pth')
        return model.cuda() if torch.cuda.is_available() else model

    def __createmodelInstance(self):
        return ResNetClassifier(
            pretrained=self.__model_pretrained,
            resnet_depth=self.__resnet_depth).cuda(
            ) if torch.cuda.is_available() else ResNetClassifier(
                pretrained=self.__model_pretrained,
                resnet_depth=self.__resnet_depth)

    def __softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()

    def __setModelTrain(self):
        self.__model = self.__model.train()

    def __setModelEval(self):
        self.__model = self.__model.eval()

    def fetch_model(self):
        return self.__model.eval()

    def fetch_trainHist(self):
        return self.__trainHist

    def __get_acc(self, output, label):
        output = torch.round(output)
        num_correct = sum(
            [1 if output[i] == label[i] else 0 for i in range(len(output))])
        return num_correct / output.shape[0]

    def train(self):
        for epochs in range(self.__epochs):
            start_time = time.time()
            avg_train_loss, avg_test_loss = 0, 0
            avg_train_acc, avg_test_acc = 0, 0
            train_total, test_total = 0, 0
            self.__setModelTrain()
            for im, label in self.__train_set:
                train_total += 1
                im, label = Variable(im), Variable(label)
                pred = self.__model(im)
                train_loss = self.__criterion(pred, label)
                self.__optim.zero_grad()
                train_loss.backward()
                self.__optim.step()
                avg_train_loss += train_loss.data.tolist()
                avg_train_acc += self.__get_acc(pred, label)
                print(
                    'Training Batch No.: {:3d}\nTraining Loss: {:.5f} ; Training Acc.: {:.5f}'
                    .format(train_total, train_loss.data.tolist(),
                            self.__get_acc(pred, label)))

            self.__setModelEval()
            for im, label in self.__test_set:
                test_total += 1
                im, label = Variable(im, requires_grad=False), Variable(
                    label, requires_grad=False)
                pred = self.__model(im)
                test_loss = self.__criterion(pred, label)
                avg_test_loss += test_loss.data.tolist()
                avg_test_acc += self.__get_acc(pred, label)
                print(
                    'Testing Batch No.: {:3d}\nTesting Loss: {:.5f} ; Testing Acc.: {:.5f}'
                    .format(test_total, test_loss.data.tolist(),
                            self.__get_acc(pred, label)))

            self.__trainHist[0].append(avg_train_loss / train_total)
            self.__trainHist[1].append(avg_test_loss / test_total)
            self.__trainHist[2].append(avg_train_acc / train_total)
            self.__trainHist[3].append(avg_test_acc / test_total)
            print(
                'Epoch: {:3d} / {:3d}\nAverage Training Loss: {:.6f} ; Average Validation Loss: {:.6f}\nTrain Accuracy: {:.3f} ; Test Accuracy: {:.3f}\nTime Taken: {:.6f}\n'
                .format(epochs + 1, self.__epochs,
                        avg_train_loss / train_total,
                        avg_test_loss / test_total,
                        avg_train_acc / train_total,
                        avg_test_acc / train_total,
                        time.time() - start_time))

        if self.__saveModel:
            torch.save(self.__model, './resnet_market_predictor.pth')
예제 #12
0
class SRSolver(BaseSolver):
    def __init__(self, opt):
        super(SRSolver, self).__init__(opt)
        self.train_opt = opt['solver']
        self.LR = self.Tensor()
        self.HR = self.Tensor()
        self.SR = None

        self.records = {'train_loss': [],
                        'val_loss': [],
                        'psnr': [],
                        'ssim': [],
                        'lr': []}

        self.model = create_model(opt)
        self.print_network()

        if self.is_train:
            self.model.train()

            # set cl_loss
            if self.use_cl:
                self.cl_weights = self.opt['solver']['cl_weights']
                assert self.cl_weights, "[Error] 'cl_weights' is not be declared when 'use_cl' is true"

            # set loss
            loss_type = self.train_opt['loss_type']
            if loss_type == 'l1':
                self.criterion_pix = nn.L1Loss()
            elif loss_type == 'l2':
                self.criterion_pix = nn.MSELoss()
            else:
                raise NotImplementedError('Loss type [%s] is not implemented!'%loss_type)

            if self.use_gpu:
                self.criterion_pix = self.criterion_pix.cuda()

            # set optimizer
            weight_decay = self.train_opt['weight_decay'] if self.train_opt['weight_decay'] else 0
            optim_type = self.train_opt['type'].upper()
            if optim_type == "ADAM":
                self.optimizer = optim.Adam(self.model.parameters(),
                                            lr=self.train_opt['learning_rate'], weight_decay=weight_decay)
            elif optim_type == 'ADABOUND':
                self.optimizer = AdaBound(self.model.parameters(),
                                          lr = self.train_opt['learning_rate'], weight_decay=weight_decay)
            elif optim_type == 'SGD':
                self.optimizer = optim.SGD(self.model.parameters(),
                                           lr = self.train_opt['learning_rate'], momentum=0.90, weight_decay=weight_decay)
            else:
                raise NotImplementedError('Loss type [%s] is not implemented!' % optim_type)

            # set lr_scheduler
            if self.train_opt['lr_scheme'].lower() == 'multisteplr':
                self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer,
                                                                self.train_opt['lr_steps'],
                                                                self.train_opt['lr_gamma'])
            elif self.train_opt['lr_scheme'].lower() == 'cos':
                self.scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer,
                                                                      T_max =  self.opt['solver']['num_epochs'],
                                                                      eta_min = self.train_opt['lr_min']
                                                                      )
            else:
                raise NotImplementedError('Only MultiStepLR scheme is supported!')

        self.load()

        print('===> Solver Initialized : [%s] || Use CL : [%s] || Use GPU : [%s]'%(self.__class__.__name__,
                                                                                       self.use_cl, self.use_gpu))
        if self.is_train:
            print("optimizer: ", self.optimizer)
            if self.train_opt['lr_scheme'].lower() == 'multisteplr':
                print("lr_scheduler milestones: %s   gamma: %f"%(self.scheduler.milestones, self.scheduler.gamma))

    def _net_init(self, init_type='kaiming'):
        print('==> Initializing the network using [%s]'%init_type)
        init_weights(self.model, init_type)


    def feed_data(self, batch, need_HR=True):
        input = batch['LR']
        self.LR.resize_(input.size()).copy_(input)

        if need_HR:
            target = batch['HR']
            self.HR.resize_(target.size()).copy_(target)


    def train_step(self):
        self.model.train()
        self.optimizer.zero_grad()

        loss_batch = 0.0
        sub_batch_size = int(self.LR.size(0) / self.split_batch)
        for i in range(self.split_batch):
            loss_sbatch = 0.0
            split_LR = self.LR.narrow(0, i*sub_batch_size, sub_batch_size)
            split_HR = self.HR.narrow(0, i*sub_batch_size, sub_batch_size)
            if self.use_cl:
                outputs = self.model(split_LR)
                loss_steps = [self.criterion_pix(sr, split_HR) for sr in outputs]
                for step in range(len(loss_steps)):
                    loss_sbatch += self.cl_weights[step] * loss_steps[step]
            else:
                output = self.model(split_LR)
                loss_sbatch = self.criterion_pix(output, split_HR)

            loss_sbatch /= self.split_batch
            loss_sbatch.backward()

            loss_batch += (loss_sbatch.item())

        # for stable training
        if loss_batch < self.skip_threshold * self.last_epoch_loss:
            self.optimizer.step()
            self.last_epoch_loss = loss_batch
        else:
            print('[Warning] Skip this batch! (Loss: {})'.format(loss_batch))

        self.model.eval()
        return loss_batch

    def test(self):
        self.model.eval()
        with torch.no_grad(): # 执行完forward
            forward_func = self._overlap_crop_forward if self.use_chop else self.model.forward
            if self.self_ensemble and not self.is_train:
                SR = self._forward_x8(self.LR, forward_func)
            else:
                SR = forward_func(self.LR)

            if isinstance(SR, list):
                self.SR = SR[-1]
            else:
                self.SR = SR

        self.model.train()
        if self.is_train:
            loss_pix = self.criterion_pix(self.SR, self.HR)
            return loss_pix.item()


    def _forward_x8(self, x, forward_function):
        """
        self ensemble
        """
        def _transform(v, op):
            v = v.float()

            v2np = v.data.cpu().numpy()
            if op == 'v':
                tfnp = v2np[:, :, :, ::-1].copy()
            elif op == 'h':
                tfnp = v2np[:, :, ::-1, :].copy()
            elif op == 't':
                tfnp = v2np.transpose((0, 1, 3, 2)).copy()

            ret = self.Tensor(tfnp)

            return ret

        lr_list = [x]
        for tf in 'v', 'h', 't':
            lr_list.extend([_transform(t, tf) for t in lr_list])

        sr_list = []
        for aug in lr_list:
            sr = forward_function(aug)
            if isinstance(sr, list):
                sr_list.append(sr[-1])
            else:
                sr_list.append(sr)

        for i in range(len(sr_list)):
            if i > 3:
                sr_list[i] = _transform(sr_list[i], 't')
            if i % 4 > 1:
                sr_list[i] = _transform(sr_list[i], 'h')
            if (i % 4) % 2 == 1:
                sr_list[i] = _transform(sr_list[i], 'v')

        output_cat = torch.cat(sr_list, dim=0)
        output = output_cat.mean(dim=0, keepdim=True)

        return output


    def _overlap_crop_forward(self, x, shave=10, min_size=100000, bic=None):
        """
        chop for less memory consumption during test
        """
        n_GPUs = 2
        scale = self.scale
        b, c, h, w = x.size()
        h_half, w_half = h // 2, w // 2
        h_size, w_size = h_half + shave, w_half + shave
        lr_list = [
            x[:, :, 0:h_size, 0:w_size],
            x[:, :, 0:h_size, (w - w_size):w],
            x[:, :, (h - h_size):h, 0:w_size],
            x[:, :, (h - h_size):h, (w - w_size):w]]

        if bic is not None:
            bic_h_size = h_size*scale
            bic_w_size = w_size*scale
            bic_h = h*scale
            bic_w = w*scale
            
            bic_list = [
                bic[:, :, 0:bic_h_size, 0:bic_w_size],
                bic[:, :, 0:bic_h_size, (bic_w - bic_w_size):bic_w],
                bic[:, :, (bic_h - bic_h_size):bic_h, 0:bic_w_size],
                bic[:, :, (bic_h - bic_h_size):bic_h, (bic_w - bic_w_size):bic_w]]

        if w_size * h_size < min_size:
            sr_list = []
            for i in range(0, 4, n_GPUs):
                lr_batch = torch.cat(lr_list[i:(i + n_GPUs)], dim=0)
                if bic is not None:
                    bic_batch = torch.cat(bic_list[i:(i + n_GPUs)], dim=0)

                sr_batch_temp = self.model(lr_batch)

                if isinstance(sr_batch_temp, list):
                    sr_batch = sr_batch_temp[-1]
                else:
                    sr_batch = sr_batch_temp

                sr_list.extend(sr_batch.chunk(n_GPUs, dim=0))
        else:
            sr_list = [
                self._overlap_crop_forward(patch, shave=shave, min_size=min_size) \
                for patch in lr_list
                ]

        h, w = scale * h, scale * w
        h_half, w_half = scale * h_half, scale * w_half
        h_size, w_size = scale * h_size, scale * w_size
        shave *= scale

        output = x.new(b, c, h, w)
        output[:, :, 0:h_half, 0:w_half] \
            = sr_list[0][:, :, 0:h_half, 0:w_half]
        output[:, :, 0:h_half, w_half:w] \
            = sr_list[1][:, :, 0:h_half, (w_size - w + w_half):w_size]
        output[:, :, h_half:h, 0:w_half] \
            = sr_list[2][:, :, (h_size - h + h_half):h_size, 0:w_half]
        output[:, :, h_half:h, w_half:w] \
            = sr_list[3][:, :, (h_size - h + h_half):h_size, (w_size - w + w_half):w_size]

        return output


    def save_checkpoint(self, epoch, is_best):
        """
        save checkpoint to experimental dir
        """
        filename = os.path.join(self.checkpoint_dir, 'last_ckp.pth')
        print('===> Saving last checkpoint to [%s] ...]'%filename)
        ckp = {
            'epoch': epoch,
            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'best_pred': self.best_pred,
            'best_epoch': self.best_epoch,
            'records': self.records
        }
        torch.save(ckp, filename)
        if is_best:
            print('===> Saving best checkpoint to [%s] ...]' % filename.replace('last_ckp','best_ckp'))
            torch.save(ckp, filename.replace('last_ckp','best_ckp'))

        if epoch % self.train_opt['save_ckp_step'] == 0:
            print('===> Saving checkpoint [%d] to [%s] ...]' % (epoch,
                                                                filename.replace('last_ckp','epoch_%d_ckp.pth'%epoch)))

            torch.save(ckp, filename.replace('last_ckp','epoch_%d_ckp.pth'%epoch))


    def load(self):
        """
        load or initialize network
        """
        if (self.is_train and self.opt['solver']['pretrain']) or not self.is_train:
            model_path = self.opt['solver']['pretrained_path']
            if model_path is None: raise ValueError("[Error] The 'pretrained_path' does not declarate in *.json")

            print('===> Loading model from [%s]...' % model_path)
            if self.is_train:
                checkpoint = torch.load(model_path)
                self.model.load_state_dict(checkpoint['state_dict'])

                # if self.opt['solver']['pretrain'] == 'resume':
                #     self.cur_epoch = checkpoint['epoch'] + 1
                #     self.optimizer.load_state_dict(checkpoint['optimizer'])
                #     self.best_pred = checkpoint['best_pred']
                #     self.best_epoch = checkpoint['best_epoch']
                #     self.records = checkpoint['records']

            else:
                checkpoint = torch.load(model_path)
                if 'state_dict' in checkpoint.keys(): checkpoint = checkpoint['state_dict']
                load_func = self.model.load_state_dict if isinstance(self.model, nn.DataParallel) \
                    else self.model.module.load_state_dict
                load_func(checkpoint)
        else:
            print('===> Initialize model')
            self._net_init()


    def get_current_visual(self, need_np=True, need_HR=True):
        """
        return LR SR (HR) images
        """
        out_dict = OrderedDict()
        out_dict['LR'] = self.LR.data[0].float().cpu()
        out_dict['SR'] = self.SR.data[0].float().cpu()
        if need_np:  out_dict['LR'], out_dict['SR'] = util.Tensor2np([out_dict['LR'], out_dict['SR']],
                                                                        self.opt['rgb_range'])
        if need_HR:
            out_dict['HR'] = self.HR.data[0].float().cpu()
            if need_np: out_dict['HR'] = util.Tensor2np([out_dict['HR']],
                                                           self.opt['rgb_range'])[0]
        return out_dict


    def save_current_visual(self, epoch, iter):
        """
        save visual results for comparison
        """
        if epoch % self.save_vis_step == 0:
            visuals_list = []
            visuals = self.get_current_visual(need_np=False)
            visuals_list.extend([util.quantize(visuals['HR'].squeeze(0), self.opt['rgb_range']),
                                 util.quantize(visuals['SR'].squeeze(0), self.opt['rgb_range'])])
            visual_images = torch.stack(visuals_list)
            visual_images = thutil.make_grid(visual_images, nrow=2, padding=5)
            visual_images = visual_images.byte().permute(1, 2, 0).numpy()
            misc.imsave(os.path.join(self.visual_dir, 'epoch_%d_img_%d.png' % (epoch, iter + 1)),
                        visual_images)


    def get_current_learning_rate(self):
        # return self.scheduler.get_lr()[-1]
        return self.optimizer.param_groups[0]['lr']


    def update_learning_rate(self, epoch):
        self.scheduler.step(epoch)


    def get_current_log(self):
        log = OrderedDict()
        log['epoch'] = self.cur_epoch
        log['best_pred'] = self.best_pred
        log['best_epoch'] = self.best_epoch
        log['records'] = self.records
        return log


    def set_current_log(self, log):
        self.cur_epoch = log['epoch']
        self.best_pred = log['best_pred']
        self.best_epoch = log['best_epoch']
        self.records = log['records']


    def save_current_log(self):
        data_frame = pd.DataFrame(
            data={'train_loss': self.records['train_loss']
                , 'val_loss': self.records['val_loss']
                , 'psnr': self.records['psnr']
                , 'ssim': self.records['ssim']
                , 'lr': self.records['lr']
                  },
            index=range(1, self.cur_epoch + 1)
        )
        data_frame.to_csv(os.path.join(self.records_dir, 'train_records.csv'),
                          index_label='epoch')


    def print_network(self):
        """
        print network summary including module and number of parameters
        """
        s, n = self.get_network_description(self.model)
        if isinstance(self.model, nn.DataParallel):
            net_struc_str = '{} - {}'.format(self.model.__class__.__name__,
                                                 self.model.module.__class__.__name__)
        else:
            net_struc_str = '{}'.format(self.model.__class__.__name__)

        print("==================================================")
        print("===> Network Summary\n")
        net_lines = []
        line = s + '\n'
        print(line)
        net_lines.append(line)
        line = 'Network structure: [{}], with parameters: [{:,d}]'.format(net_struc_str, n)
        print(line)
        net_lines.append(line)

        if self.is_train:
            with open(os.path.join(self.exp_root, 'network_summary.txt'), 'w') as f:
                f.writelines(net_lines)

        print("==================================================")
예제 #13
0
class TranslatorModel(BaseModel):
    @staticmethod
    def modify_commandline_options(parser, is_train=True):
        """Add new model-specific options and rewrite default values for existing options.

        Parameters:
            parser -- the option parser
            is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options.

        Returns:
            the modified parser.
        """
        if is_train:
            opt, _ = parser.parse_known_args()
            preprocess = opt.preprocess \
                            .replace('mel', '') \
                            .replace('normalize', '') \
                            .replace('stft', '') + ',mulaw'
        else:
            preprocess = 'mulaw'
        parser.set_defaults(preprocess=preprocess)
        parser.add_argument('--wavenet_layers',
                            type=int,
                            default=30,
                            help='wavenet layers')
        parser.add_argument('--wavenet_blocks',
                            type=int,
                            default=10,
                            help='wavenet layers')
        parser.add_argument('--bottleneck',
                            type=int,
                            default=64,
                            help='bottleneck')
        parser.add_argument('--dc_width',
                            type=int,
                            default=128,
                            help='dc width')
        parser.add_argument('--width', type=int, default=128, help='width')
        parser.add_argument('--pool_length',
                            type=int,
                            default=1024,
                            help='pool length')
        parser.add_argument('--dc_lambda',
                            type=float,
                            default=0.01,
                            help='dc lambda')
        parser.add_argument('--dc_no_bias',
                            action='store_true',
                            help='dc bias')
        return parser

    def __init__(self, opt):
        BaseModel.__init__(self,
                           opt)  # call the initialization method of BaseModel
        self.loss_names = [
            'C_A_right', 'C_B_right', 'C_A_wrong', 'C_B_wrong', 'D_A', 'D_B'
        ]
        if opt.isTrain:
            self.output_names = []  # ['aug_A', 'aug_B', 'rec_A', 'rec_B']
        else:
            self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A']
        self.params_names = ['params_A', 'params_B'] * 2
        self.model_names = ['E', 'C', 'D_A', 'D_B']

        self.netE = TemporalEncoder(
            **{
                'width': opt.width,
                'bottleneck_width': opt.bottleneck,
                'pool_length': opt.pool_length,
            }).to(self.devices[0])
        # self.vector_length = opt.audio_length // opt.pool_length
        self.netC = DomainConfusion(3, 2, opt.bottleneck, opt.dc_width,
                                    opt.dc_lambda,
                                    not opt.dc_no_bias).to(self.devices[0])
        self.netD_A = WaveNet(
            opt.mu + 1, opt.wavenet_layers, opt.wavenet_blocks, opt.width, 256,
            256, opt.bottleneck, 1,
            1).to(self.devices[-1])  # opt.pool_length, opt.pool_length
        self.netD_B = WaveNet(
            opt.mu + 1, opt.wavenet_layers, opt.wavenet_blocks, opt.width, 256,
            256, opt.bottleneck, 1,
            1).to(self.devices[-1])  # opt.pool_length, opt.pool_length
        self.softmax = nn.LogSoftmax(dim=1)  # (1, 256, audio_len) -> pick 256

        if self.isTrain:
            self.A_target = torch.LongTensor([0] * opt.batch_size).to(
                self.devices[0])
            self.B_target = torch.LongTensor([1] * opt.batch_size).to(
                self.devices[0])
            self.criterionDC = nn.CrossEntropyLoss(reduction='mean')
            self.criterionDecode = nn.NLLLoss(reduction='mean')
            # self.optimizer_C = torch.optim.Adam(itertools.chain(self.netE.parameters(), self.netC.parameters()), lr=opt.lr)
            # self.optimizer_D = torch.optim.Adam(itertools.chain(self.netE.parameters(), self.netD_A.parameters(), self.netD_B.parameters()), lr=opt.lr)
            self.optimizer_C = AdaBound(self.netC.parameters(),
                                        lr=opt.lr,
                                        final_lr=0.1)
            self.optimizer_D = AdaBound(itertools.chain(
                self.netE.parameters(), self.netD_A.parameters(),
                self.netD_B.parameters()),
                                        lr=opt.lr,
                                        final_lr=0.1)
            self.optimizers = [self.optimizer_C, self.optimizer_D]
        else:
            self.preprocesses = ['mulaw']
            # TODO change structure of test.py and setup() instead
            load_suffix = str(
                opt.load_iter) if opt.load_iter > 0 else opt.epoch
            self.load_networks(load_suffix)
            self.netC.eval()
            self.netD_A.eval()
            self.netD_B.eval()

            self.infer_A = NVWaveNet(**(self.netD_A.export_weights()))
            self.infer_B = NVWaveNet(**(self.netD_B.export_weights()))

    def set_input(self, input):
        A, params_A = input[0]
        B, params_B = input[1]
        self.aug_A = A.to(self.devices[0])
        self.aug_B = B.to(self.devices[0])
        self.real_A = params_A['original'].to(self.devices[0])
        self.real_B = params_B['original'].to(self.devices[0])

        self.params_A = self.decollate_params(params_A)
        self.params_B = self.decollate_params(params_B)

    def get_indices(self, y):
        y = (y + 1.) * .5 * self.opt.mu
        return y.long()

    # def to_onehot(self, y, device):
    #     y = self.get_indices(y).view(-1, 1)
    #     y = torch.zeros(y.size()[0], self.opt.mu + 1).to(device).scatter_(1, y, 1)
    #     return y.transpose(0, 1).unsqueeze(0)

    def inv_indices(self, y):
        return y.float() / self.opt.mu * 2. - 1.

    @staticmethod
    def sample(logits):
        dist = torch.distributions.categorical.Categorical(
            logits=logits.transpose(1, 2))
        return dist.sample()

    def train(self):
        self.optimizer_C.zero_grad()
        encoded_A = self.netE(
            self.real_A.unsqueeze(1))  # Input range: (-1, 1) Output: R^64
        pred_C_A = self.netC(
            encoded_A)  # (encoded_A + 1.) * self.vector_length / 2)
        self.loss_C_A_right = self.criterionDC(pred_C_A, self.A_target)
        loss = self.opt.dc_lambda * self.loss_C_A_right
        loss.backward()

        encoded_B = self.netE(self.real_B.unsqueeze(1))
        pred_C_B = self.netC(
            encoded_B)  # (encoded_B + 1.) * self.vector_length / 2)
        self.loss_C_B_right = self.criterionDC(pred_C_B, self.B_target)
        loss = self.opt.dc_lambda * self.loss_C_B_right
        loss.backward()
        self.optimizer_C.step()

        self.optimizer_D.zero_grad()
        encoded_A = self.netE(
            self.aug_A.unsqueeze(1))  # Input range: (-1, 1) Output: R^64
        pred_C_A = self.netC(encoded_A)
        self.loss_C_A_wrong = self.criterionDC(pred_C_A, self.A_target)
        encoded_A = nn.functional.interpolate(encoded_A,
                                              size=self.opt.audio_length).to(
                                                  self.devices[-1])
        real_A = self.get_indices(self.real_A).to(self.devices[-1])
        pred_D_A = self.netD_A((encoded_A, real_A))
        rec_A = self.softmax(pred_D_A)
        self.loss_D_A = self.criterionDecode(rec_A, real_A)
        loss = self.loss_D_A - self.opt.dc_lambda * self.loss_C_A_wrong
        loss.backward()

        encoded_B = self.netE(self.aug_B.unsqueeze(1))
        pred_C_B = self.netC(encoded_B)
        self.loss_C_B_wrong = self.criterionDC(pred_C_B, self.B_target)
        encoded_B = nn.functional.interpolate(encoded_B,
                                              size=self.opt.audio_length).to(
                                                  self.devices[-1])
        real_B = self.get_indices(self.real_B).to(self.devices[-1])
        pred_D_B = self.netD_B((encoded_B, real_B))
        rec_B = self.softmax(pred_D_B)
        self.loss_D_B = self.criterionDecode(rec_B, real_B)
        loss = self.loss_D_B - self.opt.dc_lambda * self.loss_C_B_wrong
        loss.backward()
        self.optimizer_D.step()

    def test(self):
        with torch.no_grad():
            encoded_A = self.netE(self.aug_A.unsqueeze(1))
            encoded_B = self.netE(self.aug_B.unsqueeze(1))
            # pred_C_A = self.softmax(self.netC(encoded_A))
            # pred_C_B = self.softmax(self.netC(encoded_B))
            encoded_A = nn.functional.interpolate(
                encoded_A, size=self.opt.audio_length).to(self.devices[-1])
            encoded_B = nn.functional.interpolate(
                encoded_B, size=self.opt.audio_length).to(self.devices[-1])
            self.fake_A = self.infer_A.infer(
                self.netD_A.get_cond_input(encoded_B), Impl.AUTO)
            self.fake_B = self.infer_B.infer(
                self.netD_B.get_cond_input(encoded_A), Impl.AUTO)
            self.fake_A = self.inv_indices(self.fake_A)
            self.fake_B = self.inv_indices(self.fake_B)