예제 #1
0
    def __init__(self,
                 config,
                 bert_hidden_states=1,
                 dropout=0.1,
                 update_bert=False):
        config = deepcopy(config)
        config.output_hidden_states = True
        config.dropout = dropout
        super(DistilBertForToxic, self).__init__(config)

        self.bert_hidden_states = bert_hidden_states
        self.num_labels = 1
        self.update_bert = update_bert
        #bert=DistilBertModel(DistilBertConfig())

        bert = DistilBertForSequenceClassification.from_pretrained(
            "distilbert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
            num_labels=
            2,  # The number of output labels--2 for binary classification.
            # You can increase this for multi-class tasks.
            output_attentions=
            False,  # Whether the model returns attentions weights.
            output_hidden_states=
            True,  # Whether the model returns all hidden-states.
        ).distilbert

        bert.config = config
        device = get_device()
        bert = bert.to(device)
        self.bert = bert

        self.qa_outputs = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(config.hidden_size * bert_hidden_states, 1),
            nn.Sigmoid())
예제 #2
0
def main_train(path_trn: str,
               path_val: str,
               crop_size: int,
               upscale_factor: int,
               num_epochs: int,
               num_workers: int,
               to_device: str = 'cuda:0',
               in_memory_trn: bool = False,
               in_memory_val: bool = False,
               batch_size: int = 64,
               step_val: int = 5):
    out_dir = path_trn + '_results_c{}_s{}'.format(crop_size, upscale_factor)
    out_dir_states = out_dir + '_states'
    out_dir_statistics = out_dir + '_staticstics'
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(out_dir_states, exist_ok=True)
    os.makedirs(out_dir_statistics, exist_ok=True)
    path_results_csv = os.path.join(
        out_dir, 'statistics_x{}_train_results.csv'.format(upscale_factor))
    #
    to_device = get_device(to_device)
    train_set = DatasetExtTrn(path_idx=path_trn,
                              crop_lr=crop_size,
                              scale=upscale_factor,
                              in_memory=in_memory_trn).build()
    val_set = DatasetExtVal(path_idx=path_val,
                            crop_lr=crop_size,
                            scale=upscale_factor,
                            in_memory=in_memory_val).build()
    #
    train_loader = DataLoader(dataset=train_set,
                              num_workers=num_workers,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(dataset=val_set,
                            num_workers=num_workers,
                            batch_size=1,
                            shuffle=False)
    #
    netG = Generator(upscale_factor).to(to_device)
    print('# generator parameters:',
          sum(param.numel() for param in netG.parameters()))
    netD = Discriminator().to(to_device)
    print('# discriminator parameters:',
          sum(param.numel() for param in netD.parameters()))
    generator_criterion = GeneratorLoss().to(to_device)
    #
    optimizerG = optim.Adam(netG.parameters())
    optimizerD = optim.Adam(netD.parameters())
    # results = {'d_loss': [], 'g_loss': [], 'd_score': [], 'g_score': [], 'psnr': [], 'ssim': []}
    for epoch in range(1, num_epochs + 1):
        results_train = train_step(train_loader, netD, netG, optimizerD,
                                   optimizerG, generator_criterion, epoch,
                                   num_epochs)
        # FIXME: seperate function for epoch training
        if (epoch % step_val) == 0:
            results_validation = validation_step(val_loader, netG, out_dir,
                                                 epoch, num_epochs)
            results_save = {**results_train, **results_validation}
            results_save['epoch'] = epoch
            export_results(results_save, path_results_csv)
            # export model
            # save model parameters
            path_state_G = os.path.join(
                out_dir_states,
                'netG_epoch_x{}_{:05d}.pth'.format(upscale_factor, epoch))
            path_state_D = os.path.join(
                out_dir_states,
                'netD_epoch_x{}_{:05d}.pth'.format(upscale_factor, epoch))
            t1 = time.time()
            torch.save(netG.state_dict(), path_state_G)
            torch.save(netD.state_dict(), path_state_D)
            dt = time.time() - t1
            print(
                '\t\t:: dump:generator-model to [{}], dt ~ {:0.2f} (s)'.format(
                    path_state_G, dt))
예제 #3
0
                        type=int,
                        help='#workers for parallel processing')
    parser.add_argument('--batch_size',
                        default=32,
                        type=int,
                        help='batch-size')
    parser.add_argument('--device',
                        default='cuda:0',
                        type=str,
                        help='device, default "cuda:0"')
    parser.add_argument('--in_memory_trn',
                        action='store_true',
                        help='Load train dataset into memory')
    parser.add_argument('--in_memory_val',
                        action='store_true',
                        help='Load validation dataset into memory')
    args = parser.parse_args()
    print('args:\n\t{}'.format(args))
    #
    to_device = get_device(args.device)
    main_train(path_trn=args.trn,
               path_val=args.val,
               crop_size=args.crop_size,
               upscale_factor=args.upscale_factor,
               num_epochs=args.num_epochs,
               num_workers=args.threads,
               batch_size=args.batch_size,
               to_device=to_device,
               in_memory_trn=args.in_memory_trn,
               in_memory_val=args.in_memory_val)
예제 #4
0
def run_model(pos_train_file,
              neg_train_file,
              pos_dev_file,
              neg_dev_file,
              nrows_train,
              nrows_dev,
              epochs,
              out_dir,
              dropout=0.2,
              model='bert',
              batch_size=16,
              test_file='../data/test_data_clean.csv',
              lr=2e-5,
              lmda=10.0,
              stnc_emb='last'):

    device = get_device()

    bert_hidden_states = 4

    if model == 'bert':
        config = BertConfig()
        config.output_hidden_states = True
        model = BertForToxic(
            config,
            bert_hidden_states=bert_hidden_states,
            dropout=dropout,
            update_bert=True,
        )
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                                  do_lower_case=True)

    if model == 'distilbert':
        #config = DistilBertConfig()
        config = BertConfig()
        config.output_hidden_states = True
        model = DistilBertForToxic(config,
                                   bert_hidden_states=bert_hidden_states,
                                   dropout=dropout,
                                   update_bert=True,
                                   lmda=lmda,
                                   stnc_emb=stnc_emb)
        #tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                                  do_lower_case=True)

    train_dataloader = get_data_loader_bal(pos_train_file,
                                           neg_train_file,
                                           batch_size=batch_size,
                                           nrows_pos=nrows_train,
                                           nrows_neg=nrows_train * 10,
                                           mode='train',
                                           tokenizer=tokenizer)
    dev_dataloader = get_data_loader_bal(pos_dev_file,
                                         neg_dev_file,
                                         batch_size=batch_size,
                                         nrows_pos=nrows_dev,
                                         nrows_neg=nrows_dev,
                                         mode='dev',
                                         tokenizer=tokenizer)

    model.to(device)

    optimizer = AdamW(
        model.parameters(),
        lr=lr,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=1e-8  # args.adam_epsilon  - default is 1e-8.
    )

    total_steps = len(train_dataloader) * epochs

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,  # Default value in run_glue.py
        num_training_steps=total_steps)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    best_score = -np.inf

    stats_vec = []
    dev_pred_vec = []
    for epoch in range(epochs):
        stats, dev_pred = train_epoch(model, train_dataloader, dev_dataloader,
                                      optimizer, scheduler)
        print(epoch, stats)

        if stats['accuracy'] > best_score:
            best_score = stats['accuracy']
            f = out_dir + '/' + 'best_model_ch.pt'
            torch.save({
                'epoch': epoch,
                'model': model,
                'stats': stats,
            }, f)

        stats_vec.append(stats)
        dev_pred_vec.append(dev_pred)

    stats_vec = pd.DataFrame(stats_vec)
    dev_pred_vec = pd.concat(dev_pred_vec, axis=0)

    f = out_dir + '/' + 'last_model_ch.pt'
    torch.save({
        'epoch': epoch,
        'model': model,
        'stats': stats,
    }, f)

    print(stats_vec)
    stats_vec.to_csv(out_dir + '/' + 'stats.csv')

    out_file = out_dir + '/train_pred.csv'
    df = get_data_pred(
        train_dataloader,
        model,
        out_file,
    )

    out_file = out_dir + '/dev_pred.csv'
    df = get_data_pred(
        dev_dataloader,
        model,
        out_file,
    )

    test_dataloader = get_data_loader_pred(test_file, tokenizer, nrows=None)
    out_file = out_dir + '/test_pred.csv'
    df = get_data_pred(
        test_dataloader,
        model,
        out_file,
    )
예제 #5
0
def run_model(pos_train_file, neg_train_file, pos_dev_file, neg_dev_file,
              nrows_train, nrows_dev, epochs, out_dir):
    batch_size = 16

    #x_train = _read_data('../data/train_bal.csv', nrows_train)
    #x_dev = _read_data('../data/dev_bal.csv', nrows_dev)

    #train_data = list( zip( x_train['comment_text'].values, x_train['target'].values  ))

    #train_dataloader = DataLoader(  train_data,
    #                            collate_fn=my_collate,
    #                            batch_size=batch_size , shuffle=True,  )
    # #

    #dev_data = list( zip( x_dev['comment_text'].values, x_dev['target'].values  ))

    #dev_dataloader = DataLoader(  dev_data,
    #                            collate_fn=my_collate,
    #                            batch_size=batch_size, shuffle=False,  )

    train_dataloader = get_data_loader_bal(pos_train_file,
                                           neg_train_file,
                                           batch_size=batch_size,
                                           nrows_pos=nrows_train,
                                           nrows_neg=nrows_train,
                                           mode='train')
    dev_dataloader = get_data_loader_bal(pos_dev_file,
                                         neg_dev_file,
                                         batch_size=batch_size,
                                         nrows_pos=nrows_dev,
                                         nrows_neg=nrows_dev,
                                         mode='dev')

    device = get_device()

    bert_hidden_states = 4
    config = DistilBertConfig()
    config.output_hidden_states = True

    model = DistilBertForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        num_labels=
        2,  # The number of output labels--2 for binary classification.
        # You can increase this for multi-class tasks.
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=
        False,  # Whether the model returns all hidden-states.
    )
    model = model.to(device)

    optimizer = AdamW(
        model.parameters(),
        lr=2e-5,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=1e-8  # args.adam_epsilon  - default is 1e-8.
    )

    total_steps = len(train_dataloader) * epochs

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,  # Default value in run_glue.py
        num_training_steps=total_steps)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    best_score = -np.inf

    stats_vec = []
    for epoch in range(epochs):
        stats = train_epoch(model, train_dataloader, dev_dataloader, optimizer,
                            scheduler)
        print(stats)

        if stats['accuracy'] > best_score:
            best_score = stats['accuracy']
            f = out_dir + '/' + 'best_model_ch.pt'
            torch.save({
                'epoch': epoch,
                'model': model,
                'stats': stats,
            }, f)

        stats_vec.append(stats)

    stats_vec = pd.DataFrame(stats_vec)

    f = out_dir + '/' + 'last_model_ch.pt'
    torch.save({
        'epoch': epoch,
        'model': model,
        'stats': stats,
    }, f)

    print(stats_vec)
    stats_vec.to_csv(out_dir + '/' + 'stats.csv')
예제 #6
0
파일: train.py 프로젝트: gakarak/SRGAN
def main_train(path_trn: str, path_val: str,
               crop_size: int, upscale_factor: int, num_epochs: int,
               num_workers: int, to_device: str = 'cuda:0', batch_size: int = 64):
    to_device = get_device(to_device)
    train_set = TrainDatasetFromFolder(path_trn, crop_size=crop_size, upscale_factor=upscale_factor)
    val_set = ValDatasetFromFolder(path_val, upscale_factor=upscale_factor)
    # train_set = TrainDatasetFromFolder('data/VOC2012/train', crop_size=crop_size, upscale_factor=upscale_factor)
    # val_set = ValDatasetFromFolder('data/VOC2012/val', upscale_factor=upscale_factor)
    #
    train_loader = DataLoader(dataset=train_set, num_workers=num_workers, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(dataset=val_set, num_workers=num_workers, batch_size=1, shuffle=False)

    netG = Generator(upscale_factor)
    print('# generator parameters:', sum(param.numel() for param in netG.parameters()))
    netD = Discriminator()
    print('# discriminator parameters:', sum(param.numel() for param in netD.parameters()))

    generator_criterion = GeneratorLoss()

    if torch.cuda.is_available():
        netG.cuda()
        netD.cuda()
        generator_criterion.cuda()

    optimizerG = optim.Adam(netG.parameters())
    optimizerD = optim.Adam(netD.parameters())

    results = {'d_loss': [], 'g_loss': [], 'd_score': [], 'g_score': [], 'psnr': [], 'ssim': []}

    for epoch in range(1, num_epochs + 1):
        train_bar = tqdm(train_loader)
        running_results = {'batch_sizes': 0, 'd_loss': 0, 'g_loss': 0, 'd_score': 0, 'g_score': 0}

        netG.train()
        netD.train()
        # FIXME: seperate function for epoch training
        for data, target in train_bar:
            g_update_first = True
            batch_size = data.size(0)
            #
            # img_hr = target.numpy().transpose((0, 2, 3, 1))[0]
            # img_lr = data.numpy().transpose((0, 2, 3, 1))[0]
            # img_lr_x4 = cv2.resize(img_lr, img_hr.shape[:2], interpolation=cv2.INTER_CUBIC)
            # #
            # plt.subplot(1, 3, 1)
            # plt.imshow(img_hr)
            # plt.subplot(1, 3, 2)
            # plt.imshow(img_lr)
            # plt.subplot(1, 3, 3)
            # plt.imshow(img_lr_x4)
            # plt.show()
            running_results['batch_sizes'] += batch_size

            ############################
            # (1) Update D network: maximize D(x)-1-D(G(z))
            ###########################
            # real_img = Variable(target)
            # if torch.cuda.is_available():
            #     real_img = real_img.cuda()
            # z = Variable(data)
            # if torch.cuda.is_available():
            #     z = z.cuda()
            z = data.to(to_device)
            real_img = target.to(to_device)
            fake_img = netG(z)

            netD.zero_grad()
            real_out = netD(real_img).mean()
            fake_out = netD(fake_img).mean()
            d_loss = 1 - real_out + fake_out
            d_loss.backward(retain_graph=True)
            optimizerD.step()

            ############################
            # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss
            ###########################
            netG.zero_grad()
            g_loss = generator_criterion(fake_out, fake_img, real_img)
            g_loss.backward()
            optimizerG.step()
            fake_img = netG(z)
            fake_out = netD(fake_img).mean()

            g_loss = generator_criterion(fake_out, fake_img, real_img)
            running_results['g_loss'] += float(g_loss) * batch_size
            d_loss = 1 - real_out + fake_out
            running_results['d_loss'] += float(d_loss) * batch_size
            running_results['d_score'] += float(real_out) * batch_size
            running_results['g_score'] += float(fake_out) * batch_size

            train_bar.set_description(desc='[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f' % (
                epoch, num_epochs, running_results['d_loss'] / running_results['batch_sizes'],
                running_results['g_loss'] / running_results['batch_sizes'],
                running_results['d_score'] / running_results['batch_sizes'],
                running_results['g_score'] / running_results['batch_sizes']))

        netG.eval()
        #FIXME: seperate function for epoch validation
        with torch.no_grad():
            out_path = 'training_results/SRF_' + str(upscale_factor) + '/'
            if not os.path.exists(out_path):
                os.makedirs(out_path)
            val_bar = tqdm(val_loader)
            valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0}
            val_images = []
            for val_lr, val_hr_restore, val_hr in val_bar:
                batch_size = val_lr.size(0)
                valing_results['batch_sizes'] += batch_size
                # lr = Variable(val_lr, volatile=True)
                # hr = Variable(val_hr, volatile=True)
                # if torch.cuda.is_available():
                #     lr = lr.cuda()
                #     hr = hr.cuda()
                lr = val_lr.to(to_device)
                hr = val_hr.to(to_device)
                sr = netG(lr)

                batch_mse = ((sr - hr) ** 2).mean()
                valing_results['mse'] += float(batch_mse) * batch_size
                batch_ssim = float(pytorch_ssim.ssim(sr, hr)) #.data[0]
                valing_results['ssims'] += batch_ssim * batch_size
                valing_results['psnr'] = 10 * log10(1 / (valing_results['mse'] / valing_results['batch_sizes']))
                valing_results['ssim'] = valing_results['ssims'] / valing_results['batch_sizes']
                val_bar.set_description(
                    desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (
                        valing_results['psnr'], valing_results['ssim']))

                val_images.extend(
                    [display_transform()(val_hr_restore.squeeze(0)), display_transform()(hr.data.cpu().squeeze(0)),
                     display_transform()(sr.data.cpu().squeeze(0))])
            val_images = torch.stack(val_images)
            val_images = torch.chunk(val_images, val_images.size(0) // 15)
            val_save_bar = tqdm(val_images, desc='[saving training results]')
            index = 1
            for image in val_save_bar:
                image = utils.make_grid(image, nrow=3, padding=5)
                utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5)
                index += 1

        # save model parameters
        torch.save(netG.state_dict(), 'epochs/netG_epoch_%d_%d.pth' % (upscale_factor, epoch))
        torch.save(netD.state_dict(), 'epochs/netD_epoch_%d_%d.pth' % (upscale_factor, epoch))
        # save loss\scores\psnr\ssim
        results['d_loss'].append(running_results['d_loss'] / running_results['batch_sizes'])
        results['g_loss'].append(running_results['g_loss'] / running_results['batch_sizes'])
        results['d_score'].append(running_results['d_score'] / running_results['batch_sizes'])
        results['g_score'].append(running_results['g_score'] / running_results['batch_sizes'])
        results['psnr'].append(valing_results['psnr'])
        results['ssim'].append(valing_results['ssim'])

        if epoch % 10 == 0 and epoch != 0:
            out_path = 'statistics/'
            data_frame = pd.DataFrame(
                data={'Loss_D': results['d_loss'], 'Loss_G': results['g_loss'], 'Score_D': results['d_score'],
                      'Score_G': results['g_score'], 'PSNR': results['psnr'], 'SSIM': results['ssim']},
                index=range(1, epoch + 1))
            data_frame.to_csv(out_path + 'srf_' + str(upscale_factor) + '_train_results.csv', index_label='Epoch')