コード例 #1
0
def train(model, train_loader, valid_loader, config):
    model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=config.train_lr, weight_decay = config.weight_decay)

    checkpoint_path = os.path.join(config.log_path, 'checkpoint.pth')
    resume = os.path.isfile(checkpoint_path)
    if resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        checkpoint = torch.load(checkpoint_path)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger_train = Logger(os.path.join(config.log_path, 'log_train.txt'), resume=True)
        logger_valid = Logger(os.path.join(config.log_path, 'log_valid.txt'), resume=True)
    else:
        best_acc = -1
        start_epoch = 0
        logger_train = Logger(os.path.join(config.log_path, 'log_train.txt'))
        logger_train.set_names(['Learning Rate', 'Essential Loss', 'Classfi Loss', 'Detector loss'])
        logger_valid = Logger(os.path.join(config.log_path, 'log_valid.txt'))
        logger_valid.set_names(['Valid Acc', 'Essential Loss', 'Clasfi Loss'])
    train_loader_iter = iter(train_loader)
    for step in trange(start_epoch, config.train_iter, ncols=config.tqdm_width):
        try:
            train_data = next(train_loader_iter)
        except StopIteration:
            train_loader_iter = iter(train_loader)
            train_data = next(train_loader_iter)

        train_data = tocuda(train_data)
        # run training
        cur_lr = adjust_learning_rate(optimizer, step, config) 
        loss_val = train_step(step, optimizer, model, train_data, config)
        logger_train.append([cur_lr]+list(loss_val))

        # Check if we want to write validation
        b_save = ((step + 1) % config.save_intv) == 0
        b_validate = ((step + 1) % config.val_intv) == 0
        if b_validate:
            va_res, loss1, loss2 = valid(valid_loader, step, config)
            logger_valid.append([va_res, loss1, loss2])
            if va_res > best_acc:
                print("Saving best model with va_res = {}".format(va_res))
                best_acc = va_res
                torch.save({
                'epoch': step + 1,
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
                }, os.path.join(config.log_path, 'model_best.pth'))

        if b_save:
            torch.save({
            'epoch': step + 1,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer' : optimizer.state_dict(),
            }, checkpoint_path)
コード例 #2
0
def train(model, train_loader, valid_loader, config):
    model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=config.train_lr, weight_decay = config.weight_decay)
    match_loss = MatchLoss(config)

    checkpoint_path = os.path.join(config.log_path, 'checkpoint.pth')
    config.resume = os.path.isfile(checkpoint_path)
    if config.resume:
        print('==> Resuming from checkpoint..')
        checkpoint = torch.load(checkpoint_path)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger_train = Logger(os.path.join(config.log_path, 'log_train.txt'), title='oan', resume=True)
        logger_valid = Logger(os.path.join(config.log_path, 'log_valid.txt'), title='oan', resume=True)
    else:
        best_acc = -1
        start_epoch = 0
        logger_train = Logger(os.path.join(config.log_path, 'log_train.txt'), title='oan')
        logger_train.set_names(['Learning Rate'] + ['Geo Loss', 'Classfi Loss', 'L2 Loss']*(config.iter_num+1))
        logger_valid = Logger(os.path.join(config.log_path, 'log_valid.txt'), title='oan')
        logger_valid.set_names(['Valid Acc'] + ['Geo Loss', 'Clasfi Loss', 'L2 Loss'])
    train_loader_iter = iter(train_loader)
    for step in trange(start_epoch, config.train_iter, ncols=config.tqdm_width):
        try:
            train_data = next(train_loader_iter)
        except StopIteration:
            train_loader_iter = iter(train_loader)
            train_data = next(train_loader_iter)
        train_data = tocuda(train_data)

        # run training
        cur_lr = optimizer.param_groups[0]['lr']
        loss_vals = train_step(step, optimizer, model, match_loss, train_data)
        logger_train.append([cur_lr] + loss_vals)

        # Check if we want to write validation
        b_save = ((step + 1) % config.save_intv) == 0
        b_validate = ((step + 1) % config.val_intv) == 0
        if b_validate:
            va_res, geo_loss, cla_loss, l2_loss,  _, _, _  = valid(valid_loader, model, step, config)
            logger_valid.append([va_res, geo_loss, cla_loss, l2_loss])
            if va_res > best_acc:
                print("Saving best model with va_res = {}".format(va_res))
                best_acc = va_res
                torch.save({
                'epoch': step + 1,
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
                }, os.path.join(config.log_path, 'model_best.pth'))

        if b_save:
            torch.save({
            'epoch': step + 1,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer' : optimizer.state_dict(),
            }, checkpoint_path)
		def mutation(m, mutationP, mutationFunction, mutationConfig, validate):

			def trivalMutation(m, mutationP, validate):
				mutationMatrix = np.random.rand(m.wall.shape[0], m.wall.shape[1]) < mutationP
				return m.wall ^ mutationMatrix

			if isinstance(m, frame.maze):
				newMazeList = []
				count = 0
				if m.teleported:
					newMazeList.append(m.teleported)
					count = count + 1
				while count < self.size:
					if mutationFunction is None:
						wall = trivalMutation(m, mutationP, validate)
					else:
						#TODO: other mutationFunction
						pass
					newMaze = frame.maze(m.rows, m.cols, m.p, m.rootNum)
					newMaze.build(initFunction = frame.setWall, initConfig = {'wall': wall})
					if validate and not test.valid(newMaze):
						continue
					newMazeList.append(newMaze)
					count = count + 1
				return newMazeList
			else:
				print('E: localSearch.neighbor.__call__(), not a maze input')
				exit()
コード例 #4
0
def main(args):

    cuda = True
    cudnn.benchmark = True
    # data_root = '/home/weiyuhua/Challenge2020/Data/DG'
    data_root = '/home/yin/code/weiyuhua/Challenge2020/Data/DG'

    model_root = args.model_root
    logs = args.logs
    lr = args.lr
    batch_size = args.batch_size
    n_epoch = args.n_epoch
    unseen_index = args.unseen_index
    val_split = args.val_split

    manual_seed = random.randint(1, 10000)
    random.seed(manual_seed)
    torch.manual_seed(manual_seed)

    tb_dir = os.path.join(logs, 'tb_dir')
    if not os.path.exists(logs):
        os.makedirs(logs)
    if not os.path.exists(model_root):
        os.makedirs(model_root)
    if not os.path.exists(tb_dir):
        os.makedirs(tb_dir)

    # Tensorboard
    train_writer = SummaryWriter(tb_dir + '/train')
    val_writer = SummaryWriter(tb_dir + '/valid')
    test_writer = SummaryWriter(tb_dir + '/test')

    # get train, val and test datasets
    D = GetDataset(data_root, unseen_index, val_split)
    train_datasets, val_datasets, test_dataset = D.get_datasets()

    # get dataloaders
    train_dataloaders = []
    for train_dataset in train_datasets:
        train_dataloader = DataLoader(dataset=train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=8)
        train_dataloaders.append(train_dataloader)

    val_dataloaders = []
    for val_dataset in val_datasets:
        val_dataloader = DataLoader(dataset=val_dataset,
                                    batch_size=batch_size,
                                    shuffle=False,
                                    num_workers=8)
        val_dataloaders.append(val_dataloader)

    test_dataloader = DataLoader(dataset=test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=8)

    # load model
    my_net = CNNModel()

    # setup optimizer

    optimizer = optim.Adam(my_net.parameters(), lr=lr)

    loss_class = torch.nn.NLLLoss()
    loss_domain = torch.nn.NLLLoss()

    if cuda:
        my_net = my_net.cuda()
        loss_class = loss_class.cuda()
        loss_domain = loss_domain.cuda()

    for p in my_net.parameters():
        p.requires_grad = True

    # training
    best_accu_val = 0.0
    for epoch in range(n_epoch):

        len_dataloader = np.min(
            np.array([
                len(train_dataloaders[i])
                for i in range(len(train_dataloaders))
            ]))

        data_train_iters = []
        for train_dataloader in train_dataloaders:
            data_train_iter = iter(train_dataloader)
            data_train_iters.append(data_train_iter)

        for i in range(len_dataloader):

            p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
            alpha = 2. / (1. + np.exp(-10 * p)) - 1

            err_label_s = []
            err_domain_s = []

            # err_label_all = torch.tensor(0.0)
            # err_domain_all = torch.tensor(0.0)
            err_label_all = 0
            err_domain_all = 0

            # training model using multi-source data
            for j, data_train_iter in enumerate(data_train_iters):
                data_train = data_train_iter.next()
                s_ecg, s_label = data_train

                my_net.zero_grad()
                batch_size = len(s_label)

                domain_label = (torch.ones(batch_size) * j).long()

                if cuda:
                    s_ecg = s_ecg.cuda()
                    s_label = s_label.cuda()
                    domain_label = domain_label.cuda()

                class_output, domain_output = my_net(input_data=s_ecg,
                                                     alpha=alpha)
                err_label = loss_class(class_output, s_label)
                err_domain = loss_domain(domain_output, domain_label)

                err_label_s.append(err_label.data.cpu().numpy())
                err_domain_s.append(err_domain.data.cpu().numpy())
                err_label_all += err_label
                err_domain_all += err_domain

            # err = err_domain_all + err_label_all
            err = err_label_all
            err.backward()
            optimizer.step()

            print('\n')

            for j in range(len(train_dataloaders)):
                print('\r epoch: %d, [iter: %d / all %d], domain: %d, err_label: %f, err_domain: %f' \
                      % (epoch, i + 1, len_dataloader, j + 1, err_label_s[j], err_domain_s[j]))
                # tb training
                train_writer.add_scalar('err_label_%d' % (j), err_label_s[j])
                train_writer.add_scalar('err_domain_%d' % (j), err_domain_s[j])

            torch.save(my_net,
                       '{0}/model_epoch_current.pth'.format(model_root))

        print('\n')

        ## validation
        val_accus, best_accu_val, val_err_label_s, val_err_domain_s = valid(
            val_dataloaders, model_root, best_accu_val)

        for i in range(len(val_dataloaders)):
            print('\r epoch: %d, Validation, domain: %d, accu: %f' %
                  (epoch, i + 1, val_accus[i]))
            # tb validation
            val_writer.add_scalar('err_label_%d' % (i), val_err_label_s[i])
            val_writer.add_scalar('err_domain_%d' % (i), val_err_domain_s[i])
            val_writer.add_scalar('accu_%d' % (i), val_accus[i])

        ## test
        test_accu, test_err_label = test(test_dataloader,
                                         model_root,
                                         model_best=False)
        test_writer.add_scalar('accu', test_accu)
        test_writer.add_scalar('err_label', test_err_label)

    result_path = os.path.join(logs, 'results.txt')
    print('============ Summary ============= \n')
    for i, train_dataloader in enumerate(train_dataloaders):
        train_accu, train_err_label = test(train_dataloader, model_root)
        write_log(
            'Accuracy of the train dataset %d : %f err_label : %f' %
            (i + 1, train_accu, train_err_label), result_path)

    for i, val_dataloader in enumerate(val_dataloaders):
        val_accu, val_err_label = test(val_dataloader, model_root)
        write_log(
            'Accuracy of the val dataset %d : %f err_label : %f' %
            (i + 1, val_accu, val_err_label), result_path)

    test_accu, test_err_label = test(test_dataloader, model_root)
    write_log(
        'Accuracy of the test dataset %d : %f err_label : %f' %
        (i + 1, test_accu, test_err_label), result_path)
コード例 #5
0
ファイル: train.py プロジェクト: logdogvip/insightface
def main():
    """Create the model and start the training."""
    cycle_n = 0
    start_epoch = args.start_epoch
    writer = SummaryWriter(osp.join(args.snapshot_dir, TIMESTAMP))
    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    h, w = map(int, args.input_size.split(','))
    input_size = [h, w]
    best_f1 = 0

    torch.cuda.set_device(args.local_rank)

    try:
        world_size = int(os.environ['WORLD_SIZE'])
        distributed = world_size > 1
    except:
        distributed = False
        world_size = 1
    if distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method='env://')
    rank = 0 if not distributed else dist.get_rank()

    log_file = args.snapshot_dir + '/' + TIMESTAMP + 'output.log'
    logger = get_root_logger(log_file=log_file, log_level='INFO')
    logger.info(f'Distributed training: {distributed}')

    cudnn.enabled = True
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.enabled = True

    if distributed:
        model = dml_csr.DML_CSR(args.num_classes)
        schp_model = dml_csr.DML_CSR(args.num_classes)
    else:
        model = dml_csr.DML_CSR(args.num_classes, InPlaceABN)
        schp_model = dml_csr.DML_CSR(args.num_classes, InPlaceABN)

    if args.restore_from is not None:
        print('Resume training from {}'.format(args.restore_from))
        model.load_state_dict(torch.load(args.restore_from), True)
        start_epoch = int(float(
            args.restore_from.split('.')[0].split('_')[-1])) + 1
    else:
        resnet_params = torch.load(RESTORE_FROM)
        new_params = model.state_dict().copy()
        for i in resnet_params:
            i_parts = i.split('.')
            if not i_parts[0] == 'fc':
                new_params['.'.join(i_parts[0:])] = resnet_params[i]
        model.load_state_dict(new_params)
    model.cuda()

    args.schp_restore = osp.join(args.snapshot_dir, TIMESTAMP, 'best.pth')
    if os.path.exists(args.schp_restore):
        print('Resume schp checkpoint from {}'.format(args.schp_restore))
        schp_model.load_state_dict(torch.load(args.schp_restore), True)
    else:
        schp_resnet_params = torch.load(RESTORE_FROM)
        schp_new_params = schp_model.state_dict().copy()
        for i in schp_resnet_params:
            i_parts = i.split('.')
            if not i_parts[0] == 'fc':
                schp_new_params['.'.join(i_parts[0:])] = schp_resnet_params[i]
        schp_model.load_state_dict(schp_new_params)
    schp_model.cuda()

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
        schp_model = torch.nn.parallel.DistributedDataParallel(
            schp_model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
    else:
        model = SingleGPU(model)
        schp_model = SingleGPU(schp_model)

    criterion = Criterion(loss_weight=[1, 1, 1, 4, 1],
                          lambda_1=args.lambda_s,
                          lambda_2=args.lambda_e,
                          lambda_3=args.lambda_c,
                          num_classes=args.num_classes)
    criterion.cuda()

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([transforms.ToTensor(), normalize])

    train_dataset = FaceDataSet(args.data_dir,
                                args.train_dataset,
                                crop_size=input_size,
                                transform=transform)
    if distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None
    trainloader = data.DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=2,
                                  pin_memory=True,
                                  drop_last=True,
                                  sampler=train_sampler)

    val_dataset = datasets[str(args.model_type)](args.data_dir,
                                                 args.valid_dataset,
                                                 crop_size=input_size,
                                                 transform=transform)
    num_samples = len(val_dataset)
    valloader = data.DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False,
                                pin_memory=True,
                                drop_last=False)

    # Optimizer Initialization
    optimizer = optim.SGD(model.parameters(),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    lr_scheduler = SGDRScheduler(optimizer,
                                 total_epoch=args.epochs,
                                 eta_min=args.learning_rate / 100,
                                 warmup_epoch=10,
                                 start_cyclical=args.schp_start,
                                 cyclical_base_lr=args.learning_rate / 2,
                                 cyclical_epoch=args.cycle_epochs)

    optimizer.zero_grad()

    total_iters = args.epochs * len(trainloader)
    start = timeit.default_timer()
    for epoch in range(start_epoch, args.epochs):
        model.train()
        if distributed:
            train_sampler.set_epoch(epoch)
        for i_iter, batch in enumerate(trainloader):
            i_iter += len(trainloader) * epoch

            if epoch < args.schp_start:
                lr = adjust_learning_rate(optimizer, i_iter, total_iters)
            else:
                lr = lr_scheduler.get_lr()[0]

            images, labels, edges, semantic_edges, _ = batch
            labels = labels.long().cuda(non_blocking=True)
            edges = edges.long().cuda(non_blocking=True)
            semantic_edges = semantic_edges.long().cuda(non_blocking=True)

            preds = model(images)

            if cycle_n >= 1:
                with torch.no_grad():
                    soft_preds, soft_edges, soft_semantic_edges = schp_model(
                        images)
            else:
                soft_preds = None
                soft_edges = None
                soft_semantic_edges = None

            loss = criterion(preds, [
                labels, edges, semantic_edges, soft_preds, soft_edges,
                soft_semantic_edges
            ], cycle_n)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()

            with torch.no_grad():
                loss = loss.detach() * labels.shape[0]
                count = labels.new_tensor([labels.shape[0]], dtype=torch.long)
                if dist.is_initialized():
                    dist.all_reduce(count, dist.ReduceOp.SUM)
                    dist.all_reduce(loss, dist.ReduceOp.SUM)
                loss /= count.item()

            if not dist.is_initialized() or dist.get_rank() == 0:
                if i_iter % 50 == 0:
                    writer.add_scalar('learning_rate', lr, i_iter)
                    writer.add_scalar('loss', loss.data.cpu().numpy(), i_iter)

                if i_iter % 500 == 0:
                    images_inv = inv_preprocess(images, args.save_num_images)
                    labels_colors = decode_parsing(labels,
                                                   args.save_num_images,
                                                   args.num_classes,
                                                   is_pred=False)
                    edges_colors = decode_parsing(edges,
                                                  args.save_num_images,
                                                  2,
                                                  is_pred=False)
                    semantic_edges_colors = decode_parsing(
                        semantic_edges,
                        args.save_num_images,
                        args.num_classes,
                        is_pred=False)

                    if isinstance(preds, list):
                        preds = preds[0]
                    preds_colors = decode_parsing(preds[0],
                                                  args.save_num_images,
                                                  args.num_classes,
                                                  is_pred=True)
                    pred_edges = decode_parsing(preds[1],
                                                args.save_num_images,
                                                2,
                                                is_pred=True)
                    pred_semantic_edges_colors = decode_parsing(
                        preds[2],
                        args.save_num_images,
                        args.num_classes,
                        is_pred=True)

                    img = vutils.make_grid(images_inv,
                                           normalize=False,
                                           scale_each=True)
                    lab = vutils.make_grid(labels_colors,
                                           normalize=False,
                                           scale_each=True)
                    pred = vutils.make_grid(preds_colors,
                                            normalize=False,
                                            scale_each=True)
                    edge = vutils.make_grid(edges_colors,
                                            normalize=False,
                                            scale_each=True)
                    pred_edge = vutils.make_grid(pred_edges,
                                                 normalize=False,
                                                 scale_each=True)
                    pred_semantic_edges = vutils.make_grid(
                        pred_semantic_edges_colors,
                        normalize=False,
                        scale_each=True)

                    writer.add_image('Images/', img, i_iter)
                    writer.add_image('Labels/', lab, i_iter)
                    writer.add_image('Preds/', pred, i_iter)
                    writer.add_image('Edge/', edge, i_iter)
                    writer.add_image('Pred_edge/', pred_edge, i_iter)

                cur_loss = loss.data.cpu().numpy()
                logger.info(
                    f'iter = {i_iter} of {total_iters} completed, loss = {cur_loss}, lr = {lr}'
                )

        if (epoch + 1) % (args.eval_epochs) == 0:
            parsing_preds, scales, centers = valid(model, valloader,
                                                   input_size, num_samples)
            mIoU, f1 = compute_mean_ioU(parsing_preds, scales, centers,
                                        args.num_classes, args.data_dir,
                                        input_size, args.valid_dataset, True)

            if not dist.is_initialized() or dist.get_rank() == 0:
                torch.save(
                    model.module.state_dict(),
                    osp.join(args.snapshot_dir, TIMESTAMP,
                             'checkpoint_{}.pth'.format(epoch + 1)))
                if 'Helen' in args.data_dir:
                    if f1['overall'] > best_f1:
                        torch.save(
                            model.module.state_dict(),
                            osp.join(args.snapshot_dir, TIMESTAMP, 'best.pth'))
                        best_f1 = f1['overall']
                else:
                    if f1['Mean_F1'] > best_f1:
                        torch.save(
                            model.module.state_dict(),
                            osp.join(args.snapshot_dir, TIMESTAMP, 'best.pth'))
                        best_f1 = f1['Mean_F1']

            writer.add_scalars('mIoU', mIoU, epoch)
            writer.add_scalars('f1', f1, epoch)
            logger.info(
                f'mIoU = {mIoU}, and f1 = {f1} of epoch = {epoch}, util now, best_f1 = {best_f1}'
            )

            if (epoch + 1) >= args.schp_start and (
                    epoch + 1 - args.schp_start) % args.cycle_epochs == 0:
                logger.info(f'Self-correction cycle number {cycle_n}')
                schp.moving_average(schp_model, model, 1.0 / (cycle_n + 1))
                cycle_n += 1
                schp.bn_re_estimate(trainloader, schp_model)
                parsing_preds, scales, centers = valid(schp_model, valloader,
                                                       input_size, num_samples)
                mIoU, f1 = compute_mean_ioU(parsing_preds, scales, centers,
                                            args.num_classes, args.data_dir,
                                            input_size, args.valid_dataset,
                                            True)

                if not dist.is_initialized() or dist.get_rank() == 0:
                    torch.save(
                        schp_model.module.state_dict(),
                        osp.join(args.snapshot_dir, TIMESTAMP,
                                 'schp_{}_checkpoint.pth'.format(cycle_n)))

                    if 'Helen' in args.data_dir:
                        if f1['overall'] > best_f1:
                            torch.save(
                                schp_model.module.state_dict(),
                                osp.join(args.snapshot_dir, TIMESTAMP,
                                         'best.pth'))
                            best_f1 = f1['overall']
                    else:
                        if f1['Mean_F1'] > best_f1:
                            torch.save(
                                schp_model.module.state_dict(),
                                osp.join(args.snapshot_dir, TIMESTAMP,
                                         'best.pth'))
                            best_f1 = f1['Mean_F1']
                writer.add_scalars('mIoU', mIoU, epoch)
                writer.add_scalars('f1', f1, epoch)
                logger.info(
                    f'mIoU = {mIoU}, and f1 = {f1} of epoch = {epoch}, util now, best_f1 = {best_f1}'
                )

            torch.cuda.empty_cache()
            end = timeit.default_timer()
            print('epoch = {} of {} completed using {} s'.format(
                epoch, args.epochs, (end - start) / (epoch - start_epoch + 1)))

    end = timeit.default_timer()
    print(end - start, 'seconds')
コード例 #6
0
ファイル: train.py プロジェクト: zfjmike/fake-news-detection
def train(train_samples,
          valid_samples,
          word2num,
          lr=0.001,
          epoch=5,
          use_cuda=False):

    print('Training...')

    # Prepare training data
    print('  Preparing training data...')
    statement_word2num = word2num[0]
    subject_word2num = word2num[1]
    speaker_word2num = word2num[2]
    speaker_pos_word2num = word2num[3]
    state_word2num = word2num[4]
    party_word2num = word2num[5]
    context_word2num = word2num[6]

    train_data = train_samples
    dataset_to_variable(train_data, use_cuda)
    valid_data = valid_samples
    dataset_to_variable(valid_data, use_cuda)

    # Construct model instance
    print('  Constructing network model...')
    model = Net(len(statement_word2num), len(subject_word2num),
                len(speaker_word2num), len(speaker_pos_word2num),
                len(state_word2num), len(party_word2num),
                len(context_word2num))
    if use_cuda: model.cuda()

    # Start training
    print('  Start training')

    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()

    step = 0
    display_interval = 2000

    for epoch_ in range(epoch):
        print('  ==> Epoch ' + str(epoch_) + ' started.')
        random.shuffle(train_data)
        total_loss = 0
        for sample in train_data:

            optimizer.zero_grad()

            prediction = model(sample)
            label = Variable(torch.LongTensor([sample.label]))
            loss = F.cross_entropy(prediction, label)
            loss.backward()
            optimizer.step()

            step += 1
            if step % display_interval == 0:
                print('    ==> Iter: ' + str(step) + ' Loss: ' + str(loss))

            total_loss += loss.data.numpy()

        print('  ==> Epoch ' + str(epoch_) + ' finished. Avg Loss: ' +
              str(total_loss / len(train_data)))

        valid(valid_data, word2num, model)

    return model
コード例 #7
0
def train(train_samples,
          valid_samples,
          word2num,
          lr = 0.001,
          epoch = 5,
          use_cuda = False):

    print('Training...')

    # Prepare training data
    print('  Preparing training data...')
    statement_word2num = word2num[0]
    subject_word2num = word2num[1]
    speaker_word2num = word2num[2]
    speaker_pos_word2num = word2num[3]
    state_word2num = word2num[4]
    party_word2num = word2num[5]
    context_word2num = word2num[6]

    train_data = train_samples
    dataset_to_variable(train_data, use_cuda)
    valid_data = valid_samples
    dataset_to_variable(valid_data, use_cuda)

    # Construct model instance
    print('  Constructing network model...')
    model = Net(len(statement_word2num),
                len(subject_word2num),
                len(speaker_word2num),
                len(speaker_pos_word2num),
                len(state_word2num),
                len(party_word2num),
                len(context_word2num))
    if use_cuda: model.cuda()

    # Start training
    print('  Start training')

    optimizer = optim.Adam(model.parameters(), lr = lr)
    model.train()

    step = 0
    display_interval = 2000

    for epoch_ in range(epoch):
        print('  ==> Epoch '+str(epoch_)+' started.')
        random.shuffle(train_data)
        total_loss = 0
        for sample in train_data:

            optimizer.zero_grad()

            prediction = model(sample)
            label = Variable(torch.LongTensor([sample.label]))
            loss = F.cross_entropy(prediction, label)
            loss.backward()
            optimizer.step()

            step += 1
            if step % display_interval == 0:
                print('    ==> Iter: '+str(step)+' Loss: '+str(loss))

            total_loss += loss.data.numpy()

        print('  ==> Epoch '+str(epoch_)+' finished. Avg Loss: '+str(total_loss/len(train_data)))

        valid(valid_data, word2num, model)

    return model
コード例 #8
0
def train(train_samples,
          valid_samples,
          word2num,
          max_len_statement,
          max_len_subject,
          max_len_speaker_pos,
          max_len_context,
          lr=0.001,
          epoch=1,
          use_cuda=False,
          batch_size=20,
          batch_size_val=5,
          model_path='models'):

    print('Training...')

    # Prepare training data
    print('  Preparing training data...')
    statement_word2num = word2num[0]
    subject_word2num = word2num[1]
    speaker_word2num = word2num[2]
    speaker_pos_word2num = word2num[3]
    state_word2num = word2num[4]
    party_word2num = word2num[5]
    context_word2num = word2num[6]

    # train_data = train_samples
    train_data = CustomDataset(train_samples, max_len_statement,
                               max_len_subject, max_len_speaker_pos,
                               max_len_context)
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              collate_fn=collate_fn)

    # dataset_to_variable(train_data, use_cuda)
    valid_data = valid_samples
    valid_samples = CustomDataset(valid_samples, max_len_statement,
                                  max_len_subject, max_len_speaker_pos,
                                  max_len_context)
    valid_loader = DataLoader(valid_samples,
                              batch_size=batch_size_val,
                              collate_fn=collate_fn)

    # dataset_to_variable(valid_data, use_cuda)

    # Construct model instance
    print('  Constructing network model...')
    model = Net(len(statement_word2num), len(subject_word2num),
                len(speaker_word2num), len(speaker_pos_word2num),
                len(state_word2num), len(party_word2num),
                len(context_word2num))
    if use_cuda:
        print('using cuda')
        model.cuda()

    # Start training
    print('  Start training')

    optimizer = optim.Adam(model.parameters(), lr=lr)
    lr_scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                     mode='max',
                                     factor=0.5,
                                     patience=5)
    model.train()

    step = 0
    display_interval = 50
    optimal_val_acc = 0

    for epoch_ in range(epoch):
        print('  ==> Epoch ' + str(epoch_) + ' started.')
        # random.shuffle(train_data)
        total_loss = 0
        for (inputs_statement, inputs_subject, inputs_speaker,
             inputs_speaker_pos, inputs_state, inputs_party, inputs_context,
             target) in train_loader:

            # sample = [inputs_statement, inputs_subject, inputs_speaker, inputs_speaker_pos, inputs_state, inputs_party, inputs_context]
            optimizer.zero_grad()
            if use_cuda:
                inputs_statement.cuda()
                inputs_subject.cuda()
                inputs_speaker.cuda()
                inputs_speaker_pos.cuda()
                inputs_state.cuda()
                inputs_party.cuda()
                inputs_context.cuda()
                # sample.cuda()
                target.cuda()

            prediction = model(inputs_statement, inputs_subject,
                               inputs_speaker, inputs_speaker_pos,
                               inputs_state, inputs_party, inputs_context)
            # label = Variable(torch.LongTensor([sample.label]))
            # loss = F.cross_entropy(prediction, label)
            loss = F.cross_entropy(prediction, target)
            loss.backward()
            optimizer.step()

            step += 1
            if step % display_interval == 0:
                print('    ==> Iter: ' + str(step) + ' Loss: ' + str(loss))

            total_loss += loss.data.numpy() * len(inputs_statement)

        print('  ==> Epoch ' + str(epoch_) + ' finished. Avg Loss: ' +
              str(total_loss / len(train_data)))

        val_acc = valid(valid_loader, word2num, model, max_len_statement,
                        max_len_subject, max_len_speaker_pos, max_len_context,
                        use_cuda)
        lr_scheduler.step(val_acc)
        for param_group in optimizer.param_groups:
            print("The current learning rate used by the optimizer is : {}".
                  format(param_group['lr']))

        if val_acc > optimal_val_acc:
            optimal_val_acc = val_acc
            model_file = os.path.join(
                model_path,
                'model_bs_{}_lr_{}_acc_{}.pth'.format(batch_size, lr, val_acc))
            old_models = [
                os.path.join(model_path, filename)
                for filename in os.listdir(model_path)
                if filename.startswith("model_bs_{}_lr_{}".format(
                    batch_size, lr))
            ]
            for file_ in old_models:
                os.remove(file_)
            torch.save(model.state_dict(), model_file)

    return optimal_val_acc
コード例 #9
0
ファイル: train.py プロジェクト: activemodest/CMS
def train(train_samples, valid_samples, test_samples, index,
          word2vec_preweight, vocabulary_dim, process_file, jpg_file,
          save_model_file, test_label_file, para_dict):
    # print ('Traing begin')
    # print ('Prepare train data')
    train_loss_list = []
    valid_loss_list = []
    valid_acc_list = []
    best_valid_acc = 0
    best_test_acc = 0

    # train_data
    train_statement_data = [x[1] for x in train_samples]
    train_statement_data = np.array(train_statement_data)
    train_statement_data = torch.from_numpy(train_statement_data).cuda()

    train_statement_len = [x[2] for x in train_samples]
    train_statement_len = np.array(train_statement_len)
    train_statement_len = torch.from_numpy(train_statement_len).int().cuda()
    # train_statement_len = train_statement_len.unsqueeze(1)

    train_meta_data = [x[3] for x in train_samples]
    train_meta_data = np.array(train_meta_data)
    train_meta_data = torch.from_numpy(train_meta_data).cuda()

    train_history_data = [x[4] for x in train_samples]
    train_history_data = np.array(train_history_data)
    train_history_data = torch.from_numpy(train_history_data)
    train_history_data = train_history_data.float().cuda()

    train_target = [x[0] for x in train_samples]
    train_target = np.array(train_target)
    train_target = torch.from_numpy(train_target).cuda()

    # valid data
    valid_statement_data = [x[1] for x in valid_samples]
    valid_statement_data = np.array(valid_statement_data)
    valid_statement_data = torch.from_numpy(valid_statement_data).cuda()

    valid_statement_len = [x[2] for x in valid_samples]
    valid_statement_len = np.array(valid_statement_len)
    valid_statement_len = torch.from_numpy(valid_statement_len).int().cuda()
    # valid_statement_len = valid_statement_len.unsqueeze(1)

    valid_meta_data = [x[3] for x in valid_samples]
    valid_meta_data = np.array(valid_meta_data)
    valid_meta_data = torch.from_numpy(valid_meta_data).cuda()

    valid_history_data = [x[4] for x in valid_samples]
    valid_history_data = np.array(valid_history_data)
    valid_history_data = torch.from_numpy(valid_history_data)
    valid_history_data = valid_history_data.float().cuda()

    valid_target = [x[0] for x in valid_samples]
    valid_target = np.array(valid_target)

    # test data
    test_statement_data = [x[1] for x in test_samples]
    test_statement_data = np.array(test_statement_data)
    test_statement_data = torch.from_numpy(test_statement_data).cuda()

    test_statement_len = [x[2] for x in test_samples]
    test_statement_len = np.array(test_statement_len)
    test_statement_len = torch.from_numpy(test_statement_len).int().cuda()
    # test_statement_len = test_statement_len.unsqueeze(1)

    test_meta_data = [x[3] for x in test_samples]
    test_meta_data = np.array(test_meta_data)
    test_meta_data = torch.from_numpy(test_meta_data).cuda()

    test_history_data = [x[4] for x in test_samples]
    test_history_data = np.array(test_history_data)
    test_history_data = torch.from_numpy(test_history_data)
    test_history_data = test_history_data.float().cuda()

    test_target = [x[0] for x in test_samples]
    test_target = np.array(test_target)
    print('Construct network model')
    model = Net(word2vec_preweight, vocabulary_dim, index,
                para_dict['transformer_num_layers'], para_dict['num_heads'],
                para_dict['dropout'])
    #print('Model Structure',model)

    # print ('Start training......')
    train_dataset = CustomDataset(train_statement_data, train_statement_len,
                                  train_meta_data, train_history_data,
                                  train_target)
    train_loader = DataLoader(train_dataset,
                              batch_size=para_dict['batch_size'],
                              shuffle=False,
                              drop_last=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=para_dict['lr'],
                           weight_decay=para_dict['weight_decay'])
    loss_func = nn.CrossEntropyLoss()
    display_interval = 50

    model.train()
    model.cuda()

    for epoch in range(para_dict['EPOCH']):
        #print ('==>EPOCH:'+str(epoch)+' '+'started')
        process_file.write('==>EPOCH:' + str(epoch) + ' ' + 'started' + '\n')
        for step, (batch_statement, batch_statement_len, batch_meta,
                   batch_history, batch_y) in enumerate(train_loader):
            batch_statement = Variable(batch_statement).cuda()
            batch_statement_len = Variable(batch_statement_len).cuda()
            batch_meta = Variable(batch_meta).cuda()
            batch_history = Variable(batch_history).cuda()
            batch_y = Variable(batch_y).cuda()
            output = model(batch_statement, batch_statement_len, batch_meta,
                           batch_history)
            loss = loss_func(output, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if step % display_interval == 0:
                train_loss_list.append(loss.cpu().data.numpy())
                # print ('...==>Iter:'+str(step)+' '+'train_Loss='+str(loss.cpu().data.numpy()))
                process_file.write('...==>Epoch:' + str(epoch) + ' ' +
                                   'train_Loss=' +
                                   str(loss.data.cpu().numpy()) + '\r\n')

                valid_loss, valid_acc = valid(
                    valid_statement_data, valid_statement_len, valid_meta_data,
                    valid_history_data, valid_target, model,
                    loss_func)  # ------------------------
                valid_loss_list.append(valid_loss)
                valid_acc_list.append(valid_acc)
                if best_valid_acc < valid_acc:
                    best_valid_acc = valid_acc
                    test_acc = test(test_statement_data, test_statement_len,
                                    test_meta_data, test_history_data,
                                    test_target, test_label_file, model)
                    best_test_acc = test_acc

                # print('......==>Iter:' + str(step) + ' ' + 'valid_Loss=' + str(valid_loss)+' '+'valid_Acc='+str(valid_acc))
                process_file.write('......==>Epoch:' + str(epoch) + ' ' +
                                   'valid_Loss=' + str(valid_loss) + ' ' +
                                   'valid_Acc=' + str(valid_acc) + '\r\n')

    x = range(
        para_dict['EPOCH'] *
        (len(train_samples) // para_dict['batch_size'] // display_interval +
         1))  # this is related display_interval
    plt.figure(figsize=(10, 10))
    plt.subplot(211)
    plt.title('Loss vs epoch')
    plt.xlim(
        0, para_dict['EPOCH'] *
        (len(train_samples) // para_dict['batch_size'] // display_interval +
         1))
    plt.ylim(min(train_loss_list + valid_loss_list),
             max(train_loss_list + valid_loss_list))
    plt.ylabel('Loss')
    plt.xlabel('Iter')
    plt.plot(x, train_loss_list, label='train_loss')
    plt.plot(x, valid_loss_list, label='valid_loss')
    plt.legend(loc='best')
    plt.subplot(212)
    plt.title('train vs valid')
    plt.xlim(
        0, para_dict['EPOCH'] *
        (len(train_samples) // para_dict['batch_size'] // display_interval +
         1))
    plt.ylim(min(valid_acc_list), max(valid_acc_list))
    plt.ylabel('Acc')
    plt.xlabel('Iter')
    plt.plot(x, valid_acc_list, label='valid_acc')
    plt.legend(loc='best')
    #plt.show()
    plt.savefig(jpg_file)
    plt.close()

    # save model
    # torch.save(model,save_model_file)  # save the whole net

    return best_test_acc