Example #1
0
def train(train_img_path, train_gt_path, pths_path, batch_size, lr, num_workers, epoch_iter, interval, output_dir):
	# 为CPU设置种子用于生成随机数,以使得结果是确定的
    torch.manual_seed(970201)            # 为CPU设置随机种子
    torch.cuda.manual_seed(970201)       # 为当前GPU设置随机种子
	logger = setup_logger("east_matrix", output_dir, get_rank())

	file_num = len(os.listdir(train_img_path)) # 图片数量
	trainset = custom_dataset(train_img_path, train_gt_path) # 训练集进行处理 ??? ***
	# 加载数据,组合一个数据集和一个采样器,并在给定的数据集上提供一个可迭代的。
	train_loader = data.DataLoader(trainset, batch_size=batch_size, \
                                   shuffle=True, num_workers=num_workers, drop_last=True)
	
	criterion = Loss() # 损失函数 ??? ***
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	model = EAST() # 网络模型 ??? ***

	# 是否多gpu
	data_parallel = False 
	if torch.cuda.device_count() > 1:
		model = nn.DataParallel(model)
		data_parallel = True

	# 分配模型到gpu或cpu,根据device决定
	model.to(device)

	#优化器
	optimizer = torch.optim.Adam(model.parameters(), lr=lr)
	
	# 学习率衰减策略,一半的时候衰减为十分之一
	scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[epoch_iter//2], gamma=0.1)
Example #2
0
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, interval, pretrained_pth):
    file_num = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path)
    train_loader = data.DataLoader(trainset, batch_size=batch_size, \
                                      shuffle=True, num_workers=num_workers, drop_last=True)

    criterion = Loss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST()
    # if pretrained_path:
    # 	model.load_state_dict(torch.load(pretrained_path))
    data_parallel = False
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[epoch_iter // 2],
                                         gamma=0.1)

    for epoch in range(epoch_iter):
        model.train()
        scheduler.step()
        epoch_loss = 0
        epoch_time = time.time()
        for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader):
            start_time = time.time()
            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(
                device), gt_geo.to(device), ignored_map.to(device)
            pred_score, pred_geo = model(img)
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)

            epoch_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\
                       epoch+1, epoch_iter, i+1, int(file_num/batch_size), time.time()-start_time, loss.item()))

        print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format(
            epoch_loss / int(file_num / batch_size),
            time.time() - epoch_time))
        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)
        if (epoch + 1) % interval == 0:
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model_epoch_{}.pth'.format(epoch + 1)))
Example #3
0
def main():
	config = Config()

	if os.path.exists(config.SAVE_PATH):
		shutil.rmtree(config.SAVE_PATH)
	os.makedirs(config.SAVE_PATH, exist_ok=True)

	trainF = open(os.path.join(config.SAVE_PATH, "train.csv"), 'w')
	testF = open(os.path.join(config.SAVE_PATH, "test.csv"), 'w')

	train_img_path = os.path.abspath('../ICDAR_2015/train_img')
	train_gt_path  = os.path.abspath('../ICDAR_2015/train_gt')
	val_img_path = os.path.abspath('../ICDAR_2015/test_img')
	val_gt_path  = os.path.abspath('../ICDAR_2015/test_gt')

	kwargs = {'num_workers': 2, 'pin_memory': True} if torch.cuda.is_available() else {}

	train_dataset = custom_dataset(train_img_path, train_gt_path)
	train_loader = data.DataLoader(train_dataset, batch_size=config.TRAIN_BATCH*len(device_list), \
									shuffle=True, drop_last=True, **kwargs)

	val_dataset = custom_dataset(val_img_path, val_gt_path)
	val_loader = data.DataLoader(val_dataset, batch_size=config.TRAIN_BATCH*len(device_list), \
									shuffle=True, drop_last=True, **kwargs)

	net = EAST()

	if torch.cuda.is_available():
		net = net.cuda(device=device_list[0])
		net = torch.nn.DataParallel(net, device_ids=device_list)

	optimizer = torch.optim.Adam(net.parameters(), lr=config.BASE_LR, weight_decay=config.WEIGHT_DECAY)

	for epoch in range(config.EPOCHS):
		train(net, epoch, train_loader, optimizer, trainF, config)
		test(net, epoch, val_loader, testF, config)
		if epoch != 0 and epoch % config.SAVE_INTERVAL == 0:
			torch.save({'state_dict': net.state_dict()}, os.path.join(os.getcwd(), config.SAVE_PATH, "laneNet{}.pth.tar".format(epoch)))
	trainF.close()
	testF.close()
	torch.save({'state_dict': net.state_dict()}, os.path.join(os.getcwd(),  config.SAVE_PATH, "finalNet.pth.tar"))
Example #4
0
def train(train_ds_path,
          val_ds_path,
          pths_path,
          results_path,
          batch_size,
          lr,
          num_workers,
          train_iter,
          interval,
          opt_level=0,
          checkpoint_path=None,
          val_freq=10):
    torch.cuda.set_device(rank)

    tensorboard_dir = os.path.join(results_path, 'logs')
    checkpoints_dir = os.path.join(results_path, 'checkpoints')
    if rank == 0:
        os.makedirs(tensorboard_dir, exist_ok=True)
        os.makedirs(checkpoints_dir, exist_ok=True)
    barrier()

    try:
        logger.info('Importing AutoResume lib...')
        from userlib.auto_resume import AutoResume as auto_resume
        auto_resume.init()
        logger.info('Success!')
    except:
        logger.info('Failed!')
        auto_resume = None

    trainset = custom_dataset(
        os.path.join(train_ds_path, 'images'),
        os.path.join(train_ds_path, 'gt'),
    )

    valset = custom_dataset(os.path.join(val_ds_path, 'images'),
                            os.path.join(val_ds_path, 'gt'),
                            is_val=True)

    logger.info(f'World Size: {world_size}, Rank: {rank}')

    if world_size > 1:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            valset, shuffle=False)
    else:
        train_sampler = None
        val_sampler = None

    worker_init = LoaderWorkerProcessInit(rank, 43)
    train_loader = DataLoader(trainset,
                              batch_size=batch_size,
                              shuffle=train_sampler is None,
                              sampler=train_sampler,
                              num_workers=num_workers,
                              pin_memory=True,
                              drop_last=True,
                              worker_init_fn=worker_init)
    val_loader = DataLoader(valset,
                            batch_size=batch_size,
                            shuffle=False,
                            sampler=val_sampler,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=True,
                            worker_init_fn=worker_init)

    criterion = Loss()

    device = torch.device(
        f"cuda:{rank}" if torch.cuda.is_available() else "cpu")
    model = EAST()
    model.to(device)

    model = apex.parallel.convert_syncbn_model(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=f'O{opt_level}')

    start_iter = 0
    if auto_resume is not None:
        auto_resume_details = auto_resume.get_resume_details()
        if auto_resume_details is not None:
            logger.info(
                'Detected that this is a resumption of a previous job!')
            checkpoint_path = auto_resume_details['CHECKPOINT_PATH']

    if checkpoint_path:
        logger.info(f'Loading checkpoint at path "{checkpoint_path}"...')
        checkpoint = torch.load(checkpoint_path, map_location=f'cuda:{rank}')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        amp.load_state_dict(checkpoint['amp_state'])
        start_iter = checkpoint['iter']
        logger.info('Done')

    data_parallel = False
    main_model = model
    if torch.distributed.is_initialized():
        logger.info(
            f'DataParallel: Using {torch.cuda.device_count()} devices!')
        model = DDP(model)
        data_parallel = True

    for param_group in optimizer.param_groups:
        param_group.setdefault('initial_lr', lr)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[train_iter // 2],
                                         gamma=0.1,
                                         last_epoch=start_iter)

    # This allows us to change dataset size without affecting things such as validation frequency
    steps_per_epoch = 1000 // (world_size * batch_size)

    step = start_iter
    start_epoch = step // steps_per_epoch
    epoch_iter = int(math.ceil(train_iter / steps_per_epoch))
    if rank == 0:
        logger.info('Initializing Tensorboard')
        writer = SummaryWriter(tensorboard_dir, purge_step=step)

    loss_meters = MeterDict(reset_on_value=True)
    val_loss_meters = MeterDict(reset_on_value=True)
    time_meters = MeterDict(reset_on_value=True)

    logger.info('Training')
    model.train()

    train_start_time = time.time()

    best_loss = 100

    train_iter = [iter(train_loader)]

    def get_batch():
        try:
            return next(train_iter[0])
        except:
            train_iter[0] = iter(train_loader)
            return get_batch()

    for epoch in range(start_epoch, epoch_iter):
        if train_sampler is not None:
            train_sampler.set_epoch(epoch)

        epoch_loss = 0
        epoch_time = time.time()
        start_time = time.time()

        model.train()

        for i in range(steps_per_epoch):
            batch = get_batch()

            optimizer.zero_grad()

            batch = [b.cuda(rank, non_blocking=True) for b in batch]

            img, gt_score, gt_geo, ignored_map = batch
            barrier()
            time_meters['batch_time'].add_sample(time.time() - start_time)

            pred_score, pred_geo = model(img)

            loss, details = criterion(gt_score, pred_score, gt_geo, pred_geo,
                                      ignored_map)

            epoch_loss += loss.detach().item()

            with amp.scale_loss(loss, optimizer) as loss_scaled:
                loss_scaled.backward()
            optimizer.step()

            barrier()
            time_meters['step_time'].add_sample(time.time() - start_time)

            details['global'] = loss.detach().item()

            for k, v in details.items():
                loss_meters[k].add_sample(v)

            if i % 10 == 0:
                logger.info(f'\tStep [{i+1}/{steps_per_epoch}]')

            start_time = time.time()
            step += 1
            scheduler.step()

            if step == train_iter:
                break

        term_requested = auto_resume is not None and auto_resume.termination_requested(
        )

        checkpoint_path = None
        if rank == 0:
            times = {k: m.value() for k, m in time_meters.items()}
            losses = {k: m.value() for k, m in loss_meters.items()}

            times['epoch'] = time.time() - epoch_time

            logger.info(
                f'Epoch is [{epoch+1}/{epoch_iter}], time consumption is {times}, batch_loss is {losses}'
            )

            for k, v in times.items():
                writer.add_scalar(f'performance/{k}', v, step)
            for k, v in losses.items():
                writer.add_scalar(f'loss/{k}', v, step)
            writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'],
                              step)

            if term_requested or (epoch + 1) % interval == 0:
                state_dict = main_model.state_dict()
                optim_state = optimizer.state_dict()

                checkpoint_path = os.path.join(
                    checkpoints_dir, 'model_epoch_{}.pth'.format(epoch + 1))
                logger.info(f'Saving checkpoint to "{checkpoint_path}"...')
                torch.save(
                    {
                        'model': state_dict,
                        'optimizer': optim_state,
                        'amp_state': amp.state_dict(),
                        'epoch': epoch + 1,
                        'iter': step
                    }, checkpoint_path)
                logger.info(f'Done')

        if (epoch + 1) % val_freq == 0 or step == train_iter:
            logger.info(f'Validating epoch {epoch+1}...')
            model.eval()
            val_loader.dataset.reset_random()
            with torch.no_grad():
                for i, batch in enumerate(val_loader):
                    batch = [b.cuda(rank, non_blocking=True) for b in batch]

                    img, gt_score, gt_geo, ignored_map = batch
                    barrier()

                    pred_score, pred_geo = model(img)

                    loss, details = criterion(gt_score, pred_score, gt_geo,
                                              pred_geo, ignored_map)
                    details['global'] = loss.detach().item()

                    barrier()

                    for k, v in details.items():
                        val_loss_meters[k].add_sample(v)

            print_dict = dict()
            for k, m in val_loss_meters.items():
                t = torch.tensor(m.value(),
                                 device=f'cuda:{rank}',
                                 dtype=torch.float32)
                if world_size > 1:
                    torch.distributed.reduce(t, 0)
                    t /= world_size
                if rank == 0:
                    writer.add_scalar(f'val/loss/{k}', t.item(), step)
                print_dict[k] = t.item()
            logger.info(f'\tLoss: {print_dict}')
            val_loss = print_dict['global']
            if rank == 0 and val_loss < best_loss:
                logger.info(
                    f'This is the best model so far. New loss: {val_loss}, previous: {best_loss}'
                )
                best_loss = val_loss
                shutil.copyfile(checkpoint_path,
                                os.path.join(checkpoints_dir, 'best.pth'))
            logger.info('Training')

        if term_requested:
            logger.warning('Termination requested! Exiting...')
            if rank == 0:
                auto_resume.request_resume(user_dict={
                    'CHECKPOINT_PATH': save_path,
                    'EPOCH': epoch
                })
            break

    logger.info(
        f'Finished training!!! Took {time.time()-train_start_time:0.3f} seconds!'
    )
Example #5
0
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, interval):
    file_num = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path)
    train_loader = data.DataLoader(trainset, batch_size=batch_size, \
                                      shuffle=True, num_workers=num_workers, drop_last=True)

    criterion = Loss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST(pretrained=False)
    model.load_state_dict(
        torch.load(
            '/home/chen-ubuntu/Desktop/checks_dataset/pths/model_epoch_mode3_14.pth'
        ))
    data_parallel = False

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    optimizer.zero_grad()
    #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[epoch_iter//2], gamma=0.1)

    for epoch in range(epoch_iter):
        model.train()
        epoch_loss = 0
        epoch_time = time.time()

        loss_plot = []
        bx = []
        for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader):
            start_time = time.time()
            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(
                device), gt_geo.to(device), ignored_map.to(device)
            pred_score, pred_geo = model(img)
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)

            epoch_loss += loss.item()
            loss.backward()
            if (i + 1) % 3:
                optimizer.step()
                optimizer.zero_grad()

            if (i + 1) % 100 == 0:
                print(
                    'Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'
                    .format(epoch + 1, epoch_iter, i + 1,
                            int(file_num / batch_size),
                            time.time() - start_time, loss.item()))

            if (i + 1) % 100 == 0:
                loss_plot.append(loss.item())
                bx.append(i + epoch * int(file_num / batch_size))
            plt.plot(bx,
                     loss_plot,
                     label='loss_mean',
                     linewidth=1,
                     color='b',
                     marker='o',
                     markerfacecolor='green',
                     markersize=2)
            plt.savefig(os.path.abspath('./labeled.jpg'))

        print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format(
            epoch_loss / int(file_num / batch_size),
            time.time() - epoch_time))
        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)
        if (epoch + 1) % interval == 0:
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model3_epoch_{}.pth'.format(epoch + 1 + 14)))
Example #6
0
               'loss': losses.avg,
               'pred': pred_meter.avg
           }, queue


if __name__ == '__main__':
    args = parse_option()
    image_size, mean, std = dataset_info(name='cifar')
    # image_size = 28
    # mean = [0.1307, ]
    # std = [0.3081, ]
    # normalize = transforms.Normalize(mean=mean, std=std)

    train_transform = get_transform(image_size, mean=mean, std=std, mode='train')
    # datasets.mnist.MNIST
    train_dataset = custom_dataset(datasets.cifar.CIFAR10)(root='./', train=True, transform=train_transform,
                                                           download=True)
    print(len(train_dataset))
    train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=0,
                                  pin_memory=False, drop_last=True)  # drop the last batch due to irregular size

    model_q, model_k = get_model(config.MODEL)

    optimizer = torch.optim.SGD(model_q.parameters(), lr=0.02, momentum=0.9, nesterov=True, weight_decay=1e-5)
    per = config.ALL_EPOCHS // 6
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[per * 2, per * 4, per * 5], gamma=0.1)

    # copy parameters from model_q to model_k
    momentum_update(model_q, model_k, 0)

    criterion = torch.nn.CrossEntropyLoss()
def train(source_img_path,
          source_gt_path,
          target_img_path,
          target_gt_path,
          valid_img_path,
          valid_gt_path,
          pths_path,
          batch_size,
          lr,
          num_workers,
          epoch_iter,
          interval,
          pretrain_model_path=None,
          scheduler_path=None,
          current_epoch_num=0):

    if not os.path.exists(pths_path):
        os.mkdir(pths_path)

    # source_train_set = IC13_dataset(source_img_path, source_gt_path)
    source_train_set = custom_dataset(source_img_path, source_gt_path)
    target_train_set = custom_dataset(target_img_path, target_gt_path)
    valid_train_set = valid_dataset(valid_img_path, valid_gt_path)

    source_train_loader = data.DataLoader(source_train_set,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=num_workers,
                                          drop_last=True)
    target_train_loader = data.DataLoader(target_train_set,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=num_workers,
                                          drop_last=True)
    valid_loader = data.DataLoader(valid_train_set,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   num_workers=num_workers,
                                   drop_last=False)

    criterion = Loss().to(device)
    loss_domain = torch.nn.CrossEntropyLoss()

    model = EAST()
    if None != pretrain_model_path:
        model.load_state_dict(torch.load(pretrain_model_path))
    data_parallel = False
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True

    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[epoch_iter // 3, epoch_iter * 2 // 3],
        gamma=0.1)
    if None != scheduler_path:
        scheduler.load_state_dict(torch.load(scheduler_path))
    best_loss = 1000
    best_model_wts = copy.deepcopy(model.state_dict())
    best_num = 0

    train_loss = []
    valid_loss = []

    for epoch in range(current_epoch_num, epoch_iter):
        model.train()
        target_train_iter = iter(target_train_loader)

        epoch_loss = 0
        epoch_time = time.time()
        for i, (s_img, s_gt_score, s_gt_geo,
                s_ignored_map) in enumerate(source_train_loader):
            start_time = time.time()

            try:
                t_img, t_gt_score, t_gt_geo, t_ignored_map = next(
                    target_train_iter)
            except StopIteration:
                target_train_iter = iter(source_train_loader)
                t_img, t_gt_score, t_gt_geo, t_ignored_map = next(
                    target_train_iter)

            s_img, s_gt_score, s_gt_geo, s_ignored_map = s_img.to(
                device), s_gt_score.to(device), s_gt_geo.to(
                    device), s_ignored_map.to(device)

            pred_score, pred_geo, pred_cls = model(s_img, False)

            #source label
            domain_s = Variable(torch.zeros(pred_cls.size(0)).long().cuda())
            loss_domain_s = loss_domain(pred_cls, domain_s)

            target_cls = model(t_img, True)
            # target label
            domain_t = Variable(torch.ones(pred_cls.size(0)).long().cuda())
            loss_domain_t = loss_domain(target_cls, domain_t)

            loss = criterion(s_gt_score, pred_score, s_gt_geo, pred_geo,
                             s_ignored_map) + loss_domain_s + loss_domain_t

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format( \
            #     epoch + 1, epoch_iter, i + 1, int(len(source_train_loader)), time.time() - start_time, loss.item()))

        epoch_loss_mean = epoch_loss / len(source_train_loader)
        train_loss.append(epoch_loss_mean)
        print('Epoch[{}], Train, epoch_loss is {:.8f}, epoch_time is {:.8f}'.
              format(epoch, epoch_loss_mean,
                     time.time() - epoch_time))

        val_epoch_loss = eval(model, valid_loader, criterion, epoch)
        val_loss_mean = val_epoch_loss / len(valid_loader)
        valid_loss.append(val_loss_mean)

        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)

        if val_loss_mean < best_loss:
            best_num = epoch + 1
            best_loss = val_loss_mean
            best_model_wts = copy.deepcopy(model.state_dict())
            # save best model
            print('best model num:{}, best loss is {:.8f}'.format(
                best_num, best_loss))
            torch.save(best_model_wts,
                       os.path.join(pths_path, 'model_epoch_best.pth'))
        if (epoch + 1) % interval == 0:
            savePath = pths_path + 'lossImg' + str(epoch + 1) + '.jpg'
            drawLoss(train_loss, valid_loss, savePath)
            print(time.asctime(time.localtime(time.time())))
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            lr_state = scheduler.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model_epoch_{}.pth'.format(epoch + 1)))
            torch.save(
                lr_state,
                os.path.join(pths_path,
                             'scheduler_epoch_{}.pth'.format(epoch + 1)))
            print("save model")
            print('=' * 50)
Example #8
0
def test():
    cuda = True

    test_dataset = custom_dataset(split='test')
    test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    #resnet = Resnet101().eval()
    resnet = resnet101()
    rpn = RPN()
    rcnn = RCNN()
    if cuda:
        resnet = resnet.cuda()
        rpn = rpn.cuda()
        rcnn = rcnn.cuda()

    rpn_check_point = torch.load(
        '/home/licheng/home/licheng/projects/cnet/data/cnet.model.state.19.pkl'
    )
    rpn.load_state_dict(rpn_check_point['rpn'])
    resnet.load_state_dict(rpn_check_point['resnet'])

    rcnn_check_point = torch.load(
        "/home/licheng/home/licheng/projects/cnet/data/rcnn/rcnn_epoch_19.params"
    )
    rcnn.load_state_dict(rcnn_check_point['rcnn'])
    """
    rpn_check_point = torch.load('/home/licheng/home/licheng/projects/cnet/data/rpn/rpn_epoch_19.params')
    #resnet.load_state_dict(check_point['resnet'])
    rpn.load_state_dict(rpn_check_point['rpn'])
    #resnet.load_state_dict(check_point['resnet'])
    rcnn_check_point = torch.load('/home/licheng/home/licheng/projects/cnet/data/rcnn/rcnn_epoch_16.params')
    rcnn.load_state_dict(rcnn_check_point['rcnn'])
    """
    pred_bboxes = list()
    pred_labels = list()
    pred_scores = list()

    gt_boxes = list()
    gt_labels = list()
    rcnn_target_creator = RCNNTargetCreator()
    with torch.no_grad():
        for img_batch, bndboxes_batch, labels_batch in test_data_loader:
            img, bndboxes, labels = img_batch, bndboxes_batch[0], labels_batch[
                0]
            if cuda:
                img, bndboxes, labels = img.cuda(), bndboxes.cuda(
                ), labels.cuda()
            feature = resnet(img.float())
            #if cuda:
            #    feature = feature.cuda()
            rois, anchors, rpn_loc, rpn_score = rpn(feature, feature_stride=16)
            sample_roi, gt_roi_label, gt_roi_loc = rcnn_target_creator(
                rois,
                bndboxes.cpu().numpy(), labels)

            rois = at.toTensor(rois)
            roi_cls_loc, roi_score = rcnn(rois, feature)

            look_score1 = np.array(roi_score.cpu().detach())
            pred_score = F.softmax(roi_score, dim=1)

            look_score1 = np.array(pred_score.cpu().detach())
            pred_score = pred_score.cpu().detach().numpy()

            mean = torch.Tensor(
                (0., 0., 0., 0.)).repeat(cfg.n_class)[None].cuda()
            std = torch.Tensor(
                (0.1, 0.1, 0.2, 0.2)).repeat(cfg.n_class)[None].cuda()
            roi_cls_loc = (roi_cls_loc * std + mean)

            roi_cls_loc = at.toTensor(roi_cls_loc)
            roi_cls_loc = roi_cls_loc.view(-1, cfg.n_class, 4)
            rois = rois.view(-1, 1, 4).expand_as(roi_cls_loc)

            # expand dim as loc
            #rois = rois.reshape(-1, 1, 4)[:, [int(x) for x in np.zeros(cfg.n_class).tolist()], :]

            #roi_cls_loc = at.toTensor(roi_cls_loc)
            #roi_cls_loc = roi_cls_loc.view(roi_cls_loc.shape[0], -1, 4)

            #pred_box = loc2bbox(at.toNumpy(rois).reshape(-1, 4), roi_cls_loc.view(-1, 4).cpu().detach().numpy())
            pred_box = loc2bbox(
                at.toNumpy(rois).reshape(-1, 4),
                roi_cls_loc.view(-1, 4).cpu().detach().numpy())

            # clip box
            pred_box[:, 0::2] = np.clip(pred_box[:, 0::2], 0, img.shape[3])
            pred_box[:, 1::2] = np.clip(pred_box[:, 1::2], 0, img.shape[2])

            gt_box = list(bndboxes_batch.cpu().numpy())
            gt_label = list(labels_batch.cpu().numpy())

            bbox = list()
            label = list()
            score = list()

            for class_index in range(1, cfg.n_class):
                each_bbox = pred_box.reshape(
                    (-1, cfg.n_class, 4))[:, class_index, :]
                each_score = pred_score[:, class_index]
                mask = each_score > cfg.pred_score_thresh
                each_bbox = each_bbox[mask]
                each_score = each_score[mask]
                keep = nms(each_bbox, each_score, cfg.pred_nms_thresh)
                bbox.append(each_bbox[keep])
                score.append(each_score[keep])
                label.append(class_index * np.ones((len(keep), )))
            bbox = np.concatenate(bbox, axis=0).astype(np.float32)
            score = np.concatenate(score, axis=0).astype(np.float32)
            label = np.concatenate(label, axis=0).astype(np.int32)
            print('gt_info:', gt_box, gt_label)
            print('sample roi', sample_roi[0])
            print('predict info:', bbox, score, label)

            pred_bboxes += [bbox]
            pred_scores += [score]
            pred_labels += [label]
            gt_boxes += gt_box
            gt_labels += gt_label

        result = calc_map(pred_bboxes, pred_labels, pred_scores, gt_boxes,
                          gt_labels)
        print(result)
Example #9
0
    '''
    def __init__(self, val_img_path, val_gt_path, val_num):
        super(evaluater).__init__()
        self.val_img_list = [
            os.path.join(val_img_path, img_file)
            for img_file in sorted(os.listdir(val_img_path))
        ][:val_num]
        self.val_gt_list = [
            os.path.join(val_gt_path, gt_file)
            for gt_file in sorted(os.listdir(val_gt_path))
        ][:val_num]

    def evaluate(self, model):
        for idx in range(len(self.val_img_list)):
            pass


if __name__ == "__main__":
    trainset = custom_dataset('data/val/img', 'data/val/gt')
    train_loader = data.DataLoader(trainset,
                                   batch_size=4,
                                   num_workers=8,
                                   drop_last=True)
    img, gt_score, gt_geo, ignored_map, _ = next(iter(train_loader))
    model_path = 'task1/pths/model_epoch_100.pth'
    model = EAST(pretrained=False)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    output = model(img)
    print(1)
Example #10
0
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, interval):
    #数据处理
    #import pdb
    #pdb.set_trace()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    file_num = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path)
    train_loader = data.DataLoader(trainset, batch_size=batch_size, \
                                      shuffle=True, num_workers=num_workers, drop_last=True)

    #模型实现
    model = EAST()
    data_parallel = False
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True
    model.to(device)

    #loss实现
    criterion = Loss()

    #[完善优化算法的调用]写出优化算法的
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    #定义学习策略
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[epoch_iter // 2],
                                         gamma=0.1)

    for epoch in range(epoch_iter):
        model.train()
        epoch_loss = 0
        epoch_time = time.time()
        # import pdb
        # pdb.set_trace()
        train_process = tqdm(train_loader)
        for i, (img, gt_score, gt_geo,
                ignored_map) in enumerate(train_process):
            start_time = time.time()
            #import pdb
            # pdb.set_trace()
            # print("start_time=%s"%(start_time))
            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(
                device), gt_geo.to(device), ignored_map.to(device)

            # 使用模型
            pred_score, pred_geo = model(img)
            # 计算得到loss
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)

            epoch_loss += loss.item()

            # 利用loss求取梯度
            optimizer.zero_grad()
            loss.backward()

            #权重更新
            optimizer.step()

            train_process.set_description_str("epoch:{}".format(epoch + 1))
            train_process.set_postfix_str("batch_loss:{:.4f}".format(
                loss.item()))
            '''
			print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\
              epoch+1, epoch_iter, i+1, int(file_num/batch_size), time.time()-start_time, loss.item()))
			'''

        scheduler.step()
        with open('train.csv', 'a') as f:
            f.write('epoch[{}]: epoch_loss is {:.8f}, epoch_time is {:.8f}\n'.
                    format(epoch + 1, epoch_loss / int(file_num / batch_size),
                           time.time() - epoch_time))
        # print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format(epoch_loss/int(file_num/batch_size), time.time()-epoch_time))
        # print(time.asctime(time.localtime(time.time())))
        # print('='*50)
        if (epoch + 1) % interval == 0:
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model_epoch_{}.pth'.format(epoch + 1)))
Example #11
0
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, interval):
    file_num = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path)
    train_loader = data.DataLoader(trainset, batch_size=batch_size, \
                                      shuffle=True, num_workers=num_workers, drop_last=True)

    test_img_path = os.path.abspath('../ICDAR_2015/test_img')
    test_gt_path = os.path.abspath('../ICDAR_2015/test_gt')

    file_num2 = len(os.listdir(test_img_path))
    testset = custom_dataset(test_img_path, test_gt_path)
    test_loader = data.DataLoader(testset, batch_size=batch_size, \
                                      shuffle=True, num_workers=num_workers, drop_last=True)

    criterion = Loss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST()
    data_parallel = False
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    try:
        print("(Continue) Loading east...")
        checkpoint = torch.load('./pths/east.pth')
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch_dict = checkpoint['epoch_loss']
        test_dict = checkpoint['test_loss']
        total_epoch = checkpoint['epoch']
        best_loss = checkpoint['best_loss']
        best_acc = checkpoint['best_acc']
    except FileNotFoundError:
        print("(Initialize) Loading east_vgg16...")
        model.load_state_dict(torch.load('./pths/east_vgg16.pth'))
        epoch_dict = dict()
        test_dict = dict()
        total_epoch = 0
        best_loss = float('inf')
        best_acc = 0

    print("Continue from epoch {}".format(total_epoch))
    print("Epoch_dict", epoch_dict)
    print("Test_dict", test_dict)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[300],
                                         gamma=0.1)

    for epoch in range(epoch_iter):
        model.train()
        scheduler.step()
        epoch_loss = 0
        test_loss = 0
        epoch_time = time.time()
        for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader):
            start_time = time.time()
            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(
                device), gt_geo.to(device), ignored_map.to(device)
            pred_score, pred_geo = model(img)
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)

            epoch_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\
                       epoch+1, epoch_iter, i+1, int(file_num/batch_size), time.time()-start_time, loss.item()))

        epoch_dict[total_epoch + epoch +
                   1] = (epoch_loss / int(file_num / batch_size), epoch_loss)
        print('epoch_loss is {:.8f}, epoch_time is {:.8f}, epoch_loss: {}'.
              format(epoch_loss / int(file_num / batch_size),
                     time.time() - epoch_time, epoch_loss))
        model_state_dict = model.module.state_dict(
        ) if data_parallel else model.state_dict()

        with torch.no_grad():
            for i, (img, gt_score, gt_geo,
                    ignored_map) in enumerate(test_loader):
                img, gt_score, gt_geo, ignored_map = img.to(
                    device), gt_score.to(device), gt_geo.to(
                        device), ignored_map.to(device)
                pred_score, pred_geo = model(img)
                loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                                 ignored_map)

                test_loss += loss.item()
                print('Epoch (test) is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\
                            epoch+1, epoch_iter, i+1, int(file_num2/batch_size), time.time()-start_time, loss.item()))

        test_dict[total_epoch + epoch +
                  1] = (test_loss / int(file_num2 / batch_size), test_loss)
        print(
            'test_loss is {:.8f}, epoch_time is {:.8f}, test_loss: {}'.format(
                test_loss / int(file_num2 / batch_size),
                time.time() - epoch_time, test_loss))

        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)
        if (epoch + 1) % interval == 0:
            torch.save(
                {
                    'epoch': total_epoch + epoch + 1,
                    'model_state_dict': model_state_dict,
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch_loss': epoch_dict,
                    'test_loss': test_dict,
                    'best_loss': best_loss,
                    'best_acc': best_acc
                }, os.path.join(pths_path, 'east.pth'))

        if (total_epoch + epoch + 1) % 10 == 0:
            torch.save(
                {
                    'epoch': total_epoch + epoch + 1,
                    'model_state_dict': model_state_dict,
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch_loss': epoch_dict,
                    'test_loss': test_dict,
                    'best_loss': best_loss,
                    'best_acc': best_acc
                },
                os.path.join(
                    pths_path,
                    'east_epoch_{}.pth'.format(total_epoch + epoch + 1)))

        if test_loss / int(file_num2 / batch_size) < best_loss:
            torch.save(
                {
                    'epoch': total_epoch + epoch + 1,
                    'model_state_dict': model_state_dict,
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch_loss': epoch_dict,
                    'test_loss': test_dict,
                    'best_loss': best_loss,
                    'best_acc': best_acc
                }, os.path.join(pths_path, 'east_best_loss.pth'))
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, interval):
    # import pdb
    # pdb.set_trace()

    # 加载数据
    file_num = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path)
    train_loader = data.DataLoader(trainset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=num_workers,
                                   drop_last=True)

    # 加载模型
    model = EAST()
    data_parallel = False

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # 设置loss
    criterion = Loss()

    # [完善优化算法的调用]写出优化算法
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # 定义学习策略, milestones is a list of epoch indices, and ust be increasing.
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[epoch_iter // 2],
                                         gamma=.1)

    for epoch in range(epoch_iter):
        model.train()
        # when epoch meets epoch_iter // 2,
        # this scheduler will schedule learning rate
        scheduler.step()

        epoch_loss = 0
        epoch_time = time.time()

        for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader):
            start_time = time.time()
            print("start_time=%s" % start_time)

            # import pdb
            # pdb.set_trace()

            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(device), \
                gt_geo.to(device), ignored_map.to(device)

            # 前向反馈
            pred_score, pred_geo = model(img)
            # 计算loss
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)
            epoch_loss += loss.item()

            # 反向传播,优化器梯度需先清零!
            optimizer.zero_grad()
            loss.backward()

            # 模型权重更新
            optimizer.step()

            print(
                'Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'
                .format(epoch + 1, epoch_iter, i + 1,
                        int(file_num / batch_size),
                        time.time() - start_time, loss.item()))

        print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format(
            epoch_loss / int(file_num / batch_size),
            time.time() - epoch_time))
        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)

        # 每5个周期保存一下模型的权重
        if (epoch + 1) % interval == 0:
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model_epoch_{}.pth'.format(epoch + 1)))
Example #13
0
def train(train_img_path, train_gt_path, pths_path, batch_size, lr,
          num_workers, epoch_iter, interval, writer):
    file_num = len(os.listdir(train_img_path))
    trainset = custom_dataset(train_img_path, train_gt_path, args.min_len,
                              args.crop_size)

    train_loader = data.DataLoader(trainset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=num_workers,
                                   drop_last=True)
    criterion = Loss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # if args.resume is None:                 ### 从头开始训练
    #     model = EAST_MobileV2(args.crop_size, True)
    # else:                                   ### 从checkpoint 处恢复训练
    #     model = EAST_MobileV2(args.crop_size, False)
    #     print('Resuming training, loading {}...'.format(args.resume))
    #     model.load_state_dict(torch.load(args.resume))
    model = EAST_SENet()

    data_parallel = False
    #if torch.cuda.device_count() > 10:
    #model = nn.DataParallel(model, device_ids=[0, 1])
    #data_parallel = True
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[200],
                                         gamma=0.1)

    for epoch in range(args.start_epoch, epoch_iter):
        model.train()
        scheduler.step()
        epoch_loss = 0
        epoch_time = time.time()
        for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader):
            start_time = time.time()
            img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to(
                device), gt_geo.to(device), ignored_map.to(device)
            pred_score, pred_geo = model(img)
            loss = criterion(gt_score, pred_score, gt_geo, pred_geo,
                             ignored_map)

            epoch_loss += float(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            steps = epoch * len(train_loader) + i
            writer.add_scalar('loss', loss, steps)

            print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\
                epoch+1, epoch_iter, i+1, int(file_num/batch_size), time.time()-start_time, loss.item()))

        print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format(
            epoch_loss / int(file_num / batch_size),
            time.time() - epoch_time))
        print(time.asctime(time.localtime(time.time())))
        print('=' * 50)
        if (epoch + 1) % interval == 0:
            state_dict = model.module.state_dict(
            ) if data_parallel else model.state_dict()
            torch.save(
                state_dict,
                os.path.join(pths_path,
                             'model_SE_epoch_{}.pth'.format(epoch + 1)))

    writer.close()