Example #1
0
def main(env_name,
         num_iteration,
         gamma,
         lam,
         batch_size,
         max_step,
         kl_targ,
         hid1_mult,
         policy_logvar,
         epochs,
         animate=False):
    """ Main training loop
    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v2'
        num_iteration: number of total iterations
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        batch_size: number of samples per policy training batch
        max_step: maximum time step each episode
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        hid1_mult: hid1 size for policy and value_f (mutliplier of obs dimension)
        policy_logvar: natural log of initial policy variance
        epochs: num of mini-batch iterations
        animate: boolean, True uses env.render() method to animate episode
    """
    killer = GracefulKiller()
    gymEnv = GymEnv(env_name)
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    logger = Logger(logname="SingleProc" + env_name, now=now)
    aigym_path = os.path.join('log-files/gym', env_name, now)
    env = gymEnv.wrapper(aigym_path, force=True, video_callable=False)
    scaler = Scaler(gymEnv.obs_dim)

    policy_size = "large"
    if env_name in ['Humanoid-v2', 'HumanoidStandup-v2', 'Ant-v2']:
        policy_size = "small"
    policy = Policy(gymEnv.obs_dim, gymEnv.act_dim, kl_targ, hid1_mult,
                    policy_logvar, policy_size)
    # sampler to generate rollouts by exex policy on Env
    sampler = Sampler(env, policy, scaler, max_step, batch_size, animate)
    # agent PPO to update policy and generate rollout alteratively
    ppo = PPO(policy, sampler, logger, killer, num_iteration, epochs, gamma,
              lam, max_step)
    # agent policy learning for num_iteration
    ppo.train()
    logger.close()
Example #2
0
def main():
    use_gpu = torch.cuda.is_available()
#    use_gpu = False
    if args.use_cpu: use_gpu = False
    pin_memory = True if use_gpu else False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root, name=args.dataset, split_id=args.split_id,
    )

    print('dataset',dataset)
    # data augmentation
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        # T.Resize(size=(384,128),interpolation=3),
        T.RandomHorizontalFlip(),
        T.ColorJitter(brightness=0.1,contrast=0.1,saturation=0.1,hue=0.1),
#        T.RandomVerticalFlip(),
#        T.RandomRotation(30),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        #T.Resize(size=(384,128),interpolation=3),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
		batch_size=args.train_batch, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=True,
    )
    #embed() 
    print('len of trainloader',len(trainloader))
    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print('len of queryloader',len(queryloader))
    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print('len of galleryloader',len(galleryloader))
    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dataset.num_train_vids,loss={'softmax','metric'},aligned=args.aligned)
    print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))
    print('Model ',model)
    print('num_classes',dataset.num_train_vids)
    if args.labelsmooth:
        criterion_class = CrossEntropyLabelSmooth(num_classes=dataset.num_train_vids, use_gpu=use_gpu)
    else:
        # criterion_class = CrossEntropyLoss(use_gpu=use_gpu)
        criterion_class = nn.CrossEntropyLoss()
    criterion_metric = TripletLossAlignedReID(margin=args.margin)
    optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return 0

    start_time = time.time()
    train_time = 0
    best_mAP = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_class, criterion_metric, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        if args.stepsize > 0: scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                epoch + 1) == args.max_epoch or ((epoch+1)==1):
            print("==> Test")
            mAP = test(model, queryloader, galleryloader, use_gpu)
            is_best = mAP > best_mAP

            if is_best:
                best_mAP = mAP
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint({
                'state_dict': state_dict,
                'mAP': mAP,
                'epoch': epoch,
            }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))
    
    print("==> Best mAP {:.2%}, achieved at epoch {}".format(best_mAP, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
def main():
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    pin_memory = True if use_gpu else False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )

    # data augmentation
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'a_softmax', 'metric'},
                              aligned=True,
                              use_gpu=use_gpu)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))
    if args.labelsmooth:
        criterion_class = CrossEntropyLabelSmooth(
            num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    else:
        criterion_class = AngleLoss()
    criterion_metric = TripletLossAlignedReID(margin=args.margin)
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model_dict = model.state_dict()
        # 1. filter out unnecessary keys
        checkpoint = {k: v for k, v in checkpoint.items() if k in model_dict}
        # 2. overwrite entries in the existing state dict
        model_dict.update(checkpoint)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return 0

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_class, criterion_metric, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        if args.stepsize > 0: scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #4
0
def main(opt):
    if not osp.exists(save_dir): os.makedirs(save_dir)
    if not osp.exists(vis_dir): os.makedirs(vis_dir)

    use_gpu = torch.cuda.is_available()
    pin_memory = True if use_gpu else False

    if args.mode == 'train':
        sys.stdout = Logger(osp.join(save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("GPU mode")
        cudnn.benchmark = True
        torch.cuda.manual_seed(args.seed)
    else:
        print("CPU mode")

    ### Setup dataset loader ###
    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=opt['split_id'],
        cuhk03_labeled=opt['cuhk03_labeled'],
        cuhk03_classic_split=opt['cuhk03_classic_split'])
    if args.ak_type < 0:
        trainloader = DataLoader(
            ImageDataset(dataset.train, transform=opt['transform_train']),
            sampler=RandomIdentitySampler(dataset.train,
                                          num_instances=opt['num_instances']),
            batch_size=args.train_batch,
            num_workers=opt['workers'],
            pin_memory=pin_memory,
            drop_last=True)
    elif args.ak_type > 0:
        trainloader = DataLoader(ImageDataset(
            dataset.train, transform=opt['transform_train']),
                                 sampler=AttrPool(dataset.train,
                                                  args.dataset,
                                                  attr_matrix,
                                                  attr_list,
                                                  sample_num=16),
                                 batch_size=args.train_batch,
                                 num_workers=opt['workers'],
                                 pin_memory=pin_memory,
                                 drop_last=True)
    queryloader = DataLoader(ImageDataset(dataset.query,
                                          transform=opt['transform_test']),
                             batch_size=args.test_batch,
                             shuffle=False,
                             num_workers=opt['workers'],
                             pin_memory=pin_memory,
                             drop_last=False)
    galleryloader = DataLoader(ImageDataset(dataset.gallery,
                                            transform=opt['transform_test']),
                               batch_size=args.test_batch,
                               shuffle=False,
                               num_workers=opt['workers'],
                               pin_memory=pin_memory,
                               drop_last=False)

    ### Prepare criterion ###
    if args.ak_type < 0:
        clf_criterion = adv_CrossEntropyLabelSmooth(
            num_classes=dataset.num_train_pids,
            use_gpu=use_gpu) if args.loss in ['xent', 'xent_htri'
                                              ] else adv_CrossEntropyLoss(
                                                  use_gpu=use_gpu)
    else:
        clf_criterion = nn.MultiLabelSoftMarginLoss()
    metric_criterion = adv_TripletLoss(margin=args.margin,
                                       ak_type=args.ak_type)
    criterionGAN = GANLoss()

    ### Prepare pretrained model ###
    target_net = models.init_model(name=args.targetmodel,
                                   pre_dir=pre_dir,
                                   num_classes=dataset.num_train_pids)
    check_freezen(target_net, need_modified=True, after_modified=False)

    ### Prepare main net ###
    G = Generator(3, 3, args.num_ker,
                  norm=args.normalization).apply(weights_init)
    if args.D == 'PatchGAN':
        D = Pat_Discriminator(input_nc=6,
                              norm=args.normalization).apply(weights_init)
    elif args.D == 'MSGAN':
        D = MS_Discriminator(input_nc=6,
                             norm=args.normalization,
                             temperature=args.temperature,
                             use_gumbel=args.usegumbel).apply(weights_init)
    check_freezen(G, need_modified=True, after_modified=True)
    check_freezen(D, need_modified=True, after_modified=True)
    print("Model size: {:.5f}M".format(
        (sum(g.numel()
             for g in G.parameters()) + sum(d.numel()
                                            for d in D.parameters())) /
        1000000.0))
    # setup optimizer
    optimizer_G = optim.Adam(G.parameters(),
                             lr=args.lr,
                             betas=(args.beta1, 0.999))
    optimizer_D = optim.Adam(D.parameters(),
                             lr=args.lr,
                             betas=(args.beta1, 0.999))

    if use_gpu:
        test_target_net = nn.DataParallel(target_net).cuda(
        ) if not args.targetmodel == 'pcb' else nn.DataParallel(
            PCB_test(target_net)).cuda()
        target_net = nn.DataParallel(target_net).cuda()
        G = nn.DataParallel(G).cuda()
        D = nn.DataParallel(D).cuda()

    if args.mode == 'test':
        epoch = 'test'
        test(G,
             D,
             test_target_net,
             dataset,
             queryloader,
             galleryloader,
             epoch,
             use_gpu,
             is_test=True)
        return 0

    # Ready
    start_time = time.time()
    train_time = 0
    worst_mAP, worst_rank1, worst_rank5, worst_rank10, worst_epoch = np.inf, np.inf, np.inf, np.inf, 0
    best_hit, best_epoch = -np.inf, 0
    print("==> Start training")

    for epoch in range(1, args.epoch + 1):
        start_train_time = time.time()
        train(epoch, G, D, target_net, criterionGAN, clf_criterion,
              metric_criterion, optimizer_G, optimizer_D, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        if epoch % args.eval_freq == 0:
            print("==> Eval at epoch {}".format(epoch))
            if args.ak_type < 0:
                cmc, mAP = test(G,
                                D,
                                test_target_net,
                                dataset,
                                queryloader,
                                galleryloader,
                                epoch,
                                use_gpu,
                                is_test=False)
                is_worst = cmc[0] <= worst_rank1 and cmc[
                    1] <= worst_rank5 and cmc[
                        2] <= worst_rank10 and mAP <= worst_mAP
                if is_worst:
                    worst_mAP, worst_rank1, worst_epoch = mAP, cmc[0], epoch
                print(
                    "==> Worst_epoch is {}, Worst mAP {:.1%}, Worst rank-1 {:.1%}"
                    .format(worst_epoch, worst_mAP, worst_rank1))
                save_checkpoint(
                    G.state_dict(), is_worst, 'G',
                    osp.join(save_dir, 'G_ep' + str(epoch) + '.pth.tar'))
                save_checkpoint(
                    D.state_dict(), is_worst, 'D',
                    osp.join(save_dir, 'D_ep' + str(epoch) + '.pth.tar'))

            else:
                all_hits = test(G,
                                D,
                                target_net,
                                dataset,
                                queryloader,
                                galleryloader,
                                epoch,
                                use_gpu,
                                is_test=False)
                is_best = all_hits[0] >= best_hit
                if is_best:
                    best_hit, best_epoch = all_hits[0], epoch
                print("==> Best_epoch is {}, Best rank-1 {:.1%}".format(
                    best_epoch, best_hit))
                save_checkpoint(
                    G.state_dict(), is_best, 'G',
                    osp.join(save_dir, 'G_ep' + str(epoch) + '.pth.tar'))
                save_checkpoint(
                    D.state_dict(), is_best, 'D',
                    osp.join(save_dir, 'D_ep' + str(epoch) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #5
0
def main():
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    if use_gpu:
        pin_memory = True
    else:
        pin_memory = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        os.environ['CUDA_CUDA_VISIBLE_DEVICES'] = args.gpu_devices
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    dataset = data_manger.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )
    #dataloader & augmentation  train query gallery
    transforms_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    transforms_test = T.Compose([
        T.Resize((args.height, args.width)),
        #T.Random2DTranslation(args.height, args.width),
        #T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transforms_train),
        batch_size=args.train_batch,
        num_workers=args.workers,
        shuffle=True,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transforms_test),
        batch_size=args.test_batch,
        num_workers=args.workers,
        shuffle=False,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transforms_test),
        batch_size=args.test_batch,
        num_workers=args.workers,
        shuffle=False,
        pin_memory=pin_memory,
        drop_last=False,
    )
    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss='softmax')
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()  #定义损失函数
    #optimizer = init_optim(args.optim,model.parameters(),args.lr,args.weight_decay) #定义优化器

    # Optimizer
    if hasattr(model, 'model'):
        base_param_ids = list(map(id, model.model.parameters()))
        base_param_ids += list(map(id, model.globe_conv5x.parameters()))
        new_params = [
            p for p in model.parameters() if id(p) not in base_param_ids
        ]
        param_groups = [{
            'params': model.model.parameters(),
            'lr_mult': 0.1
        }, {
            'params': new_params,
            'lr_mult': 1.0
        }]
    else:
        param_groups = model.parameters()
    optimizer = torch.optim.SGD(param_groups,
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay,
                                nesterov=True)

    # ###自己定义优化器
    # ignored_params = list(map(id, model.model.fc.parameters()))
    # ignored_params += (list(map(id, model.classifier0.parameters()))
    #                    + list(map(id, model.classifier1.parameters()))
    #                    + list(map(id, model.classifier2.parameters()))
    #                    + list(map(id, model.classifier3.parameters()))
    #                    # + list(map(id, model.classifier4.parameters()))
    #                    # + list(map(id, model.classifier5.parameters()))
    #                    # +list(map(id, model.classifier6.parameters() ))
    #                    # +list(map(id, model.classifier7.parameters() ))
    #                    )
    # base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
    # optimizer_ft = optim.SGD([
    #     {'params': base_params, 'lr': 0.1 * args.lr},
    #     {'params': model.model.fc.parameters(), 'lr': args.lr},
    #     {'params': model.classifier0.parameters(), 'lr': args.lr},
    #     {'params': model.classifier1.parameters(), 'lr': args.lr},
    #     {'params': model.classifier2.parameters(), 'lr': args.lr},
    #     {'params': model.classifier3.parameters(), 'lr': args.lr},
    #     # {'params': model.classifier4.parameters(), 'lr': args.lr},
    #     # {'params': model.classifier5.parameters(), 'lr': args.lr},
    #     # {'params': model.classifier6.parameters(), 'lr': 0.01},
    #     # {'params': model.classifier7.parameters(), 'lr': 0.01}
    # ], weight_decay=5e-4, momentum=0.9, nesterov=True)
    #optimizer = optimizer_ft

    # Schedule learning rate
    def adjust_lr(epoch):
        step_size = 60 if args.arch == 'inception' else args.stepsize
        lr = args.lr * (0.1**(epoch // step_size))
        for g in optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)

    # if args.stepsize > 0:
    #     scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()
    if args.evaluate:
        print("Evaluate only")
        test_PCB03(model, queryloader, galleryloader, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        adjust_lr(epoch)
        start_train_time = time.time()
        train_PCB(epoch, model, criterion, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        # if args.stepsize > 0: scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test_PCB02(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #6
0
import zmq, time, os, json, re
from bs4 import BeautifulSoup
from util.colors import GREEN, RED, RESET
from scrapy.http import HtmlResponse
from urllib.parse import urljoin
from multiprocessing import Process, Queue
from util.utils import parseLevel, makeUuid, LoggerFactory as Logger, noBlockREQ, discoverPeer, getSeeds, findSeeds, valid_tags, change_html
from threading import Thread, Lock as tLock, Semaphore

log = Logger(name="Dispatcher")

lockSocketReq = tLock()
counterSocketReq = Semaphore(value=0)


def writer(root, url, old, data, name, graph):
    """
    Write all the files on which <url> depends
    on the <root> folder, taking their contents
    from <data> and its name from <name>.
    The <graph> dependency tree is traversed while 
    there are dependencies that are not found in <old>
    """
    if url in old or url not in data:
        return
    old.add(url)

    url_name = name[url]
    with open(f'{root}/{url_name}', 'wb') as fd:
        fd.write(data[url])
def main():
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    pin_memory = True if use_gpu else False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
    )

    # data augmentation
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'softmax', 'metric'},
                              aligned=True,
                              use_gpu=use_gpu)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))
    if args.labelsmooth:
        criterion_class = CrossEntropyLabelSmooth(
            num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    else:
        criterion_class = CrossEntropyLoss(use_gpu=use_gpu)
    criterion_metric = TripletLossAlignedReID(margin=args.margin)
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")
    cnt_n = 0
    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_class, criterion_metric, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)
        cnt_n = cnt_n + 1

        if args.stepsize > 0: scheduler.step()
        if (cnt_n % 40) == 0:  #### Saving models after each 40 epochs
            print("==> Saving")
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            is_best = 0
            save_checkpoint(
                {
                    'state_dict':
                    state_dict,  ### rank1 and is_best are kept same as original code. Don't want to mess up the saving
                    'rank1': '###',
                    'epoch': epoch,
                },
                is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #8
0
def main():
    use_gpu = torch.cuda.is_available()
#    use_gpu = False
    if args.use_cpu: use_gpu = False
    pin_memory = True if use_gpu else False

    if not args.test:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root, name=args.dataset, split_id=args.split_id,
    )

    print('dataset',dataset)
    # data augmentation
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        # T.Resize(size=(384,128),interpolation=3),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        #T.Resize(size=(384,128),interpolation=3),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
		batch_size=args.train_batch, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=True,
    )
    
    print('len of trainloader',len(trainloader))
    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test,train=False),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print('len of queryloader',len(queryloader))
    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test,train=False),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )
    #embed()
    print('len of galleryloader',len(galleryloader))
    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dataset.num_train_vids,
                            loss={'softmax','metric'}, aligned =True, use_gpu=use_gpu)
    print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))
    print('Model ',model)
    print('num_classes',dataset.num_train_vids)
    if args.labelsmooth:
        criterion_class = CrossEntropyLabelSmooth(num_classes=dataset.num_train_vids, use_gpu=use_gpu)
    else:
        # criterion_class = CrossEntropyLoss(use_gpu=use_gpu)
        criterion_class = nn.CrossEntropyLoss()
    criterion_metric = TripletLossAlignedReID(margin=args.margin)
    optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.test:
        print("test aicity dataset")
        if args.use_track_info:
            g_track_id = get_track_id(args.root)
            test(model, queryloader, galleryloader, use_gpu,dataset_q=dataset.query,dataset_g=dataset.gallery,track_id_tmp=g_track_id,rank=100)
        else:
            test(model, queryloader, galleryloader, use_gpu,dataset_q=dataset.query,dataset_g=dataset.gallery,rank=100)
        return 0
Example #9
0
def main():
    print(time.strftime("Current TIME is %Y-%m-%d %H:%M:%S", time.localtime()))
    torch.manual_seed(args.seed)
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    if use_gpu:
        pin_memory = True
    else:
        pin_memory = False

    if not args.evaluate:
        # sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) avoid overlay txt file
        sys.stdout = Logger(
            osp.join(
                args.save_dir, "log_train_{}.txt".format(
                    time.strftime("%Y-%m-%d %H-%M", time.localtime()))))
    else:
        # sys.stdout = Logger(osp.join(args.save_dir, 'log_test_{}.txt'))
        sys.stdout = Logger(
            osp.join(
                args.save_dir, "log_test_{}.txt".format(
                    time.strftime("%Y-%m-%d %H-%M", time.localtime()))))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    # name = args.dataset
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )

    # dataloader & augementation train/query/gallery
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    # model =models.init_model(name=args.arch, num_classes = dataset.num_train_pids, loss = 'softmax')
    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'xent'})
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion_class = CrossEntropyLabelSmooth(num_classes=751)
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    # optimizer = init_optim(args.optim, nn.Sequential([
    #     model.conv1,
    #     model.conv2,
    # ]))
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)

    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    # Parallel
    if use_gpu:
        model = nn.DataParallel(model).cuda()
        # model.module.parameters() !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    if args.evaluate:
        print('Evaluate only!')
        test(model, queryloader, galleryloader, use_gpu)
        return 0

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0

    print('==>start training')
    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_class, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        if args.stepsize > 0: scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict(
                )  ### use_gpu .module. !!!!!!!!
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) +
                         '.pth.tar'))  # fpath=/log/checkpoint_ep().pth.tar

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #10
0
import zmq, time, queue, pickle, re, random
from util.conit import Conit
from multiprocessing import Process, Queue
from threading import Thread, Lock as tLock
from util.params import login, BROADCAST_PORT
from util.utils import parseLevel, LoggerFactory as Logger, noBlockREQ, discoverPeer, getSeeds, clock
from socket import socket, AF_INET, SOCK_DGRAM


log = Logger(name="Seed")

lockTasks = tLock()
lockSubscriber = tLock()
lockSeeds = tLock()
lockClients = tLock()


def verificator(queue, t, pushQ):
    """
    Process that manage the list of workers who should be verified. 
    """
    for address, url in iter(queue.get, "STOP"):
        ansQ = Queue()
        pQuick = Process(target=quickVerification, args=(address, url, t, ansQ))
        pQuick.start()
        ans = ansQ.get()
        pQuick.terminate()
        if not ans:
            pushQ.put(url)
            
def main():
    # 判断是否使用gpu
    use_gpu = torch.cuda.is_available()
    # 若指定只使用cpu,则置use_gpu = False
    if args.use_cpu: use_gpu = False
    pin_memory = True if use_gpu else False  # 避免内存浪费

    # 日志打印设置
    # 训练阶段存放在log_train.txt,测试阶段存放在log_test.txt
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    # 若使用gpu,进行相关优化设置
    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    dataset = data_manager.init_img_dataset(root=args.root,
                                            name=args.dataset,
                                            split_id=args.split_id)

    # 创建dataloader & 进行 augmentation,训练时进行数据增广,测试时不需要
    # 3个data_loader train query gallery

    # 训练用transform
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                     0.225]),  #归一化
    ])

    # 测试用transform
    transform_test = T.Compose([
        T.Resize((args.height, args.width)),  #只做resize处理,将图像统一到同一尺寸
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # 导入dataset_loader
    # param@drop_last:把尾部多余数据扔掉,不用做训练了
    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    # param@drop_last:在test阶段,每一个样本都不能丢
    # param@shuffle:在test阶段就不要打乱了
    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    # 初始化模型Resnet50
    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'softmax', 'metric'},
                              aligned=True,
                              use_gpu=use_gpu)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    # 确定损失类型
    criterion_class = CrossEntropyLoss(use_gpu=use_gpu)

    # 优化器
    # [email protected]():更新模型的所有参数 若更新某一层:model.conv1 model.fc;若更新两层:nn.Sequential([model.conv1,model.conv2])
    # param@lr:learning rate,学习率
    # param@decay:模型的正则化参数
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)

    # 学习率衰减,逐步减小步长,采用阶梯型衰减
    # param@gamma:衰减倍率
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)

    # 设置开始训练的epoch
    start_epoch = args.start_epoch

    # 是否要恢复模型
    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    # 是否使用并行
    if use_gpu:
        model = nn.DataParallel(model).cuda()

    # 若是进行测试
    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return 0

    print('start training')

    # 开始进行训练
    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_class, optimizer, trainloader, use_gpu)
        # save_checkpoint是个字典dic

        train_time += round(time.time() - start_train_time)

        if args.stepsize > 0: scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #12
0
def main():
    # 是否使用GPU和是否节省显存
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    pin_memory = True if use_gpu else False

    # Log文件的输出
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("============\nArgs:{}\n=============".format(args))

    # GPU调用
    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
        cudnn.benchmark = True  # 表示使用cudnn
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU !")

    # 初始化dataset
    dataset = data_manager.Market1501(root=args.root)

    # dataloader(train query gallery) 和 增强
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(p=0.5),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    transform_test = T.Compose([
        T.Resize(args.height, args.width),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instance=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=
        True  # 训练时会存在epoch % batchsize != 0 的情况,那么余数的图片是否还要训练?drop_last= True就是舍去这些图片
    )

    queryloader = DataLoader(ImageDataset(dataset.query,
                                          transform=transform_test),
                             batch_size=args.train_batch,
                             num_workers=args.workers,
                             shuffle=False,
                             pin_memory=pin_memory)

    galleryloader = DataLoader(ImageDataset(dataset.gallery,
                                            transform=transform_test),
                               batch_size=args.train_batch,
                               num_workers=args.workers,
                               shuffle=False,
                               pin_memory=pin_memory)

    # 加载模型
    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'softmax', 'metric'})
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    # 损失和优化器
    criterion_class = nn.CrossEntropyLoss()
    criterion_metric = TripletLoss(margin=args.margin)
    # optimizer = torch.optim.adam()
    # 只更新其中某两层(先运行下行语句看看要更新哪几层)
    # print(*list(model.children()))
    # optimizer = init_optim(args.optim, model.parameters(nn.Sequential([
    #     *list(model.children())[:-2]
    # ])))
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)

    start_epoch = args.start_epoch

    # 是否要恢复模型
    if args.resume:
        print("Loading checkpoint from {}".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    # 并行
    if use_gpu:
        model = nn.DataParallel(model).cuda()

    # 如果只是想测试
    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return 0

    start_time = time.time()
    train_time = 0

    # 训练
    print("Start Traing!")
    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_class, criterion_metric, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        # 测试以及存模型
        # step()了才会衰减
        if args.stepsize > 0:
            scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Example #13
0
import zmq, logging, time, os, requests, pickle, re
from multiprocessing import Process, Lock, Queue, Value
from ctypes import c_int
from threading import Thread, Lock as tLock, Semaphore
from util.utils import parseLevel, LoggerFactory as Logger, noBlockREQ, discoverPeer, getSeeds, findSeeds

log = Logger(name="Scrapper")

availableSlaves = Value(c_int)

lockWork = tLock()
lockClients = tLock()
lockSocketPull = tLock()
lockSocketNotifier = tLock()
counterSocketPull = Semaphore(value=0)
counterSocketNotifier = Semaphore(value=0)


def slave(tasks, notifications, idx, verifyQ):
    """
    Child Process of Scrapper, responsable of downloading the urls.
    """
    while True:
        url = tasks.get()
        with availableSlaves:
            availableSlaves.value -= 1
        log.info(f"Child:{os.getpid()} of Scrapper downloading {url}",
                 f"slave {idx}")
        for i in range(5):
            try:
                response = requests.get(url)
Example #14
0
def main(n_sampler,
         env_name,
         num_iteration,
         gamma,
         lam,
         batch_size,
         max_step,
         kl_targ,
         hid1_mult,
         policy_logvar,
         epochs,
         animate=False):
    """ Main training loop
    Args:
        n_sampler: num of sampler processes to generate rollouts
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v2'
        num_iteration: number of total iterations
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        batch_size: number of samples per policy training batch
        max_step: maximum time step each episode
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        hid1_mult: hid1 size for policy and value_f (mutliplier of obs dimension)
        policy_logvar: natural log of initial policy variance
        epochs: num of mini-batch iterations
        animate: boolean, True uses env.render() method to animate episode
    """
    gymEnv = GymEnv(env_name)
    # create unique directories
    now = datetime.utcnow().strftime("%b-%d_%H:%M:%S")
    logger = Logger(logname='MultProc' + env_name, now=now)
    policy_size = "large"
    if env_name in ['Humanoid-v2', 'HumanoidStandup-v2', 'Ant-v2']:
        policy_size = "small"
    # MLP policy network
    policy = Policy(gymEnv.obs_dim, gymEnv.act_dim, kl_targ, hid1_mult,
                    policy_logvar, policy_size)

    # rollouts or agent commands
    agent_tasks = JoinableQueue()
    # communcaiton of policy weights
    agent_results = Queue()
    # PPO Agent Process run async
    agent = PPO(agent_tasks, agent_results, policy, logger, num_iteration,
                epochs, gamma, lam, max_step)
    agent.start()

    # generate rollouts in parallel (by each process)
    pSampler = ParallelSampler(n_sampler, env_name, policy, max_step,
                               batch_size, animate)
    # get init policy weights
    agent_tasks.put(1)
    agent_tasks.join()
    init_weights = agent_results.get()
    pSampler.set_policy_weights(init_weights)

    total_time = 0.0
    for iter in range(num_iteration):
        print("-------- Iteration {} ----------".format(iter))
        agent.set_total_time(total_time)

        # runs a bunch of async processes that collect rollouts
        print("-------- Generate rollouts in Parallel ------")
        rollout_start = time.time()
        rollouts = pSampler.gen_rollouts()
        rollout_time = (time.time() - rollout_start) / 60.0

        # agent receive rollouts and update policy async
        learn_start = time.time()
        # save rollouts generating from parallel samplers
        agent_tasks.put(rollouts)
        agent_tasks.join()
        learn_time = (time.time() - learn_start) / 60.0

        # read policy weights from agent Queue
        print("-------- Get policy weights from Agent ------")
        new_policy_weights = agent_results.get()
        #TODO, save policy weights, calc totalsteps
        print("-------- Update policy weights to Samplers -----\n\n")
        pSampler.set_policy_weights(new_policy_weights)
        total_time += (time.time() - rollout_start) / 60.0
        print("Total time: {}  mins, Rollout time: {}, Learn time: {}".format(
            total_time, rollout_time, learn_time))

    logger.close()
    # exit parallel sampler
    pSampler.exit()
    #TODO, save policy weights
    # exit ppo agent
    agent.exit()