Exemple #1
0
def process_features(fname):
    imfile = join(base_path, fname)

    # compute low-level features
    ffile = join(output_path, splitext(fname)[0] + '.feat')
    if exists(ffile):
        fdict = load_data(ffile)
    else:
        fdict = compute_features(imfile)
    kp, desc = fdict['kp'], fdict['desc']

    # retrieve short list
    dist2 = distance.cdist(desc, vocabulary, metric='sqeuclidean')
    assignments = np.argmin(dist2, axis=1)
    idx, count = np.unique(assignments, return_counts=True)

    query_norm = np.linalg.norm(count)

    # score images using the (modified) dot-product with the query
    scores = dict.fromkeys(index['id2i'], 0)
    for i, idx_ in enumerate(idx):
        index_i = index['dbase'][idx_]
        for (id_, c) in index_i:
            #scores[id_] += 1
            #scores[id_] += count[i] * c / index['norm'][id_]
            scores[id_] += idf2[idx_] * count[i] * c / index['norm'][
                id_]  # VER si la division va

    # rank list
    short_list = sorted(scores.items(), key=lambda x: x[1],
                        reverse=True)[:n_short_list]

    # spatial re-ranking
    fdict1 = fdict
    scores = []
    for id_, _ in short_list:
        i = index['id2i'][id_]
        ffile2 = join(output_path, splitext(image_list[i])[0] + '.feat')
        fdict2 = load_data(ffile2)
        consistency_score = geometric_consistency(fdict1, fdict2)
        scores.append(consistency_score)

    # re-rank short list
    if np.sum(scores) > 0:
        idxs = np.argsort(-np.array(scores))
        short_list = [short_list[i] for i in idxs]

    # get index from file name
    n = int(splitext(fname)[0][-5:])

    # compute score for query + print output
    tp = 0
    print('Q: {}'.format(image_list[n]))
    for id_, s in short_list[:4]:
        i = index['id2i'][id_]
        tp += int((i // 4) == (n // 4))
        print('  {:.3f} {}'.format(s / query_norm, image_list[i]))
    print('  hits = {}'.format(tp))
    return tp
def get_feats(path, cfg):
    print("Getting Inception V3 model...")
    model = InceptionV3()
    print("...Done")
    dataset = ImageFolder(path, ext=cfg["ext"])
    data_loader = DataLoader(dataset,
                             batch_size=cfg["batch_size"],
                             num_workers=cfg["num_workers"],
                             shuffle=False)
    print("Generating features...")
    feats = compute_features(model,
                             data_loader,
                             cfg["cuda"],
                             make_chip_list=False)
    print("...Done, %d %d-dimensional features generated" %
          (feats.shape[0], feats.shape[1]))
    return feats
Exemple #3
0
def get_feats(path, cfg):
    print("Getting Inception V3 model...")
    model = InceptionV3()
    cudev = cfg["cuda"]
    if cudev >= 0:
        model.cuda(cudev)
    print("...Done")
    dataset = ImageFolder(path, max_N=cfg["max_N"], ext=cfg["ext"])
    data_loader = DataLoader(dataset,
                             batch_size=cfg["batch_size"],
                             num_workers=cfg["num_workers"],
                             shuffle=False)
    print("Generating features...")
    feats = compute_features(model,
                             data_loader,
                             cfg["cuda"],
                             make_chip_list=False)
    print("...Done")
    return feats
Exemple #4
0
        print('{} saved'.format(vocabulary_file))

    # --------------
    # DBASE INDEXING
    # --------------

    base_path = unsup_base_path
    image_list = read_image_list(unsup_image_list_file)

    # pre-compute local features
    for fname in image_list:
        imfile = join(base_path, fname)
        featfile = join(output_path, splitext(fname)[0] + '.feat')
        if exists(featfile):
            continue
        fdict = compute_features(imfile)
        save_data(fdict, featfile)
        print('{}: {} features'.format(featfile, len(fdict['desc'])))

    # compute inverted index
    index_file = join(output_path, 'index_{:d}.dat'.format(n_clusters))
    if not exists(index_file):
        vocabulary = load_data(vocabulary_file)
        n_clusters, n_dim = vocabulary.shape

        index = {
            'n': 0,  # n documents
            'df': np.zeros(n_clusters, dtype=int),  # doc. frec.
            'dbase': dict([(k, []) for k in range(n_clusters)]),  # inv. file
            'id2i': {},  # id->index
            'norm': {}  # L2-norms
Exemple #5
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass

        # builtins.print = print_pass

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = pcl.builder.MoCo(
        models.__dict__[args.arch],
        args.low_dim, args.pcl_r, args.moco_m, args.temperature, args.mlp)
    print(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    train_dataset, eval_dataset = create_cifar10_dataset(args)

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        eval_sampler = torch.utils.data.distributed.DistributedSampler(eval_dataset, shuffle=False)
    else:
        train_sampler = None
        eval_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)

    # dataloader for center-cropped images, use larger batch size to increase speed
    eval_loader = torch.utils.data.DataLoader(
        eval_dataset, batch_size=args.batch_size * 5, shuffle=False,
        sampler=eval_sampler, num_workers=args.workers, pin_memory=True
    )

    for epoch in range(args.start_epoch, args.epochs):

        cluster_result = None
        if epoch >= args.warmup_epoch:
            # compute momentum features for center-cropped images
            features = compute_features(eval_loader, model, args)

            # placeholder for clustering result
            cluster_result = {'im2cluster': [], 'centroids': [], 'density': []}
            for num_cluster in args.num_cluster:
                cluster_result['im2cluster'].append(torch.zeros(len(eval_dataset), dtype=torch.long).cuda())
                cluster_result['centroids'].append(torch.zeros(int(num_cluster), args.low_dim).cuda())
                cluster_result['density'].append(torch.zeros(int(num_cluster)).cuda())

            if args.gpu == 0:
                features[
                    torch.norm(features, dim=1) > 1.5] /= 2  # account for the few samples that are computed twice
                features = features.numpy()
                cluster_result = run_kmeans(features, args)  # run kmeans clustering on master node
                # save the clustering result
                # torch.save(cluster_result,os.path.join(args.exp_dir, 'clusters_%d'%epoch))  

            dist.barrier()
            # broadcast clustering result
            for k, data_list in cluster_result.items():
                for data_tensor in data_list:
                    dist.broadcast(data_tensor, 0, async_op=False)

        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args, cluster_result)

        if (epoch + 1) % 5 == 0 and (not args.multiprocessing_distributed or (args.multiprocessing_distributed
                                                                              and args.rank % ngpus_per_node == 0)):
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, is_best=False, filename='{}/checkpoint_{:04d}.pth.tar'.format(args.exp_dir, epoch))