예제 #1
0
 def __init__(self,
              filenames,
              imgnet,
              cds,
              size,
              verbose=False,
              transform=None):
     self.imgnet = imgnet
     self.cds = cds
     self.loader = image_loader.ImageLoader(imgnet, cds)
     self.size = size
     self.verbose = verbose
     self.filenames = list(filenames)
     self.transform = transform
     self.wnids = [self.loader.get_wnid_of_image(x) for x in self.filenames]
     self.class_ids = [
         self.imgnet.class_info_by_wnid[x].cid for x in self.wnids
     ]
     for x in self.class_ids:
         assert x >= 0
         assert x < 1000
예제 #2
0
def download_images(datasets, include_val):
    imgnet = imagenet.ImageNetData()
    cds = candidate_data.CandidateData(exclude_blacklisted_candidates=False)
    loader = image_loader.ImageLoader(imgnet, cds)

    all_wnids = list(sorted(list(imgnet.class_info_by_wnid.keys())))
    assert len(all_wnids) == 1000

    for dataset in datasets.split(','):
        print(f'Downloading images for dataset {dataset} ...')

        dataset_filepath = pathlib.Path(
            __file__).parent / '../data/datasets' / (dataset + '.json')
        dataset_filepath = dataset_filepath.resolve()
        assert dataset_filepath.is_file()
        with open(dataset_filepath, 'r') as f:
            data = json.load(f)

        dataset_by_wnid = {x: [] for x in all_wnids}
        for img, wnid in data['image_filenames']:
            dataset_by_wnid[wnid].append(img)
        for cur_wnid in tqdm.tqdm(all_wnids):
            images_to_download = dataset_by_wnid[cur_wnid]
            #if include_val:
            #    images_to_download.extend(imgnet.val_imgs_by_wnid[cur_wnid])
            loader.load_image_bytes_batch(images_to_download,
                                          size='scaled_500',
                                          verbose=False)

    if include_val:
        print('Downloading all validation images ...')
        for cur_wnid in tqdm.tqdm(all_wnids):
            images_to_download = imgnet.val_imgs_by_wnid[cur_wnid]
            loader.load_image_bytes_batch(images_to_download,
                                          size='scaled_500',
                                          verbose=False)
import json
import pathlib

import click
import tqdm

import candidate_data
import image_loader
import imagenet

imgnet = imagenet.ImageNetData()
cds = candidate_data.CandidateData(exclude_blacklisted_candidates=False)
loader = image_loader.ImageLoader(imgnet, cds)

all_wnids = list(sorted(list(imgnet.class_info_by_wnid.keys())))
assert len(all_wnids) == 1000

print('Downloading all candidate images ...')
for cur_wnid in tqdm.tqdm(all_wnids):
    images_to_download = cds.candidates_by_wnid[cur_wnid]
    images_to_download = [x['id_ours'] for x in images_to_download]
    loader.load_image_bytes_batch(images_to_download,
                                  size='scaled_500',
                                  verbose=False)

if __name__ == "__main__":
    download_images()
예제 #4
0
def compute_nearest_neighbors(distance_measures, candidate_filenames,
                              reference_filenames, top_k, window_size, cache,
                              cache_root):
    cache_key = compute_hash(distance_measures, candidate_filenames,
                             reference_filenames, top_k, window_size)
    full_key = f"{cache_root}/{cache_key}"
    timing_info = {}
    if cache:
        if utils.key_exists(BUCKET, full_key):
            load_start = timer()
            ret_value = pickle.loads(
                utils.get_s3_object_bytes_with_backoff(full_key)[0])
            load_end = timer()
            compute_start = compute_end = timer()
            timing_info['load_start'] = load_start
            timing_info['load_end'] = load_end
            timing_info['compute_start'] = compute_start
            timing_info['compute_end'] = compute_end
            timing_info['cached'] = True
            return ret_value, timing_info

    imgnt = imagenet.ImageNetData(cache_on_local_disk=True,
                                  verbose=False,
                                  cache_root_path='/tmp/imagenet2_cache')
    cds = candidate_data.CandidateData(cache_on_local_disk=True,
                                       load_metadata_from_s3=True,
                                       verbose=False,
                                       cache_root_path='/tmp/imagenet2_cache')
    loader = image_loader.ImageLoader(imgnt,
                                      cds,
                                      cache_on_local_disk=True,
                                      num_tries=4,
                                      cache_root_path='/tmp/imagenet2_cache')
    load_start = timer()
    if ('l2' in distance_measures) or ('dssim' in distance_measures):
        candidate_image_dict = loader.load_image_batch(candidate_filenames,
                                                       size='scaled_256',
                                                       force_rgb=True,
                                                       verbose=False)
        reference_image_dict = loader.load_image_batch(reference_filenames,
                                                       size='scaled_256',
                                                       force_rgb=True,
                                                       verbose=False)
    if 'fc7' in distance_measures:
        candidate_feature_dict = loader.load_features_batch(
            candidate_filenames, verbose=False)
        reference_feature_dict = loader.load_features_batch(
            reference_filenames, verbose=False)
    load_end = timer()

    compute_start = timer()
    result = {}
    for distance_measure in distance_measures:
        if distance_measure == 'l2':
            result['l2'] = compute_l2_distances(candidate_image_dict,
                                                reference_image_dict, 196608)
        elif distance_measure == 'dssim':
            result['dssim'] = compute_dssim_distances(candidate_image_dict,
                                                      reference_image_dict,
                                                      window_size)
        elif distance_measure == 'fc7':
            result['fc7'] = compute_l2_distances(candidate_feature_dict,
                                                 reference_feature_dict, 4096)
        else:
            raise ValueError('Unknown distance measure')
    compute_end = timer()
    timing_info = {}
    timing_info['load_start'] = load_start
    timing_info['load_end'] = load_end
    timing_info['compute_start'] = compute_start
    timing_info['compute_end'] = compute_end
    timing_info['cached'] = False

    res = compute_top_k(result, top_k)
    if cache:
        utils.put_s3_object_bytes_with_backoff(pickle.dumps(res), full_key)

    return res, timing_info
예제 #5
0
def eval(dataset, models, batch_size):
    dataset_filename = dataset
    if models == 'all':
        models = all_models
    else:
        models = models.split(',')
    for model in models:
        assert model in all_models

    dataset_filepath = pathlib.Path(__file__).parent / '../data/datasets' / (
        dataset_filename + '.json')
    print('Reading dataset from {} ...'.format(dataset_filepath))
    with open(dataset_filepath, 'r') as f:
        dataset = json.load(f)
    cur_imgs = [x[0] for x in dataset['image_filenames']]

    imgnet = imagenet.ImageNetData()
    cds = candidate_data.CandidateData(load_metadata_from_s3=False,
                                       exclude_blacklisted_candidates=False)
    loader = image_loader.ImageLoader(imgnet, cds)

    pbar = tqdm(total=len(cur_imgs), desc='Dataset download')
    img_data = loader.load_image_bytes_batch(
        cur_imgs,
        size='scaled_500',
        verbose=False,
        download_callback=lambda x: pbar.update(x))
    pbar.close()

    for model in tqdm(models, desc='Model evaluations'):
        if (model not in extra_models):
            tqdm.write('Evaluating {}'.format(model))
            resize_size = 256
            center_crop_size = 224
            if model == 'inception_v3':
                resize_size = 299
                center_crop_size = 299
            data_loader = eval_utils.get_data_loader(
                cur_imgs,
                imgnet,
                cds,
                image_size='scaled_500',
                resize_size=resize_size,
                center_crop_size=center_crop_size,
                batch_size=batch_size)
            pt_model = getattr(torchvision.models, model)(pretrained=True)
            if (torch.cuda.is_available()):
                pt_model = pt_model.cuda()
            pt_model.eval()
            tqdm.write('    Number of trainable parameters: {}'.format(
                sum(p.numel() for p in pt_model.parameters()
                    if p.requires_grad)))

            predictions, top1_acc, top5_acc, total_time, num_images = eval_utils.evaluate_model(
                pt_model, data_loader, show_progress_bar=True)
            tqdm.write('    Evaluated {} images'.format(num_images))
            tqdm.write('    Top-1 accuracy: {:.2f}'.format(100.0 * top1_acc))
            tqdm.write('    Top-5 accuracy: {:.2f}'.format(100.0 * top5_acc))
            tqdm.write(
                '    Total time: {:.1f}  (average time per image: {:.2f} ms)'.
                format(total_time, 1000.0 * total_time / num_images))
            npy_out_filepath = pathlib.Path(
                __file__).parent / '../data/predictions' / dataset_filename / (
                    model + '.npy')
            npy_out_filepath = npy_out_filepath.resolve()
            directory = os.path.dirname(npy_out_filepath)
            if not os.path.exists(directory):
                os.makedirs(directory)
            if (os.path.exists(npy_out_filepath)):
                old_preds = np.load(npy_out_filepath)
                np.save(f'{npy_out_filepath}.{int(time.time())}', old_preds)
                print('checking old preds is same as new preds')
                if not np.allclose(old_preds, predictions):
                    diffs = np.round(old_preds - predictions, 4)
                    print('old preds != new preds')
                else:
                    print('old preds == new_preds!')
            np.save(npy_out_filepath, predictions)
            tqdm.write('    Saved predictions to {}'.format(npy_out_filepath))
        else:
            tqdm.write('Evaluating extra model {}'.format(model))
            if (model in {"dpn68b", "dpn92", "dpn107"}):
                pt_model = pretrainedmodels.__dict__[model](
                    num_classes=1000, pretrained='imagenet+5k')
            else:
                pt_model = pretrainedmodels.__dict__[model](
                    num_classes=1000, pretrained='imagenet')
            tf_img = pretrained_utils.TransformImage(pt_model)
            load_img = pretrained_utils.LoadImage()
            tqdm.write('    Number of trainable parameters: {}'.format(
                sum(p.numel() for p in pt_model.parameters()
                    if p.requires_grad)))

            #print(pt_model)
            #print(load_img)
            dataset = eval_utils.ImageLoaderDataset(cur_imgs,
                                                    imgnet,
                                                    cds,
                                                    'scaled_500',
                                                    transform=tf_img)

            data_loader = torch.utils.data.DataLoader(dataset,
                                                      batch_size=batch_size,
                                                      shuffle=False,
                                                      num_workers=0,
                                                      pin_memory=True)
            if (torch.cuda.is_available()):
                pt_model = pt_model.cuda()

            pt_model.eval()
            predictions, top1_acc, top5_acc, total_time, num_images = eval_utils.evaluate_model(
                pt_model, data_loader, show_progress_bar=True)
            tqdm.write('    Evaluated {} images'.format(num_images))
            tqdm.write('    Top-1 accuracy: {:.2f}'.format(100.0 * top1_acc))
            tqdm.write('    Top-5 accuracy: {:.2f}'.format(100.0 * top5_acc))
            tqdm.write(
                '    Total time: {:.1f}  (average time per image: {:.2f} ms)'.
                format(total_time, 1000.0 * total_time / num_images))
            npy_out_filepath = pathlib.Path(
                __file__).parent / '../data/predictions' / dataset_filename / (
                    model + '.npy')
            npy_out_filepath = npy_out_filepath.resolve()
            directory = os.path.dirname(npy_out_filepath)
            if not os.path.exists(directory):
                os.makedirs(directory)
            if (os.path.exists(npy_out_filepath)):
                old_preds = np.load(npy_out_filepath)
                np.save(f'{npy_out_filepath}.{int(time.time())}', old_preds)
                print('checking old preds is same as new preds')
                if not np.allclose(old_preds, predictions):
                    diffs = np.round(old_preds - predictions, 4)
                    print('old preds != new preds')
                else:
                    print('old preds == new_preds!')
            np.save(npy_out_filepath, predictions)
            tqdm.write('    Saved predictions to {}'.format(npy_out_filepath))
예제 #6
0
train_labl_path = '/userhome/student/kede/colorize/deep_learning/data/train_labels.csv'
valid_labl_path = '/userhome/student/kede/colorize/deep_learning/data/valid_labels.csv'
test_labl_path = '/userhome/student/kede/colorize/deep_learning/data/test_labels.csv'
class_desc_path = '/userhome/student/kede/colorize/deep_learning/data/class_descriptions.csv'
image_id_path = '/userhome/student/kede/colorize/deep_learning/data/image_ids_and_rotation.csv'

image_root_folder = '/userhome/student/kede/colorize/deep_learning/data/images/'

#data_hl = data_collector.DataCollector()
#data_hl.load_datas(image_id_path, train_labl_path, valid_labl_path, test_labl_path, class_desc_path)

#label_names = ['City', 'Skyline', 'Cityscape', 'Boathouse', 'Landscape lighting', 'Town square', 'College town', 'Town']
#collect_labels(data_hl, image_root_folder, label_names)

img_loader = image_loader.ImageLoader(image_root_folder, pts_hull_file)

# Separate_small_data(validation_rate, test_rate)
img_loader.separate_small_data(0.2, 0.1)

model = nnetwork.create_vgg_model(3)
model.compile('adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', keras.metrics.categorical_accuracy])

patience = 70
early_stopping = EarlyStopping(monitor='val_acc', patience=patience, verbose=1)
checkpointer = ModelCheckpoint(filepath='weights.hdf5',
                               monitor='val_acc',
                               save_best_only=True,
                               verbose=1)