def __init__(self, filenames, imgnet, cds, size, verbose=False, transform=None): self.imgnet = imgnet self.cds = cds self.loader = image_loader.ImageLoader(imgnet, cds) self.size = size self.verbose = verbose self.filenames = list(filenames) self.transform = transform self.wnids = [self.loader.get_wnid_of_image(x) for x in self.filenames] self.class_ids = [ self.imgnet.class_info_by_wnid[x].cid for x in self.wnids ] for x in self.class_ids: assert x >= 0 assert x < 1000
def download_images(datasets, include_val): imgnet = imagenet.ImageNetData() cds = candidate_data.CandidateData(exclude_blacklisted_candidates=False) loader = image_loader.ImageLoader(imgnet, cds) all_wnids = list(sorted(list(imgnet.class_info_by_wnid.keys()))) assert len(all_wnids) == 1000 for dataset in datasets.split(','): print(f'Downloading images for dataset {dataset} ...') dataset_filepath = pathlib.Path( __file__).parent / '../data/datasets' / (dataset + '.json') dataset_filepath = dataset_filepath.resolve() assert dataset_filepath.is_file() with open(dataset_filepath, 'r') as f: data = json.load(f) dataset_by_wnid = {x: [] for x in all_wnids} for img, wnid in data['image_filenames']: dataset_by_wnid[wnid].append(img) for cur_wnid in tqdm.tqdm(all_wnids): images_to_download = dataset_by_wnid[cur_wnid] #if include_val: # images_to_download.extend(imgnet.val_imgs_by_wnid[cur_wnid]) loader.load_image_bytes_batch(images_to_download, size='scaled_500', verbose=False) if include_val: print('Downloading all validation images ...') for cur_wnid in tqdm.tqdm(all_wnids): images_to_download = imgnet.val_imgs_by_wnid[cur_wnid] loader.load_image_bytes_batch(images_to_download, size='scaled_500', verbose=False)
import json import pathlib import click import tqdm import candidate_data import image_loader import imagenet imgnet = imagenet.ImageNetData() cds = candidate_data.CandidateData(exclude_blacklisted_candidates=False) loader = image_loader.ImageLoader(imgnet, cds) all_wnids = list(sorted(list(imgnet.class_info_by_wnid.keys()))) assert len(all_wnids) == 1000 print('Downloading all candidate images ...') for cur_wnid in tqdm.tqdm(all_wnids): images_to_download = cds.candidates_by_wnid[cur_wnid] images_to_download = [x['id_ours'] for x in images_to_download] loader.load_image_bytes_batch(images_to_download, size='scaled_500', verbose=False) if __name__ == "__main__": download_images()
def compute_nearest_neighbors(distance_measures, candidate_filenames, reference_filenames, top_k, window_size, cache, cache_root): cache_key = compute_hash(distance_measures, candidate_filenames, reference_filenames, top_k, window_size) full_key = f"{cache_root}/{cache_key}" timing_info = {} if cache: if utils.key_exists(BUCKET, full_key): load_start = timer() ret_value = pickle.loads( utils.get_s3_object_bytes_with_backoff(full_key)[0]) load_end = timer() compute_start = compute_end = timer() timing_info['load_start'] = load_start timing_info['load_end'] = load_end timing_info['compute_start'] = compute_start timing_info['compute_end'] = compute_end timing_info['cached'] = True return ret_value, timing_info imgnt = imagenet.ImageNetData(cache_on_local_disk=True, verbose=False, cache_root_path='/tmp/imagenet2_cache') cds = candidate_data.CandidateData(cache_on_local_disk=True, load_metadata_from_s3=True, verbose=False, cache_root_path='/tmp/imagenet2_cache') loader = image_loader.ImageLoader(imgnt, cds, cache_on_local_disk=True, num_tries=4, cache_root_path='/tmp/imagenet2_cache') load_start = timer() if ('l2' in distance_measures) or ('dssim' in distance_measures): candidate_image_dict = loader.load_image_batch(candidate_filenames, size='scaled_256', force_rgb=True, verbose=False) reference_image_dict = loader.load_image_batch(reference_filenames, size='scaled_256', force_rgb=True, verbose=False) if 'fc7' in distance_measures: candidate_feature_dict = loader.load_features_batch( candidate_filenames, verbose=False) reference_feature_dict = loader.load_features_batch( reference_filenames, verbose=False) load_end = timer() compute_start = timer() result = {} for distance_measure in distance_measures: if distance_measure == 'l2': result['l2'] = compute_l2_distances(candidate_image_dict, reference_image_dict, 196608) elif distance_measure == 'dssim': result['dssim'] = compute_dssim_distances(candidate_image_dict, reference_image_dict, window_size) elif distance_measure == 'fc7': result['fc7'] = compute_l2_distances(candidate_feature_dict, reference_feature_dict, 4096) else: raise ValueError('Unknown distance measure') compute_end = timer() timing_info = {} timing_info['load_start'] = load_start timing_info['load_end'] = load_end timing_info['compute_start'] = compute_start timing_info['compute_end'] = compute_end timing_info['cached'] = False res = compute_top_k(result, top_k) if cache: utils.put_s3_object_bytes_with_backoff(pickle.dumps(res), full_key) return res, timing_info
def eval(dataset, models, batch_size): dataset_filename = dataset if models == 'all': models = all_models else: models = models.split(',') for model in models: assert model in all_models dataset_filepath = pathlib.Path(__file__).parent / '../data/datasets' / ( dataset_filename + '.json') print('Reading dataset from {} ...'.format(dataset_filepath)) with open(dataset_filepath, 'r') as f: dataset = json.load(f) cur_imgs = [x[0] for x in dataset['image_filenames']] imgnet = imagenet.ImageNetData() cds = candidate_data.CandidateData(load_metadata_from_s3=False, exclude_blacklisted_candidates=False) loader = image_loader.ImageLoader(imgnet, cds) pbar = tqdm(total=len(cur_imgs), desc='Dataset download') img_data = loader.load_image_bytes_batch( cur_imgs, size='scaled_500', verbose=False, download_callback=lambda x: pbar.update(x)) pbar.close() for model in tqdm(models, desc='Model evaluations'): if (model not in extra_models): tqdm.write('Evaluating {}'.format(model)) resize_size = 256 center_crop_size = 224 if model == 'inception_v3': resize_size = 299 center_crop_size = 299 data_loader = eval_utils.get_data_loader( cur_imgs, imgnet, cds, image_size='scaled_500', resize_size=resize_size, center_crop_size=center_crop_size, batch_size=batch_size) pt_model = getattr(torchvision.models, model)(pretrained=True) if (torch.cuda.is_available()): pt_model = pt_model.cuda() pt_model.eval() tqdm.write(' Number of trainable parameters: {}'.format( sum(p.numel() for p in pt_model.parameters() if p.requires_grad))) predictions, top1_acc, top5_acc, total_time, num_images = eval_utils.evaluate_model( pt_model, data_loader, show_progress_bar=True) tqdm.write(' Evaluated {} images'.format(num_images)) tqdm.write(' Top-1 accuracy: {:.2f}'.format(100.0 * top1_acc)) tqdm.write(' Top-5 accuracy: {:.2f}'.format(100.0 * top5_acc)) tqdm.write( ' Total time: {:.1f} (average time per image: {:.2f} ms)'. format(total_time, 1000.0 * total_time / num_images)) npy_out_filepath = pathlib.Path( __file__).parent / '../data/predictions' / dataset_filename / ( model + '.npy') npy_out_filepath = npy_out_filepath.resolve() directory = os.path.dirname(npy_out_filepath) if not os.path.exists(directory): os.makedirs(directory) if (os.path.exists(npy_out_filepath)): old_preds = np.load(npy_out_filepath) np.save(f'{npy_out_filepath}.{int(time.time())}', old_preds) print('checking old preds is same as new preds') if not np.allclose(old_preds, predictions): diffs = np.round(old_preds - predictions, 4) print('old preds != new preds') else: print('old preds == new_preds!') np.save(npy_out_filepath, predictions) tqdm.write(' Saved predictions to {}'.format(npy_out_filepath)) else: tqdm.write('Evaluating extra model {}'.format(model)) if (model in {"dpn68b", "dpn92", "dpn107"}): pt_model = pretrainedmodels.__dict__[model]( num_classes=1000, pretrained='imagenet+5k') else: pt_model = pretrainedmodels.__dict__[model]( num_classes=1000, pretrained='imagenet') tf_img = pretrained_utils.TransformImage(pt_model) load_img = pretrained_utils.LoadImage() tqdm.write(' Number of trainable parameters: {}'.format( sum(p.numel() for p in pt_model.parameters() if p.requires_grad))) #print(pt_model) #print(load_img) dataset = eval_utils.ImageLoaderDataset(cur_imgs, imgnet, cds, 'scaled_500', transform=tf_img) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True) if (torch.cuda.is_available()): pt_model = pt_model.cuda() pt_model.eval() predictions, top1_acc, top5_acc, total_time, num_images = eval_utils.evaluate_model( pt_model, data_loader, show_progress_bar=True) tqdm.write(' Evaluated {} images'.format(num_images)) tqdm.write(' Top-1 accuracy: {:.2f}'.format(100.0 * top1_acc)) tqdm.write(' Top-5 accuracy: {:.2f}'.format(100.0 * top5_acc)) tqdm.write( ' Total time: {:.1f} (average time per image: {:.2f} ms)'. format(total_time, 1000.0 * total_time / num_images)) npy_out_filepath = pathlib.Path( __file__).parent / '../data/predictions' / dataset_filename / ( model + '.npy') npy_out_filepath = npy_out_filepath.resolve() directory = os.path.dirname(npy_out_filepath) if not os.path.exists(directory): os.makedirs(directory) if (os.path.exists(npy_out_filepath)): old_preds = np.load(npy_out_filepath) np.save(f'{npy_out_filepath}.{int(time.time())}', old_preds) print('checking old preds is same as new preds') if not np.allclose(old_preds, predictions): diffs = np.round(old_preds - predictions, 4) print('old preds != new preds') else: print('old preds == new_preds!') np.save(npy_out_filepath, predictions) tqdm.write(' Saved predictions to {}'.format(npy_out_filepath))
train_labl_path = '/userhome/student/kede/colorize/deep_learning/data/train_labels.csv' valid_labl_path = '/userhome/student/kede/colorize/deep_learning/data/valid_labels.csv' test_labl_path = '/userhome/student/kede/colorize/deep_learning/data/test_labels.csv' class_desc_path = '/userhome/student/kede/colorize/deep_learning/data/class_descriptions.csv' image_id_path = '/userhome/student/kede/colorize/deep_learning/data/image_ids_and_rotation.csv' image_root_folder = '/userhome/student/kede/colorize/deep_learning/data/images/' #data_hl = data_collector.DataCollector() #data_hl.load_datas(image_id_path, train_labl_path, valid_labl_path, test_labl_path, class_desc_path) #label_names = ['City', 'Skyline', 'Cityscape', 'Boathouse', 'Landscape lighting', 'Town square', 'College town', 'Town'] #collect_labels(data_hl, image_root_folder, label_names) img_loader = image_loader.ImageLoader(image_root_folder, pts_hull_file) # Separate_small_data(validation_rate, test_rate) img_loader.separate_small_data(0.2, 0.1) model = nnetwork.create_vgg_model(3) model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy', keras.metrics.categorical_accuracy]) patience = 70 early_stopping = EarlyStopping(monitor='val_acc', patience=patience, verbose=1) checkpointer = ModelCheckpoint(filepath='weights.hdf5', monitor='val_acc', save_best_only=True, verbose=1)