def main(): parser = argparse.ArgumentParser() parser.add_argument('--original', type=str) parser.add_argument('--to_fix', type=str) parser.add_argument('--output', type=str) args = parser.parse_args() data_original = FileStorage.load_multiple_files_multiple_keys( args.original, retrieve_merged=['features'], num_files_limit=2)['features'] features_mean = np.mean(np.stack(data_original), axis=0) data_preprocessed = FileStorage.load_multiple_files_multiple_keys( args.to_fix, retrieve_merged=['features'], num_files_limit=5) preprocessed_features = np.stack(data_preprocessed['features']) preprocessed_fixed = preprocessed_features + features_mean new_data = {} for key in data_preprocessed.keys(): if key == 'features': new_data['features'] = preprocessed_fixed else: new_data[key] = data_preprocessed[key] FileStorage.save_data(Path(args.output, 'fixed_data'), **new_data)
def main(): experiment_save_dir = r"C:\Users\janul\Desktop\thesis_tmp_files\responses" requests = get_queries() exps = [experiments(i) for i in [58, 59, 60]] for exp in exps: try: print(exp.__repr__()) if not exp: continue filename_hash = sha256(repr(exp).encode('utf-8')).hexdigest() responses_save_path = Path(experiment_save_dir, filename_hash).with_suffix(".npz") if (responses_save_path.exists()): print("Results already present.", responses_save_path) continue print("Output path:", responses_save_path) responses = exp.run(requests) FileStorage.save_data(responses_save_path, responses=responses, experiment=exp.__dict__, exp_repr=repr(exp), model=repr(exp.get_env().model), num_images=exp.num_images()) except Exception: continue
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', type=str) parser.add_argument('--output', type=str) args = parser.parse_args() keys_merged = {'crops', 'paths', 'features'} first_file_name = str(next(Path(args.input).rglob("*.npz"))) first_file = FileStorage.load_data_from_file(first_file_name) keys_available = set(first_file.keys()) keys_once = keys_available - keys_merged data = FileStorage.load_multiple_files_multiple_keys(args.input, retrieve_merged=list(keys_available - keys_once), retrieve_once=list(keys_once)) filename = Path(first_file_name).name.split(',')[0] FileStorage.save_data(Path(args.output, filename), **data)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', type=str) parser.add_argument('--output', type=str) args = parser.parse_args() for file in Path(args.input).rglob("*.npz"): print(file.name) data = np.load(str(file), allow_pickle=True) new_data = {} for key in data.keys(): if key == 'features': new_data['features'] = GlobalAveragePooling2D()( data['features']).numpy() else: new_data[key] = data[key] FileStorage.save_data(Path(args.output, file.name), **new_data)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', type=str) parser.add_argument('--output', type=str) parser.add_argument('--sample_size', type=int, default=100) args = parser.parse_args() random.seed(42) requests = get_queries() queries_paths = [r.query_image for r in requests] selected_paths = sample_image_paths(args.input, args.sample_size) selected_paths += queries_paths sample_args = ['paths', 'features', 'crops'] for file in Path(args.input).rglob("*.npz"): if Path(args.output, file.name).exists(): print("skipping", file.name, "already exists") continue data = np.load(str(file), allow_pickle=True) idxs = np.array([ i_path for i_path, path in enumerate(data['paths']) if path in selected_paths ]) if len(idxs) == 0: continue new_data = {} for key in data.keys(): if key in sample_args: new_data[key] = data[key][idxs] else: new_data[key] = data[key] FileStorage.save_data(Path(args.output, file.name), **new_data)
data = np.load(data_path, allow_pickle=True) features = data['features'] print("Videos with faces", len(set([prefix.split("/")[0] for prefix in data['paths']]))) y_pred = fclusterdata(features, t=0.6, criterion='distance', method='complete') print(len(y_pred)) print(len(set(y_pred))) representatives = [] for cluster_id in set(y_pred): features_ids = np.argwhere(y_pred == cluster_id) cluster_items = features[features_ids] centroid = np.mean(cluster_items, axis=0) closest_to_centroid_idx = np.argmin([np.linalg.norm(x - centroid) for x in cluster_items]) closest = features_ids[closest_to_centroid_idx] assert y_pred[closest] == cluster_id representatives.append(closest) new_data_path = r'C:\Users\janul\Desktop\thesis_tmp_files\face_features_only_bigger_10percent_316videos_only_representatives\faces.npz' new_data = {} new_data['crops'] = data['crops'][representatives] new_data['paths'] = data['paths'][representatives] new_data['features'] = data['features'][representatives] FileStorage.save_data(Path(new_data_path), **new_data)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--images_dir", default=None, type=str, help="Path to image directory.") parser.add_argument("--save_location", default="", type=str, help="Path to directory where precomputed models are saved.") parser.add_argument("--input_size", default=96, type=int, help="Input shape for model (square width)") parser.add_argument("--batch_size", default=128, type=int, help="Batch size for processing") parser.add_argument("--num_regions", default=None, type=str, help="Number of regions \"vertically,horizzontaly\".") parser.add_argument('--feature_model', default='resnet50v2', type=str, help='Feature vector model to compute (default: %(default)s)') args = parser.parse_args() input_shape = (args.input_size, args.input_size, 3) num_regions = tuple(map(int, args.num_regions.split(","))) if args.num_regions else None if args.feature_model == 'resnet50v2' and num_regions: features_model = Resnet50V2(input_shape=input_shape) evaluation_mechanism = EvaluatingRegions(model=features_model, num_regions=num_regions) elif args.feature_model == 'resnet50v2': features_model = Resnet50V2(input_shape=input_shape) evaluation_mechanism = EvaluatingWholeImage(model=features_model) elif args.feature_model == 'resnet50v2antepenultimate': features_model = Resnet50V2Antepenultimate(input_shape=input_shape) evaluation_mechanism = EvaluatingSpatially(model=features_model) elif args.feature_model == 'mobilenetv2' and num_regions: features_model = MobileNetV2(input_shape=input_shape) evaluation_mechanism = EvaluatingRegions(model=features_model, num_regions=num_regions) elif args.feature_model == 'mobilenetv2': features_model = MobileNetV2(input_shape=input_shape) evaluation_mechanism = EvaluatingWholeImage(model=features_model) elif args.feature_model == 'mobilenetv2antepenultimate': features_model = MobileNetV2Antepenultimate(input_shape=input_shape) evaluation_mechanism = EvaluatingSpatially(model=features_model) elif args.feature_model == 'Resnet50_11k_classes' and num_regions: features_model = Resnet50_11k_classes() if args.input_size: regions_size = (args.input_size, args.input_size, 3) else: regions_size = None evaluation_mechanism = EvaluatingRegions(model=features_model, num_regions=num_regions, regions_size=regions_size) elif args.feature_model == 'Resnet50_11k_classes': features_model = Resnet50_11k_classes() evaluation_mechanism = EvaluatingWholeImage(model=features_model) elif args.feature_model == 'faces': evaluation_mechanism = EvaluatingFaces() else: raise ValueError('Unknown `feature_model`.') directories = FileStorage.directories(args.images_dir) or [args.images_dir] print("Found %d directories." % len(directories)) images_features = [] for directory in directories: save_location = Path(args.save_location, filename(args.feature_model, Path(directory).name, extension='.npz')) if save_location.exists(): print("Skipping directory {}".format(directory)) continue print("Processing directory {}".format(directory)) for images_data in batches(FileStorage.load_images_continuously(directory), batch_size=args.batch_size): features = evaluation_mechanism.features([sample.image for sample in images_data]) for image_features, image_data in zip(features, images_data): images_features.append( DatabaseRecord(filename=str(Path(image_data.filename).relative_to(args.images_dir).as_posix()), features=image_features)) FileStorage.save_data(Path(args.save_location, filename(args.feature_model, Path(directory).name)), data=images_features, src_dir=args.images_dir, model=repr(evaluation_mechanism.model)) images_features = []
parser = argparse.ArgumentParser() parser.add_argument("--input", default=None, type=str) parser.add_argument('--output', default=None, type=str) args = parser.parse_args() for file in Path(args.input).rglob("*.npz"): save_location = Path(args.output, file.name) if save_location.exists(): print("Skipping {}. Already present.".format(save_location)) continue data = np.load(str(file), allow_pickle=True) new_db_records = [] for filepath, features in data['data']: image_features = [] for regions_features in features: avg_pool_features = np.mean(regions_features.features, axis=(0, 1)) # There is no batch image_features.append( RegionFeatures(crop=regions_features.crop, features=avg_pool_features)) new_db_records.append( DatabaseRecord(filename=filepath, features=image_features)) FileStorage.save_data(Path(args.output, file.name), data=new_db_records, src_dir=data['src_dir'], model=data['model'])