def create_subset_dataset_from_instances(data: DataSet, instances_per_rig, name): """Given a list of images grouped by rigs instances, pick a subset of images and create a dataset subset with the provided name from them. Returns : A DataSet containing a subset of images containing enough rig instances """ subset_images = [] for instances in instances_per_rig.values(): instances_sorted = sorted( instances, key=lambda x: data.load_exif(x[0][0])["capture_time"]) subset_size = data.config["rig_calibration_subset_size"] middle = len(instances_sorted) / 2 instances_calibrate = instances_sorted[max( [0, middle - int(subset_size / 2)] ):min([middle + int(subset_size / 2 ), len(instances_sorted) - 1])] for instance in instances_calibrate: subset_images += [x[0] for x in instance] return data.subset(name, subset_images)
def propose_subset_dataset_from_instances( data: DataSet, rig_instances: Dict[str, TRigInstance], name: str ) -> Iterable[Tuple[DataSet, List[List[Tuple[str, str]]]]]: """Given a list of images grouped by rigs instances, infitely propose random subset of images and create a dataset subset with the provided name from them. Returns : Yield infinitely DataSet containing a subset of images containing enough rig instances """ per_rig_camera_group = group_instances(rig_instances) if not data.reference_lla_exists(): data.invent_reference_lla() reference = data.load_reference() instances_to_pick = {} for key, instances in per_rig_camera_group.items(): # build GPS look-up tree gpses = [] for i, instance in enumerate(instances): all_gps = [] for image, _ in instance: gps = data.load_exif(image)["gps"] all_gps.append( reference.to_topocentric(gps["latitude"], gps["longitude"], 0) ) gpses.append((i, np.average(np.array(all_gps), axis=0))) tree = spatial.cKDTree([x[1] for x in gpses]) # build NN-graph and split by connected components nn = 6 instances_graph = nx.Graph() for i, gps in gpses: distances, neighbors = tree.query(gps, k=nn) for d, n in zip(distances, neighbors): if i == n: continue instances_graph.add_edge(i, n, weight=d) all_components = sorted( nx.algorithms.components.connected_components(instances_graph), key=len, reverse=True, ) logger.info(f"Found {len(all_components)} connected components") # keep the biggest one biggest_component = all_components[0] logger.info(f"Best component has {len(biggest_component)} instances") instances_to_pick[key] = biggest_component random.seed(42) while True: total_instances = [] subset_images = [] for key, instances in instances_to_pick.items(): all_instances = per_rig_camera_group[key] instances_sorted = sorted( [all_instances[i] for i in instances], key=lambda x: data.load_exif(x[0][0])["capture_time"], ) subset_size = data.config["rig_calibration_subset_size"] random_index = random.randint(0, len(instances_sorted) - 1) instances_calibrate = instances_sorted[ max([0, random_index - int(subset_size / 2)]) : min( [random_index + int(subset_size / 2), len(instances_sorted) - 1] ) ] for instance in instances_calibrate: subset_images += [x[0] for x in instance] total_instances += instances_calibrate data.io_handler.rm_if_exist(os.path.join(data.data_path, name)) yield data.subset(name, subset_images), total_instances