Example #1
0
    def verify_dataset(self,
                       sequence_id: typing.Union[str, int, float],
                       repair: bool = False):
        if isinstance(sequence_id, int):
            sequence_id_int = sequence_id
        else:
            sequence_id_int = to_sequence_id(sequence_id)

        if sequence_id_int in self._full_paths:
            import_dataset_task = task_manager.get_import_dataset_task(
                module_name=kitti_loader.__name__,
                path=str(self._full_paths[sequence_id_int]),
                additional_args={'sequence_number': sequence_id_int})
            if import_dataset_task.is_finished:
                image_collection = import_dataset_task.get_result()
                return kitti_validator.verify_dataset(
                    image_collection, self._full_paths[sequence_id_int],
                    sequence_id_int, repair)
            else:
                logging.getLogger(__name__).warning(
                    f"Cannot validate sequence {sequence_id_int:06}, it is not loaded yet"
                )
                return True
        if 0 <= sequence_id_int < 11:
            raise NotADirectoryError(
                "No root folder for sequence {0:06}, did you download it?".
                format(sequence_id_int))
        else:
            raise NotADirectoryError(
                "No root folder for sequence {0}, are you sure it's a sequence?"
                .format(sequence_id))
Example #2
0
    def get_dataset(self, sequence_id: typing.Union[str, int, float]):
        if isinstance(sequence_id, int):
            sequence_id_int = sequence_id
        else:
            sequence_id_int = to_sequence_id(sequence_id)

        if sequence_id_int in self._full_paths:
            import_dataset_task = task_manager.get_import_dataset_task(
                module_name=kitti_loader.__name__,
                path=str(self._full_paths[sequence_id_int]),
                additional_args={'sequence_number': sequence_id_int},
                num_cpus=1,
                num_gpus=0,
                memory_requirements='3GB',
                expected_duration='8:00:00',
            )
            if import_dataset_task.is_finished:
                return import_dataset_task.get_result()
            else:
                # Make sure the import dataset task gets done
                import_dataset_task.save()
                return None
        if 0 <= sequence_id_int < 11:
            raise NotADirectoryError(
                "No root folder for sequence {0:06}, did you download it?".
                format(sequence_id_int))
        else:
            raise NotADirectoryError(
                "No root folder for sequence {0}, are you sure it's a sequence?"
                .format(sequence_id))
Example #3
0
    def verify_sequences(self,
                         environment: str = None,
                         trajectory_id: str = None,
                         quality_level: QualityLevel = None,
                         time_of_day: imeta.TimeOfDay = None) -> bool:
        """
        Verify all image sequences that match a specific set of requirements.
        Each image sequence will be read from disk, and compared to the image data in the image_manager.
        Focus is on the image pixels, does not verify trajectories or metadata.
        Errors will be logged, but the verification will continue.
        Only those sequences that have successfully imported can be validated.

        :param environment: Only validate sequences recorded in the specified environment. Member of 'ENVIRONMENTS'.
        :param trajectory_id: Only validate sequences that follow the given trajectory. Member of 'TRAJECTORIES'
        :param quality_level: Only validate sequences that are recorded at the given quality.
        :param time_of_day: Only validate sequences of the particular time of day
        :return: True if all the specified sequences pass validation, false otherwise
        """
        sequence_paths = [
            (sequence_entry.path, sequence_entry.sequence_name)
            for sequence_entry in self._sequence_data if
            ((environment is None or sequence_entry.environment == environment)
             and (trajectory_id is None or sequence_entry.trajectory_id ==
                  trajectory_id) and (quality_level is None or sequence_entry.
                                      quality_level == quality_level) and
             (time_of_day is None or sequence_entry.time_of_day == time_of_day)
             )
        ]

        all_valid = False
        total_validated = 0
        invalid_ids = []
        for sequence_path, sequence_name in sequence_paths:
            import_dataset_task = task_manager.get_import_dataset_task(
                module_name=ndds_loader.__name__,
                path=str(sequence_path),
                additional_args={'sequence_name': sequence_name})
            if import_dataset_task.is_finished:
                image_collection = import_dataset_task.get_result()
                is_valid = ndds_verify.verify_sequence(image_collection,
                                                       sequence_path)
                all_valid = all_valid and is_valid
                total_validated += 1
                if not is_valid:
                    invalid_ids.append(image_collection.pk)
                del image_collection  # Clear from memory before loading the next one
            else:
                logging.getLogger(__name__).debug(
                    f"Not validating {sequence_path}, not imported yet")
        logging.getLogger(__name__).info(
            f"Validated {total_validated} of {len(self._sequence_data)} sequences, "
            f"{len(invalid_ids)} were invalid")
        if len(invalid_ids) > 0:
            logging.getLogger(__name__).error(
                f"Invalid ids were: {invalid_ids}")
        return all_valid
Example #4
0
    def get_datasets(
        self,
        environment: str = None,
        trajectory_id: str = None,
        quality_level: QualityLevel = None,
        time_of_day: imeta.TimeOfDay = None,
    ) -> typing.Tuple[typing.List[ObjectId], int]:
        """
        Get all image sequences, with a specific set of requirements.
        Returns all image sequences by default, specify an environment to get only those with that environment, etc.
        Specifying multiple parameters means sequences must match ALL the parameters. Call multiple times for OR.

        :param environment: Only return sequences recorded in the specified environment. Member of 'ENVIRONMENTS'.
        :param trajectory_id: Only return sequences that follow the given trajectory. Member of 'TRAJECTORIES'
        :param quality_level: Only return sequences that are recorded at the given quality.
        :param time_of_day: Only return
        :return: A list of image sequence objects,
        and a number of pending sequences that match the criteria, but are still to import.
        """
        sequence_paths = [
            (sequence_entry.path, sequence_entry.sequence_name)
            for sequence_entry in self._sequence_data if
            ((environment is None or sequence_entry.environment == environment)
             and (trajectory_id is None or sequence_entry.trajectory_id ==
                  trajectory_id) and (quality_level is None or sequence_entry.
                                      quality_level == quality_level) and
             (time_of_day is None or sequence_entry.time_of_day == time_of_day)
             )
        ]

        sequences = []
        num_pending = 0
        for sequence_path, sequence_name in sequence_paths:
            import_dataset_task = task_manager.get_import_dataset_task(
                module_name=ndds_loader.__name__,
                path=str(sequence_path),
                additional_args={'sequence_name': sequence_name},
                num_cpus=self.num_cpus,
                num_gpus=0,
                memory_requirements=self.memory_requirements,
                expected_duration=self.expected_duration,
            )
            if import_dataset_task.is_finished:
                sequences.append(import_dataset_task.get_result())
            else:
                # Ensure that the job as the right resource requirements
                import_dataset_task.num_cpus = self.num_cpus
                import_dataset_task.memory_requirements = self.memory_requirements
                import_dataset_task.expected_duration = self.expected_duration
                # Make sure the import dataset task gets done
                import_dataset_task.save()
                num_pending += 1
        return sequences, num_pending
Example #5
0
 def verify_dataset(self, name: str, repair: bool = False):
     if name in self._full_paths:
         import_dataset_task = task_manager.get_import_dataset_task(
             module_name=euroc_loader.__name__,
             path=str(self._full_paths[name]),
             additional_args={'dataset_name': name})
         if import_dataset_task.is_finished:
             image_collection = import_dataset_task.get_result()
             return euroc_validator.verify_dataset(image_collection,
                                                   self._full_paths[name],
                                                   name, repair)
         else:
             logging.getLogger(__name__).warning(
                 f"Cannot validate {name}, it is not loaded yet")
             return True
     raise NotADirectoryError(
         "No root folder for {0}, did you download it?".format(name))
 def import_dataset(
         self,
         name: str,
         task_manager: arvet.batch_analysis.task_manager.TaskManager,
         path_manager: arvet.config.path_manager.PathManager,
         mappings: typing.List[typing.Tuple[str, dict]],
         module_name: str,
         path: str,
         additional_args: dict = None,
         num_cpus: int = 1,
         num_gpus: int = 0,
         memory_requirements: str = '3GB',
         expected_duration: str = '12:00:00') -> None:
     """
     Import a dataset at a given path, using a given module.
     Has all the arguments of get_import_dataset_task, which are passed through
     :param name: The name to store the dataset as
     :param task_manager: The task manager, for scheduling
     :param path_manager: The path manager, for checking the path
     :param mappings: List of simulator names and origins for this dataset trajectory
     :param module_name: The
     :param path:
     :param additional_args:
     :param num_cpus:
     :param num_gpus:
     :param memory_requirements:
     :param expected_duration:
     :return:
     """
     task = task_manager.get_import_dataset_task(
         module_name=module_name,
         path=path,
         additional_args=additional_args
         if additional_args is not None else {},
         num_cpus=num_cpus,
         num_gpus=num_gpus,
         memory_requirements=memory_requirements,
         expected_duration=expected_duration)
     if task.is_finished:
         if name not in self.trajectory_groups:
             trajectory_group = tg.TrajectoryGroup(name=name,
                                                   reference_id=task.result,
                                                   mappings=mappings)
             self._trajectory_groups[name] = trajectory_group
     elif path_manager.check_path(path):
         task_manager.do_task(task)
Example #7
0
 def get_dataset(self, name):
     if name in self._full_paths:
         import_dataset_task = task_manager.get_import_dataset_task(
             module_name=euroc_loader.__name__,
             path=str(self._full_paths[name]),
             additional_args={'dataset_name': name},
             num_cpus=1,
             num_gpus=0,
             memory_requirements='3GB',
             expected_duration='8:00:00',
         )
         if import_dataset_task.is_finished:
             return import_dataset_task.get_result()
         else:
             # Make sure the import dataset task gets done
             import_dataset_task.save()
             return None
     raise NotADirectoryError(
         "No root folder for {0}, did you download it?".format(name))
Example #8
0
 def verify_dataset(self, name: str, repair: bool = False):
     if name in self._full_paths:
         import_dataset_task = task_manager.get_import_dataset_task(
             module_name=tum_loader.__name__,
             path=str(self._full_paths[name]),
             additional_args={'dataset_name': name})
         if import_dataset_task.is_finished:
             image_collection = import_dataset_task.get_result()
             return tum_validator.verify_dataset(image_collection,
                                                 self._full_paths[name],
                                                 name, repair)
         else:
             # Try looking for an existing tarfile
             for candidate_path in [
                     str(self._full_paths[name]) + '.tar.gz',
                     str(self._full_paths[name]) + '.tgz',
             ]:
                 import_dataset_task = task_manager.get_import_dataset_task(
                     module_name=tum_loader.__name__,
                     path=candidate_path,
                     additional_args={'dataset_name': name})
                 if import_dataset_task.is_finished:
                     break
             if import_dataset_task.is_finished:
                 if repair:
                     logging.getLogger(__name__).warning(
                         f"Removed suffix from tarball import task for {name}, it should get returned next time"
                     )
                     import_dataset_task.path = self._full_paths[name]
                     import_dataset_task.save()
                 image_collection = import_dataset_task.get_result()
                 return tum_validator.verify_dataset(
                     image_collection, self._full_paths[name], name, repair)
             else:
                 # Try looking for an existing task with the actual root from find_files as the path
                 try:
                     actual_root = tum_loader.find_files(
                         self._full_paths[name])
                 except FileNotFoundError:
                     actual_root = None
                 if actual_root is not None and len(actual_root) > 0:
                     import_dataset_task = task_manager.get_import_dataset_task(
                         module_name=tum_loader.__name__,
                         path=actual_root[0],
                         additional_args={'dataset_name': name})
                 else:
                     import_dataset_task = None
                 if import_dataset_task is not None and import_dataset_task.is_finished:
                     if repair:
                         logging.getLogger(__name__).warning(
                             f"Shortened path for {name}, it should get returned next time"
                         )
                         import_dataset_task.path = self._full_paths[name]
                         import_dataset_task.save()
                     image_collection = import_dataset_task.get_result()
                     return tum_validator.verify_dataset(
                         image_collection, self._full_paths[name], name,
                         repair)
                 else:
                     logging.getLogger(__name__).warning(
                         f"Cannot validate {name}, it is not loaded yet? "
                         f"(looking for module name \"{tum_loader.__name__}\", "
                         f"path \"{str(self._full_paths[name])}\", "
                         f"additional args \"{ {'dataset_name': name} }\")")
                     return True
     raise NotADirectoryError(
         "No root folder for {0}, did you download it?".format(name))