Esempio n. 1
0
 def load_data(self, data: str, dataset: Optional[Any] = None) -> 'EncodedVideoDataset':
     ds = self._make_encoded_video_dataset(data)
     if self.training:
         label_to_class_mapping = {p[1]: p[0].split("/")[-2] for p in ds._labeled_videos._paths_and_labels}
         self.set_state(LabelsState(label_to_class_mapping))
         dataset.num_classes = len(np.unique([s[1]['label'] for s in ds._labeled_videos]))
     return ds
    def __init__(self,
                 labels: Optional[List[str]] = None,
                 multi_label: bool = False,
                 threshold: float = 0.5):
        super().__init__(multi_label=multi_label, threshold=threshold)
        self._labels = labels

        if labels is not None:
            self.set_state(LabelsState(labels))
Esempio n. 3
0
    def load_data(
        self,
        data: Tuple[str, Union[str, List[str]], Union[str, List[str]]],
        dataset: Optional[Any] = None,
        columns: Union[List[str],
                       Tuple[str]] = ("input_ids", "attention_mask", "labels"),
    ) -> Union[Sequence[Mapping[str, Any]]]:
        csv_file, input, target = data

        data_files = {}

        stage = self.running_stage.value
        data_files[stage] = str(csv_file)

        # FLASH_TESTING is set in the CI to run faster.
        # FLASH_TESTING is set in the CI to run faster.
        if flash._IS_TESTING and not torch.cuda.is_available():
            try:
                dataset_dict = DatasetDict({
                    stage:
                    load_dataset(self.filetype,
                                 data_files=data_files,
                                 split=[f'{stage}[:20]'])[0]
                })
            except Exception:
                dataset_dict = load_dataset(self.filetype,
                                            data_files=data_files)
        else:
            dataset_dict = load_dataset(self.filetype, data_files=data_files)

        if self.training:
            labels = list(sorted(list(set(dataset_dict[stage][target]))))
            dataset.num_classes = len(labels)
            self.set_state(LabelsState(labels))

        labels = self.get_state(LabelsState)

        # convert labels to ids
        # if not self.predicting:
        if labels is not None:
            labels = labels.labels
            label_to_class_mapping = {v: k for k, v in enumerate(labels)}
            dataset_dict = dataset_dict.map(
                partial(self._transform_label, label_to_class_mapping, target))

        dataset_dict = dataset_dict.map(partial(self._tokenize_fn,
                                                input=input),
                                        batched=True)

        # Hugging Face models expect target to be named ``labels``.
        if not self.predicting and target != "labels":
            dataset_dict.rename_column_(target, "labels")

        dataset_dict.set_format("torch", columns=columns)

        return dataset_dict[stage]
Esempio n. 4
0
    def load_data(self, data: Bunch, dataset: Any) -> Sequence[Mapping[str, Any]]:
        """Gets the ``data`` and ``target`` attributes from the ``Bunch`` and passes them to ``super().load_data``.

        Args:
            data: The scikit-learn data ``Bunch``.
            dataset: The object that we can set attributes (such as ``num_classes``) on.

        Returns:
            A sequence of samples / sample metadata.
        """
        dataset.num_classes = len(data.target_names)
        self.set_state(LabelsState(data.target_names))
        return super().load_data((data.data, data.target), dataset=dataset)
    def __init__(
        self,
        labels: Optional[List[str]] = None,
        threshold: Optional[float] = None,
        return_filepath: bool = False,
    ):
        super().__init__()
        self._labels = labels
        self.threshold = threshold
        self.return_filepath = return_filepath

        if labels is not None:
            self.set_state(LabelsState(labels))
Esempio n. 6
0
    def __init__(
        self,
        labels: Optional[List[str]] = None,
        threshold: Optional[float] = None,
        return_filepath: bool = False,
    ):
        if not _FIFTYONE_AVAILABLE:
            raise ModuleNotFoundError("Please, run `pip install fiftyone`.")

        super().__init__()
        self._labels = labels
        self.threshold = threshold
        self.return_filepath = return_filepath

        if labels is not None:
            self.set_state(LabelsState(labels))
    def __init__(
        self,
        labels: Optional[List[str]] = None,
        multi_label: bool = False,
        threshold: Optional[float] = None,
        store_logits: bool = False,
        return_filepath: bool = False,
    ):
        if multi_label and threshold is None:
            threshold = 0.5

        super().__init__(multi_label=multi_label)
        self._labels = labels
        self.threshold = threshold
        self.store_logits = store_logits
        self.return_filepath = return_filepath

        if labels is not None:
            self.set_state(LabelsState(labels))
Esempio n. 8
0
 def load_data(self,
               data: str,
               dataset: Optional[Any] = None) -> 'EncodedVideoDataset':
     ds: EncodedVideoDataset = labeled_encoded_video_dataset(
         pathlib.Path(data),
         self.clip_sampler,
         video_sampler=self.video_sampler,
         decode_audio=self.decode_audio,
         decoder=self.decoder,
     )
     if self.training:
         label_to_class_mapping = {
             p[1]: p[0].split("/")[-2]
             for p in ds._labeled_videos._paths_and_labels
         }
         self.set_state(LabelsState(label_to_class_mapping))
         dataset.num_classes = len(
             np.unique([s[1]['label'] for s in ds._labeled_videos]))
     return ds
Esempio n. 9
0
 def load_data(self,
               data: Tuple[str, str],
               dataset: Optional[Any] = None) -> Sequence[Dict[str, Any]]:
     if self.parser is not None:
         if inspect.isclass(self.parser) and issubclass(
                 self.parser, Parser):
             root, ann_file = data
             parser = self.parser(ann_file, root)
         elif isinstance(self.parser, Callable):
             parser = self.parser(data)
         else:
             raise ValueError(
                 "The parser must be a callable or an IceVision Parser type."
             )
         dataset.num_classes = parser.class_map.num_classes
         self.set_state(
             LabelsState([
                 parser.class_map.get_by_id(i)
                 for i in range(dataset.num_classes)
             ]))
         records = parser.parse(data_splitter=SingleSplitSplitter())
         return [{DefaultDataKeys.INPUT: record} for record in records[0]]
     raise ValueError("The parser argument must be provided.")