def load_data(self, data: str, dataset: Optional[Any] = None) -> 'EncodedVideoDataset': ds = self._make_encoded_video_dataset(data) if self.training: label_to_class_mapping = {p[1]: p[0].split("/")[-2] for p in ds._labeled_videos._paths_and_labels} self.set_state(LabelsState(label_to_class_mapping)) dataset.num_classes = len(np.unique([s[1]['label'] for s in ds._labeled_videos])) return ds
def __init__(self, labels: Optional[List[str]] = None, multi_label: bool = False, threshold: float = 0.5): super().__init__(multi_label=multi_label, threshold=threshold) self._labels = labels if labels is not None: self.set_state(LabelsState(labels))
def load_data( self, data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], dataset: Optional[Any] = None, columns: Union[List[str], Tuple[str]] = ("input_ids", "attention_mask", "labels"), ) -> Union[Sequence[Mapping[str, Any]]]: csv_file, input, target = data data_files = {} stage = self.running_stage.value data_files[stage] = str(csv_file) # FLASH_TESTING is set in the CI to run faster. # FLASH_TESTING is set in the CI to run faster. if flash._IS_TESTING and not torch.cuda.is_available(): try: dataset_dict = DatasetDict({ stage: load_dataset(self.filetype, data_files=data_files, split=[f'{stage}[:20]'])[0] }) except Exception: dataset_dict = load_dataset(self.filetype, data_files=data_files) else: dataset_dict = load_dataset(self.filetype, data_files=data_files) if self.training: labels = list(sorted(list(set(dataset_dict[stage][target])))) dataset.num_classes = len(labels) self.set_state(LabelsState(labels)) labels = self.get_state(LabelsState) # convert labels to ids # if not self.predicting: if labels is not None: labels = labels.labels label_to_class_mapping = {v: k for k, v in enumerate(labels)} dataset_dict = dataset_dict.map( partial(self._transform_label, label_to_class_mapping, target)) dataset_dict = dataset_dict.map(partial(self._tokenize_fn, input=input), batched=True) # Hugging Face models expect target to be named ``labels``. if not self.predicting and target != "labels": dataset_dict.rename_column_(target, "labels") dataset_dict.set_format("torch", columns=columns) return dataset_dict[stage]
def load_data(self, data: Bunch, dataset: Any) -> Sequence[Mapping[str, Any]]: """Gets the ``data`` and ``target`` attributes from the ``Bunch`` and passes them to ``super().load_data``. Args: data: The scikit-learn data ``Bunch``. dataset: The object that we can set attributes (such as ``num_classes``) on. Returns: A sequence of samples / sample metadata. """ dataset.num_classes = len(data.target_names) self.set_state(LabelsState(data.target_names)) return super().load_data((data.data, data.target), dataset=dataset)
def __init__( self, labels: Optional[List[str]] = None, threshold: Optional[float] = None, return_filepath: bool = False, ): super().__init__() self._labels = labels self.threshold = threshold self.return_filepath = return_filepath if labels is not None: self.set_state(LabelsState(labels))
def __init__( self, labels: Optional[List[str]] = None, threshold: Optional[float] = None, return_filepath: bool = False, ): if not _FIFTYONE_AVAILABLE: raise ModuleNotFoundError("Please, run `pip install fiftyone`.") super().__init__() self._labels = labels self.threshold = threshold self.return_filepath = return_filepath if labels is not None: self.set_state(LabelsState(labels))
def __init__( self, labels: Optional[List[str]] = None, multi_label: bool = False, threshold: Optional[float] = None, store_logits: bool = False, return_filepath: bool = False, ): if multi_label and threshold is None: threshold = 0.5 super().__init__(multi_label=multi_label) self._labels = labels self.threshold = threshold self.store_logits = store_logits self.return_filepath = return_filepath if labels is not None: self.set_state(LabelsState(labels))
def load_data(self, data: str, dataset: Optional[Any] = None) -> 'EncodedVideoDataset': ds: EncodedVideoDataset = labeled_encoded_video_dataset( pathlib.Path(data), self.clip_sampler, video_sampler=self.video_sampler, decode_audio=self.decode_audio, decoder=self.decoder, ) if self.training: label_to_class_mapping = { p[1]: p[0].split("/")[-2] for p in ds._labeled_videos._paths_and_labels } self.set_state(LabelsState(label_to_class_mapping)) dataset.num_classes = len( np.unique([s[1]['label'] for s in ds._labeled_videos])) return ds
def load_data(self, data: Tuple[str, str], dataset: Optional[Any] = None) -> Sequence[Dict[str, Any]]: if self.parser is not None: if inspect.isclass(self.parser) and issubclass( self.parser, Parser): root, ann_file = data parser = self.parser(ann_file, root) elif isinstance(self.parser, Callable): parser = self.parser(data) else: raise ValueError( "The parser must be a callable or an IceVision Parser type." ) dataset.num_classes = parser.class_map.num_classes self.set_state( LabelsState([ parser.class_map.get_by_id(i) for i in range(dataset.num_classes) ])) records = parser.parse(data_splitter=SingleSplitSplitter()) return [{DefaultDataKeys.INPUT: record} for record in records[0]] raise ValueError("The parser argument must be provided.")