def _test_checksums(self): # If no call to `dl_manager.download`, then no need to check url presence. if not self._download_urls: return err_msg = ("If you are developping outside TFDS and want to opt-out, " "please add `SKIP_CHECKSUMS = True` to the " "`DatasetBuilderTestCase`") url_infos = self.DATASET_CLASS.url_infos if url_infos is None: filepath = os.path.join(checksums._get_path(self.builder.name)) # pylint: disable=protected-access with utils.try_reraise(suffix=err_msg): url_infos = checksums._get_url_infos(filepath) # pylint: disable=protected-access else: # TODO(tfds): Improve doc for dataset-as-folder (and remove # try_reraise above) filepath = str(self.DATASET_CLASS.code_path.parent / "checksums.tsv") missing_urls = self._download_urls - set(url_infos.keys()) self.assertEmpty( missing_urls, "Some urls checksums are missing at: {} " "Did you forget to record checksums with `--register_checksums` ? " "See instructions at: " "https://www.tensorflow.org/datasets/add_dataset#2_run_download_and_prepare_locally" "\n{}".format(filepath, err_msg))
def from_spec(cls, spec: SplitArg) -> 'AbstractSplit': """Creates a ReadInstruction instance out of a string spec. Args: spec (str): split(s) + optional slice(s) to read. A slice can be specified, using absolute numbers (int) or percentages (int). E.g. `test`: test split. `test + validation`: test split + validation split. `test[10:]`: test split, minus its first 10 records. `test[:10%]`: first 10% records of test split. `test[:-5%]+train[40%:60%]`: first 95% of test + middle 20% of train. Returns: The split instance. """ if isinstance(spec, AbstractSplit): return spec spec = str(spec) # Need to convert to str in case of `Split` instance. subs = _ADDITION_SEP_RE.split(spec) if not subs: raise ValueError(f'No instructions could be built out of {spec!r}') with utils.try_reraise(f'Error parsing split {spec!r}. See format at: ' 'https://www.tensorflow.org/datasets/splits\n'): instructions = [_str_to_relative_instruction(s) for s in subs] # Merge all splits together (_SplitAll) return functools.reduce(operator.add, instructions)
def extract_features( self, features: features_lib.FeatureConnector, ) -> features_lib.FeatureConnector: """Returns the `tfds.features.FeaturesDict`. Extract the subset of features Args: features: Features on which extract the sub-set Returns: features_subset: A subset of the features """ with utils.try_reraise( 'Provided PartialDecoding specs does not match actual features: ' ): # Convert non-features into features expected_feature = _normalize_feature_item( feature=features, expected_feature=self._feature_specs, ) # Get the intersection of `features` and `expected_feature` return _extract_features( feature=features, expected_feature=features_dict.to_feature(expected_feature), )
def _extract_feature_item( feature: features_lib.FeaturesDict, expected_key: str, expected_value: features_lib.FeatureConnector, fn: Callable[..., Any], ) -> features_lib.FeatureConnector: """Calls `_extract_features(feature[key], expected_feature=value)`.""" assert isinstance(feature, features_lib.FeaturesDict) if expected_key not in feature: raise ValueError(f'Missing expected feature {expected_key!r}.') with utils.try_reraise(f'In {expected_key!r}: '): return fn(feature=feature[expected_key], expected_feature=expected_value)
def _test_checksums(self): # If no call to `dl_manager.download`, then no need to check url presence. if not self._download_urls: return err_msg = ("If you are developping outside TFDS and want to opt-out, " "please add `SKIP_CHECKSUMS = True` to the " "`DatasetBuilderTestCase`") with utils.try_reraise(suffix=err_msg): filepath = os.path.join(checksums._get_path(self.builder.name)) # pylint: disable=protected-access sizes_checksums = checksums._get_sizes_checksums(filepath) # pylint: disable=protected-access urls = sizes_checksums.keys() missing_urls = self._download_urls - set(urls) self.assertEmpty( missing_urls, "Some urls checksums are missing at: {} " "Did you forgot to record checksums with `--register_checksums` ? " "See instructions at: " "https://www.tensorflow.org/datasets/add_dataset#2_run_download_and_prepare_locally" "\n{}".format(filepath, err_msg))
def serialize_single_field(k, example_data, tensor_info): with utils.try_reraise( "Error while serializing feature {} ({}): ".format( k, tensor_info)): return _item_to_tf_feature(example_data, tensor_info)
def build_single_spec(k, v): with utils.try_reraise( "Specification error for feature {} ({}): ".format(k, v)): return _to_tf_example_spec(v)
def run_with_reraise(fn, k, example_data, tensor_info): with utils.try_reraise( "Error while serializing feature {} ({}): ".format( k, tensor_info)): return fn(example_data, tensor_info)
def build_single_spec(k, v): with utils.try_reraise( f"Specification error for feature {k!r} ({v}): "): return _to_tf_example_spec(v)