def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading flowers. Args: split_name: A train/validation split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/validation split. """ if split_name not in SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def getSplit (self, split_name, nValidations = None): if not nValidations: if not self.nValidations: raise ValueError("nValidations is not known"); else: nValidations = self.nValidations if not self.nTrains: photo_filenames, _ = self._get_filenames_and_classes() self.nTrains = len(photo_filenames) - self.nValidations if split_name not in self.SPLITS_NAMES: raise ValueError('split name %s was not recognized.' % split_name) file_pattern = self._FILE_PATTERN file_pattern = os.path.join(self.datasetDir, file_pattern % split_name) reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'image/class/label': tf.FixedLenFeature( [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(self.datasetDir): labels_to_names = dataset_utils.read_label_file(self.datasetDir) self.labels_to_names = labels_to_names self.nClasses = len(labels_to_names) else: raise ValueError("Can't find label file in the data directory: " + self.datasetDir); return slim.dataset.Dataset( data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=self.nTrains, items_to_descriptions=self._ITEMS_TO_DESCRIPTIONS, num_classes=len(labels_to_names), labels_to_names=labels_to_names)
def get_dataset(split_name, dataset_dir): """Given a dataset name and a split_name returns a Dataset. Args: split_name: A train/test split name. dataset_dir: The directory where the dataset files are stored. file_pattern: The file pattern to use for matching the dataset source files. Returns: A `Dataset` class. """ file_pattern = os.path.join(dataset_dir, split_name + '.tfrecord') reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)) } items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label') } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=get_sample_num(split_name), items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def get_dataset(dataset_name, split_name, dataset_dir): splits_to_sizes, num_classes, example_shape, items_to_descriptions \ = dataset_config(dataset_name) if split_name not in splits_to_sizes: raise ValueError('split name %s was not recognized.' % split_name) data_file = os.path.join(dataset_dir, FILE_PATTERN % (dataset_name, split_name)) # Allowing None in the signature so that dataset_factory can use the default. reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(shape=example_shape), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset(data_sources=data_file, reader=reader, decoder=decoder, num_samples=splits_to_sizes[split_name], items_to_descriptions=items_to_descriptions, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): if split_name not in SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def get_split(split_name, dataset_dir, file_pattern, reader, split_to_sizes, items_to_descriptions, num_classes): """Gets a dataset tuple with instructions for reading Pascal VOC dataset. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in split_to_sizes: raise ValueError('split name %s was not recognized.' % split_name) file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader # Features in Pascal VOC TFRecords. keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/channels': tf.FixedLenFeature([1], tf.int64), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'object/bbox': slim.tfexample_decoder.BoundingBox( ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), } decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) # else: # labels_to_names = create_readable_names_for_imagenet_labels() # dataset_utils.write_label_file(labels_to_names, dataset_dir) return slim.dataset.Dataset( data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=split_to_sizes[split_name], items_to_descriptions=items_to_descriptions, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split(split_name, dataset_dir, labels_dir=None, file_pattern=None): """Retrieves a InputData object with the parameters for reading ImageNet data. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. labels_dir: The folder where the labels file is located, and where it will be eventually written if missing. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. Returns: An `InputData` object. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not labels_dir: labels_dir = dataset_dir if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = file_pattern % split_name files = [] # Allow for filename expansion w/out using Glob(). # Example: 'train-[0,1023,05d]-of-01024' to generate: # train-00000-of-01024 # train-00001-of-01024 # ... # train-01023-of-01024 m = re.match(r'(.*)\[(\d+),(\d+),([a-zA-Z0-9]+)\](.*)', file_pattern) if m: format_string = '%' + m.group(4) for n in range(int(m.group(2)), int(m.group(3)) + 1): seqstr = format_string % n files.append( os.path.join(dataset_dir, m.group(1) + seqstr + m.group(5))) else: path = os.path.join(dataset_dir, file_pattern) # If the file_pattern ends with '.list', then the file is supposed to be a # file which lists the input files one per line. if path.endswith('.list'): with gfile.Open(path, 'r') as list_file: for fpath in list_file: fpath = fpath.strip() if fpath: files.append(fpath) elif path.find('*') < 0: # If the path does not contain any glob pattern, assume it is a single # input file. Detection for glob patters might be more complex, but all # the examples seen so far, uses '*' only. files.append(path) else: # Otherwise we assume it is a glob-able path. files = gfile.Glob(path) keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'label': slim.tfexample_decoder.Tensor('image/class/label'), 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(labels_dir): labels_to_names = dataset_utils.read_label_file(labels_dir) else: labels_to_names = create_readable_names_for_imagenet_labels() dataset_utils.write_label_file(labels_to_names, labels_dir) return InputData(data_sources=files, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading ImageNet. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature( (), tf.string, default_value=''), 'image/format': tf.FixedLenFeature( (), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature( [], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature( [], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature( dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature( dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature( dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature( dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature( dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'label': slim.tfexample_decoder.Tensor('image/class/label'), 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), 'object/bbox': slim.tfexample_decoder.BoundingBox( ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) labels_to_names = None if LOAD_READABLE_NAMES: if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) else: labels_to_names = create_readable_names_for_imagenet_labels() dataset_utils.write_label_file(labels_to_names, dataset_dir) return slim.dataset.Dataset( data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature( [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset( data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)