def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading cifar10. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if not reader: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def classify_image(image_path, train_dir, label_dir): image_size = 299 with tf.Graph().as_default(): image_string = tf.gfile.FastGFile(image_path, 'r').read() _, image_ext = os.path.splitext(image_path) if image_ext in ['.jpg', '.jpeg']: image = tf.image.decode_jpeg(image_string, channels=3) elif image_ext == '.png': image = tf.image.decode_png(image_string, channels=3) else: raise ValueError('image format not supported, must be jpg or png') processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) labels_to_names = dataset_utils.read_label_file(label_dir) # Create the model, use the default arg scope to # configure the batch norm parameters. with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits, endpoints = inception_v4.inception_v4( processed_images, num_classes=len(labels_to_names), is_training=False) probabilities = endpoints['Predictions'] checkpoint_path = tf.train.latest_checkpoint(train_dir) saver = tf.train.Saver(tf.model_variables()) with tf.Session() as sess: saver.restore(sess, checkpoint_path) probabilities = sess.run(probabilities) probabilities = probabilities[0, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1]) ] for i in range(5): index = sorted_inds[i] print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, labels_to_names[index]))
def get_split_lips(split_name, tfrecord_dir, file_pattern=None, reader=None, num_frames=12): """Gets a dataset tuple for reading lip movement videos. Args: split_name: A 'train'/'validation' split name. tfrecord_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. num_frames: The number of frames contained in each video sample (this is determined when we create the TFRecord files). Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/validation split. """ if split_name not in ['train', 'validation']: raise ValueError( 'The split_name %s is not recognized.' % (split_name) + 'Please input either train or validation as the split_name') if not file_pattern: file_pattern = _FILE_PATTERN # Compute the number of samples contained in the dataset. num_samples = 0 tfrecords_to_count = [ os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir) if fnmatch.fnmatch(file, file_pattern % split_name)] for tfrecord_file in tfrecords_to_count: for record in tf.python_io.tf_record_iterator(tfrecord_file): num_samples += 1 file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name) if reader is None: reader = tf.TFRecordReader # Create the keys_to_features dictionary for the decoder keys_to_features = { 'video/data': tf.FixedLenFeature((60, 80, num_frames), tf.float32), 'video/label': tf.FixedLenFeature( (), tf.int64, default_value=tf.zeros((), dtype=tf.int64)), } items_to_handlers = { 'video': slim.tfexample_decoder.Tensor( 'video/data', shape=(60, 80, num_frames, 1)), 'label': slim.tfexample_decoder.Tensor('video/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) labels_to_names = None num_classes = None if dataset_utils.has_labels(tfrecord_dir): labels_to_names = dataset_utils.read_label_file(tfrecord_dir) num_classes = len(labels_to_names) return slim.dataset.Dataset( data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split_color_depth(split_name, tfrecord_dir, file_pattern=None, reader=None, color_channels=3, depth_channels=3): """Gets a dataset tuple for reading color-depth image pairs. Args: split_name: A 'train'/'validation' split name. tfrecord_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. color_channels: The number of channels contained in the color images of the output dataset. depth_channels: The number of channels contained in the depth images of the output dataset. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/validation split. """ if split_name not in ['train', 'validation']: raise ValueError( 'The split_name %s is not recognized.' % (split_name) + 'Please input either train or validation as the split_name') if not file_pattern: file_pattern = _FILE_PATTERN num_samples = 0 tfrecords_to_count = [ os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir) if fnmatch.fnmatch(file, file_pattern % split_name) ] for tfrecord_file in tfrecords_to_count: for record in tf.python_io.tf_record_iterator(tfrecord_file): num_samples += 1 file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name) if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/color/encoded': tf.FixedLenFeature((), tf.string), 'image/color/format': tf.FixedLenFeature((), tf.string), 'image/depth/encoded': tf.FixedLenFeature((), tf.string), 'image/depth/format': tf.FixedLenFeature((), tf.string), 'image/class/label': tf.FixedLenFeature((), tf.int64, default_value=tf.zeros((), dtype=tf.int64)), } items_to_handlers = { 'image/color': slim.tfexample_decoder.Image(image_key='image/color/encoded', format_key='image/color/format', channels=color_channels), 'image/depth': slim.tfexample_decoder.Image(image_key='image/depth/encoded', format_key='image/depth/format', channels=depth_channels), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None num_classes = None if dataset_utils.has_labels(tfrecord_dir): labels_to_names = dataset_utils.read_label_file(tfrecord_dir) num_classes = len(labels_to_names) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split(split_name, dataset_dir, file_pattern, reader, items_to_descriptions, num_classes): """Gets a dataset tuple with instructions for reading Pascal VOC dataset. Args: split_name: A trainval/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in ['trainval', 'test']: raise ValueError('split name %s was not recognized.' % split_name) file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader # Features in Pascal VOC TFRecords. keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/channels': tf.FixedLenFeature([1], tf.int64), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) # else: # labels_to_names = create_readable_names_for_imagenet_labels() # dataset_utils.write_label_file(labels_to_names, dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=split_to_sizes[split_name], items_to_descriptions=items_to_descriptions, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split_mfcc_lips(split_name, tfrecord_dir, file_pattern=None, reader=None, num_frames_audio=24, num_frames_video=12): """Gets a dataset tuple for reading mfcc features and videos. Args: split_name: 'train_all'/'trainAT'/'trainUZ'/'validaion'/ 'valdationAT'/'validationUZ'. tfrecord_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. num_frames_audio: The number of frames of the stored audios. Thie is fixed once the TFRecords are generated. num_frames_video: The number of frames of the stored videos. Thie is fixed once the TFRecords are generated. Returns: A `Dataset` namedtuple. """ if not file_pattern: file_pattern = _FILE_PATTERN num_samples = 0 tfrecords_to_count = [ os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir) if fnmatch.fnmatch(file, file_pattern % split_name) ] for tfrecord_file in tfrecords_to_count: for record in tf.python_io.tf_record_iterator(tfrecord_file): num_samples += 1 file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name) if reader is None: reader = tf.TFRecordReader keys_to_features = { 'audio/mfcc': tf.FixedLenFeature((26, num_frames_audio), tf.float32), 'video/data': tf.FixedLenFeature((60, 80, num_frames_video), tf.float32), 'label': tf.FixedLenFeature((), tf.int64, default_value=tf.zeros((), dtype=tf.int64)), } items_to_handlers = { 'mfcc': slim.tfexample_decoder.Tensor('audio/mfcc', shape=(26, num_frames_audio, 1)), 'video': slim.tfexample_decoder.Tensor('video/data', shape=(60, 80, num_frames_video, 1)), 'label': slim.tfexample_decoder.Tensor('label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None num_classes = None if dataset_utils.has_labels(tfrecord_dir): labels_to_names = dataset_utils.read_label_file(tfrecord_dir) num_classes = len(labels_to_names) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split_avicar(split_name, tfrecord_dir, file_pattern=None, reader=None, num_frames=20): if not file_pattern: file_pattern = _FILE_PATTERN num_samples = 0 tfrecords_to_count = [ os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir) if fnmatch.fnmatch(file, file_pattern % split_name) ] for tfrecord_file in tfrecords_to_count: for record in tf.python_io.tf_record_iterator(tfrecord_file): num_samples += 1 file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name) if reader is None: reader = tf.TFRecordReader # Create the keys_to_features dictionary for the decoder keys_to_features = { 'audio/wav/data': tf.VarLenFeature(tf.float32), 'audio/wav/length': tf.FixedLenFeature((), tf.int64), 'audio/mfcc': tf.FixedLenFeature((26, num_frames), tf.float32), 'audio/label': tf.FixedLenFeature((), tf.int64, default_value=tf.zeros((), dtype=tf.int64)), } items_to_handlers = { 'wav': slim.tfexample_decoder.Tensor('audio/wav/data', shape_keys='audio/wav/length'), 'mfcc': slim.tfexample_decoder.Tensor('audio/mfcc', shape=(26, num_frames, 1)), 'label': slim.tfexample_decoder.Tensor('audio/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None num_classes = None if dataset_utils.has_labels(tfrecord_dir): labels_to_names = dataset_utils.read_label_file(tfrecord_dir) num_classes = len(labels_to_names) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=num_classes, labels_to_names=labels_to_names)
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading ImageNet. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'label': slim.tfexample_decoder.Tensor('image/class/label'), 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) else: labels_to_names = create_readable_names_for_imagenet_labels() dataset_utils.write_label_file(labels_to_names, dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)