Example #1
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions for reading cifar10.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
    if split_name not in SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if not reader:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='png'),
        'image/class/label':
        tf.FixedLenFeature([],
                           tf.int64,
                           default_value=tf.zeros([], dtype=tf.int64)),
    }

    items_to_handlers = {
        'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
        'label': slim.tfexample_decoder.Tensor('image/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=SPLITS_TO_SIZES[split_name],
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=_NUM_CLASSES,
                                labels_to_names=labels_to_names)
Example #2
0
def classify_image(image_path, train_dir, label_dir):

    image_size = 299

    with tf.Graph().as_default():

        image_string = tf.gfile.FastGFile(image_path, 'r').read()
        _, image_ext = os.path.splitext(image_path)

        if image_ext in ['.jpg', '.jpeg']:
            image = tf.image.decode_jpeg(image_string, channels=3)
        elif image_ext == '.png':
            image = tf.image.decode_png(image_string, channels=3)
        else:
            raise ValueError('image format not supported, must be jpg or png')

        processed_image = inception_preprocessing.preprocess_image(
            image, image_size, image_size, is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        labels_to_names = dataset_utils.read_label_file(label_dir)

        # Create the model, use the default arg scope to
        # configure the batch norm parameters.
        with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
            logits, endpoints = inception_v4.inception_v4(
                processed_images,
                num_classes=len(labels_to_names),
                is_training=False)
        probabilities = endpoints['Predictions']

        checkpoint_path = tf.train.latest_checkpoint(train_dir)
        saver = tf.train.Saver(tf.model_variables())

        with tf.Session() as sess:
            saver.restore(sess, checkpoint_path)

            probabilities = sess.run(probabilities)
            probabilities = probabilities[0, 0:]
            sorted_inds = [
                i[0]
                for i in sorted(enumerate(-probabilities), key=lambda x: x[1])
            ]

        for i in range(5):
            index = sorted_inds[i]
            print('Probability %0.2f%% => [%s]' %
                  (probabilities[index] * 100, labels_to_names[index]))
Example #3
0
def get_split_lips(split_name,
                   tfrecord_dir,
                   file_pattern=None,
                   reader=None,
                   num_frames=12):
    """Gets a dataset tuple for reading lip movement videos.

    Args:
        split_name: A 'train'/'validation' split name.
        tfrecord_dir: The base directory of the dataset sources.
        file_pattern: The file pattern to use when matching the
            dataset sources. It is assumed that the pattern contains
            a '%s' string so that the split name can be inserted.
        reader: The TensorFlow reader type.
        num_frames: The number of frames contained in each video sample
            (this is determined when we create the TFRecord files).

    Returns:
        A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/validation split.
    """
    if split_name not in ['train', 'validation']:
        raise ValueError(
            'The split_name %s is not recognized.' % (split_name)
            + 'Please input either train or validation as the split_name')

    if not file_pattern:
        file_pattern = _FILE_PATTERN

    # Compute the number of samples contained in the dataset.
    num_samples = 0
    tfrecords_to_count = [
        os.path.join(tfrecord_dir, file)
        for file in os.listdir(tfrecord_dir)
        if fnmatch.fnmatch(file, file_pattern % split_name)]
    for tfrecord_file in tfrecords_to_count:
        for record in tf.python_io.tf_record_iterator(tfrecord_file):
            num_samples += 1

    file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name)

    if reader is None:
        reader = tf.TFRecordReader

    # Create the keys_to_features dictionary for the decoder
    keys_to_features = {
        'video/data': tf.FixedLenFeature((60, 80, num_frames), tf.float32),
        'video/label': tf.FixedLenFeature(
          (), tf.int64, default_value=tf.zeros((), dtype=tf.int64)),
    }

    items_to_handlers = {
        'video': slim.tfexample_decoder.Tensor(
            'video/data', shape=(60, 80, num_frames, 1)),
        'label': slim.tfexample_decoder.Tensor('video/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)

    labels_to_names = None
    num_classes = None

    if dataset_utils.has_labels(tfrecord_dir):
        labels_to_names = dataset_utils.read_label_file(tfrecord_dir)
        num_classes = len(labels_to_names)

    return slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=reader,
        decoder=decoder,
        num_samples=num_samples,
        items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
        num_classes=num_classes,
        labels_to_names=labels_to_names)
Example #4
0
def get_split_color_depth(split_name,
                          tfrecord_dir,
                          file_pattern=None,
                          reader=None,
                          color_channels=3,
                          depth_channels=3):
    """Gets a dataset tuple for reading color-depth image pairs.

    Args:
        split_name: A 'train'/'validation' split name.
        tfrecord_dir: The base directory of the dataset sources.
        file_pattern: The file pattern to use when matching the
            dataset sources. It is assumed that the pattern contains
            a '%s' string so that the split name can be inserted.
        reader: The TensorFlow reader type.
        color_channels: The number of channels contained in the color
            images of the output dataset.
        depth_channels: The number of channels contained in the depth
            images of the output dataset.

    Returns:
        A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/validation split.
    """
    if split_name not in ['train', 'validation']:
        raise ValueError(
            'The split_name %s is not recognized.' % (split_name) +
            'Please input either train or validation as the split_name')

    if not file_pattern:
        file_pattern = _FILE_PATTERN

    num_samples = 0
    tfrecords_to_count = [
        os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir)
        if fnmatch.fnmatch(file, file_pattern % split_name)
    ]
    for tfrecord_file in tfrecords_to_count:
        for record in tf.python_io.tf_record_iterator(tfrecord_file):
            num_samples += 1

    file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name)

    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/color/encoded':
        tf.FixedLenFeature((), tf.string),
        'image/color/format':
        tf.FixedLenFeature((), tf.string),
        'image/depth/encoded':
        tf.FixedLenFeature((), tf.string),
        'image/depth/format':
        tf.FixedLenFeature((), tf.string),
        'image/class/label':
        tf.FixedLenFeature((),
                           tf.int64,
                           default_value=tf.zeros((), dtype=tf.int64)),
    }

    items_to_handlers = {
        'image/color':
        slim.tfexample_decoder.Image(image_key='image/color/encoded',
                                     format_key='image/color/format',
                                     channels=color_channels),
        'image/depth':
        slim.tfexample_decoder.Image(image_key='image/depth/encoded',
                                     format_key='image/depth/format',
                                     channels=depth_channels),
        'label':
        slim.tfexample_decoder.Tensor('image/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    num_classes = None

    if dataset_utils.has_labels(tfrecord_dir):
        labels_to_names = dataset_utils.read_label_file(tfrecord_dir)
        num_classes = len(labels_to_names)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=num_samples,
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=num_classes,
                                labels_to_names=labels_to_names)
Example #5
0
def get_split(split_name, dataset_dir, file_pattern, reader,
              items_to_descriptions, num_classes):
    """Gets a dataset tuple with instructions for reading Pascal VOC dataset.

    Args:
      split_name: A trainval/test split name.
      dataset_dir: The base directory of the dataset sources.
      file_pattern: The file pattern to use when matching the dataset sources.
        It is assumed that the pattern contains a '%s' string so that the split
        name can be inserted.
      reader: The TensorFlow reader type.

    Returns:
      A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/test split.
    """
    if split_name not in ['trainval', 'test']:
        raise ValueError('split name %s was not recognized.' % split_name)
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader
    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult':
        slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated':
        slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)
    # else:
    #     labels_to_names = create_readable_names_for_imagenet_labels()
    #     dataset_utils.write_label_file(labels_to_names, dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=split_to_sizes[split_name],
                                items_to_descriptions=items_to_descriptions,
                                num_classes=num_classes,
                                labels_to_names=labels_to_names)
Example #6
0
def get_split_mfcc_lips(split_name,
                        tfrecord_dir,
                        file_pattern=None,
                        reader=None,
                        num_frames_audio=24,
                        num_frames_video=12):
    """Gets a dataset tuple for reading mfcc features and videos.

    Args:
        split_name: 'train_all'/'trainAT'/'trainUZ'/'validaion'/
            'valdationAT'/'validationUZ'.
        tfrecord_dir: The base directory of the dataset sources.
        file_pattern: The file pattern to use when matching the
            dataset sources. It is assumed that the pattern contains
            a '%s' string so that the split name can be inserted.
        reader: The TensorFlow reader type.
        num_frames_audio: The number of frames of the stored audios.
            Thie is fixed once the TFRecords are generated.
        num_frames_video: The number of frames of the stored videos.
            Thie is fixed once the TFRecords are generated.

    Returns:
        A `Dataset` namedtuple.
    """

    if not file_pattern:
        file_pattern = _FILE_PATTERN

    num_samples = 0
    tfrecords_to_count = [
        os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir)
        if fnmatch.fnmatch(file, file_pattern % split_name)
    ]
    for tfrecord_file in tfrecords_to_count:
        for record in tf.python_io.tf_record_iterator(tfrecord_file):
            num_samples += 1

    file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name)

    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'audio/mfcc':
        tf.FixedLenFeature((26, num_frames_audio), tf.float32),
        'video/data':
        tf.FixedLenFeature((60, 80, num_frames_video), tf.float32),
        'label':
        tf.FixedLenFeature((),
                           tf.int64,
                           default_value=tf.zeros((), dtype=tf.int64)),
    }

    items_to_handlers = {
        'mfcc':
        slim.tfexample_decoder.Tensor('audio/mfcc',
                                      shape=(26, num_frames_audio, 1)),
        'video':
        slim.tfexample_decoder.Tensor('video/data',
                                      shape=(60, 80, num_frames_video, 1)),
        'label':
        slim.tfexample_decoder.Tensor('label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    num_classes = None

    if dataset_utils.has_labels(tfrecord_dir):
        labels_to_names = dataset_utils.read_label_file(tfrecord_dir)
        num_classes = len(labels_to_names)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=num_samples,
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=num_classes,
                                labels_to_names=labels_to_names)
Example #7
0
def get_split_avicar(split_name,
                     tfrecord_dir,
                     file_pattern=None,
                     reader=None,
                     num_frames=20):

    if not file_pattern:
        file_pattern = _FILE_PATTERN

    num_samples = 0
    tfrecords_to_count = [
        os.path.join(tfrecord_dir, file) for file in os.listdir(tfrecord_dir)
        if fnmatch.fnmatch(file, file_pattern % split_name)
    ]
    for tfrecord_file in tfrecords_to_count:
        for record in tf.python_io.tf_record_iterator(tfrecord_file):
            num_samples += 1

    file_pattern = os.path.join(tfrecord_dir, file_pattern % split_name)

    if reader is None:
        reader = tf.TFRecordReader

    # Create the keys_to_features dictionary for the decoder
    keys_to_features = {
        'audio/wav/data':
        tf.VarLenFeature(tf.float32),
        'audio/wav/length':
        tf.FixedLenFeature((), tf.int64),
        'audio/mfcc':
        tf.FixedLenFeature((26, num_frames), tf.float32),
        'audio/label':
        tf.FixedLenFeature((),
                           tf.int64,
                           default_value=tf.zeros((), dtype=tf.int64)),
    }

    items_to_handlers = {
        'wav':
        slim.tfexample_decoder.Tensor('audio/wav/data',
                                      shape_keys='audio/wav/length'),
        'mfcc':
        slim.tfexample_decoder.Tensor('audio/mfcc', shape=(26, num_frames, 1)),
        'label':
        slim.tfexample_decoder.Tensor('audio/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    num_classes = None

    if dataset_utils.has_labels(tfrecord_dir):
        labels_to_names = dataset_utils.read_label_file(tfrecord_dir)
        num_classes = len(labels_to_names)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=num_samples,
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=num_classes,
                                labels_to_names=labels_to_names)
Example #8
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions for reading ImageNet.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/class/label':
        tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
        'image/class/text':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/object/bbox/xmin':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/class/label':
        tf.VarLenFeature(dtype=tf.int64),
    }

    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'label':
        slim.tfexample_decoder.Tensor('image/class/label'),
        'label_text':
        slim.tfexample_decoder.Tensor('image/class/text'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)
    else:
        labels_to_names = create_readable_names_for_imagenet_labels()
        dataset_utils.write_label_file(labels_to_names, dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=_SPLITS_TO_SIZES[split_name],
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=_NUM_CLASSES,
                                labels_to_names=labels_to_names)