Ejemplo n.º 1
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  if not tf.gfile.Exists(FLAGS.dataset_dir):
    tf.gfile.MakeDirs(FLAGS.dataset_dir)

  _download_dataset(FLAGS.dataset_dir)
  photo_filenames, class_names = _get_filenames_and_classes(FLAGS.dataset_dir)
  class_names_to_ids = dict(zip(class_names, range(len(class_names))))

  # Divide into train and test:
  random.seed(_RANDOM_SEED)
  random.shuffle(photo_filenames)
  training_filenames = photo_filenames[_NUM_VALIDATION:]
  validation_filenames = photo_filenames[:_NUM_VALIDATION]

  # First, convert the training and validation sets.
  _convert_dataset('train', training_filenames, class_names_to_ids,
                   FLAGS.dataset_dir)
  _convert_dataset('validation', validation_filenames, class_names_to_ids,
                   FLAGS.dataset_dir)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(class_names)), class_names))
  dataset_utils.write_label_file(labels_to_class_names, FLAGS.dataset_dir)

  _clean_up_temporary_files(FLAGS.dataset_dir)
  print('\nFinished converting the Flowers dataset!')
def run(training_data_dir, protobuf_dir):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  if not tf.gfile.Exists(protobuf_dir):
    tf.gfile.MakeDirs(protobuf_dir)

  if _dataset_exists(protobuf_dir):
    print('Dataset files already exist. Exiting without re-creating them.')
    return

  photo_filenames, class_names = _get_filenames_and_classes(training_data_dir)
  class_names_to_ids = dict(zip(class_names, range(len(class_names))))

  # Divide into train and test:
  random.seed(_RANDOM_SEED)
  random.shuffle(photo_filenames)
  training_filenames = photo_filenames[_NUM_VALIDATION:]
  validation_filenames = photo_filenames[:_NUM_VALIDATION]

  # First, convert the training and validation sets.
  _convert_dataset('train', training_filenames, class_names_to_ids,
                   training_data_dir)
  _convert_dataset('validation', validation_filenames, class_names_to_ids,
                   training_data_dir)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(class_names)), class_names))
  dataset_utils.write_label_file(labels_to_class_names, protobuf_dir)

  # _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the dataset!')
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    if not tf.gfile.Exists(FLAGS.dataset_dir):
        tf.gfile.MakeDirs(FLAGS.dataset_dir)

    _download_dataset(FLAGS.dataset_dir)

    # First, process the training data:
    output_file = _get_output_filename('train')
    with tf.python_io.TFRecordWriter(output_file) as tfrecord_writer:
        data_filename = os.path.join(FLAGS.dataset_dir, _TRAIN_DATA_FILENAME)
        labels_filename = os.path.join(FLAGS.dataset_dir,
                                       _TRAIN_LABELS_FILENAME)
        _add_to_tfrecord(data_filename, labels_filename, 60000,
                         tfrecord_writer)

    # Next, process the testing data:
    output_file = _get_output_filename('test')
    with tf.python_io.TFRecordWriter(output_file) as tfrecord_writer:
        data_filename = os.path.join(FLAGS.dataset_dir, _TEST_DATA_FILENAME)
        labels_filename = os.path.join(FLAGS.dataset_dir,
                                       _TEST_LABELS_FILENAME)
        _add_to_tfrecord(data_filename, labels_filename, 10000,
                         tfrecord_writer)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
    dataset_utils.write_label_file(labels_to_class_names, FLAGS.dataset_dir)

    _clean_up_temporary_files(FLAGS.dataset_dir)
    print('\nFinished converting the MNIST dataset!')
Ejemplo n.º 4
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  if not tf.gfile.Exists(FLAGS.dataset_dir):
    tf.gfile.MakeDirs(FLAGS.dataset_dir)

  _download_and_uncompress_dataset(FLAGS.dataset_dir)

  # First, process the training data:
  output_file = _get_output_filename('train')
  with tf.python_io.TFRecordWriter(output_file) as tfrecord_writer:
    offset = 0
    for i in range(_NUM_TRAIN_FILES):
      filename = os.path.join(FLAGS.dataset_dir,
                              'cifar-10-batches-py',
                              'data_batch_%d' % (i + 1))  # 1-indexed.
      offset = _add_to_tfrecord(filename, tfrecord_writer, offset)

  # Next, process the testing data:
  output_file = _get_output_filename('test')
  with tf.python_io.TFRecordWriter(output_file) as tfrecord_writer:
    filename = os.path.join(FLAGS.dataset_dir,
                            'cifar-10-batches-py',
                            'test_batch')
    _add_to_tfrecord(filename, tfrecord_writer)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
  dataset_utils.write_label_file(labels_to_class_names, FLAGS.dataset_dir)

  _clean_up_temporary_files(FLAGS.dataset_dir)
  print('\nFinished converting the Cifar10 dataset!')
Ejemplo n.º 5
0
def _write_labels(label_ids, args):
    labels_name = args.output_prefix + "labels.txt"
    log.info("Writing class labels %s",
             os.path.join(args.output_dir, labels_name))
    id_to_name_map = {label_ids[name]: name for name in label_ids}
    dataset_utils.write_label_file(id_to_name_map, args.output_dir,
                                   labels_name)
Ejemplo n.º 6
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  if not tf.gfile.Exists(FLAGS.dataset_dir):
    tf.gfile.MakeDirs(FLAGS.dataset_dir)

  _download_dataset(FLAGS.dataset_dir)

  # First, process the training data:
  output_file = _get_output_filename('train')
  with tf.python_io.TFRecordWriter(output_file) as tfrecord_writer:
    data_filename = os.path.join(FLAGS.dataset_dir, _TRAIN_DATA_FILENAME)
    labels_filename = os.path.join(FLAGS.dataset_dir, _TRAIN_LABELS_FILENAME)
    _add_to_tfrecord(data_filename, labels_filename, 60000, tfrecord_writer)

  # Next, process the testing data:
  output_file = _get_output_filename('test')
  with tf.python_io.TFRecordWriter(output_file) as tfrecord_writer:
    data_filename = os.path.join(FLAGS.dataset_dir, _TEST_DATA_FILENAME)
    labels_filename = os.path.join(FLAGS.dataset_dir, _TEST_LABELS_FILENAME)
    _add_to_tfrecord(data_filename, labels_filename, 10000, tfrecord_writer)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
  dataset_utils.write_label_file(labels_to_class_names, FLAGS.dataset_dir)

  _clean_up_temporary_files(FLAGS.dataset_dir)
  print('\nFinished converting the MNIST dataset!')
Ejemplo n.º 7
0
def run(training_data_dir,
        protobuf_dir,
        fract_validation=_FRACT_VALIDATION,
        num_shards=_NUM_SHARDS):
    """
    Assembles information necessary to convert the training data files and starts the conversion of training and test set.
    The conversion needs to be wrapped in dedicated threads as a workaround to release GPU resources again after the 
    conversion is done. If tensorflow is started from the main thread, it will occupy the GPU until the main thread 
    terminates.
    """
    if not tf.gfile.Exists(protobuf_dir):
        tf.gfile.MakeDirs(protobuf_dir)

    if _dataset_exists(protobuf_dir, num_shards):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    # Parse information encoded in the structure of the training data directory
    training_filenames, validation_filenames, class_names = _get_filenames_and_classes(
        training_data_dir, fract_validation)

    # Convert class names into numerical ids for processing in tensorflow
    class_names_to_ids = dict(zip(class_names, range(len(class_names))))

    # Randomly shuffle the data sets again
    random.seed(_RANDOM_SEED)
    random.shuffle(training_filenames)
    random.shuffle(validation_filenames)

    # Workaround to release GPU resources after running tensorflow
    process = multiprocessing.Process(target=_convert_dataset,
                                      args=('train', training_filenames,
                                            class_names_to_ids, protobuf_dir,
                                            num_shards))
    process.start()
    process.join()

    process = multiprocessing.Process(target=_convert_dataset,
                                      args=('validation', validation_filenames,
                                            class_names_to_ids, protobuf_dir,
                                            num_shards))
    process.start()
    process.join()

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    dataset_utils.write_label_file(labels_to_class_names, protobuf_dir)

    print('\nFinished converting the dataset!')
def run(dataset_dir):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  if not tf.gfile.Exists(dataset_dir):
    tf.gfile.MakeDirs(dataset_dir)

  train_filename = _get_output_filename(dataset_dir, 'train')
  testing_filename = _get_output_filename(dataset_dir, 'test')

  if tf.gfile.Exists(train_filename) and tf.gfile.Exists(testing_filename):
    print('Dataset files already exist. Exiting without re-creating them.')
    return

  # TODO(konstantinos): Add download and cleanup functionality

  train_validation_filenames = _get_filenames(
      os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train'))
  test_filenames = _get_filenames(
      os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test'))

  # Divide into train and validation:
  random.seed(_RANDOM_SEED)
  random.shuffle(train_validation_filenames)
  train_filenames = train_validation_filenames[_NUM_VALIDATION:]
  validation_filenames = train_validation_filenames[:_NUM_VALIDATION]

  train_validation_filenames_to_class_ids = _extract_labels(
      os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train_labels.txt'))
  test_filenames_to_class_ids = _extract_labels(
      os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test_labels.txt'))

  # Convert the train, validation, and test sets.
  _convert_dataset('train', train_filenames,
                   train_validation_filenames_to_class_ids, dataset_dir)
  _convert_dataset('valid', validation_filenames,
                   train_validation_filenames_to_class_ids, dataset_dir)
  _convert_dataset('test', test_filenames, test_filenames_to_class_ids,
                   dataset_dir)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

  print('\nFinished converting the MNIST-M dataset!')
Ejemplo n.º 9
0
def run(dataset_dir):
    """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    train_filename = _get_output_filename(dataset_dir, 'train')
    testing_filename = _get_output_filename(dataset_dir, 'test')

    if tf.gfile.Exists(train_filename) and tf.gfile.Exists(testing_filename):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    # TODO(konstantinos): Add download and cleanup functionality

    train_validation_filenames = _get_filenames(
        os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train'))
    test_filenames = _get_filenames(
        os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test'))

    # Divide into train and validation:
    random.seed(_RANDOM_SEED)
    random.shuffle(train_validation_filenames)
    train_filenames = train_validation_filenames[_NUM_VALIDATION:]
    validation_filenames = train_validation_filenames[:_NUM_VALIDATION]

    train_validation_filenames_to_class_ids = _extract_labels(
        os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train_labels.txt'))
    test_filenames_to_class_ids = _extract_labels(
        os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test_labels.txt'))

    # Convert the train, validation, and test sets.
    _convert_dataset('train', train_filenames,
                     train_validation_filenames_to_class_ids, dataset_dir)
    _convert_dataset('valid', validation_filenames,
                     train_validation_filenames_to_class_ids, dataset_dir)
    _convert_dataset('test', test_filenames, test_filenames_to_class_ids,
                     dataset_dir)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    print('\nFinished converting the MNIST-M dataset!')
def run(dataset_dir):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  if not tf.gfile.Exists(dataset_dir):
    tf.gfile.MakeDirs(dataset_dir)

  training_filename = _get_output_filename(dataset_dir, 'train')
  testing_filename = _get_output_filename(dataset_dir, 'test')

  if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
    print('Dataset files already exist. Exiting without re-creating them.')
    return

  dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)

  # First, process the training data:
  with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
    offset = 0
    for i in range(_NUM_TRAIN_FILES):
      filename = os.path.join(dataset_dir,
                              'cifar-10-batches-py',
                              'data_batch_%d' % (i + 1))  # 1-indexed.
      offset = _add_to_tfrecord(filename, tfrecord_writer, offset)

  # Next, process the testing data:
  with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
    filename = os.path.join(dataset_dir,
                            'cifar-10-batches-py',
                            'test_batch')
    _add_to_tfrecord(filename, tfrecord_writer)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

  _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the Cifar10 dataset!')
Ejemplo n.º 11
0
def run_other_dir(dataset_dir, output_suffix):
    """Runs the download and conversion operation.

 Args:
   dataset_dir: The dataset directory where the dataset is stored.
 """

    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    if _dataset_exists(dataset_dir):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    # dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
    training_filenames, class_names = _get_filenames_and_classes(dataset_dir, FLAGS.dataset_name, +'\\train')
    validation_filenames, class_names = _get_filenames_and_classes(dataset_dir, FLAGS.dataset_name, +'\\validation')

    # Divide into train and test:
    print("Now let's start converting the % dataset!" % FLAGS.dataset_name)
    random.seed(_RANDOM_SEED)
    random.shuffle(training_filenames)
    random.shuffle(validation_filenames)

    class_names_to_ids = dict(zip(class_names, range(len(class_names))))
    # First, convert the training and validation sets.
    _convert_dataset('train', training_filenames, class_names_to_ids,
                     dataset_dir, output_suffix)
    _convert_dataset('validation', validation_filenames, class_names_to_ids,
                     dataset_dir, output_suffix)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    if output_suffix:
        dataset_utils.write_label_file(labels_to_class_names, dataset_dir, 'labels_' + output_suffix + '.txt')
    else:
        dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    print("\nFinished converting the %s dataset!" % FLAGS.dataset_name)
Ejemplo n.º 12
0
def run(dataset_dir, tf_record_dir):
    """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
    if not tf.gfile.Exists(tf_record_dir):
        tf.gfile.MakeDirs(tf_record_dir)

    if _dataset_exists(tf_record_dir):
        print(
            'TF Record Dataset files already exist. Exiting without re-creating them.'
        )
        return

    # dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
    photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)
    class_names_to_ids = dict(zip(class_names, range(len(class_names))))
    #print("\n\n",photo_filenames,"\n\n")
    print("class_names_to_ids : ", class_names_to_ids)

    # Find the number of validation examples we need
    num_validation = int(_VALIDATION_SIZE * len(photo_filenames))

    # Divide into train and test:
    random.seed(_RANDOM_SEED)
    random.shuffle(photo_filenames)
    training_filenames = photo_filenames[num_validation:]
    validation_filenames = photo_filenames[:num_validation]

    #First, convert the training and validation sets.
    _convert_dataset('train', training_filenames, class_names_to_ids,
                     tf_record_dir)
    _convert_dataset('validation', validation_filenames, class_names_to_ids,
                     tf_record_dir)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    dataset_utils.write_label_file(labels_to_class_names, tf_record_dir)
Ejemplo n.º 13
0
def run(dataset_dir):
    """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    training_filename = _get_output_filename(dataset_dir, 'train')
    testing_filename = _get_output_filename(dataset_dir, 'test')

    if tf.gfile.Exists(training_filename) and tf.gfile.Exists(
            testing_filename):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    _download_dataset(dataset_dir)

    # First, process the training data:
    with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
        data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME)
        labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME)
        _add_to_tfrecord(data_filename, labels_filename, 60000,
                         tfrecord_writer)

    # Next, process the testing data:
    with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
        data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME)
        labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME)
        _add_to_tfrecord(data_filename, labels_filename, 10000,
                         tfrecord_writer)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    _clean_up_temporary_files(dataset_dir)
    print('\nFinished converting the MNIST dataset!')
Ejemplo n.º 14
0
def run(training_data_dir,
        protobuf_dir,
        fract_validation=_FRACT_VALIDATION,
        num_shards=_NUM_SHARDS):
    """
    
    :param training_data_dir: 
    :param protobuf_dir: 
    :param fract_validation: 
    :param num_shards: 
    :return: 
    """
    if not tf.gfile.Exists(protobuf_dir):
        tf.gfile.MakeDirs(protobuf_dir)

    if _dataset_exists(protobuf_dir, num_shards):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    training_filenames, validation_filenames, class_names = _get_filenames_and_classes(
        training_data_dir, fract_validation)
    class_names_to_ids = dict(zip(class_names, range(len(class_names))))

    random.seed(_RANDOM_SEED)
    random.shuffle(training_filenames)
    random.shuffle(validation_filenames)

    _convert_dataset('train', training_filenames, class_names_to_ids,
                     protobuf_dir, num_shards)
    _convert_dataset('validation', validation_filenames, class_names_to_ids,
                     protobuf_dir, num_shards)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    dataset_utils.write_label_file(labels_to_class_names, protobuf_dir)

    # _clean_up_temporary_files(dataset_dir)
    print('\nFinished converting the dataset!')
Ejemplo n.º 15
0
def main(argv):
    """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """

    for ss in range(nSets):

        dataset_name = '%s_rand%d' % (dataset_root, ss + 1)

        dataset_dir = os.path.join(root, 'biasCNN/datasets/gratings/',
                                   dataset_name)
        image_dir = os.path.join(root, 'biasCNN/images/gratings/',
                                 dataset_name)

        # if the folder already exists, we'll automatically delete it and make it again.
        if tf.gfile.Exists(dataset_dir):
            print('deleting')
            #        tf.gfile.DeleteRecursively(FLAGS.log_dir)
            shutil.rmtree(dataset_dir, ignore_errors=True)
            tf.gfile.MakeDirs(dataset_dir)
        else:
            tf.gfile.MakeDirs(dataset_dir)

    #%% get the information for ALL my images (all categories, exemplars, rotations)

        all_filenames, all_labels, class_names = _get_filenames_and_classes(
            image_dir)

        # save out this list just as a double check that this original order is correct
        np.save(os.path.join(dataset_dir, 'all_filenames.npy'), all_filenames)
        np.save(os.path.join(dataset_dir, 'all_labels.npy'), all_labels)
        np.save(os.path.join(dataset_dir, 'featureMat.npy'), featureMat)

        # Save the test set as a couple "batches" of images.
        # Doing this manually makes it easy to load them and get their weights later on.
        n_total_val = np.size(all_labels)
        max_per_batch = int(90)
        num_batches = np.ceil(n_total_val / max_per_batch)

        for bb in np.arange(0, num_batches):

            bb = int(bb)
            name = 'batch' + str(bb)

            if (bb + 1) * max_per_batch > np.size(all_filenames):
                batch_filenames = all_filenames[bb * max_per_batch:-1]
                batch_labels = all_labels[bb * max_per_batch:-1]
            else:
                batch_filenames = all_filenames[bb * max_per_batch:(bb + 1) *
                                                max_per_batch]
                batch_labels = all_labels[bb * max_per_batch:(bb + 1) *
                                          max_per_batch]

                assert np.size(batch_labels) == max_per_batch

            _convert_dataset(dataset_name,
                             name,
                             batch_filenames,
                             batch_labels,
                             dataset_dir,
                             num_shards=1)

            np.save(os.path.join(dataset_dir, name + '_filenames.npy'),
                    batch_filenames)
            np.save(os.path.join(dataset_dir, name + '_labels.npy'),
                    batch_labels)

        # Finally, write the labels file:
        labels_to_class_names = dict(zip(range(len(class_names)), class_names))
        dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

        print(
            '\nFinished converting the grating dataset, with orientation labels!'
        )
Ejemplo n.º 16
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions for reading ImageNet.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/class/label':
        tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
        'image/class/text':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/object/bbox/xmin':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/class/label':
        tf.VarLenFeature(dtype=tf.int64),
    }

    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'label':
        slim.tfexample_decoder.Tensor('image/class/label'),
        'label_text':
        slim.tfexample_decoder.Tensor('image/class/text'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)
    else:
        labels_to_names = create_readable_names_for_imagenet_labels()
        dataset_utils.write_label_file(labels_to_names, dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=_SPLITS_TO_SIZES[split_name],
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=_NUM_CLASSES,
                                labels_to_names=labels_to_names)
Ejemplo n.º 17
0
def main(argv):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  
  dataset_dir = '/usr/local/serenceslab/maggie/biasCNN/datasets/datasets_Grating_Orient/'
  image_dir = '/usr/local/serenceslab/maggie/biasCNN/grating_ims/'
  
  # if the folder already exists, we'll automatically delete it and make it again.
  if tf.gfile.Exists(dataset_dir):
    print('deleting')
#        tf.gfile.DeleteRecursively(FLAGS.log_dir)
    shutil.rmtree(dataset_dir, ignore_errors = True)
    tf.gfile.MkDir(dataset_dir)
  else:
    tf.gfile.MkDir(dataset_dir)

 
#%% get the information for ALL my images (all categories, exemplars, rotations)
    
  all_filenames, all_labels, class_names, orig_angles = _get_filenames_and_classes(image_dir)
 
# save out this list just as a double check that this original order is correct
  np.save(dataset_dir + 'all_filenames.npy', all_filenames)
  np.save(dataset_dir + 'all_labels.npy', all_labels)
  np.save(dataset_dir + 'orig_angles.npy', orig_angles)
#%% Define my training and validation sets. 
# Random 10 percent is validation

  random.seed(_RANDOM_SEED)   
  
  fullseq = np.arange(0,np.size(all_labels))
  random.shuffle(fullseq)
  
  num_val = int(np.ceil(np.size(all_labels)*_PCT_VAL))
  
  valinds_num = fullseq[:num_val]
  trninds_num = fullseq[num_val:]
  
  training_filenames = []
  validation_filenames = []
  training_labels = []
  validation_labels=[]
    
  for ii in trninds_num:
      training_filenames.append(all_filenames[ii])
      training_labels.append(all_labels[ii])
    
  for ii in valinds_num:
      validation_filenames.append(all_filenames[ii])
      validation_labels.append(all_labels[ii])
           
 

    
  # First, convert the training and validation sets. these will be automatically
  # divided into num_shards (5 sets), which speeds up the training procedure.
  _convert_dataset('train', training_filenames, training_labels, dataset_dir,num_shards=_NUM_SHARDS)
  _convert_dataset('validation', validation_filenames, validation_labels, dataset_dir,num_shards=_NUM_SHARDS)

  # Second - save the validation set as a couple "batches" of images. 
  # Doing this manually makes it easy to load them and get their weights later on. 
  n_total_val = np.size(all_labels)
  max_per_batch = int(360);
  num_batches = np.ceil(n_total_val/max_per_batch)
  
  for bb in np.arange(0,num_batches):
     
      bb=int(bb)
      name = 'batch' + str(bb)
      
      if (bb+1)*max_per_batch > np.size(all_filenames):
          batch_filenames = all_filenames[bb*max_per_batch:-1]
          batch_labels = all_labels[bb*max_per_batch:-1]
      else:     
          batch_filenames =all_filenames[bb*max_per_batch:(bb+1)*max_per_batch]
          batch_labels = all_labels[bb*max_per_batch:(bb+1)*max_per_batch]

          assert np.size(batch_labels)==max_per_batch

      _convert_dataset(name, batch_filenames, batch_labels, dataset_dir,num_shards=1)
  
      np.save(dataset_dir + name + '_filenames.npy', batch_filenames)
      np.save(dataset_dir + name + '_labels.npy', batch_labels)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(class_names)), class_names))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

#  _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the grating dataset, with orientation labels!')
Ejemplo n.º 18
0
def run(dataset_dir, custom_binary_validation=None, custom_binary_validation_label=None,
        custom_binary_validation_ratio=None,
        output_suffix=None, is_other_dir=None):
    """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
    if is_other_dir:
        run_other_dir(dataset_dir, output_suffix)

    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    if _dataset_exists(dataset_dir):
        print('Dataset files already exist. Exiting without re-creating them.')
        return
    random.seed(_RANDOM_SEED)
    # dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
    if custom_binary_validation:
        tmp_photo_filenames, class_names = _get_filenames_and_classes_by_label(dataset_dir, FLAGS.dataset_name)
        if not custom_binary_validation_ratio:
            custom_binary_validation_ratio = 0.
        if custom_binary_validation_ratio > 1:
            custom_binary_validation_ratio = 1.
        validation_filenames = []
        training_filenames = []
        for key in tmp_photo_filenames:
            if key == custom_binary_validation_label:
                ratio = custom_binary_validation_ratio
            else:
                ratio = 1. - custom_binary_validation_ratio

            random.shuffle(tmp_photo_filenames[key])
            training_filenames += tmp_photo_filenames[key][int(FLAGS.num_validation * ratio):]
            print(key, len(tmp_photo_filenames[key][:int(FLAGS.num_validation * ratio)]))
            validation_filenames += tmp_photo_filenames[key][:int(FLAGS.num_validation * ratio)]
    else:
        photo_filenames, class_names = _get_filenames_and_classes(dataset_dir, FLAGS.dataset_name)

        # Divide into train and test:
        print("Now let's start converting the Koreans dataset!")
        random.shuffle(photo_filenames)
        training_filenames = photo_filenames[FLAGS.num_validation:]
        validation_filenames = photo_filenames[:FLAGS.num_validation]

    class_names_to_ids = dict(zip(class_names, range(len(class_names))))
    # First, convert the training and validation sets.
    _convert_dataset('train', training_filenames, class_names_to_ids,
                     dataset_dir, output_suffix)
    _convert_dataset('validation', validation_filenames, class_names_to_ids,
                     dataset_dir, output_suffix)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    if output_suffix:
        dataset_utils.write_label_file(labels_to_class_names, dataset_dir, 'labels_' + output_suffix + '.txt')
    else:
        dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    print('\nFinished converting the Koreans dataset!')
Ejemplo n.º 19
0
def main(argv):
    """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """

    # if the folder already exists, we'll automatically delete it and make it again.
    if tf.gfile.Exists(dataset_dir):
        print('deleting')
        #        tf.gfile.DeleteRecursively(FLAGS.log_dir)
        shutil.rmtree(dataset_dir, ignore_errors=True)
        tf.gfile.MkDir(dataset_dir)
    else:
        tf.gfile.MkDir(dataset_dir)


#%% get the information for ALL my images (all categories, exemplars, rotations)

    all_filenames, all_labels, class_names = _get_filenames_and_classes(
        image_dir)

    # save out this list just as a double check that this original order is correct
    np.save(dataset_dir + 'all_filenames.npy', all_filenames)
    np.save(dataset_dir + 'all_labels.npy', all_labels)
    np.save(dataset_dir + 'featureMat.npy', featureMat)

    #%% Define my training and validation sets.
    # Random 10 percent is validation

    random.seed(_RANDOM_SEED)

    fullseq = np.arange(0, np.size(all_labels))
    random.shuffle(fullseq)

    num_val = int(np.ceil(np.size(all_labels) * _PCT_VAL))

    valinds_num = fullseq[:num_val]
    trninds_num = fullseq[num_val:]

    training_filenames = []
    validation_filenames = []
    training_labels = []
    validation_labels = []

    for ii in trninds_num:
        training_filenames.append(all_filenames[ii])
        training_labels.append(all_labels[ii])

    for ii in valinds_num:
        validation_filenames.append(all_filenames[ii])
        validation_labels.append(all_labels[ii])

    # First, convert the training and validation sets. these will be automatically
    # divided into num_shards (5 sets), which speeds up the training procedure.
    _convert_dataset('train',
                     training_filenames,
                     training_labels,
                     dataset_dir,
                     num_shards=_NUM_SHARDS)
    _convert_dataset('validation',
                     validation_filenames,
                     validation_labels,
                     dataset_dir,
                     num_shards=_NUM_SHARDS)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    #  _clean_up_temporary_files(dataset_dir)
    print(
        '\nFinished converting the grating dataset, with orientation labels!')
Ejemplo n.º 20
0
def main(argv):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  
  dataset_dir = '/usr/local/serenceslab/maggie/tensorflow/novel_objects/datasets/datasets_NovelObjects_CategoryLabels/'
  image_dir = '/usr/local/serenceslab/maggie/tensorflow/novel_objects/ims/'
  
  # if the folder already exists, we'll automatically delete it and make it again.
  if tf.gfile.Exists(dataset_dir):
    print('deleting')
#        tf.gfile.DeleteRecursively(FLAGS.log_dir)
    shutil.rmtree(dataset_dir, ignore_errors = True)
    tf.gfile.MkDir(dataset_dir)
  else:
    tf.gfile.MkDir(dataset_dir)

 
#%% get the information for ALL my images (all categories, exemplars, rotations)
    
  all_filenames, all_labels, class_names = _get_filenames_and_classes(image_dir)
 
# save out this list just as a double check that this original order is correct
  np.save(dataset_dir + 'all_filenames.npy', all_filenames)
  np.save(dataset_dir + 'all_labels.npy', all_labels)

#%% Define my training and validation sets. 
# To keep everything balanced, we'll leave out entire exemplar sets of images
# (e.g. a single exemplar at each of the possible viewpoints). Leave out four 
# exemplar sets per category, this is roughly 10% of the data.

  random.seed(_RANDOM_SEED)   
  
  # how many exemplars per category to leave out?
  n_ex_leave_out = int(np.ceil(nEx*_PCT_VAL))
  
  # which indices go into the validation set?
  inds2val = np.zeros(np.shape(all_labels))
  for cc in np.arange(0,nCat,1):
      # for each category, we're randomly drawing several exemplars to leave out. 
      # all 144 images of that exemplar get left out together.
      ex2val = np.random.choice(np.arange(0,nEx)+1, n_ex_leave_out, replace=False)
      these_inds = np.logical_and(catlist==cc+1, np.expand_dims(np.any(exlist==ex2val, axis=1),1));
      inds2val[these_inds] = 1
      
  assert np.sum(inds2val)==n_ex_leave_out*nCat*nX*nY
  
  trninds_num = np.where(inds2val==0)[0]
  valinds_num = np.where(inds2val==1)[0]
  
  training_filenames = []
  validation_filenames = []
  training_labels = []
  validation_labels=[]
    
  for ii in trninds_num:
      training_filenames.append(all_filenames[ii])
      training_labels.append(all_labels[ii])
    
  for ii in valinds_num:
      validation_filenames.append(all_filenames[ii])
      validation_labels.append(all_labels[ii])
 
  # First, convert the training and validation sets. these will be automatically
  # divided into num_shards (5 sets), which speeds up the training procedure.
  _convert_dataset('train', training_filenames, training_labels, dataset_dir,num_shards=_NUM_SHARDS)
  _convert_dataset('validation', validation_filenames, validation_labels, dataset_dir,num_shards=_NUM_SHARDS)

  # Second - save the validation set as a couple "batches" of images. 
  # Doing this manually makes it easy to load them and get their weights later on. 
  n_total_val = np.sum(inds2val)
  max_per_batch = int(nX*nY);
  num_batches = np.ceil(n_total_val/max_per_batch)
  
  for bb in np.arange(0,num_batches):
     
      bb=int(bb)
      name = 'batch' + str(bb)
      
      if (bb+1)*max_per_batch > np.size(all_filenames):
          batch_filenames = validation_filenames[bb*max_per_batch:-1]
          batch_labels = validation_labels[bb*max_per_batch:-1]
      else:     
          batch_filenames = validation_filenames[bb*max_per_batch:(bb+1)*max_per_batch]
          batch_labels = validation_labels[bb*max_per_batch:(bb+1)*max_per_batch]

          assert np.size(batch_labels)==max_per_batch

      _convert_dataset(name, batch_filenames, batch_labels, dataset_dir,num_shards=1)
  
      np.save(dataset_dir + name + '_filenames.npy', batch_filenames)
      np.save(dataset_dir + name + '_labels.npy', batch_labels)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(class_names)), class_names))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

#  _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the novel object dataset, with Category labels!')