def input_fn_amoabanet(is_training, use_random_crop, data_dir, batch_size, num_epochs=1, num_gpus=None, dtype=tf.float32, with_drawing_bbox=False, autoaugment_type=None, dataset_name=None, drop_remainder=False, preprocessing_type='imagenet', return_logits=False, dct_method="", train_regex='train*', val_regex='validation*'): filenames = data_util.get_filenames(is_training, data_dir, train_regex=train_regex, val_regex=val_regex) dataset = data_config.get_config(dataset_name) return input_fn(is_training, filenames, use_random_crop, batch_size, dataset.num_train_files, dataset.num_images['train'], dataset.shuffle_buffer, dataset.num_channels, num_epochs, num_gpus, dtype, autoaugment_type=autoaugment_type, with_drawing_bbox=with_drawing_bbox, drop_remainder=drop_remainder, preprocessing_type=preprocessing_type, return_logits=return_logits, dct_method=dct_method)
def input_fn_ir_eval(is_training, data_dir, batch_size, num_epochs=1, num_gpus=0, dtype=tf.float32, preprocessing_type='imagenet', dataset_name=None, dct_method="", val_regex='validation-*'): filenames = data_util.get_filenames(is_training, data_dir, val_regex=val_regex) assert len(filenames) > 0 dataset_config = data_config.get_config(dataset_name) return input_fn(is_training, filenames, False, batch_size, dataset_config.num_train_files, dataset_config.num_images['validation'], dataset_config.shuffle_buffer, dataset_config.num_channels, num_epochs, num_gpus, dtype, preprocessing_type=preprocessing_type, dct_method=dct_method)
def input_fn_cls(is_training, use_random_crop, data_dir, batch_size, num_epochs=1, num_gpus=None, dtype=tf.float32, with_drawing_bbox=False, autoaugment_type=None, dataset_name=None, drop_remainder=False, dct_method=""): """Input function which provides batches for train or eval. Args: is_training: A boolean denoting whether the input is for training. use_random_crop: Whether to randomly crop a training image. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_gpus: The number of gpus used for training. dtype: Data type to use for images/features autoaugment_type: Auto augmentation type. 'imagenet', 'svhn', 'cifar', 'good' with_drawing_bbox: If True, return the dataset including raw image tensor with bbox. dct_method: An optional `string`. Defaults to `""`. string specifying a hint about the algorithm used for decompression. Defaults to "" which maps to a system-specific default. Currently valid values are ["INTEGER_FAST", "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal jpeg library changes to a version that does not have that specific option.) Returns: A dataset that can be used for iteration. """ filenames = data_util.get_filenames(is_training, data_dir) dataset = data_config.get_config(dataset_name) return input_fn(is_training, filenames, use_random_crop, batch_size, dataset.num_train_files, dataset.num_images['train'], dataset.shuffle_buffer, dataset.num_channels, num_epochs, num_gpus, dtype, autoaugment_type=autoaugment_type, with_drawing_bbox=with_drawing_bbox, drop_remainder=drop_remainder, dct_method=dct_method)
def input_fn_cls(is_training, use_random_crop, num_epochs, flags_obj): if flags_obj.mixup_type == 1 and is_training: batch_size = flags_obj.batch_size * 2 num_epochs = num_epochs * 2 else: batch_size = flags_obj.batch_size batch_size = distribution_utils.per_device_batch_size( batch_size, flags_core.get_num_gpus(flags_obj)) filenames_sup = data_util.get_filenames(is_training, flags_obj.data_dir, train_regex=flags_obj.train_regex, val_regex=flags_obj.val_regex) tf.logging.info('The # of Supervised tfrecords: {}'.format( len(filenames_sup))) dataset_meta = data_config.get_config(flags_obj.dataset_name) datasets = [] dataset_sup = input_fn(is_training, filenames_sup, use_random_crop, batch_size, dataset_meta.num_train_files, dataset_meta.num_images['train'], dataset_meta.shuffle_buffer, dataset_meta.num_channels, num_epochs, flags_core.get_num_gpus(flags_obj), flags_core.get_tf_dtype(flags_obj), autoaugment_type=flags_obj.autoaugment_type, with_drawing_bbox=flags_obj.with_drawing_bbox, drop_remainder=False, preprocessing_type=flags_obj.preprocessing_type, return_logits=flags_obj.kd_temp > 0, dct_method=flags_obj.dct_method, parse_record_fn=data_util.parse_record_sup) datasets.append(dataset_sup) def flatten_input(*features): images_dict = {} for feature in features: for key in feature: if key == 'label': label = feature[key] else: images_dict[key] = feature[key] return images_dict, label dataset = tf.data.Dataset.zip(tuple(datasets)) dataset = dataset.map(flatten_input) tf.logging.info('dataset = dataset.map(flatten_input)') tf.logging.info(dataset) return dataset
def input_fn_bfe_eval(is_training, data_dir, batch_size, num_epochs=1, num_gpus=0, dtype=tf.float32, preprocessing_type='imagenet', dataset_name=None, dct_method=""): """Input function which provides batches for train or eval. Args: is_training: A boolean denoting whether the input is for training. data_dir: The directory containing the input data. batch_size: The number of samples per batch. num_epochs: The number of epochs to repeat the dataset. num_gpus: The number of gpus used for training. dtype: Data type to use for images/features preprocessing_type: TODO dataset_name: TODO dct_method: An optional `string`. Defaults to `""`. string specifying a hint about the algorithm used for decompression. Defaults to "" which maps to a system-specific default. Currently valid values are ["INTEGER_FAST", "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal jpeg library changes to a version that does not have that specific option.) Returns: A dataset that can be used for iteration. """ filenames = data_util.get_filenames(is_training, data_dir, val_regex='validation-label*') dataset_config = data_config.get_config(dataset_name) return input_fn(is_training, filenames, False, batch_size, dataset_config.num_train_files, dataset_config.num_images['validation'], dataset_config.shuffle_buffer, dataset_config.num_channels, num_epochs, num_gpus, dtype, preprocessing_type=preprocessing_type, dct_method=dct_method)
def input_fn_npair_train(is_training, use_random_crop, data_dir, batch_size, train_epochs, num_gpus=0, dtype=tf.float32, with_drawing_bbox=False, autoaugment_type=None, preprocessing_type='imagenet', dct_method="", dataset_name=None): """Input function which provides batches for train or eval. See https://oss.navercorp.com/VisualSearch/food-fighters/pull/49#issue-566301. Args: is_training: A boolean denoting whether the input is for training. use_random_crop: Whether to randomly crop a training image. data_dir: The directory containing the input data. batch_size: The number of samples per batch. train_epochs: TODO num_gpus: The number of gpus used for training. dtype: Data type to use for images/features dct_method: An optional `string`. Defaults to `""`. string specifying a hint about the algorithm used for decompression. Defaults to "" which maps to a system-specific default. Currently valid values are ["INTEGER_FAST", "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal jpeg library changes to a version that does not have that specific option.) autoaugment_type: Auto augmentation type. 'imagenet', 'svhn', 'cifar', `good` with_drawing_bbox: If True, return the dataset including raw image tensor with bbox. Returns: A dataset that can be used for iteration. """ dconf = data_config.get_config(dataset_name) all_choices_ds = [] for filename in data_util.get_filenames(True, data_dir, train_regex='train-label*'): dataset = tf.data.Dataset.from_tensors(filename) # The cycle_length isn't necessary now because there's only one tfrecord file. # Use this feature when you want to increase the number of file shards. dataset = dataset.apply( tf.contrib.data.parallel_interleave(tf.data.TFRecordDataset, cycle_length=1)) # shuffling records by class. A larger shuffle buffer's size results in better randomness, # but smaller size reduce startup time and use less memory. dataset = dataset.shuffle(buffer_size=100, seed=0) dataset = dataset.repeat() all_choices_ds.append(dataset) # A Repeat number must be mutliples of two which means anchor and positive. max_train_steps = train_epochs * int( dconf.num_images['train'] / batch_size) choice_dataset = tf.data.Dataset.range(len(all_choices_ds)).repeat( max_train_steps * 2) dataset = tf.contrib.data.choose_from_datasets(all_choices_ds, choice_dataset) return data_util.process_record_dataset_ir( dataset=dataset, is_training=is_training, batch_size=batch_size, parse_record_fn=data_util.parse_record, num_classes=dconf.num_classes, num_channels=dconf.num_channels, num_gpus=num_gpus, use_random_crop=use_random_crop, dtype=dtype, with_drawing_bbox=with_drawing_bbox, autoaugment_type=autoaugment_type, num_instances=2, preprocessing_type=preprocessing_type, is_aggregated=False, dct_method=dct_method)