Python Iterator Beispiele, tensorflow.python.data.Iterator Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: tf_dataset.py Projekt: ahmedezzat85/SNPX_ML

    def _create_data_iterators(self,
                               tf_rec_list,
                               batch_size,
                               dtype=tf.float32):
        """ """
        def tf_parse_record(tf_record):
            """ """
            feature = tf.parse_single_example(tf_record,
                                              features=_IMAGE_TFREC_STRUCTURE)
            image = tf.decode_raw(feature['image'], tf.uint8)
            image = tf.reshape(image, self.shape)
            label = tf.cast(feature['label'], tf.int64)
            return image, label

        data_iter = None
        iter_list = []
        for rec in tf_rec_list:
            rec = DictToAttrs(rec)
            dataset = TFRecordDataset(rec.file)
            dataset = dataset.map(tf_parse_record)
            dataset = dataset.map(lambda image, label:
                                  (rec.preproc(image), label))  #, batch_size)
            if rec.shuffle: dataset = dataset.shuffle(buffer_size=50000)
            dataset = dataset.batch(batch_size)
            out_types = dataset.output_types
            out_shapes = dataset.output_shapes

            if data_iter is None:
                # Create a reinitializable iterator
                data_iter = Iterator.from_structure(out_types, out_shapes)
            iter_init = data_iter.make_initializer(dataset)
            iter_list.append(iter_init)

        return data_iter.get_next(), iter_list

Beispiel #2

0

Datei anzeigen

def tf_create_data_iterator(
    batch_size, 
    train_set_file=None, 
    val_set_file=None, 
    shape=None, 
    dtype=tf.float32,
    mean_img=None):
    """ """
    def tf_parse_record(tf_record):
        """ """
        feature = tf.parse_single_example(tf_record, features=_IMAGE_TFREC_STRUCTURE)
        image = tf.decode_raw(feature['image'], tf.uint8)
        image = tf.reshape(image, shape)
        # image = tf.image.resize_images(image, [64, 64])
        # image = tf.subtract(image, mean_img)
        label = tf.cast(feature['label'], tf.int64)
        image = tf.cast(image, dtype)
        return image, label

    if shape is None:
        raise ValueError("shape cannot be None")
    if val_set_file is None and train_set_file is None:
        raise ValueError("Both train_set_file and val_set_file are not specified")

    val_set       = None
    train_set     = None
    out_types     = None
    out_shapes    = None
    val_init_op   = None
    train_init_op = None

    # Create the validation dataset object
    if val_set_file is not None:
        val_set = TFRecordDataset(val_set_file)
        val_set = val_set.map(tf_parse_record)
        val_set = val_set.batch(batch_size)
        out_types = val_set.output_types
        out_shapes = val_set.output_shapes

    # Create the training dataset object
    if train_set_file is not None:
        train_set = TFRecordDataset(train_set_file)
        train_set = train_set.map(tf_parse_record)
        train_set = train_set.shuffle(buffer_size=batch_size * 1000)
        train_set = train_set.batch(batch_size)
        out_types = train_set.output_types
        out_shapes = train_set.output_shapes

    # Create a reinitializable iterator from both datasets
    iterator  = Iterator.from_structure(out_types, out_shapes)
    
    if train_set is not None:
        train_init_op   = iterator.make_initializer(train_set)
    
    if val_set is not None:
        val_init_op     = iterator.make_initializer(val_set)

    iter_op = iterator.get_next()
    return train_init_op, val_init_op, iter_op

Beispiel #3

0

Datei anzeigen

Datei: pipeline.py Projekt: jrbtaylor/story-generator

    def __init__(self,
                 train_tfrecord,
                 val_tfrecord,
                 batch_size,
                 max_sequence=None,
                 buffer_size=50):
        """
        Load vectors of integers from tfrecord files.

        note: pads batches w/ 0 so all sequences match the longest example
              adds 1 to all other values so that 0 is unique to padding

        :param train_tfrecord: path to tfrecord file
        :param val_tfrecord: path to tfrecord file
        :param batch_size: batch size
        :param max_sequence: maximum sequence length (None means unlimited)
        :param buffer_size: examples in buffer for shuffling
        """
        assert isinstance(train_tfrecord, str)
        assert isinstance(val_tfrecord, str)
        assert isinstance(batch_size, int)
        assert batch_size > 0
        assert buffer_size > 0

        if buffer_size < batch_size:
            buffer_size = batch_size

        def dataset(tfrecord, shuffle):
            ds = TFRecordDataset(tfrecord)

            def parse(x):
                example = tf.parse_single_example(
                    x, features={'data': tf.VarLenFeature(tf.int64)})
                example = tf.cast(example['data'].values, tf.int32) + 1
                if max_sequence is not None:
                    example = example[:max_sequence]
                return example

            ds = ds.map(parse, num_parallel_calls=8)
            if shuffle:
                ds = ds.shuffle(buffer_size)
            return ds.padded_batch(batch_size,
                                   padded_shapes=(tf.TensorShape([None])),
                                   padding_values=0)

        train_dataset = dataset(train_tfrecord, shuffle=True)
        val_dataset = dataset(val_tfrecord, shuffle=False)
        iterator = Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)
        self.init_train = iterator.make_initializer(train_dataset,
                                                    name='init_train')
        self.init_val = iterator.make_initializer(val_dataset, name='init_val')
        self.output = iterator.get_next()

Beispiel #4

0

Datei anzeigen

Datei: learn.py Projekt: wearelumenai/getswtf

def mk_batch(epochs=4, batch_size=50, buf_size=7000):
    """
    Build a sub graph that represents the input and output data
    and iterators to feed the graph either with train or test data.

    Four objects are returned :
     - Two tensors : one for input images and another for output images.
     - Two iterators : one for training set and another for test set.

    The tensors will be used to create the graph. The iterators will be used to
    feed the tensors with the train or test data.

    The iterators may be initialized with either training or test data.

    :param epochs the number of epochs
    :param batch_size the size of the mini-batches
    :param buf_size the size of the shuffling buffer
    :return: the batch iterators and initializers
    """

    # Training dataset, collect input and output from corresponding files and zip them together
    train_in = tf.data.FixedLengthRecordDataset(IN_TRAIN,
                                                WIDTH * HEIGHT * CHANNELS)
    train_out = tf.data.FixedLengthRecordDataset(OUT_TRAIN, 1)
    train_data = tf.data.Dataset.zip((train_in, train_out)) \
        .map(mk_parse) \
        .shuffle(buf_size) \
        .batch(batch_size) \
        .repeat(epochs)

    # Test dataset, collect input and output from corresponding files and zip them together
    test_in = tf.data.FixedLengthRecordDataset(IN_TEST,
                                               WIDTH * HEIGHT * CHANNELS)
    test_out = tf.data.FixedLengthRecordDataset(OUT_TEST, 1)
    test_data = tf.data.Dataset.zip((test_in, test_out)) \
        .map(mk_parse) \
        .shuffle(buf_size) \
        .batch(batch_size)

    # Build a feedable iterator that obey to the above datasets structure
    handle = tf.placeholder(tf.string, shape=[])
    iterator = Iterator.from_string_handle(handle, train_data.output_types,
                                           train_data.output_shapes)
    in_batch, out_batch = iterator.get_next()

    train = train_data.make_one_shot_iterator()
    test = test_data.make_initializable_iterator()

    # The size of the mini-batch
    size = tf.shape(in_batch)[0]

    return in_batch, out_batch, size, train, test, handle

Beispiel #5

0

Datei anzeigen

Datei: base_data_reader.py Projekt: sszbuu/gqa_data

    def __init__(self, data_config):
        self.data_config = data_config

        # default batch_size from data reader config
        self._batch_size = self.data_config.reader_batch_size
        self._build_context_and_feature()
        self.training_dataset = self._get_dataset(
            self.data_config.train_tfrecord_dir)

        self.iterator = Iterator.from_structure(
            self.training_dataset.output_types,
            self.training_dataset.output_shapes)
        pass

Beispiel #6

0

Datei anzeigen

    def __init__(self,
                 *,
                 batch_size,
                 num_classes,
                 prefetch_size=1e8,
                 shuffle_size=1000):
        super().__init__(batch_size, prefetch_size, shuffle_size)

        self._num_classes = num_classes

        (x_train, y_train), (x_test, y_test) = self._load_data()

        x_train = np.float32(x_train) / 255.
        x_test = np.float32(x_test) / 255.

        y_train = np.float32(self._to_one_hot(y_train))
        y_test = np.float32(self._to_one_hot(y_test))

        self.train_size = y_train.shape[0]
        self.test_size = y_test.shape[0]

        output_types = (x_train.dtype, y_train.dtype)
        output_shape = (x_train.shape[1:], y_train.shape[1:])

        train_dataset = Dataset.from_generator(lambda: self._gen_data(x_train, y_train), output_types, output_shape). \
            shuffle(self.shuffle_size).\
            batch(self.batch_size, drop_remainder=True).\
            prefetch(self.prefetch_size)

        test_dataset = Dataset.from_generator(lambda: self._gen_data(x_test, y_test), output_types, output_shape). \
            shuffle(self.shuffle_size).\
            batch(self.batch_size, drop_remainder=True).\
            prefetch(self.prefetch_size)

        iter_ = Iterator.from_structure(train_dataset.output_types,
                                        train_dataset.output_shapes)

        self.x, self.y = iter_.get_next()

        self.train_init_op = iter_.make_initializer(train_dataset)
        self.test_init_op = iter_.make_initializer(test_dataset)

Beispiel #7

0

Datei anzeigen

def main():
    # 初始参数设置
    learning_rate = 1e-3
    num_epochs = 10  # 代的个数 之前是10
    train_batch_size = 100  # 之前是1024
    test_batch_size = 100
    dropout_rate = 0.5
    num_classes = 2  # 类别标签
    display_step = 2  # display_step个train_batch_size训练完了就在tensorboard中写入loss和accuracy
    # need: display_step <= train_dataset_size / train_batch_size

    filewriter_path = "./tmp/tensorboard"  # 存储tensorboard文件
    checkpoint_path = "./tmp/checkpoints"  # 训练好的模型和参数存放目录

    image_format = 'jpg'  # 数据集的数据类型
    file_name_of_class = ['sicken',
                          'normal']  # sicken对应标签0,normal对应标签1。默认图片包含独特的名词，比如类别
    train_dataset_paths = [
        'G:/python/病虫害识别/dataset/train/sicken/',
        'G:/python/病虫害识别/dataset/train/normal/'
    ]  # 指定训练集数据路径（根据实际情况指定训练数据集的路径）
    test_dataset_paths = [
        'G:/python/病虫害识别/dataset/test/sicken/',
        'G:/python/病虫害识别/dataset/test/normal/'
    ]  # 指定测试集数据路径（根据实际情况指定测试数据集的路径）
    # 注意：默认数据集中的样本文件名称中包含其所属类别标签的名称，即file_name_of_class中的名称
    # 初始参数设置完毕

    # 训练数据集数据处理
    train_image_paths = []
    train_labels = []
    # 打开训练数据集目录，读取全部图片，生成图片路径列表
    for train_dataset_path in train_dataset_paths:
        length = len(train_image_paths)
        train_image_paths[length:length] = np.array(
            glob.glob(train_dataset_path + '*.' + image_format)).tolist()
    for image_path in train_image_paths:
        image_file_name = image_path.split('/')[-1]
        print(image_file_name)
        for i in range(num_classes):
            if file_name_of_class[i] in image_file_name:
                train_labels.append(i)
                break
    print(train_labels)
    # 测试数据集数据处理
    test_image_paths = []
    test_labels = []
    # 打开测试数据集目录，读取全部图片，生成图片路径列表
    for test_dataset_path in test_dataset_paths:
        length = len(test_image_paths)
        test_image_paths[length:length] = np.array(
            glob.glob(test_dataset_path + '*.' + image_format)).tolist()
    for image_path in test_image_paths:
        image_file_name = image_path.split('/')[-1]
        for i in range(num_classes):
            if file_name_of_class[i] in image_file_name:
                test_labels.append(i)
                break
    print(test_labels)
    # get Datasets
    # 调用图片生成器，把训练集图片转换成三维数组
    train_data = ImageDataGenerator(images=train_image_paths,
                                    labels=train_labels,
                                    batch_size=train_batch_size,
                                    num_classes=num_classes,
                                    image_format=image_format,
                                    shuffle=True)

    # 调用图片生成器，把测试集图片转换成三维数组
    test_data = ImageDataGenerator(images=test_image_paths,
                                   labels=test_labels,
                                   batch_size=test_batch_size,
                                   num_classes=num_classes,
                                   image_format=image_format,
                                   shuffle=False)

    # get Iterators
    with tf.name_scope('input'):
        # 定义迭代器
        train_iterator = Iterator.from_structure(train_data.data.output_types,
                                                 train_data.data.output_shapes)
        training_initalizer = train_iterator.make_initializer(train_data.data)
        test_iterator = Iterator.from_structure(test_data.data.output_types,
                                                test_data.data.output_shapes)
        testing_initalizer = test_iterator.make_initializer(test_data.data)
        # 定义每次迭代的数据
        train_next_batch = train_iterator.get_next()
        test_next_batch = test_iterator.get_next()

    x = tf.placeholder(tf.float32, [None, 227, 227, 3])
    y = tf.placeholder(tf.float32, [None, num_classes])
    keep_prob = tf.placeholder(tf.float32)

    # alexnet
    fc8 = alexnet(x, keep_prob, num_classes)

    # loss
    with tf.name_scope('loss'):
        loss_op = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=fc8, labels=y))
    # optimizer
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss_op)

    # accuracy
    with tf.name_scope("accuracy"):
        correct_pred = tf.equal(tf.argmax(fc8, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    init = tf.global_variables_initializer()

    # Tensorboard
    tf.summary.scalar('loss', loss_op)
    tf.summary.scalar('accuracy', accuracy)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(filewriter_path)

    # saver
    saver = tf.train.Saver()

    # 定义一代的迭代次数
    train_batches_per_epoch = int(
        np.floor(train_data.data_size / train_batch_size))
    test_batches_per_epoch = int(
        np.floor(test_data.data_size / test_batch_size))

    # Start training
    with tf.Session() as sess:
        sess.run(init)
        # saver.restore(sess, "./tmp/checkpoints/model_epoch18.ckpt")
        # Tensorboard
        writer.add_graph(sess.graph)

        print("{}: Start training...".format(datetime.now()))
        print("{}: Open Tensorboard at --logdir {}".format(
            datetime.now(), filewriter_path))

        for epoch in range(num_epochs):
            sess.run(training_initalizer)
            print("{}: Epoch number: {} start".format(datetime.now(),
                                                      epoch + 1))

            # train
            for step in range(train_batches_per_epoch):
                img_batch, label_batch = sess.run(train_next_batch)
                loss, _ = sess.run([loss_op, train_op],
                                   feed_dict={
                                       x: img_batch,
                                       y: label_batch,
                                       keep_prob: dropout_rate
                                   })
                if step % display_step == 0:
                    # loss
                    print("{}: loss = {}".format(datetime.now(), loss))

                    # Tensorboard
                    s = sess.run(merged_summary,
                                 feed_dict={
                                     x: img_batch,
                                     y: label_batch,
                                     keep_prob: 1.
                                 })
                    writer.add_summary(s,
                                       epoch * train_batches_per_epoch + step)

            # accuracy
            print("{}: Start validation".format(datetime.now()))
            sess.run(testing_initalizer)
            test_acc = 0.
            test_count = 0
            for _ in range(test_batches_per_epoch):
                img_batch, label_batch = sess.run(test_next_batch)
                acc = sess.run(accuracy,
                               feed_dict={
                                   x: img_batch,
                                   y: label_batch,
                                   keep_prob: 1.0
                               })
                test_acc += acc
                test_count += 1
            try:
                test_acc /= test_count
            except:
                print('ZeroDivisionError!')
            print("{}: Validation Accuracy = {:.4f}".format(
                datetime.now(), test_acc))

            # save model
            print("{}: Saving checkpoint of model...".format(datetime.now()))
            checkpoint_name = os.path.join(
                checkpoint_path, 'model_epoch' + str(epoch + 1) + '.ckpt')
            save_path = saver.save(sess, checkpoint_name)
            #saver.save(sess, "./tmp/checkpoints/model_epoch18.ckpt")

            # this epoch is over
            print("{}: Epoch number: {} end".format(datetime.now(), epoch + 1))

Beispiel #8

0

Datei anzeigen

def TestGraph():
    # Loading graph
    sess = tf.Session()
    saver = tf.train.Saver(max_to_keep=1)
    checkpoint = tf.train.latest_checkpoint(SAVE_PATH)
    if (checkpoint != None):
        print("Restoring checkpoint %s" % (checkpoint))
        saver.restore(sess, checkpoint)
        print("Model restored")
    else:
        print("Model not found")
        return
    print("")

    # Loading test set

    if (not os.path.exists(TEST_PATH)):
        print("Error reading testing directory.")
        return

    # Reading the dataset and creating testing data class
    with tf.device('/cpu:0'):
        test_data = ImageDataGenerator(TEST_PATH,
                                       batch_size=1,
                                       num_classes=NUM_CLASSES,
                                       channel=CHANNEL,
                                       shuffle=False,
                                       data_aug=False,
                                       img_size=IMAGE_SIZE,
                                       resize=TEST)

        # Create an reinitializable iterator given the dataset structure
        iterator = Iterator.from_structure(test_data.data.output_types,
                                           test_data.data.output_shapes)
        next_batch = iterator.get_next()

    # Ops for initializing the testing iterator
    testing_init_op = iterator.make_initializer(test_data.data)

    print("")
    print("Dataset Size:")
    print("Test data: " + str(test_data.data_size))
    print("")

    # Now calculate the predicted classes for the batches.
    # We will just iterate through all the batches.

    # Test the model on the entire testing set
    print("{} Start testing".format(
        datetime.now().strftime('%d-%m-%Y %H:%M:%S')))

    sess.run(testing_init_op)

    test_acc = 0.
    test_count = 0

    for _ in range(test_data.data_size):
        img_batch, label_batch = sess.run(next_batch)
        batch_acc = sess.run('accuracy:0',
                             feed_dict={
                                 x: img_batch,
                                 y_true: label_batch
                             })

        test_acc += batch_acc
        test_count += 1

    test_acc /= test_count

    print("{} Testing Accuracy = {:>6.1%}".format(
        datetime.now().strftime('%d-%m-%Y %H:%M:%S'), test_acc))

Beispiel #9

0

Datei anzeigen

def TrainGraph(output):

    global_step = tf.Variable(0, name="global_step")

    y_true_cls = tf.argmax(y_true, axis=1)  #returns the true class index

    # y predicted image class
    y_pred = tf.nn.softmax(output, name="y_pred")
    y_pred_cls = tf.argmax(y_pred, axis=1,
                           name="y_pred_cls")  #returns the predicted index

    # Cost function
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=output, labels=tf.stop_gradient(y_true))
    cost = tf.reduce_mean(cross_entropy)

    # Optimizer
    train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
        cost, global_step=global_step)

    # Accuracy
    correct_prediction = tf.equal(y_pred_cls, y_true_cls)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32"),
                              name="accuracy")

    # Creating the session to run the graph
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    #config.gpu_options.allocator_type = 'BFC'
    #config.gpu_options.per_process_gpu_memory_fraction=0.90
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Saver
    saver = tf.train.Saver(max_to_keep=1)
    if (not os.path.exists(SAVE_PATH)
        ):  #if directory doesn't exists, then create it
        os.makedirs(SAVE_PATH)
    checkpoint = tf.train.latest_checkpoint(
        SAVE_PATH)  #restoring last checkpoint
    if (checkpoint != None):
        print("Restoring checkpoint %s" % (checkpoint))
        saver.restore(sess, checkpoint)
        print("Model restored")
    else:
        sess.run(tf.global_variables_initializer())
        print("Initialized a new Graph")
    print("")

    # Tensorboard

    # Visualize conv1 filters and histograms
    with tf.variable_scope('visualization'):
        tf.get_variable_scope().reuse_variables()
        visualization.conv1_filters()
        visualization.histograms()

    # Create a summary to monitor cost tensor
    tf.summary.scalar("loss/step", cost)
    # Create a summary to monitor accuracy tensor
    tf.summary.scalar("accuracy/step", accuracy)
    # Merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    # Loading training set

    if (not os.path.exists(TRAIN_PATH)):
        print("Error reading training directory")
        return

    # Reading the dataset and creating training data class
    with tf.device('/cpu:0'):
        tr_data = ImageDataGenerator(TRAIN_PATH,
                                     batch_size=BATCH_SIZE,
                                     num_classes=NUM_CLASSES,
                                     channel=CHANNEL,
                                     shuffle=True,
                                     data_aug=AUG,
                                     img_size=IMAGE_SIZE,
                                     crop=CROP,
                                     resize=RESIZE)

        # Create an reinitializable iterator given the dataset structure
        iterator = Iterator.from_structure(tr_data.data.output_types,
                                           tr_data.data.output_shapes)
        next_batch = iterator.get_next()

    # Ops for initializing the training iterator
    training_init_op = iterator.make_initializer(tr_data.data)

    # Get the number of training steps per epoch
    train_batches_per_epoch = int(np.floor(tr_data.data_size / BATCH_SIZE))

    # Loading validation set (if used)

    if (VALIDATION_EPOCH > 0):
        # Reading the dataset and creating validation data class

        if (not os.path.exists(VALIDATION_PATH)):
            print("Error reading validation directory")
            return

        with tf.device('/cpu:0'):
            val_data = ImageDataGenerator(VALIDATION_PATH,
                                          batch_size=BATCH_SIZE,
                                          num_classes=NUM_CLASSES,
                                          channel=CHANNEL,
                                          shuffle=False,
                                          data_aug=False,
                                          img_size=IMAGE_SIZE,
                                          crop=CROP,
                                          resize=RESIZE)

            # Create an reinitializable iterator given the dataset structure
            val_iterator = Iterator.from_structure(val_data.data.output_types,
                                                   val_data.data.output_shapes)
            val_next_batch = val_iterator.get_next()

        # Ops for initializing the validation iterator
        validation_init_op = val_iterator.make_initializer(val_data.data)

        # Get the number of training steps per epoch
        val_batches_per_epoch = int(np.floor(val_data.data_size / BATCH_SIZE))

    # Print sizes
    print("")
    print("Dataset Size:")
    print("Training data: " + str(tr_data.data_size))

    if (VALIDATION_EPOCH > 0):
        print("Validation data: " + str(val_data.data_size))

    print("")

    # Write logs to Tensorboard
    if (not os.path.exists(LOGS_PATH)
        ):  #if directories don't exist, then create them
        os.makedirs(LOGS_PATH)

        if (not os.path.exists(LOGS_PATH + "/train")):
            os.makedirs(LOGS_PATH + "/train")

        if (VALIDATION_EPOCH > 0):
            if (not os.path.exists(LOGS_PATH + "/val")):
                os.makedirs(LOGS_PATH + "/val")

    train_summary_writer = tf.summary.FileWriter(LOGS_PATH + "/train",
                                                 graph=tf.get_default_graph())

    if (VALIDATION_EPOCH > 0):
        val_summary_writer = tf.summary.FileWriter(LOGS_PATH + "/val")

    # Start training
    for i in range(EPOCH):
        print("{} Epoch number: {}".format(
            datetime.now().strftime('%d-%m-%Y %H:%M:%S'), i + 1))

        # Initialize iterator with the training dataset
        sess.run(training_init_op)

        train_acc = 0.
        train_loss = 0.

        for step in range(train_batches_per_epoch):

            # Get next batch of data
            img_batch, label_batch = sess.run(next_batch)

            # And run the training op
            i_global, _, summary, batch_acc, batch_loss = sess.run(
                [global_step, train_step, merged_summary_op, accuracy, cost],
                feed_dict={
                    x: img_batch,
                    y_true: label_batch
                })

            # Write Tensorboard logs at every iteration
            train_summary_writer.add_summary(summary, i_global)

            train_acc += batch_acc
            train_loss += batch_loss

        train_acc /= train_batches_per_epoch
        train_loss /= train_batches_per_epoch

        # Do validation after VALIDATION_EPOCH epochs
        if (VALIDATION_EPOCH > 0 and (i + 1) % VALIDATION_EPOCH == 0):
            # Initialize iterator with the validation dataset
            sess.run(validation_init_op)

            val_acc = 0.
            val_loss = 0.

            for step in range(val_batches_per_epoch):

                # Get next batch of data
                val_img_batch, val_label_batch = sess.run(val_next_batch)

                # And run the training op
                i_global, _, summary, batch_acc, batch_loss = sess.run(
                    [
                        global_step, train_step, merged_summary_op, accuracy,
                        cost
                    ],
                    feed_dict={
                        x: val_img_batch,
                        y_true: val_label_batch
                    })

                # Write Tensorboard logs at every iteration
                val_summary_writer.add_summary(summary, i_global)

                val_acc += batch_acc
                val_loss += batch_loss

            val_acc /= val_batches_per_epoch
            val_loss /= val_batches_per_epoch

        # Print status to screen and tensorboard every PRINT_EPOCH epochs (and the last).
        if ((i + 1) % PRINT_EPOCH == 0) or ((i + 1) == EPOCH):
            # Print status.
            print("{0} Global Step: {1}, Training Accuracy: {2:>6.1%}".format(
                datetime.now().strftime('%d-%m-%Y %H:%M:%S'), i_global,
                train_acc))
            visualization.make_summary('accuracy/average', train_acc,
                                       train_summary_writer, i_global)
            visualization.make_summary('loss/average', train_loss,
                                       train_summary_writer, i_global)

            if (VALIDATION_EPOCH > 0):
                # Print status.
                print("{0} Global Step: {1}, Validation Accuracy: {2:>6.1%}".
                      format(datetime.now().strftime('%d-%m-%Y %H:%M:%S'),
                             i_global, val_acc))
                visualization.make_summary('accuracy/average', val_acc,
                                           val_summary_writer, i_global)
                visualization.make_summary('loss/average', val_loss,
                                           val_summary_writer, i_global)

        # Save a checkpoint to disk every SAVE_EPOCH epochs (and the last).
        if ((i + 1) % SAVE_EPOCH == 0) or ((i + 1) == EPOCH):
            # Save all variables of the TensorFlow graph to a checkpoint.
            saver.save(sess,
                       SAVE_PATH + "/" + MODEL_NAME + ".ckpt",
                       global_step=global_step)
            print("{0} Checkpoint saved.".format(
                datetime.now().strftime('%d-%m-%Y %H:%M:%S')))

Beispiel #10

0

Datei anzeigen

Datei: finetune.py Projekt: plutojia/VGG19-finetune

# Place data loading and preprocessing on the cpu
with tf.device('/cpu:0'):
    tr_data = ImageDataGenerator(train_file,
                                 mode='training',
                                 batch_size=batch_size,
                                 num_classes=num_classes,
                                 shuffle=True)
    val_data = ImageDataGenerator(val_file,
                                  mode='inference',
                                  batch_size=batch_size,
                                  num_classes=num_classes,
                                  shuffle=False)

    # create an reinitializable iterator given the dataset structure
    iterator = Iterator.from_structure(tr_data.data.output_types,
                                       tr_data.data.output_shapes)
    next_batch = iterator.get_next()

# Ops for initializing the two different iterators
training_init_op = iterator.make_initializer(tr_data.data)
validation_init_op = iterator.make_initializer(val_data.data)

# TF placeholder for graph input and output
x = tf.placeholder(tf.float32, [batch_size, 224, 224, 3])
y = tf.placeholder(tf.float32, [batch_size, num_classes])
keep_prob = tf.placeholder(tf.float32)

# Initialize model
model = VGG19Net(x, keep_prob, num_classes, train_layers)

# Link variable to model output

Beispiel #11

0

Datei anzeigen

def parse_input_fn_result(train_with_eval,
                          result,
                          handler=None,
                          only_iterator=False):
    """Gets features, labels, and hooks from the result of an Estimator input_fn.

    Parameters
    ----------
    train_with_eval: bool
        train with evaluation or not
    result: output of an input_fn to an estimator, which should be one of:

        For train without eval:
            * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
            tuple (features, labels) with same constraints as below.
            * A tuple (features, labels): Where `features` is a `Tensor` or a
            dictionary of string feature name to `Tensor` and `labels` is a
            `Tensor` or a dictionary of string label name to `Tensor`. Both
            `features` and `labels` are consumed by `model_fn`. They should
            satisfy the expectation of `model_fn` from inputs.
        For train with eval:
            * A list of 2 `tf.data.Dataset` object: Train Dataset & Eval Dataset
    handler: placeholder
    only_iterator: bool
        Return iterator or features. Set true for distribution strategy

    Returns
    -------
    Tuple of features, labels, and input_hooks, where features are as described
    above, labels are as described above or None, and input_hooks are a list
    of SessionRunHooks to be included when running.

    Raises:
        ValueError: if the result is a list or tuple of length != 2.
    """
    input_hooks = []
    with ops.name_scope("DataGenerator"):
        if not train_with_eval:
            try:
                # We can't just check whether this is a tf.data.Dataset instance here,
                # as this is plausibly a PerDeviceDataset. Try treating as a dataset first.
                iterator = result.make_initializable_iterator()
            except AttributeError:
                # Not a dataset or dataset-like-object. Move along.
                pass
            else:
                input_hooks.append(
                    estimator_util._DatasetInitializerHook(iterator))
                if only_iterator:
                    return iterator, input_hooks

                result = iterator.get_next()
                return estimator_util.parse_iterator_result(result) + (
                    input_hooks, )
        else:
            err_str = "`result` must be a list of Dataset instance if set train_with_eval as True"
            if not isinstance(result, (list, tuple)):
                raise TypeError(err_str)
            if len(result) != 2:
                raise ValueError(
                    "`result` should contains 2 Dataset instances, but got {}".
                    format(len(result)))
            ds_prop = _check_dataset_structure(result[0], result[1])

            train_iterator = result[0].make_initializable_iterator()
            eval_iterator = result[1].make_initializable_iterator()
            input_hooks.extend([
                estimator_util._DatasetInitializerHook(train_iterator),
                estimator_util._DatasetInitializerHook(eval_iterator)
            ])

            iterator = Iterator.from_string_handle(handler, ds_prop.types,
                                                   ds_prop.shapes,
                                                   ds_prop.classes)
            if only_iterator:
                return iterator, train_iterator, eval_iterator, input_hooks

            result = iterator.get_next()
            return estimator_util.parse_iterator_result(result) + (
                train_iterator, eval_iterator, input_hooks)