Ejemplo n.º 1
0
    def validate(self, sess):
        if self._acc_ops is None:
            self._acc_ops =  self._init_acc_ops()
        if self._acc_orient_ops is None:
            self._acc_orient_ops =  self._init_acc_orient_ops()
        sess.run(self._acc_ops.reset)
        sess.run(self._acc_orient_ops.reset)
        num_batches_val = int(self.data.validation.num_examples/self.batch_size_val)
        if self.tf_record_prefix is not None:
#            tmp = sum(1 for _ in tf.python_io.tf_record_iterator(self.data.validation.path))
#            assert(num_batches_val == tmp)
            img, label, label_orient = MNIST.read_and_decode_ops(\
                                self.data.validation.path,
                                one_hot=self.data.validation.one_hot,
                                num_orientations=len(self.data.validation.orientations))
            batch_xs_op, batch_ys_op, batch_os_op = tf.train.batch([img, label, label_orient],
                                                    batch_size=self.batch_size_val,
                                                    capacity=2000,
                                                    num_threads=8
                                                    )
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            
        for _ in xrange(num_batches_val):
            if self.tf_record_prefix is None:
                batch_xs, batch_ys = self.data.validation.next_batch(self.batch_size_val,
                                                                     shuffle=False)
            else:
                batch_xs, batch_ys, batch_os = sess.run([batch_xs_op, batch_ys_op, batch_os_op])
            batch_xs_in = batch_xs
            if self.do_augment_rot:
                augment_op, batch_os2 = self.rotation_ops_multiset_val(3)
                #TODO remove hardcoded init
                rots = rotation_rad(-60,60,15)
                num_orients = len(rots)
                orients_dense = np.array([rots.index(o) for o in batch_os2])
                batch_os_one_hot = dense_to_one_hot(orients_dense, num_orients)
                batch_xs_in = sess.run(augment_op, feed_dict={self.x : batch_xs})
            else:
                #TODO remove hardcoded init
                rots = rotation_rad(-60, 60, 15)
                batch_os_one_hot = dense_to_one_hot(np.zeros((self.batch_size_val,), dtype=int)+(len(rots)/2), len(rots))
            _, _, _, _ = sess.run(\
                            [self._acc_ops.metric, self._acc_ops.update,
                             self._acc_orient_ops.metric, self._acc_orient_ops.update,
#                             tf.argmax(self.model.p,1), tf.argmax(self.y_,1),
                             ],
                            feed_dict={self.x: batch_xs_in,
                                       self.y_: batch_ys,
                                       self.orient_ : batch_os_one_hot,
                                       }
                            )
        if self.tf_record_prefix is not None:
            coord.request_stop()
            coord.join(threads)
Ejemplo n.º 2
0
def read_data_sets(validation_size=5000, one_hot=True):
    cifar_filename = "datasets/" + "cifar-10-python.tar.gz"

    try:
        os.makedirs("datasets")
    except OSError:
        pass

    if not os.path.isfile(cifar_dir + batches[0]):
        # Download data
        print("Downloading ckplus dataset")
        urllib.urlretrieve(
            "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz",
            cifar_filename)
        tar = tarfile.open(cifar_filename)
        tar.extractall(path="datasets")
        tar.close()
        os.remove(cifar_filename)

    # Process batches
    all_batch_images = []
    all_batch_labels = []
    for batch_name in batches:
        batch = np.load(cifar_dir + batch_name)
        batch_images = batch['data']
        all_batch_images.append(batch_images)
        batch_labels = batch['labels']
        all_batch_labels.extend(batch_labels)

    all_batch_images = np.vstack(all_batch_images).reshape(-1, 3, 32, 32)
    all_batch_images = all_batch_images.transpose([0, 2, 3, 1])
    all_batch_labels = np.array(all_batch_labels)

    train_images, validation_images, train_labels, validation_labels = train_test_split(
        all_batch_images,
        all_batch_labels,
        test_size=validation_size,
        random_state=0)

    test_batch = np.load(cifar_dir + "test_batch")
    test_images = test_batch['data'].reshape(-1, 3, 32, 32)
    test_images = test_images.transpose([0, 2, 3, 1])

    test_labels = np.array(test_batch['labels'])

    if one_hot:
        train_labels = dense_to_one_hot(train_labels, NUM_CLASSES)
        validation_labels = dense_to_one_hot(validation_labels, NUM_CLASSES)
        test_labels = dense_to_one_hot(test_labels, NUM_CLASSES)

    train = DataSet(train_images, train_labels, reshape=False)
    validation = DataSet(validation_images, validation_labels, reshape=False)
    test = DataSet(test_images, test_labels, reshape=False)

    return Datasets(train=train, validation=validation, test=test)
Ejemplo n.º 3
0
    def __init__(self, dirname, one_hot=True):
        self._epochs_completed = 0
        self._index_in_epoch = 0
        self._one_hot = one_hot
        self._num_classes = 10

        self.maybe_download_and_extract(dirname)
        dirname = os.path.join(dirname, 'cifar-10-batches-py/')
        images = []
        labels = []
        for i in range(1, 6):
            fpath = os.path.join(dirname, 'data_batch_' + str(i))
            image, label = self.load_batch(fpath)
            if i == 1:
                images = np.array(image)
                labels = np.array(label)
            else:
                images = np.concatenate([images, image], axis=0)
                labels = np.concatenate([labels, label], axis=0)
        images = np.dstack(
            (images[:, :1024], images[:, 1024:2048], images[:, 2048:]))
        images = np.reshape(images, [-1, 32, 32, 3])
        if self._one_hot:
            labels = dense_to_one_hot(labels, self._num_classes)

        print 'Cifar images size:', images.shape
        print 'Cifar labels size:', labels.shape
        self._images = images / 255.0 - 0.5
        self._labels = labels
        self._num_examples = images.shape[0]
Ejemplo n.º 4
0
 def validate(self, sess):
     if self._acc_ops is None:
         self._acc_ops = self._init_acc_ops()
     if self._acc_orient_ops is None:
         self._acc_orient_ops = self._init_acc_orient_ops()
     sess.run(self._acc_ops.reset)
     sess.run(self._acc_orient_ops.reset)
     num_batches_val = int(self.data.validation.num_examples /
                           self.batch_size_val)
     for _ in xrange(num_batches_val):
         if self.tf_record_prefix is None:
             batch_xs, batch_ys = self.data.validation.next_batch(
                 self.batch_size_val, shuffle=False)
         else:
             batch_xs, batch_ys, batch_os = sess.run(
                 [batch_xs_op, batch_ys_op, batch_os_op])
         batch_xs_in = batch_xs
         if self.do_augment_rot:
             augment_op, batch_os2 = self.rotation_ops_multiset_val(3)
             rots = rotation_rad(-60, 60, 15)
             num_orients = len(rots)
             orients_dense = np.array([rots.index(o) for o in batch_os2])
             batch_os_one_hot = dense_to_one_hot(orients_dense, num_orients)
             batch_xs_in = sess.run(augment_op,
                                    feed_dict={self.x: batch_xs})
         _, _, _, _ = sess.run(\
                         [self._acc_ops.metric, self._acc_ops.update,
                          self._acc_orient_ops.metric, self._acc_orient_ops.update,
                           #                             tf.argmax(self.model.p,1), tf.argmax(self.y_,1),
                          ],
                         feed_dict={self.x: batch_xs_in,
                                    self.y_: batch_ys,
                                    self.orient_ : batch_os_one_hot,
                                    }
                         )
Ejemplo n.º 5
0
def cifar_datasets(dirname, one_hot=True,
                   dtype=dtypes.float32,
                   reshape=False,
                   seed=None):
    maybe_download_and_extract(dirname)
    dirname = os.path.join(dirname, 'cifar-10-batches-py/')
    train_images = []
    train_labels = []
    for i in range(1, 6):
        fpath = os.path.join(dirname, 'data_batch_' + str(i))
        image, label = load_batch(fpath)
        if i == 1:
            train_images = np.array(image)
            train_labels = np.array(label)
        else:
            train_images = np.concatenate([train_images, image], axis=0)
            train_labels = np.concatenate([train_labels, label], axis=0)
    train_images = np.dstack((train_images[:, :1024], train_images[:, 1024:2048], train_images[:, 2048:]))
    train_images = np.reshape(train_images, [-1, 32, 32, 3])
    if one_hot:
        train_labels = dense_to_one_hot(train_labels, 10)
    print 'Cifar train_images size:', train_images.shape
    print 'Cifar train_labels size:', train_labels.shape
    train_images = train_images / 255.0 - 0.5

    fpath = os.path.join(dirname, "test_batch")
    image, label = load_batch(fpath)
    test_images = np.array(image)
    test_labels = np.array(label)
    test_images = np.dstack((test_images[:, :1024], test_images[:, 1024:2048], test_images[:, 2048:]))
    test_images = np.reshape(test_images, [-1, 32, 32, 3])
    if one_hot:
        test_labels = dense_to_one_hot(test_labels, 10)
    print "Cifar test_images size:", test_images.shape
    print "Cifar test_lables size:", test_labels.shape
    test_images = test_images / 255.0 - 0.5

    options = dict(dtype=dtype, reshape=reshape, seed=seed)
    train = DataSet(train_images, train_labels, options)
    test = DataSet(test_images, test_labels, options)
    return Datasets(train=train, test=test)
Ejemplo n.º 6
0
def read_datafile(data_file, one_hot=False, num_classes=10):
    mat = np.loadtxt(data_file, np.float32)
    images = mat[:, :-1]
    labels = mat[:, -1]
    labels = labels.astype(np.uint8)

    num_images = images.shape[0]
    rows = 28
    cols = 28
    assert rows * cols == images.shape[1],\
        "loaded images are not 28*28"
    images = images.reshape(num_images, rows, cols, 1)

    if one_hot:
        labels = mnist.dense_to_one_hot(labels, num_classes)
    return images, labels
Ejemplo n.º 7
0
def loadLabelSet(filename):
	print("load label set", filename)
	binfile = open(filename, 'rb')
	buffers = binfile.read()

	head = struct.unpack_from('>II', buffers, 0)
	print("head,", head)

	offset = struct.calcsize('>II')
	print("offset,", offset)
	labels = np.frombuffer(buffers, dtype=np.uint8, offset=offset)
	binfile.close()

	labels = dense_to_one_hot(labels, 10)
	print("load label finished")
	return labels
def read_user_data(path, ratio):
    """

    :param path: 数据所在路径
    :param ratio: 训练集和测试集比例
    :return:
    """
    images = []
    labels = []
    lablejson = {}
    dtype = dtypes.float32
    reshape = True
    seed = None

    with open(os.path.join(path, 'tag.json')) as file:
        lablejson = json.load(file)

    path = 'data'
    for (root, dirs, files) in os.walk(path):
        for filename in files:
            if filename.endswith('.jpg'):
                name = os.path.join(root, filename)
                img = Image.open(name)
                img = numpy.array(img)
                img = img.reshape([img.shape[0], img.shape[1], 1])
                labels.append(int(lablejson[filename]))
                images.append(img)
    images = numpy.array(images)
    labels = dense_to_one_hot(numpy.array(labels), 10)

    number = labels.shape[0]
    middle = int(number * ratio)
    train_image = images[:middle]
    train_label = labels[:middle]
    test_image = images[middle:]
    test_label = labels[middle:]
    options = dict(dtype=dtype, reshape=reshape, seed=seed)
    train = DataSet(train_image, train_label, **options)
    test = DataSet(test_image, test_label, **options)
    return base.Datasets(train=train, validation=None, test=test)
def initDataSetsClasses():
    global dataSetTrain
    global dataSetTest

    print(FLAGS.train_classes, FLAGS.test_classes)
    # Variable to read out the labels & data of the DataSet Object.
    mnistData = read_data_sets('./', one_hot=True)
    # MNIST labels & data for training.
    mnistLabelsTrain = mnistData.train.labels
    mnistDataTrain = mnistData.train.images

    # MNIST labels & data for testing.
    mnistLabelsTest = mnistData.test.labels
    mnistDataTest = mnistData.test.images
    print("LABELS", mnistLabelsTest.shape, mnistLabelsTrain.shape)

    if FLAGS.permuteTrain != -1:
        # training dataset
        np.random.seed(FLAGS.permuteTrain)
        permTr = np.random.permutation(mnistDataTrain.shape[1])
        mnistDataTrainPerm = mnistDataTrain[:, permTr]
        mnistDataTrain = mnistDataTrainPerm
        # dataSetTrain = DataSet(255. * dataSetTrainPerm,
        #                       mnistLabelsTrain, reshape=False)
    if FLAGS.permuteTest != -1:
        # testing dataset
        np.random.seed(FLAGS.permuteTest)
        permTs = np.random.permutation(mnistDataTest.shape[1])
        mnistDataTestPerm = mnistDataTest[:, permTs]
        # dataSetTest = DataSet(255. * dataSetTestPerm,
        #                      mnistLabelsTest, reshape=False)
        mnistDataTest = mnistDataTestPerm

    if True:
        # args = parser.parse_args()
        print(FLAGS.train_classes, FLAGS.test_classes)
        if FLAGS.train_classes[0:]:
            labels_to_train = [int(i) for i in FLAGS.train_classes[0:]]

        if FLAGS.test_classes[0:]:
            labels_to_test = [int(i) for i in FLAGS.test_classes[0:]]

        # Filtered labels & data for training and testing.
        labels_train_classes = np.array([
            mnistLabelsTrain[i].argmax()
            for i in xrange(0, mnistLabelsTrain.shape[0])
            if mnistLabelsTrain[i].argmax() in labels_to_train
        ],
                                        dtype=np.uint8)
        data_train_classes = np.array([
            mnistDataTrain[i, :] for i in xrange(0, mnistLabelsTrain.shape[0])
            if mnistLabelsTrain[i].argmax() in labels_to_train
        ],
                                      dtype=np.float32)

        labels_test_classes = np.array([
            mnistLabelsTest[i].argmax()
            for i in xrange(0, mnistLabelsTest.shape[0])
            if mnistLabelsTest[i].argmax() in labels_to_test
        ],
                                       dtype=np.uint8)
        data_test_classes = np.array([
            mnistDataTest[i, :] for i in xrange(0, mnistDataTest.shape[0])
            if mnistLabelsTest[i].argmax() in labels_to_test
        ],
                                     dtype=np.float32)

        labelsTrainOnehot = dense_to_one_hot(labels_train_classes, 10)
        labelsTestOnehot = dense_to_one_hot(labels_test_classes, 10)

        dataSetTrain = DataSet(255. * data_train_classes,
                               labelsTrainOnehot,
                               reshape=False)
        dataSetTest = DataSet(255. * data_test_classes,
                              labelsTestOnehot,
                              reshape=False)
Ejemplo n.º 10
0
    def learn(self, sess):
        if self.y_ is None:
            self.logger.info("Define placeholder for ground truth. Dims: %d" % self.model.n_nodes[-1])
            self.y_ = tf.placeholder("float", [None, self.model.n_nodes[-1]])
        suffix = ''
        if self.do_finetune:
            self.logger.info("Finetuning!")
            suffix += 'finetune'
        dir_train = self.dirpath('train', suffix=suffix)
        dir_val = self.dirpath('validation', suffix=suffix)
            
        summary_writer_train = tf.summary.FileWriter(dir_train,
                                                     sess.graph)
        summary_writer_val = tf.summary.FileWriter(dir_val)
        cost, loss = self._cost_loss(self.dirname('train', suffix=suffix))
        vars_new = None
        if not self.do_finetune:
            vars_new = self.model.vars_new() # limit optimizaer vars if not finetuning
        optimizer = setup_optimizer(cost, self.learning_rate, var_list=vars_new)
        if not self.do_finetune:
            vars_new = self.model.vars_new()
            self.init_vars(sess, vars_new)
        summaries_merged_train = self._merge_summaries_scalars([cost, loss])
        
        if self._acc_ops is None:
            self._acc_ops =  self._init_acc_ops()
        sess.run(self._acc_ops.reset)
        if self._acc_orient_ops is None:
            self._acc_orient_ops =  self._init_acc_orient_ops()
        sess.run(self._acc_orient_ops.reset)
        summaries_merged_val = self._merge_summaries_scalars([self._acc_ops.metric,
                                                              self._acc_orient_ops.metric])
#        
#        in_ = self.model.x
#        xx = tf.placeholder("float", [None, 784])
#        augment_op = augment_rotation(xx,
#                                      -90, 90, 15,
#                                      self.batch_size_train)
#        self.model.x = augment_op
        if self.tf_record_prefix is not None:
            img, label, label_orient = MNIST.read_and_decode_ops(\
                                self.data.train.path,
                                one_hot=self.data.train.one_hot,
                                num_orientations=len(self.data.train.orientations))
            batch_xs_op, batch_ys_op, batch_os_op = tf.train.shuffle_batch([img, label, label_orient],
                                                    batch_size=self.batch_size_train,
                                                    capacity=2000,
                                                    min_after_dequeue=1000,
                                                    num_threads=8
                                                    )
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        if self.do_augment_rot:
            #TODO remove hardcoded init
            rots = rotation_rad(-60, 60, 15)
        self._init_saver()
        itr_exp = 0
        result = collections.namedtuple('Result', ['max', 'last', 'name', 'history', 'epoch_last'])
        result_orient = collections.namedtuple('Result', ['max', 'last', 'name', 'history', 'epoch_last'])
        result.name = self._acc_ops.metric.name
        result.max = 0
        result.history = collections.deque(maxlen=3)
        result_orient.name = self._acc_orient_ops.metric.name
        result_orient.max = 0
        result_orient.history = collections.deque(maxlen=3)
        for epoch in xrange(self.training_epochs):
            self.logger.info("Start %s epoch %d, step %d" % (suffix, epoch, itr_exp))
            # Loop over all batches
            for itr_epoch in xrange(self.num_batches_train):
                if self.tf_record_prefix is None:
                    batch_xs, batch_ys = self.data.train.next_batch(self.batch_size_train)
                else:
                    batch_xs, batch_ys, batch_os = sess.run([batch_xs_op, batch_ys_op, batch_os_op])
#                f = sess.run([augment_op], feed_dict={xx:batch_xs})
                batch_xs_in = batch_xs
                if self.do_augment_rot:
                    augment_op, batch_os2 = self.rotation_ops_multiset_train(3)
                    batch_xs_in = sess.run(augment_op, feed_dict={self.x : batch_xs})
                    orients_dense = np.array([rots.index(o) for o in batch_os2])
                    batch_os_one_hot = dense_to_one_hot(orients_dense, len(rots))
                else:
                    #TODO remove hardcoded init
                    rots = rotation_rad(-60, 60, 15)
                    batch_os_one_hot = dense_to_one_hot(np.zeros((self.batch_size_train,), dtype=int)+(len(rots)/2), len(rots))
                _, _, sess_summary = sess.run([optimizer,
                                               cost,
                                               summaries_merged_train],
                                               feed_dict={self.x : batch_xs_in,
                                                          self.y_: batch_ys,
                                                          self.orient_ : batch_os_one_hot}
                                              )
                if self.is_time_to_track_train(itr_exp):
                    summary_writer_train.add_summary(sess_summary, itr_exp)
#                self.logger.debug("training batch loss after step %d: %f" % (itr_exp, loss_batch))
                itr_exp += 1
            self.validate(sess)
            # run metric op one more time, data in feed dict is dummy data, does not influence metric
            acc, acc_orient, sess_summary = sess.run([
                                        self._acc_ops.metric,
                                        self._acc_orient_ops.metric,
                                        summaries_merged_val],
                                        feed_dict={self.x  : batch_xs,
                                                    self.y_ : batch_ys,
                                                    self.orient_ : batch_os_one_hot}
                                         )
            if self.is_time_to_track_val(itr_exp):
                summary_writer_val.add_summary(sess_summary, itr_exp)
            self.logger.debug("validation accuracy after %s step %d: %f" % (suffix, itr_exp, acc))
            self.logger.debug("validation orientation accuracy after %s step %d: %f" % (suffix, itr_exp, acc_orient))
            fpath_save = os.path.join(dir_train, self._get_save_name())
            self.logger.debug("Save model at %s step %d to '%s'" % (suffix, itr_exp, fpath_save))
            self.saver.save(sess, fpath_save, global_step=itr_exp)
            result.last = acc
            result.epoch_last = epoch
            result.max = max(result.max, result.last)
            result.history.append(result.last)
            result_orient.last = acc_orient
            result_orient.max = max(result_orient.max, result_orient.last)
            result_orient.history.append(result_orient.last)
            result_orient.epoch_last = epoch
            if self.do_task_recognition:
                if len(result.history) == result.history.maxlen and np.absolute(np.mean(result.history)-result.last) < 1e-5:
                    self.logger.debug("Validation accuracy not changing anymore. Stop iterating.")
                    break
            elif self.do_task_orientation:
                if len(result_orient.history) == result_orient.history.maxlen and np.absolute(np.mean(result_orient.history)-result_orient.last) < 1e-5:
                    self.logger.debug("Validation orientation accuracy not changing anymore. Stop iterating.")
                    break
        if self.tf_record_prefix is not None:
            coord.request_stop()
            coord.join(threads)
        self.logger.info("Classification %s Optimization Finished!" % suffix)
        return result, result_orient
Ejemplo n.º 11
0
def initDataSetsClasses(FLAGS):
    """
    global dataSetTrain
    global dataSetTest
    global dataSetTest2
    global dataSetTest3
    """
    print("FLAGS", FLAGS.train_classes, FLAGS.test_classes)
    # Variable to read out the labels & data of the DataSet Object.
    mnistData = read_data_sets('./', one_hot=True)
    # MNIST labels & data for training.
    mnistLabelsTrain = mnistData.train.labels
    mnistDataTrain = mnistData.train.images

    # MNIST labels & data for testing.
    mnistLabelsTest = mnistData.test.labels
    mnistDataTest = mnistData.test.images
    print("LABELS", mnistLabelsTest.shape, mnistLabelsTrain.shape)

    ## starting point:
    # TRAINSET: mnistDataTrain, mnistLabelsTrain
    # TESTSET: mnistDataTest, mnistLabelsTest

    # make a copy
    mnistDataTest2 = mnistDataTest + 0.0
    mnistLabelsTest2 = mnistLabelsTest + 0.0

    # make a copy
    mnistDataTest3 = mnistDataTest + 0.0
    mnistLabelsTest3 = mnistLabelsTest + 0.0

    if FLAGS.permuteTrain != -1:
        # training dataset
        np.random.seed(FLAGS.permuteTrain)
        permTr = np.random.permutation(mnistDataTrain.shape[1])
        mnistDataTrainPerm = mnistDataTrain[:, permTr]
        if FLAGS.mergeTrainWithPermutation == True:
            mnistDataTrain = np.concatenate(
                (mnistDataTrain, mnistDataTrainPerm), axis=0)
        else:
            mnistDataTrain = mnistDataTrainPerm
        # dataSetTrain = DataSet(255. * dataSetTrainPerm,
        #                       mnistLabelsTrain, reshape=False)
    if FLAGS.permuteTest != -1:
        print("Permute")
        # testing dataset
        np.random.seed(FLAGS.permuteTest)
        permTs = np.random.permutation(mnistDataTest.shape[1])
        mnistDataTestPerm = mnistDataTest[:, permTs]
        # dataSetTest = DataSet(255. * dataSetTestPerm,
        #                      mnistLabelsTest, reshape=False)
        mnistDataTest = mnistDataTestPerm
    if FLAGS.permuteTest2 != -1:
        # testing dataset
        print("Permute2")
        np.random.seed(FLAGS.permuteTest2)
        permTs = np.random.permutation(mnistDataTest.shape[1])
        mnistDataTestPerm = mnistDataTest[:, permTs]
        mnistDataTest2 = mnistDataTestPerm
    if FLAGS.permuteTest3 != -1:
        print("Permute3")
        # testing dataset
        np.random.seed(FLAGS.permuteTest3)
        permTs = np.random.permutation(mnistDataTest.shape[1])
        mnistDataTestPerm = mnistDataTest[:, permTs]
        mnistDataTest3 = mnistDataTestPerm

    print "SHAPE", mnistDataTrain.shape
    if True:
        # args = parser.parse_args()
        if FLAGS.train_classes[0:]:
            labels_to_train = [int(i) for i in FLAGS.train_classes[0:]]

        if FLAGS.test_classes[0:]:
            labels_to_test = [int(i) for i in FLAGS.test_classes[0:]]

        if FLAGS.test2_classes != None:
            labels_to_test2 = [int(i) for i in FLAGS.test2_classes[0:]]
        else:
            labels_to_test2 = []

        if FLAGS.test3_classes != None:
            labels_to_test3 = [int(i) for i in FLAGS.test3_classes[0:]]
        else:
            labels_to_test3 = []

        # Filtered labels & data for training and testing.
        labels_train_classes = np.array([
            mnistLabelsTrain[i].argmax()
            for i in range(0, mnistLabelsTrain.shape[0])
            if mnistLabelsTrain[i].argmax() in labels_to_train
        ],
                                        dtype=np.uint8)
        data_train_classes = np.array([
            mnistDataTrain[i, :] for i in range(0, mnistLabelsTrain.shape[0])
            if mnistLabelsTrain[i].argmax() in labels_to_train
        ],
                                      dtype=np.float32)

        labels_test_classes = np.array([
            mnistLabelsTest[i].argmax()
            for i in range(0, mnistLabelsTest.shape[0])
            if mnistLabelsTest[i].argmax() in labels_to_test
        ],
                                       dtype=np.uint8)
        labels_test2_classes = np.array([
            mnistLabelsTest[i].argmax()
            for i in range(0, mnistLabelsTest.shape[0])
            if mnistLabelsTest[i].argmax() in labels_to_test2
        ],
                                        dtype=np.uint8)
        if FLAGS.mergeTest12 == False:
            labels_test3_classes = np.array([
                mnistLabelsTest[i].argmax()
                for i in range(0, mnistLabelsTest.shape[0])
                if mnistLabelsTest[i].argmax() in labels_to_test3
            ],
                                            dtype=np.uint8)
        data_test_classes = np.array([
            mnistDataTest[i, :] for i in range(0, mnistDataTest.shape[0])
            if mnistLabelsTest[i].argmax() in labels_to_test
        ],
                                     dtype=np.float32)
        data_test2_classes = np.array([
            mnistDataTest[i, :] for i in range(0, mnistDataTest.shape[0])
            if mnistLabelsTest[i].argmax() in labels_to_test2
        ],
                                      dtype=np.float32)
        if FLAGS.mergeTest12 == False:
            data_test3_classes = np.array([
                mnistDataTest[i, :] for i in range(0, mnistDataTest.shape[0])
                if mnistLabelsTest[i].argmax() in labels_to_test3
            ],
                                          dtype=np.float32)

        if FLAGS.mergeTest12 == True:
            data_test3_classes = np.concatenate(
                (data_test_classes, data_test2_classes), axis=0)
            labels_test3_classes = np.concatenate(
                (labels_test_classes, labels_test2_classes), axis=0)
            print "CONCATMERGE", data_test_classes.shape, data_test2_classes.shape, data_test3_classes.shape

        labelsTrainOnehot = dense_to_one_hot(labels_train_classes, 10)
        labelsTestOnehot = dense_to_one_hot(labels_test_classes, 10)
        labelsTest2Onehot = dense_to_one_hot(labels_test2_classes, 10)
        labelsTest3Onehot = dense_to_one_hot(labels_test3_classes, 10)

        dataSetTrain = DataSet(255. * data_train_classes,
                               labelsTrainOnehot,
                               reshape=False)
        dataSetTest = DataSet(255. * data_test_classes,
                              labelsTestOnehot,
                              reshape=False)
        dataSetTest2 = DataSet(255. * data_test2_classes,
                               labelsTest2Onehot,
                               reshape=False)
        dataSetTest3 = DataSet(255. * data_test3_classes,
                               labelsTest3Onehot,
                               reshape=False)

        #print ("EQUAL?",np.mean((data_test3_classes==data_test_classes)).astype("float32")) ;
        print(data_test3_classes.shape, data_test2_classes.shape)
        print(FLAGS.test_classes, FLAGS.test2_classes, FLAGS.test3_classes)
        print(labels_to_test3, labels_to_test2)

        return dataSetTrain, dataSetTest, dataSetTest2, dataSetTest3
Ejemplo n.º 12
0
def read_data_sets(split=True,
                   num_train_folders=90,
                   num_test_folders=24,
                   one_hot=True,
                   frames=3):

    all_folders = os.listdir(emotions_path)
    random.Random(0).shuffle(all_folders)

    if split:
        train_folders = all_folders[:num_train_folders]
        test_folders = all_folders[num_train_folders:num_train_folders +
                                   num_test_folders]
        validation_folders = all_folders[num_train_folders + num_test_folders:]

        train_df = pd.DataFrame(read_from_folders(train_folders, frames))
        validation_df = pd.DataFrame(
            read_from_folders(validation_folders, frames))
        test_df = pd.DataFrame(read_from_folders(test_folders, frames))
        print("{} CK+ TRAIN datapoints loaded".format(len(train_df)))
        print("{} CK+ VALIDATION datapoints loaded".format(len(validation_df)))
        print("{} CK+ TEST datapoints loaded".format(len(test_df)))
    else:
        train_df = pd.DataFrame(read_from_folders(all_folders, frames))
        validation_df = train_df.copy()
        test_df = train_df.copy()
        print("{} CK+ TRAIN datapoints loaded".format(len(train_df)))

    if one_hot:
        train_labels = dense_to_one_hot(train_df['emotion'].values,
                                        NUM_CLASSES)
        validation_labels = dense_to_one_hot(validation_df['emotion'].values,
                                             NUM_CLASSES)
        test_labels = dense_to_one_hot(test_df['emotion'].values, NUM_CLASSES)
    else:
        train_labels = train_df['emotion']
        validation_labels = validation_df['emotion']
        test_labels = test_df['emotion']
    del train_df['emotion']
    del validation_df['emotion']
    del test_df['emotion']

    train_idx = np.arange(len(train_labels))
    validation_idx = np.arange(len(validation_labels))
    test_idx = np.arange(len(test_labels))
    np.random.shuffle(train_idx)
    np.random.shuffle(validation_idx)
    np.random.shuffle(test_idx)

    train_images = train_df.as_matrix()
    validation_images = validation_df.as_matrix()
    test_images = test_df.as_matrix()

    train = DataSet(train_images[train_idx, :],
                    train_labels[train_idx],
                    reshape=False)
    validation = DataSet(validation_images[validation_idx, :],
                         validation_labels[validation_idx],
                         reshape=False)
    test = DataSet(test_images[test_idx, :],
                   test_labels[test_idx],
                   reshape=False)

    return Datasets(train=train, validation=validation, test=test)
with open(training_file, mode='rb') as f:
    train = pickle.load(f)
with open(testing_file, mode='rb') as f:
    test = pickle.load(f)

X_train, y_train = train['features'], train['labels']
X_test, y_test = test['features'], test['labels']

# number of training examples
n_train = len(X_train)

# number of testing examples
n_test = len(X_test)

y_train = mnist.dense_to_one_hot(y_train, nb_classes)
y_test = mnist.dense_to_one_hot(y_test, nb_classes)

print("done loading")

# TODO: Split data into training and validation sets.

# TODO: Define placeholders and resize operation.
x = tf.placeholder(tf.float32, (None, 32, 32, 3))
# TODO: Resize the images so they can be fed into AlexNet.
# HINT: Use `tf.image.resize_images` to resize the images
resized = tf.image.resize_images(x, (227, 227))

# TODO: pass placeholder as first argument to `AlexNet`.
fc7 = AlexNet(resized, feature_extract=True)
# NOTE: `tf.stop_gradient` prevents the gradient from flowing backwards
Ejemplo n.º 14
0
def split_data_set(wanted_digits, fraction_train, fraction_test):
    """
    Splits MNIST dataset into two parts: one containing only digits in wanted_digits
    and second containing all remaining digits. Labels in dataset with wanted_digits
    are mapped to range 0:num_labels, where num_labels - number of wanted digits.
    Each of two datasets in turn is split into train, validation and test with number 
    of samples specified by fraction_train and fraction_test
    
    Output: (label_map, remapped, remaining)
    where label_map - dictionary containing mapping from original labels to new ones;
    remapped - dataset of type base.Datasets with wanted digits and remapped labels;
    remaining - dataset of type base.Datasets with remaining digits and original labels.
    """
    # read data set
    mnist = input_data.read_data_sets("MNIST_data/")

    # concatenate train, validation and test data
    concatenated_images = np.concatenate((mnist.test.images, mnist.validation.images, mnist.train.images),axis=0)
    concatenated_labels = np.concatenate((mnist.test.labels, mnist.validation.labels, mnist.train.labels),axis=0)

    # estimate class frequencies
    unique, counts = np.unique(concatenated_labels, return_counts=True)
    digit_frequency = dict(zip(unique, counts))
    print(digit_frequency)
   
    # rows to extract from dataset
    rows_to_extract = np.logical_or.reduce([concatenated_labels == x for x in wanted_digits])

    # extract samples corresponding to desired digits
    extracted_labels = concatenated_labels[rows_to_extract]
    extracted_images = concatenated_images[rows_to_extract]

    # remaining samples
    remaining_labels = concatenated_labels[np.logical_not(rows_to_extract)]
    remaining_images = concatenated_images[np.logical_not(rows_to_extract)]
   
    num_train_extracted = int(extracted_labels.shape[0] * fraction_train)
    num_test_extracted = int(extracted_labels.shape[0] * fraction_test)

    num_train_remaining = int(remaining_labels.shape[0] * fraction_train)
    num_test_remaining = int(remaining_labels.shape[0] * fraction_test)

    num_train_all = int(concatenated_labels.shape[0] * fraction_train)
    num_test_all = int(concatenated_labels.shape[0] * fraction_test)


    # map desired labels to range (0:num_labels)
    extracted_remapped_labels = np.empty(shape=extracted_labels.shape, dtype=np.int32)
    remaining_remapped_labels = np.empty(shape=remaining_labels.shape, dtype=np.int32)
    
    extracted_label_map = {} # dictionary containing label map
    remaining_label_map = {} #
                          
    # create map distionary for extracted digits
    for i in range(len(wanted_digits)):
        extracted_label_map[wanted_digits[i]] = i
    
    # create map distionary for remaining digits
    counter = 0
    for i in range(num_classes):
        if i not in wanted_digits:
            remaining_label_map[i] = counter
            counter += 1
        
            

    print("extracted_label map:" ,extracted_label_map)

    for i in range(extracted_labels.shape[0]):
        extracted_remapped_labels[i] = extracted_label_map[extracted_labels[i]]

    print("extracted_remapped_labels:" ,extracted_remapped_labels)

    print("remaining_label map:" ,remaining_label_map)

    for i in range(remaining_labels.shape[0]):
        remaining_remapped_labels[i] = remaining_label_map[remaining_labels[i]]

    print("remaining_remapped_labels:" ,remaining_remapped_labels)


    num_extracted_digits = len(wanted_digits) # number of classes in dataset with wanted_digits only

    # convert labels to one hot
    extracted_labels_one_hot = dense_to_one_hot(extracted_labels, num_classes)
    extracted_remapped_labels_one_hot = dense_to_one_hot(extracted_remapped_labels, num_extracted_digits)
    remaining_labels_one_hot = dense_to_one_hot(remaining_labels, num_classes)
    remaining_remapped_labels_one_hot = dense_to_one_hot(remaining_remapped_labels, num_classes - num_extracted_digits)
    
    concatenated_labels_one_hot = dense_to_one_hot(concatenated_labels, num_classes)
    
    print("extracted labels one hot: ",extracted_labels_one_hot)
    print("remaining labels one hot: ",remaining_labels_one_hot)
    
    print("extracted remapped labels one hot: ",extracted_remapped_labels_one_hot)
    print("remaining remapped labels one hot: ",remaining_remapped_labels_one_hot)
    
    # split datasets
    extracted_train = DataSet(extracted_images[:num_train_extracted] * MAX_INTENSITY, extracted_labels_one_hot[:num_train_extracted], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)
    
    extracted_test = DataSet(extracted_images[num_train_extracted:(num_train_extracted + num_test_extracted)] * MAX_INTENSITY, 
                             extracted_labels_one_hot[num_train_extracted:(num_train_extracted + num_test_extracted)], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)

    extracted_validation = DataSet(extracted_images[(num_train_extracted + num_test_extracted):] * MAX_INTENSITY, 
                             extracted_labels_one_hot[(num_train_extracted + num_test_extracted):], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)

    
    extracted_remapped_train = DataSet(extracted_images[:num_train_extracted] * MAX_INTENSITY, extracted_remapped_labels_one_hot[:num_train_extracted], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)

    extracted_remapped_test = DataSet(extracted_images[num_train_extracted:(num_train_extracted + num_test_extracted)] * MAX_INTENSITY, 
                             extracted_remapped_labels_one_hot[num_train_extracted:(num_train_extracted + num_test_extracted)], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)

    extracted_remapped_validation = DataSet(extracted_images[(num_train_extracted + num_test_extracted):] * MAX_INTENSITY, 
                             extracted_remapped_labels_one_hot[(num_train_extracted + num_test_extracted):], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)

    remaining_train = DataSet(remaining_images[:num_train_remaining] * MAX_INTENSITY, remaining_labels_one_hot[:num_train_remaining], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)

    remaining_test = DataSet(remaining_images[num_train_remaining:(num_train_remaining + num_test_remaining)] * MAX_INTENSITY, 
                             remaining_labels_one_hot[num_train_remaining:(num_train_remaining + num_test_remaining)], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)

    remaining_validation = DataSet(remaining_images[(num_train_remaining + num_test_remaining):] * MAX_INTENSITY, 
                             remaining_labels_one_hot[(num_train_remaining + num_test_remaining):], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)
    
    remaining_remapped_train = DataSet(remaining_images[:num_train_remaining] * MAX_INTENSITY, remaining_remapped_labels_one_hot[:num_train_remaining], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)

    remaining_remapped_test = DataSet(remaining_images[num_train_remaining:(num_train_remaining + num_test_remaining)] * MAX_INTENSITY, 
                             remaining_remapped_labels_one_hot[num_train_remaining:(num_train_remaining + num_test_remaining)], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)

    remaining_remapped_validation = DataSet(remaining_images[(num_train_remaining + num_test_remaining):] * MAX_INTENSITY, 
                             remaining_remapped_labels_one_hot[(num_train_remaining + num_test_remaining):], 
                             dtype=dtypes.float32, reshape=False, one_hot=True)
    
    # data sets comprised of all digits
    all_train = DataSet(concatenated_images[:num_train_all] * MAX_INTENSITY, concatenated_labels_one_hot[:num_train_all], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)

    all_test = DataSet(concatenated_images[num_train_all:(num_train_all + num_test_all)] * MAX_INTENSITY, concatenated_labels_one_hot[num_train_all:(num_train_all + num_test_all)], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)
    
    all_validation = DataSet(concatenated_images[(num_train_all + num_test_all):] * MAX_INTENSITY, concatenated_labels_one_hot[(num_train_all + num_test_all):], 
                              dtype=dtypes.float32, reshape=False, one_hot=True)

    # combine data sets into single base.Datasets class
    extracted = base.Datasets(train=extracted_train, validation=extracted_validation, test=extracted_test)
    extracted_remapped = base.Datasets(train=extracted_remapped_train, validation=extracted_remapped_validation, test=extracted_remapped_test)
    remaining = base.Datasets(train=remaining_train, validation=remaining_validation, test=remaining_test)
    remaining_remapped = base.Datasets(train=remaining_remapped_train, validation=remaining_remapped_validation, test=remaining_remapped_test)
    
    all_digits = base.Datasets(train=all_train, validation=all_validation, test=all_test)
    
    return (extracted_label_map, remaining_label_map, extracted, extracted_remapped, \
            remaining, remaining_remapped, all_digits)