예제 #1
0
def get_mnist(sc, mnist_path):
    # target is start from 0,
    (train_images, train_labels) = mnist.read_data_sets(mnist_path, "train")
    (test_images, test_labels) = mnist.read_data_sets(mnist_path, "test")
    training_mean = np.mean(train_images)
    training_std = np.std(train_images)
    rdd_train_images = sc.parallelize(train_images)
    rdd_train_labels = sc.parallelize(train_labels)
    rdd_test_images = sc.parallelize(test_images)
    rdd_test_labels = sc.parallelize(test_labels)

    rdd_train_sample = rdd_train_images.zip(rdd_train_labels).map(lambda fl:
                                        common.Sample.from_ndarray(
                                        (fl[0] - training_mean) / training_std,
                                        fl[1] + 1))
    rdd_test_sample = rdd_test_images.zip(rdd_test_labels).map(lambda fl:
                                        common.Sample.from_ndarray(
                                        (fl[0] - training_mean) / training_std,
                                        fl[1] + 1))
    return (rdd_train_sample, rdd_test_sample)
예제 #2
0
파일: utils.py 프로젝트: ru003ar/BigDL
def get_mnist(sc, data_type="train", location="/tmp/mnist"):
    """
    Get mnist dataset and parallelize into RDDs.
    Data would be downloaded automatically if it doesn't present at the specific location.

    :param sc: SparkContext.
    :param data_type: "train" for training data and "test" for testing data.
    :param location: Location to store mnist dataset.
    :return: RDD of (features: ndarray, label: ndarray).
    """
    (images, labels) = mnist.read_data_sets(location, data_type)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels + 1)  # Target start from 1 in BigDL
    record = images.zip(labels)
    return record
예제 #3
0
파일: mnist_cnn.py 프로젝트: ru003ar/BigDL
def get_mnist(sc, data_type="train", location="/tmp/mnist"):
    """
    Download or load MNIST dataset to/from the specified path.
    Normalize and transform input data into an RDD of Sample
    """
    from bigdl.dataset import mnist
    from bigdl.dataset.transformer import normalizer
    (images, labels) = mnist.read_data_sets(location, data_type)
    images = images.reshape((images.shape[0], ) + input_shape)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels + 1)  # Target start from 1 in BigDL
    record = images.zip(labels).map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),
                                    rec_tuple[1])) \
                               .map(lambda t: Sample.from_ndarray(t[0], t[1]))
    return record
예제 #4
0
def get_mnist(sc, data_type="train", location="/tmp/mnist"):
    """
    Download or load MNIST dataset.
    Normalize and transform input data into an RDD of Sample
    """
    from bigdl.dataset import mnist
    from bigdl.dataset.transformer import normalizer
    (images, labels) = mnist.read_data_sets(location, data_type)
    images = images.reshape(images.shape[0], 1, 28, 28)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels + 1)  # Target start from 1 in BigDL
    record = images.zip(labels).map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),
                                    rec_tuple[1])) \
                               .map(lambda t: Sample.from_ndarray(t[0], t[1]))
    return record
예제 #5
0
def get_mnist(sc, data_type="train", location="/tmp/mnist"):
    """
    Get and normalize the mnist data. We would download it automatically
    if the data doesn't present at the specific location.

    :param sc: SparkContext
    :param data_type: training data or testing data
    :param location: Location storing the mnist
    :return: A RDD of (features: Ndarray, label: Ndarray)
    """
    (images, labels) = mnist.read_data_sets(location, data_type)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels + 1)  # Target start from 1 in BigDL
    record = images.zip(labels)
    return record
예제 #6
0
def main(data_num):

    sc = init_nncontext()

    # get data, pre-process and create TFDataset
    (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test")
    image_rdd = sc.parallelize(images_data[:data_num])
    labels_rdd = sc.parallelize(labels_data[:data_num])
    rdd = image_rdd.zip(labels_rdd) \
        .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),
                                np.array(rec_tuple[1])])

    dataset = TFDataset.from_rdd(rdd,
                                 names=["features", "labels"],
                                 shapes=[[28, 28, 1], [1]],
                                 types=[tf.float32, tf.int32],
                                 batch_per_thread=20)

    # construct the model from TFDataset
    images, labels = dataset.tensors

    labels = tf.squeeze(labels)

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=False)

    predictions = tf.to_int32(tf.argmax(logits, axis=1))
    correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)),
                             axis=1)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, "/tmp/lenet/model")

        predictor = TFPredictor(sess, [correct])

        accuracy = predictor.predict().mean()

        print("predict accuracy is %s" % accuracy)
예제 #7
0
def main():
    sc = init_nncontext()

    # get data, pre-process and create TFDataset
    (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "train")
    image_rdd = sc.parallelize(images_data)
    labels_rdd = sc.parallelize(labels_data)
    rdd = image_rdd.zip(labels_rdd) \
        .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),
                                np.array(rec_tuple[1])])

    dataset = TFDataset.from_rdd(rdd,
                                 names=["features", "labels"],
                                 shapes=[[28, 28, 1], [1]],
                                 types=[tf.float32, tf.int32],
                                 batch_size=280)

    # construct the model from TFDataset
    images, labels = dataset.tensors

    labels = tf.squeeze(labels)

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=True)

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    # create a optimizer
    optimizer = TFOptimizer(loss, Adam(1e-3))
    optimizer.set_train_summary(TrainSummary("/tmp/az_lenet", "lenet"))
    # kick off training
    for i in range(5):
        optimizer.optimize(end_trigger=MaxEpoch(i + 1))

    saver = tf.train.Saver()
    saver.save(optimizer.sess, "/tmp/lenet/")
예제 #8
0
def main(options, data_num):

    data_path = '/tmp/mnist' if not options.data_path else options.data_path
    sc = init_nncontext()

    # get data, pre-process and create TFDataset
    (images_data, labels_data) = mnist.read_data_sets(data_path, "test")
    images_data = (images_data[:data_num] - mnist.TRAIN_MEAN) / mnist.TRAIN_STD
    labels_data = labels_data[:data_num].astype(np.int32)
    dataset = TFDataset.from_ndarrays((images_data, labels_data),
                                      batch_per_thread=20)

    # construct the model from TFDataset
    images, labels = dataset.tensors

    labels = tf.squeeze(labels)

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=False)

    predictions = tf.to_int32(tf.argmax(logits, axis=1))
    correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)),
                             axis=1)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, "/tmp/lenet/model")

        predictor = TFPredictor(sess, [correct])

        accuracy = predictor.predict().mean()

        print("predict accuracy is %s" % accuracy)
예제 #9
0
def get_data(dataset):
    from bigdl.dataset import mnist
    (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset)
    images_data = (images_data - mnist.TRAIN_MEAN) / mnist.TRAIN_STD
    return (images_data, labels_data.astype(np.int32))
예제 #10
0
def get_data_rdd(dataset):
    (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset)
    image_rdd = sc.parallelize(images_data).map(lambda img: [((img / 255) - 0.5) * 2])
    return image_rdd
예제 #11
0
#Train model
trained_model = optimizer.optimize()


# 5. Loss visualization
# Let's draw the performance curve during optimization.

loss = np.array(train_summary.read_scalar("Loss"))

plt.figure(figsize = (12,12))
plt.plot(loss[:,0],loss[:,1],label='loss')
plt.xlim(0,loss.shape[0]+10)
plt.grid(True)
plt.title("loss")


# 6. Prediction on test dataset
# Then we test our autoencoder from the previous loaded dataset, compress and reconstruct the digit images 
# then compare the results with the original inputs, which are also our target outputs. We are going to use 
# only 10 examples to demonstrate our created and trained autoencoder is working.


(images, labels) = mnist.read_data_sets(mnist_path, "test")
examples_to_show = 10
examples = trained_model.predict(test_data).take(examples_to_show)
f, a = plt.subplots(2, examples_to_show, figsize=(examples_to_show, 2))
for i in range(examples_to_show):
    a[0][i].imshow(np.reshape(images[i], (28, 28)))
    a[1][i].imshow(np.reshape(examples[i], (28, 28)))