Ejemplo n.º 1
0
def get_minst(data_type="train"):
    (images, labels) = mnist.read_data_sets("/tmp/mnist/", data_type)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels)
    # Target start from 1 in BigDL
    record = images.zip(labels).map(
        lambda (features, label): Sample.from_ndarray(features, label + 1))
    return record
Ejemplo n.º 2
0
def get_mnist(sc, mnist_path):
    # target is start from 0,
    (train_images, train_labels) = mnist.read_data_sets(mnist_path, "train")
    (test_images, test_labels) = mnist.read_data_sets(mnist_path, "test")
    training_mean = np.mean(train_images)
    training_std = np.std(train_images)
    rdd_train_images = sc.parallelize(train_images)
    rdd_train_labels = sc.parallelize(train_labels)
    rdd_test_images = sc.parallelize(test_images)
    rdd_test_labels = sc.parallelize(test_labels)

    rdd_train_sample = rdd_train_images.zip(rdd_train_labels).map(
        lambda (features, label): Sample.from_ndarray(
            (features - training_mean) / training_std, label + 1))
    rdd_test_sample = rdd_test_images.zip(rdd_test_labels).map(
        lambda (features, label): Sample.from_ndarray(
            (features - training_mean) / training_std, label + 1))
    return (rdd_train_sample, rdd_test_sample)
Ejemplo n.º 3
0
def get_minst(sc, data_type="train", location="/tmp/mnist"):
    """
    Get and normalize the mnist data. We would download it automatically
    if the data doesn't present at the specific location.
    :param sc: SparkContext
    :param data_type: training data or testing data
    :param location: Location storing the mnist
    :return: A RDD of Sample
    """
    (images, labels) = mnist.read_data_sets(location, data_type)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels)
    # Target start from 1 in BigDL
    record = images.zip(labels).map(lambda features_label: Sample.from_ndarray(
        features_label[0], features_label[1] + 1))
    return record
Ejemplo n.º 4
0
def get_minst(sc, data_type="train", location="/tmp/mnist"):
    """
    Get and normalize the mnist data. We would download it automatically
    if the data doesn't present at the specific location.
    :param sc: SparkContext
    :param data_type: training data or testing data
    :param location: Location storing the mnist
    :return: A RDD of Sample
    """
    (images, labels) = mnist.read_data_sets(location, data_type)
    images = sc.parallelize(images)
    labels = sc.parallelize(labels)
    # Target start from 1 in BigDL
    record = images.zip(labels).map(lambda (features, label):
                                    Sample.from_ndarray(features, label + 1))
    return record
Ejemplo n.º 5
0
                      criterion=MSECriterion(),
                      optim_method="Adagrad",
                      state=state,
                      end_trigger=MaxEpoch(2),
                      batch_size=batch_size)

app_name = 'autoencoder-' + dt.datetime.now().strftime("%Y%m%d-%H%M%S")
train_summary = TrainSummary(log_dir='/tmp/bigdl_summaries', app_name=app_name)
optimizer.set_train_summary(train_summary)

print "saving logs to ", app_name

# Boot training process
trained_model = optimizer.optimize()
print "Optimization Done."

loss = np.array(train_summary.read_scalar("Loss"))
lr = np.array(train_summary.read_scalar("LearningRate"))

plt.figure(figsize=(12, 12))
plt.plot(loss[:, 0], loss[:, 1], label='loss')
plt.xlim(0, loss.shape[0] + 10)
plt.grid(True)
plt.title("loss")
(images, labels) = mnist.read_data_sets(mnist_path, "test")
examples = trained_model.predict(test_data).take(10)
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(examples_to_show):
    a[0][i].imshow(np.reshape(images[i], (28, 28)))
    a[1][i].imshow(np.reshape(examples[i], (28, 28)))
Ejemplo n.º 6
0
# As always, a bit of setup

import pandas
from dataset import mnist
from util.common import *
init_engine()
mnist_path = "datasets/mnist"
(train_images, train_labels) = mnist.read_data_sets(mnist_path, "train")
(test_images, test_labels) = mnist.read_data_sets(mnist_path, "test")
print train_images.shape
print train_labels.shape
print test_images.shape
print test_labels.shape
imshow(np.column_stack(train_images[0:10].reshape(10, 28, 28)), cmap='gray')
axis('off')
print "groud true labels: "
print train_labels[0:10]
rdd_train_images = sc.parallelize(train_images)
rdd_train_labels = sc.parallelize(train_labels)
rdd_test_images = sc.parallelize(test_images)
rdd_test_labels = sc.parallelize(test_labels)
training_mean = np.mean(train_images)
training_std = np.std(train_images)
rdd_train_sample = rdd_train_images.zip(rdd_train_labels).map(
    lambda (features, label): Sample.from_ndarray(
        (features - training_mean) / training_std, label + 1))
rdd_test_sample = rdd_test_images.zip(rdd_test_labels).map(
    lambda (features, label): Sample.from_ndarray(
        (features - training_mean) / training_std, label + 1))
print rdd_train_sample.count()
print rdd_test_sample.count()