Esempio n. 1
0
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.image import img_to_array, load_img
import numpy as np
import os
from pyspark import SparkContext, SQLContext
from pyspark.sql.types import StringType
from sparkdl import KerasImageFileTransformer

#Load context
sc = SparkContext()
sqlContext = SQLContext(sc)

def loadAndPreprocessKerasInceptionV3(uri):
  # this is a typical way to load and prep images in keras
  image = img_to_array(load_img(uri, target_size=(299, 299)))  # image dimensions for InceptionV3
  image = np.expand_dims(image, axis=0)
  return preprocess_input(image)

transformer = KerasImageFileTransformer(inputCol="uri", outputCol="predictions",
                                        modelFile='model-full.h5',  # local file path for model
                                        imageLoader=loadAndPreprocessKerasInceptionV3,
                                        outputMode="vector")

files = [os.path.abspath(os.path.join("myimages/", f)) for f in os.listdir("myimages/") if f.endswith('.jpg')]
uri_df = sqlContext.createDataFrame(files, StringType()).toDF("uri")

keras_pred_df = transformer.transform(uri_df)
Esempio n. 2
0

spark = SparkSession.builder.appName('binary_classification').getOrCreate()


model = InceptionV3(weights='imagenet')
model.save('model-full.h5')

IMAGES_PATH = 'datasets/image_classifier/test/'


def preprocess_keras_inceptionV3(uri):
  image = img_to_array(load_img(uri, target_size=(299, 299)))
  image = np.expand_dims(image, axis=0)
  return preprocess_input(image)


transformer = KerasImageFileTransformer(inputCol='uri',
                                        outputCol='predictions',
                                        modelFile='model-full-tmp.h5',
                                        imageLoader=preprocess_keras_inceptionV3)


files = [os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir(IMAGES_PATH) if f.endswith('.jpg')]
uri_df = spark.createDataFrame(files, StringType()).toDF('uri')

predictions = transformer.transform(uri_df)
predictions.select('uri', 'predictions').show()


Esempio n. 3
0
    image = np.expand_dims(image, axis=0)
    return preprocess_input(image)


transformer = KerasImageFileTransformer(
    inputCol="uri",
    outputCol="predictions",
    modelFile='model-full.h5',  # local file path for model
    imageLoader=loadAndPreprocessKerasInceptionV3,
    outputMode="vector")

uri_df = spark.createDataFrame(fs, StringType()).toDF("uri")

uri_df.show()

keras_pred_df = transformer.transform(uri_df)

keras_pred_df.select("uri", "predictions").show()

from sparkdl import KerasTransformer
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Generate random input data
num_features = 10
num_examples = 100
input_data = [{
    "features": np.random.randn(num_features).astype(float).tolist()
} for i in range(num_examples)]
schema = StructType([StructField("features", ArrayType(FloatType()), True)])
Esempio n. 4
0
        image = img_to_array(load_img(uri, target_size=SIZE))
        image = np.expand_dims(image, axis=0)
        return preprocess_input(image)

    # Define Spark Transformer
    transformer = KerasImageFileTransformer(
        inputCol="uri",
        outputCol="predictions",
        modelFile=MODEL,
        imageLoader=preprocess_keras_inceptionV3,
        outputMode="vector")

    uri_df = sqlContext.createDataFrame(url_list, StringType()).toDF("uri")

    # Get Output
    labels_df = transformer.transform(uri_df)

    # Show Output
    labels_df.show()

    #
    # train_df = train_df.filter(train_df.isDefault == 0)
    # test_df = test_df.filter(test_df.isDefault == 0)
    #
    # train_df.show()
    # test_df.show()
    #
    # # Under the hood, each of the partitions is fully loaded in memory, which may be expensive.
    # # This ensure that each of the paritions has a small size.
    # train_df = train_df.repartition(100)
    # test_df = test_df.repartition(100)
Esempio n. 5
0
#	f_content = f.read()
#tf.gfile.FastGFile(model_hdfs_path, 'wb').write(f_content)
print("-------------OK--------------")

# model tester

from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.image import img_to_array, load_img
import numpy as np
import os
from sparkdl import KerasImageFileTransformer


def loadAndPreprocessKerasInceptionV3(uri):
    # this is a typical way to load and prep images in keras
    image = img_to_array(load_img(uri, target_size=(299, 299)))
    image = np.expand_dims(image, axis=0)
    return preprocess_input(image)


transformer = KerasImageFileTransformer(
    inputCol="image",
    outputCol="predictions",
    modelFile=model_path,
    imageLoader=loadAndPreprocessKerasInceptionV3,
    outputMode="vector")

print(type(transformer))
final_df = transformer.transform(test_df)
final_df.show()
Esempio n. 6
0
transformer = KerasImageFileTransformer(
    inputCol="uri",
    outputCol="predictions",
    modelFile=
    "/Users/502677522/Class/DataWeekend/2017_12_02/labs/vgg16_dogcat_transfer.h5",
    imageLoader=loadAndPreprocessKerasInceptionV3,
    outputMode="vector")
dirpath = "/Users/502677522/Class/DataWeekend/2017_12_02/data/test"

# files = [os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir("/Users/502677522/Class/DataWeekend/2017_12_02/data/test") if f.endswith('11.jpg')]
# >>> spark.createDataFrame(rdd, "a: string, b: int").collect()
# [Row(a=u'Alice', b=1)]
# >>> rdd = rdd.map(lambda row: row[1])
# >>> spark.createDataFrame(rdd, "int").collect()
# [Row(value=1)]
# >>> spark.createDataFrame(rdd, "boolean").collect()
# Traceback (most recent call last):
# uri_df = sqlContext.createDataFrame(files, StringType()).toDF("uri")
files = [
    os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir(
        "/Users/502677522/Class/DataWeekend/2017_12_02/data/test_temp")
    if f.endswith('.jpg')
]

# files = [os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir("/Users/502677522/Class/DataWeekend/2017_12_02/data/test") if f == '3.jpg']
uri_df = spark.createDataFrame(files, "string").toDF("uri")
final_df = transformer.transform(uri_df)
final_df.show()
print final_df.collect()