spark = SparkSession.builder.appName('binary_classification').getOrCreate() model = InceptionV3(weights='imagenet') model.save('model-full.h5') IMAGES_PATH = 'datasets/image_classifier/test/' def preprocess_keras_inceptionV3(uri): image = img_to_array(load_img(uri, target_size=(299, 299))) image = np.expand_dims(image, axis=0) return preprocess_input(image) transformer = KerasImageFileTransformer(inputCol='uri', outputCol='predictions', modelFile='model-full-tmp.h5', imageLoader=preprocess_keras_inceptionV3) files = [os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir(IMAGES_PATH) if f.endswith('.jpg')] uri_df = spark.createDataFrame(files, StringType()).toDF('uri') predictions = transformer.transform(uri_df) predictions.select('uri', 'predictions').show()
from keras.applications.inception_v3 import preprocess_input from keras.preprocessing.image import img_to_array, load_img import numpy as np import os from pyspark import SparkContext, SQLContext from pyspark.sql.types import StringType from sparkdl import KerasImageFileTransformer #Load context sc = SparkContext() sqlContext = SQLContext(sc) def loadAndPreprocessKerasInceptionV3(uri): # this is a typical way to load and prep images in keras image = img_to_array(load_img(uri, target_size=(299, 299))) # image dimensions for InceptionV3 image = np.expand_dims(image, axis=0) return preprocess_input(image) transformer = KerasImageFileTransformer(inputCol="uri", outputCol="predictions", modelFile='model-full.h5', # local file path for model imageLoader=loadAndPreprocessKerasInceptionV3, outputMode="vector") files = [os.path.abspath(os.path.join("myimages/", f)) for f in os.listdir("myimages/") if f.endswith('.jpg')] uri_df = sqlContext.createDataFrame(files, StringType()).toDF("uri") keras_pred_df = transformer.transform(uri_df)
def fit_custom_model_distributed(): transform = KerasImageFileTransformer(inputCol="uri", outputCol="predictions", modelFile='path_to_pretrained_model', imageLoader=load_images, outputMode="vector")
# Parameters SIZE = (299, 299) # Size accepted by Inception model IMAGES_PATH = 'datasets/image_classifier/test/' # Images Path MODEL = '/tmp/model-full-tmp.h5' # Model Path # Image Preprocessing def preprocess_keras_inceptionV3(uri): image = img_to_array(load_img(uri, target_size=SIZE)) image = np.expand_dims(image, axis=0) return preprocess_input(image) # Define Spark Transformer transformer = KerasImageFileTransformer( inputCol="uri", outputCol="predictions", modelFile=MODEL, imageLoader=preprocess_keras_inceptionV3, outputMode="vector") uri_df = sqlContext.createDataFrame(url_list, StringType()).toDF("uri") # Get Output labels_df = transformer.transform(uri_df) # Show Output labels_df.show() # # train_df = train_df.filter(train_df.isDefault == 0) # test_df = test_df.filter(test_df.isDefault == 0) #
model = InceptionV3(weights="imagenet") model.save('model-full.h5') from keras.applications.inception_v3 import preprocess_input from keras.preprocessing.image import img_to_array, load_img import numpy as np from pyspark.sql.types import StringType from sparkdl import KerasImageFileTransformer def loadAndPreprocessKerasInceptionV3(data): image = img_to_array(load_img(data, target_size=(299, 299))) image = np.expand_dims(image, axis=0) return preprocess_input(image) transformer = KerasImageFileTransformer(inputCol="filename", outputCol="category", modelFile='model-full.h5', imageLoader=loadAndPreprocessKerasInceptionV3, outputMode="vector") transformer fs = !ls content/train/*.jpg uri_df = spark.createDataFrame(fs, StringType()).toDF("filename") keras_pred_df = transformer.transform(uri_df) from keras.models import Sequential from keras.layers import Dense import numpy as np from pyspark.sql.types import StructType, StructField, ArrayType, FloatType num_features = 10 num_examples = 100 input_data = [{"features" : np.random.randn(num_features).astype(float).tolist()} for i in range(num_examples)]
from pyspark.sql.types import StringType from sparkdl import KerasImageFileTransformer def loadAndPreprocessKerasInceptionV3(uri): # this is a typical way to load and prep images in keras image = img_to_array(load_img( uri, target_size=(299, 299))) # image dimensions for InceptionV3 image = np.expand_dims(image, axis=0) return preprocess_input(image) dbutils.fs.cp(dbfs_model_path, 'file:/tmp/model-full-tmp.h5') transformer = KerasImageFileTransformer( inputCol="uri", outputCol="predictions", modelFile='/tmp/model-full-tmp.h5', # local file path for model imageLoader=loadAndPreprocessKerasInceptionV3, outputMode="vector") files = ["/dbfs" + str(f.path)[5:] for f in dbutils.fs.ls(sample_img_dir) ] # make "local" file paths for images uri_df = sqlContext.createDataFrame(files, StringType()).toDF("uri") keras_pred_df = transformer.transform(uri_df) # COMMAND ---------- display(keras_pred_df.select("uri", "predictions")) # COMMAND ----------
# f_content = f.read() #tf.gfile.FastGFile(model_hdfs_path, 'wb').write(f_content) print("-------------OK--------------") # model tester from keras.applications.inception_v3 import preprocess_input from keras.preprocessing.image import img_to_array, load_img import numpy as np import os from sparkdl import KerasImageFileTransformer def loadAndPreprocessKerasInceptionV3(uri): # this is a typical way to load and prep images in keras image = img_to_array(load_img(uri, target_size=(299, 299))) image = np.expand_dims(image, axis=0) return preprocess_input(image) transformer = KerasImageFileTransformer( inputCol="image", outputCol="predictions", modelFile=model_path, imageLoader=loadAndPreprocessKerasInceptionV3, outputMode="vector") print(type(transformer)) final_df = transformer.transform(test_df) final_df.show()
def read_image(file_path): img = image.load_img(file_path, grayscale=False, target_size=(150, 150)) return image.img_to_array(img) / 255 def loadAndPreprocessKerasInceptionV3(uri): in_data = np.ndarray((1, 150, 150, 3), dtype=np.float32) img = read_image(uri) in_data[0] = img return in_data transformer = KerasImageFileTransformer( inputCol="uri", outputCol="predictions", modelFile= "/Users/502677522/Class/DataWeekend/2017_12_02/labs/vgg16_dogcat_transfer.h5", imageLoader=loadAndPreprocessKerasInceptionV3, outputMode="vector") dirpath = "/Users/502677522/Class/DataWeekend/2017_12_02/data/test" # files = [os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir("/Users/502677522/Class/DataWeekend/2017_12_02/data/test") if f.endswith('11.jpg')] # >>> spark.createDataFrame(rdd, "a: string, b: int").collect() # [Row(a=u'Alice', b=1)] # >>> rdd = rdd.map(lambda row: row[1]) # >>> spark.createDataFrame(rdd, "int").collect() # [Row(value=1)] # >>> spark.createDataFrame(rdd, "boolean").collect() # Traceback (most recent call last): # uri_df = sqlContext.createDataFrame(files, StringType()).toDF("uri") files = [
data[i] = image if i % 2500 == 0: print('Processed {} of {}'.format(i, count)) #print image_file return data def loadAndPreprocessKerasInceptionV3(uri): # this is a typical way to load and prep images in keras image = read_image(uri) # img_to_array(load_img(uri, target_size=(299, 299))) image = np.expand_dims(image, axis=0) return preprocess_input(image) transformer = KerasImageFileTransformer( inputCol="uri", outputCol="predictions", modelFile="/tmp/model-full.h5", imageLoader=loadAndPreprocessKerasInceptionV3, outputMode="vector") sc = SparkContext(appName="PythonStreamingEventHubWordCount") sqlContext = SQLContext(sc) files = [ os.path.abspath(os.path.join(dirpath, f)) for f in os.listdir("/data/test") if f.endswith('.jpg') ] uri_df = sqlContext.createDataFrame(files, StringType()).toDF("uri") final_df = transformer.transform(uri_df)