def featurize_transform(dataset, context): from microsoftml import load_image, resize_image, extract_pixels, featurize_image, rx_featurize from lung_cancer.connection_settings import MICROSOFTML_MODEL_NAME data = DataFrame(dataset) data = rx_featurize( data=data, overwrite=True, ml_transforms=[ load_image(cols={"feature": "image"}), resize_image(cols="feature", width=224, height=224), extract_pixels(cols="feature"), featurize_image(cols="feature", dnn_model=MICROSOFTML_MODEL_NAME) ] ) return data
def featurize_transform(dataset, context): from microsoftml import load_image, resize_image, extract_pixels, featurize_image, rx_featurize from lung_cancer.connection_settings import MICROSOFTML_MODEL_NAME data = DataFrame(dataset) data = rx_featurize(data=data, overwrite=True, ml_transforms=[ load_image(cols={"feature": "image"}), resize_image(cols="feature", width=224, height=224), extract_pixels(cols="feature"), featurize_image(cols="feature", dnn_model=MICROSOFTML_MODEL_NAME) ], ml_transform_vars=["image", "label"]) data.columns = ["image", "class", "label" ] + ["f" + str(i) for i in range(len(data.columns) - 3)] return data
def parallel_kernel(df, data_dir=None, download_fresh=False): """ Args: df a data frame with urls and file names, as created by listFilesInAzureFolder """ # get the images from blob and do them locally if data_dir is None: data_dir < -file.path(os.getcwd(), 'localdata') if (not os.path.exists(data_dir)): os.makedirs(data_dir) # download the assigned blob files, serially to start with df["local_name"] = df["file_name"].apply( lambda f: os.path.join(data_dir, f)) for i in range(len(df)): if (not os.path.exists(df.loc[i, "local_name"])) or download_fresh: urllib.request.urlretrieve(df.loc[i, "url"], df.loc[i, "local_name"]) # featurize image_features = rx_featurize( data=df, # declare the featurization pipeline ml_transforms=[ load_image(cols=dict( Image="local_name")), # will make a column named "Image" resize_image( cols=dict(Resized="Image"), # will make "Resized" from "Image" width=224, height=224, resizing_option="IsoPad"), extract_pixels(cols=dict(Pixels="Resized")), featurize_image(cols=dict(Features="Pixels"), dnn_model="Resnet18") ], ml_transform_vars=["local_name"], # transform these columns report_progress=1) image_features.url = df.url return (image_features)
img for img in os.listdir(image_location) if img.endswith(".tif") ]: images.append(image_location + "\\" + file) import pandas image_df = pandas.DataFrame(data=dict(image=images)) print(image_df) from microsoftml import rx_featurize, load_image, resize_image, extract_pixels, featurize_image image_vector = rx_featurize(data=image_df, ml_transforms=[ load_image(cols=dict(Features="image")), resize_image(cols="Features", width=227, height=227), extract_pixels(cols="Features"), featurize_image(cols="Features", dnn_model="Alexnet") ]) print(image_vector.head()) image_location_match = os.path.abspath( os.path.join("C:\\workshop\\ImageSimilarity\\", "kids_girls_shoes_match")) images_match = [] for file in [ img for img in os.listdir(image_location_match) if img.endswith(".tif") ]: images_match.append(image_location_match + "\\" + file) image_match_df = pandas.DataFrame(data=dict(image=images_match))