コード例 #1
0
def featurize_transform(dataset, context):
    from microsoftml import load_image, resize_image, extract_pixels, featurize_image, rx_featurize
    from lung_cancer.connection_settings import MICROSOFTML_MODEL_NAME
    data = DataFrame(dataset)
    data = rx_featurize(
        data=data,
        overwrite=True,
        ml_transforms=[
            load_image(cols={"feature": "image"}),
            resize_image(cols="feature", width=224, height=224),
            extract_pixels(cols="feature"),
            featurize_image(cols="feature", dnn_model=MICROSOFTML_MODEL_NAME)
        ]
    )
    return data
コード例 #2
0
def featurize_transform(dataset, context):
    from microsoftml import load_image, resize_image, extract_pixels, featurize_image, rx_featurize
    from lung_cancer.connection_settings import MICROSOFTML_MODEL_NAME
    data = DataFrame(dataset)
    data = rx_featurize(data=data,
                        overwrite=True,
                        ml_transforms=[
                            load_image(cols={"feature": "image"}),
                            resize_image(cols="feature", width=224,
                                         height=224),
                            extract_pixels(cols="feature"),
                            featurize_image(cols="feature",
                                            dnn_model=MICROSOFTML_MODEL_NAME)
                        ],
                        ml_transform_vars=["image", "label"])
    data.columns = ["image", "class", "label"
                    ] + ["f" + str(i) for i in range(len(data.columns) - 3)]
    return data
コード例 #3
0
def parallel_kernel(df, data_dir=None, download_fresh=False):
    """
    Args: 
    df  a data frame with urls and file names, as created by listFilesInAzureFolder
    """

    # get the images from blob and do them locally
    if data_dir is None:
        data_dir < -file.path(os.getcwd(), 'localdata')
    if (not os.path.exists(data_dir)):
        os.makedirs(data_dir)

    # download the assigned blob files, serially to start with
    df["local_name"] = df["file_name"].apply(
        lambda f: os.path.join(data_dir, f))
    for i in range(len(df)):
        if (not os.path.exists(df.loc[i, "local_name"])) or download_fresh:
            urllib.request.urlretrieve(df.loc[i, "url"], df.loc[i,
                                                                "local_name"])

    # featurize
    image_features = rx_featurize(
        data=df,
        # declare the featurization pipeline
        ml_transforms=[
            load_image(cols=dict(
                Image="local_name")),  # will make a column named "Image"
            resize_image(
                cols=dict(Resized="Image"),  # will make "Resized" from "Image"
                width=224,
                height=224,
                resizing_option="IsoPad"),
            extract_pixels(cols=dict(Pixels="Resized")),
            featurize_image(cols=dict(Features="Pixels"), dnn_model="Resnet18")
        ],
        ml_transform_vars=["local_name"],  # transform these columns
        report_progress=1)

    image_features.url = df.url
    return (image_features)
コード例 #4
0
]:
    images.append(image_location + "\\" + file)

import pandas
image_df = pandas.DataFrame(data=dict(image=images))
print(image_df)

from microsoftml import rx_featurize, load_image, resize_image, extract_pixels, featurize_image
image_vector = rx_featurize(data=image_df,
                            ml_transforms=[
                                load_image(cols=dict(Features="image")),
                                resize_image(cols="Features",
                                             width=227,
                                             height=227),
                                extract_pixels(cols="Features"),
                                featurize_image(cols="Features",
                                                dnn_model="Alexnet")
                            ])

print(image_vector.head())

image_location_match = os.path.abspath(
    os.path.join("C:\\workshop\\ImageSimilarity\\", "kids_girls_shoes_match"))
images_match = []
for file in [
        img for img in os.listdir(image_location_match) if img.endswith(".tif")
]:
    images_match.append(image_location_match + "\\" + file)

image_match_df = pandas.DataFrame(data=dict(image=images_match))
image_match_vectors = rx_featurize(data=image_match_df,
                                   ml_transforms=[