예제 #1
0
    def __init__(self,
                 frame: Frame2D,
                 model: KMeans,
                 fit_to: List[Frame2D.CHN] = None,
                 frame_1dmask: np.ndarray = None,
                 scaler=None):
        """ Creates a KMeans Object from current data

        :param model: KMeans Model
        :param fit_to: The indexes to .fit() to, must be a list of the Channel Consts. If None, use all channels
        :param scaler: The scaler to use, must be a callable(np.ndarray)
        :param frame_1dmask: The 2D mask to exclude certain points. Must be in 2 Dimensions
        :returns: KMeans2D Instance
        """
        data = frame.data_chn(
            fit_to).data_flatten_xy() if fit_to else frame.data_flatten_xy()

        if frame_1dmask is not None:
            assert frame_1dmask.shape == data.shape[:-1],\
                f"The Frame 2D Mask must match the size of first 2 Dimensions of frame exactly." \
                f"(Mask Shape: {frame_1dmask.shape}, Frame 2D Shape: {data.shape[:-1]})"
            data = data[frame_1dmask, ...]
        if scaler:
            data = scaler(data)
        fit = model.fit(data)
        self.model = fit
        self.frame = frame
        self.frame_1dmask = frame_1dmask
예제 #2
0
def kmeans_matrix(test_path: str,
                  score_path: str,
                  scale: float,
                  output_dir: str,
                  ixs_per_kmeans: int,
                  verbose=True,
                  clusters=6,
                  imgs_dir="imgs",
                  scaler=minmax_scale,
                  glcm_radius=5):
    """ Runs the KMeans Matrix generation

    :param test_path: Path to the test file
    :param score_path: Path to the score file
    :param scale: How much to scale the Frame before running the loop
    :param output_dir: The directory to output results in
    :param ixs_per_kmeans: The number of indexes to consider.
    :param verbose: Whether to output information in console
    :param clusters: The number of KMeans clusters
    :param imgs_dir: The subdir folder name of images
    :param scaler: The scaler to use to normalize data
    :param glcm_radius: Radius of GLCM
    """
    f = Frame2D.from_image(test_path, scale=scale)
    score = Frame2D.from_image(score_path, scale=scale)
    frame = f.get_all_chns(glcm=Frame2D.GLCM(verbose=verbose, radius=radius))

    try:
        os.makedirs(output_dir + "/" + imgs_dir)
    except OSError:
        pass

    with open(f"{output_dir}/results.csv", "w+") as file:

        file.write(",".join([a for a in ascii_lowercase[0:ixs_per_kmeans]]) +
                   f",Custom,Homogeneity,Completeness,V Measure\n")
        combos = list(combinations(range(22), ixs_per_kmeans))
        for ixs in tqdm.tqdm(combos):

            km = KMeans2D(frame,
                          KMeans(n_clusters=clusters, verbose=verbose),
                          fit_to=ixs,
                          scaler=scaler)

            sns.set_palette(sns.color_palette("magma"), n_colors=clusters)
            km.plot()
            plt.savefig(f"{output_dir}/{imgs_dir}/" +
                        "_".join([str(i) for i in ixs]) + ".png")
            plt.cla()

            file.write(",".join([str(i) for i in ixs]) + ",")
            file.write(",".join([
                str(s)
                for s in km.score(score, glcm_radius=glcm_radius).values()
            ]) + '\n')
            file.flush()
예제 #3
0
 def setUp(cls) -> None:
     cls.frame = Frame2D.from_image(
         f"{_RSC}/imgs/chestnut_0/frame10000ms.jpg")
     cls.frame_window = cls.frame.split_xy(100)[0][0]
     cls.window = 100
     cls._RSC = _RSC
     cls.channels = 3
예제 #4
0
 def setUp(cls) -> None:
     cls.frame = Frame2D.from_image(f"{_RSC}/imgs/chestnut_0/screenshot1.png")
     cls.frame.data = cls.frame.data[...,:3] # REMOVE THIS if not a screenshot
     cls.frame_window = cls.frame.split_xy(100)[0][0]
     cls.window = 100
     cls._RSC = _RSC
     cls.channels = 3
     cls.frame2 = cls.frame.get_chns(xy=True, hsv=True, mex_g=True, ex_gr=True, ndi=True)
예제 #5
0
def kmeans(f: Frame2D,
           clusters: int,
           verbose: bool,
           fit_indexes: list,
           scaler=minmax_scale,
           fig_name: str or None = "out.png"):
    km = KMeans2D(f,
                  model=KMeans(n_clusters=clusters, verbose=verbose),
                  fit_to=fit_indexes,
                  scaler=scaler)

    sns.set_palette(sns.color_palette("magma"), n_colors=clusters)

    if fig_name:
        plt.gcf().set_size_inches(f.width() / 96 * 2, f.height() / 96 * 2)
        plt.gcf().savefig(fig_name)
        plt.cla()

    return km
예제 #6
0
def channel_analysis(image_path: str,
                     verbose:bool = True,
                     image_scale: float = 0.5,
                     plot_scale: float = 0.2,
                     exclude_xy: bool = True):
    """ Quickly analyzes the image provided with multiple channels

    :param image_path: Path to image
    :param verbose: Whether to show output for GLCM loading
    :param image_scale: Scale of Image
    :param plot_scale: Scale of Plot
    :param exclude_xy: Whether to exclude XY trivial channel or not
    :return: A matplotlib fig
    """

    f = Frame2D.from_image(image_path, scale=image_scale)
    f = f.get_all_chns(exc_chns=[Frame2D.CHN.XY] if exclude_xy else [],
                       glcm=Frame2D.GLCM(verbose=verbose))
    fig = f.plot(f.labels).image(scale=plot_scale)

    return fig
예제 #7
0
def kmeans_score(f: Frame2D,
                 score: Frame2D or str,
                 glcm_radius: int,
                 clusters: int,
                 verbose: bool,
                 fit_indexes: list,
                 scaler=minmax_scale,
                 fig_name: str or None = "out.png"):
    km = kmeans(f, clusters, verbose, fit_indexes, scaler, fig_name)
    if isinstance(score, str):
        score = Frame2D.from_image(score)
    print(km.score(score, glcm_radius=glcm_radius))
예제 #8
0
    def test_box(self):
        C = CONSTS.CHN

        f = Frame2D.from_image(self._RSC + "/imgs/basic/box.png")
        # frame_xy = f.get_chns(xy=True, hsv=True, mex_g=True, ex_gr=True, ndi=True)
        frame_xy = f.get_chns(self_=False,
                              chns=[C.XY, C.HSV, C.MEX_G, C.EX_GR, C.NDI])

        km = KMeans2D(frame_xy,
                      KMeans(n_clusters=3, verbose=False),
                      fit_to=[C.MEX_G, C.EX_GR, C.NDI],
                      scaler=minmax_scale)
        kmf = km.as_frame()
        score = kmf.score(f)
        self.assertAlmostEqual(score['Custom'], 1)
        self.assertAlmostEqual(score['Homogeneity'], 1)
        self.assertAlmostEqual(score['Completeness'], 1)
        self.assertAlmostEqual(score['V Measure'], 1)
예제 #9
0
 def load_frame(frame: Frame2D) -> Draw2D:
     """ Creates a new Draw2D Class with a frame as the Canvas """
     canvas = Image.fromarray(frame.data_rgb().data, mode=DRAW_MODE)
     return Draw2D(canvas, ImageDraw.Draw(canvas, DRAW_MODE))
예제 #10
0
def kmeans_scoring_12122020(test_path: str,
                            score_path: str,
                            channels: dict = None,
                            grouping: str = "PREDICT",
                            color: str = "ACTUAL",
                            img_scale: float = 0.5,
                            clusters_mnl: int = 3,
                            clusters_mnf: int = 5,
                            scatter_size: float = 1.0,
                            verbose: bool = True):
    """ Runs the KMeans model developed at 12/12/2020

    channels can be passed as a dictionary, whereby it's similar to how you would call get_chns

    E.g. for f.get_chns(xy=True, hsv=True, glcm_con=True),
    you'd pass channels=dict(xy=True, hsv=True, glcm_con=True)

    grouping and color only accept these following values:
    'PREDICT', 'ACTUAL', 'SELECTED'

    :param test_path: Path to the test image
    :param score_path: Path to the scoring image
    :param channels: The channels to get, See Description on how to pass argument.
    :param grouping: The categorical grouping of the plots. i.e. how to create subplots.
        See Description on allowable values
    :param color: The categorical color/hue.
        See Description on allowable values
    :param img_scale: The scaling of the test/score loaded in
    :param clusters_mnl: Clusters to use for Meaningless Clustering
    :param clusters_mnf: Clusters to use for Meaningful Clustering
    :param scatter_size: Scatter size of plot
    :param verbose: Whether to output into console the details
    :return:
    """
    """ MEANINGLESS CLASSIFICATION
    
    Here, we start off with the meaningless classification.
    Some acronym pre/suffixes:
    MNL: Meaningless, MNF: Meaningful
    
    In this part, we're concerned about removing the meaningless labels so that the clustering
    is more meaningful in a way.    
    """

    assert grouping in (
        'PREDICT', 'ACTUAL', 'SELECTED',
        None), "Invalid grouping, see description on allowable values."
    assert color in (
        'PREDICT', 'ACTUAL', 'SELECTED',
        None), "Invalid grouping, see description on allowable values."

    # We load the Frames here and run the KMeans Directly on it.
    # Note that the KMeans is being run on the RGB Channels only, we may change this later

    predict = Frame2D.from_image(test_path, scale=img_scale)
    predict_rgb = predict.data_rgb()
    actual = Frame2D.from_image(score_path, scale=img_scale)

    if channels:
        predict = predict.get_chns(**channels)

        # If there are any GLCM channels, we have to crop it.
        if any([
                k in ("glcm_con", "glcm_cor", "glcm_ent")
                for k in channels.keys()
        ]):
            if 'glcm_radius' not in channels.keys():
                raise Exception(
                    "glcm_radius must be explicitly specified on glcm features"
                )
            predict_rgb = predict_rgb.convolute(channels['glcm_radius'],
                                                method='average')
            actual = actual.crop_glcm(glcm_radius=channels['glcm_radius'])

    fit_indexes = list(range(predict.shape[-1]))

    # Predict using KMeans
    predict_km_mnl = KMeans2D(predict_rgb,
                              KMeans(clusters_mnl, verbose=verbose),
                              fit_to=list(range(3)),
                              scaler=scale)

    # Score the prediction
    # The labels are in 1D, we reshape it to recreate the channels
    score_mnl = Frame2D.scorer_pair(predict_km_mnl.model.labels_, actual)['labels']\
                   .reshape([-1, 3])  # Reshape label prediction to PRED, ACT, COUNT

    # We retrieve the xy using predict or actual, then stack it onto the score
    score_mnl_xy = predict.get_xy()[0].reshape([-1, 2])
    score_mnl = np.hstack([score_mnl, score_mnl_xy])

    # Create DataFrame for lmplot
    score_mnl_df = pd.DataFrame(score_mnl,
                                columns=('PREDICT', 'ACTUAL', 'SELECTED', 'X',
                                         'Y'))

    # Call lmplot
    fig_mnl = sns.lmplot(
        'X',
        'Y',
        data=score_mnl_df,
        fit_reg=False,  # Don't render regression
        col=grouping,  # Group By Predict
        hue=color,
        col_wrap=3,  # Wrap around on 3 column plots
        scatter_kws={'s': scatter_size},
        legend=True,
        aspect=predict.width() / predict.height(),
        legend_out=True)  # Scatter Size
    """ MEANINGLESS CLASSIFICATION DETERMINANT
    
    This is the algorithm to determine the cluster that is the least meaningful.
    
    If there's too many clusters, this wouldn't work well as depicted in the paper.
    
    This will only rid off the least meaningful one, hence it'll fail on >1 MNL cluster
    """

    # Contains the meaningless cluster number as labelled by KMeans
    ix_mnl: int = np.mean(predict_km_mnl.model.cluster_centers_,
                          axis=1).argmin()

    # Contains the mask [XY] where you can mask against np.ndarrays
    # noinspection PyTypeChecker
    mask_mnl: np.ndarray = predict_km_mnl.model.labels_ != ix_mnl
    """ MEANINGFUL CLASSIFICATION
    
    For this part, we remove the MNL Cluster and perform another KMeans on it.
    """

    predict_km_mnf =\
        KMeans2D(predict,
                 KMeans(clusters_mnf, verbose=True),
                 fit_to=fit_indexes,
                 frame_1dmask=mask_mnl,
                 scaler=scale)

    # Contains the Label in 1D
    score_mnf = Frame2D.scorer_pair(
        predict_km_mnf.model.labels_,
        actual.data_flatten_xy()[predict_km_mnf.frame_1dmask, 0])['labels']

    # We retrieve the xy again, but we need to mask it since we removed the MNL cluster
    score_mnf_xy = predict.get_xy()[0].reshape([-1, 2])[mask_mnl, :]
    score_mnf = np.hstack([score_mnf, score_mnf_xy])

    # Create DataFrame for lmplot
    score_mnf_df = pd.DataFrame(score_mnf,
                                columns=('PREDICT', 'ACTUAL', 'SELECTED', 'X',
                                         'Y'))

    # Call lmplot
    fig_mnf = sns.lmplot(
        'X',
        'Y',
        data=score_mnf_df,
        fit_reg=False,  # Don't render regression
        col='PREDICT',  # Group By Predict
        hue='ACTUAL',
        col_wrap=3,  # Wrap around on 3 column plots
        scatter_kws={'s': scatter_size},
        aspect=predict.width() / predict.height(),
        legend=False)  # Scatter Size

    # Return both Figures, Score and the detected MNL Cluster
    return dict(
        fig_mnl=fig_mnl,
        fig_mnf=fig_mnf,
        score_mnl=homogeneity_completeness_v_measure(score_mnl_df.ACTUAL,
                                                     score_mnl_df.PREDICT),
        score_mnf=homogeneity_completeness_v_measure(score_mnf_df.ACTUAL,
                                                     score_mnf_df.PREDICT),
        ix_mnl=ix_mnl)