def __init__(self, frame: Frame2D, model: KMeans, fit_to: List[Frame2D.CHN] = None, frame_1dmask: np.ndarray = None, scaler=None): """ Creates a KMeans Object from current data :param model: KMeans Model :param fit_to: The indexes to .fit() to, must be a list of the Channel Consts. If None, use all channels :param scaler: The scaler to use, must be a callable(np.ndarray) :param frame_1dmask: The 2D mask to exclude certain points. Must be in 2 Dimensions :returns: KMeans2D Instance """ data = frame.data_chn( fit_to).data_flatten_xy() if fit_to else frame.data_flatten_xy() if frame_1dmask is not None: assert frame_1dmask.shape == data.shape[:-1],\ f"The Frame 2D Mask must match the size of first 2 Dimensions of frame exactly." \ f"(Mask Shape: {frame_1dmask.shape}, Frame 2D Shape: {data.shape[:-1]})" data = data[frame_1dmask, ...] if scaler: data = scaler(data) fit = model.fit(data) self.model = fit self.frame = frame self.frame_1dmask = frame_1dmask
def kmeans_matrix(test_path: str, score_path: str, scale: float, output_dir: str, ixs_per_kmeans: int, verbose=True, clusters=6, imgs_dir="imgs", scaler=minmax_scale, glcm_radius=5): """ Runs the KMeans Matrix generation :param test_path: Path to the test file :param score_path: Path to the score file :param scale: How much to scale the Frame before running the loop :param output_dir: The directory to output results in :param ixs_per_kmeans: The number of indexes to consider. :param verbose: Whether to output information in console :param clusters: The number of KMeans clusters :param imgs_dir: The subdir folder name of images :param scaler: The scaler to use to normalize data :param glcm_radius: Radius of GLCM """ f = Frame2D.from_image(test_path, scale=scale) score = Frame2D.from_image(score_path, scale=scale) frame = f.get_all_chns(glcm=Frame2D.GLCM(verbose=verbose, radius=radius)) try: os.makedirs(output_dir + "/" + imgs_dir) except OSError: pass with open(f"{output_dir}/results.csv", "w+") as file: file.write(",".join([a for a in ascii_lowercase[0:ixs_per_kmeans]]) + f",Custom,Homogeneity,Completeness,V Measure\n") combos = list(combinations(range(22), ixs_per_kmeans)) for ixs in tqdm.tqdm(combos): km = KMeans2D(frame, KMeans(n_clusters=clusters, verbose=verbose), fit_to=ixs, scaler=scaler) sns.set_palette(sns.color_palette("magma"), n_colors=clusters) km.plot() plt.savefig(f"{output_dir}/{imgs_dir}/" + "_".join([str(i) for i in ixs]) + ".png") plt.cla() file.write(",".join([str(i) for i in ixs]) + ",") file.write(",".join([ str(s) for s in km.score(score, glcm_radius=glcm_radius).values() ]) + '\n') file.flush()
def setUp(cls) -> None: cls.frame = Frame2D.from_image( f"{_RSC}/imgs/chestnut_0/frame10000ms.jpg") cls.frame_window = cls.frame.split_xy(100)[0][0] cls.window = 100 cls._RSC = _RSC cls.channels = 3
def setUp(cls) -> None: cls.frame = Frame2D.from_image(f"{_RSC}/imgs/chestnut_0/screenshot1.png") cls.frame.data = cls.frame.data[...,:3] # REMOVE THIS if not a screenshot cls.frame_window = cls.frame.split_xy(100)[0][0] cls.window = 100 cls._RSC = _RSC cls.channels = 3 cls.frame2 = cls.frame.get_chns(xy=True, hsv=True, mex_g=True, ex_gr=True, ndi=True)
def kmeans(f: Frame2D, clusters: int, verbose: bool, fit_indexes: list, scaler=minmax_scale, fig_name: str or None = "out.png"): km = KMeans2D(f, model=KMeans(n_clusters=clusters, verbose=verbose), fit_to=fit_indexes, scaler=scaler) sns.set_palette(sns.color_palette("magma"), n_colors=clusters) if fig_name: plt.gcf().set_size_inches(f.width() / 96 * 2, f.height() / 96 * 2) plt.gcf().savefig(fig_name) plt.cla() return km
def channel_analysis(image_path: str, verbose:bool = True, image_scale: float = 0.5, plot_scale: float = 0.2, exclude_xy: bool = True): """ Quickly analyzes the image provided with multiple channels :param image_path: Path to image :param verbose: Whether to show output for GLCM loading :param image_scale: Scale of Image :param plot_scale: Scale of Plot :param exclude_xy: Whether to exclude XY trivial channel or not :return: A matplotlib fig """ f = Frame2D.from_image(image_path, scale=image_scale) f = f.get_all_chns(exc_chns=[Frame2D.CHN.XY] if exclude_xy else [], glcm=Frame2D.GLCM(verbose=verbose)) fig = f.plot(f.labels).image(scale=plot_scale) return fig
def kmeans_score(f: Frame2D, score: Frame2D or str, glcm_radius: int, clusters: int, verbose: bool, fit_indexes: list, scaler=minmax_scale, fig_name: str or None = "out.png"): km = kmeans(f, clusters, verbose, fit_indexes, scaler, fig_name) if isinstance(score, str): score = Frame2D.from_image(score) print(km.score(score, glcm_radius=glcm_radius))
def test_box(self): C = CONSTS.CHN f = Frame2D.from_image(self._RSC + "/imgs/basic/box.png") # frame_xy = f.get_chns(xy=True, hsv=True, mex_g=True, ex_gr=True, ndi=True) frame_xy = f.get_chns(self_=False, chns=[C.XY, C.HSV, C.MEX_G, C.EX_GR, C.NDI]) km = KMeans2D(frame_xy, KMeans(n_clusters=3, verbose=False), fit_to=[C.MEX_G, C.EX_GR, C.NDI], scaler=minmax_scale) kmf = km.as_frame() score = kmf.score(f) self.assertAlmostEqual(score['Custom'], 1) self.assertAlmostEqual(score['Homogeneity'], 1) self.assertAlmostEqual(score['Completeness'], 1) self.assertAlmostEqual(score['V Measure'], 1)
def load_frame(frame: Frame2D) -> Draw2D: """ Creates a new Draw2D Class with a frame as the Canvas """ canvas = Image.fromarray(frame.data_rgb().data, mode=DRAW_MODE) return Draw2D(canvas, ImageDraw.Draw(canvas, DRAW_MODE))
def kmeans_scoring_12122020(test_path: str, score_path: str, channels: dict = None, grouping: str = "PREDICT", color: str = "ACTUAL", img_scale: float = 0.5, clusters_mnl: int = 3, clusters_mnf: int = 5, scatter_size: float = 1.0, verbose: bool = True): """ Runs the KMeans model developed at 12/12/2020 channels can be passed as a dictionary, whereby it's similar to how you would call get_chns E.g. for f.get_chns(xy=True, hsv=True, glcm_con=True), you'd pass channels=dict(xy=True, hsv=True, glcm_con=True) grouping and color only accept these following values: 'PREDICT', 'ACTUAL', 'SELECTED' :param test_path: Path to the test image :param score_path: Path to the scoring image :param channels: The channels to get, See Description on how to pass argument. :param grouping: The categorical grouping of the plots. i.e. how to create subplots. See Description on allowable values :param color: The categorical color/hue. See Description on allowable values :param img_scale: The scaling of the test/score loaded in :param clusters_mnl: Clusters to use for Meaningless Clustering :param clusters_mnf: Clusters to use for Meaningful Clustering :param scatter_size: Scatter size of plot :param verbose: Whether to output into console the details :return: """ """ MEANINGLESS CLASSIFICATION Here, we start off with the meaningless classification. Some acronym pre/suffixes: MNL: Meaningless, MNF: Meaningful In this part, we're concerned about removing the meaningless labels so that the clustering is more meaningful in a way. """ assert grouping in ( 'PREDICT', 'ACTUAL', 'SELECTED', None), "Invalid grouping, see description on allowable values." assert color in ( 'PREDICT', 'ACTUAL', 'SELECTED', None), "Invalid grouping, see description on allowable values." # We load the Frames here and run the KMeans Directly on it. # Note that the KMeans is being run on the RGB Channels only, we may change this later predict = Frame2D.from_image(test_path, scale=img_scale) predict_rgb = predict.data_rgb() actual = Frame2D.from_image(score_path, scale=img_scale) if channels: predict = predict.get_chns(**channels) # If there are any GLCM channels, we have to crop it. if any([ k in ("glcm_con", "glcm_cor", "glcm_ent") for k in channels.keys() ]): if 'glcm_radius' not in channels.keys(): raise Exception( "glcm_radius must be explicitly specified on glcm features" ) predict_rgb = predict_rgb.convolute(channels['glcm_radius'], method='average') actual = actual.crop_glcm(glcm_radius=channels['glcm_radius']) fit_indexes = list(range(predict.shape[-1])) # Predict using KMeans predict_km_mnl = KMeans2D(predict_rgb, KMeans(clusters_mnl, verbose=verbose), fit_to=list(range(3)), scaler=scale) # Score the prediction # The labels are in 1D, we reshape it to recreate the channels score_mnl = Frame2D.scorer_pair(predict_km_mnl.model.labels_, actual)['labels']\ .reshape([-1, 3]) # Reshape label prediction to PRED, ACT, COUNT # We retrieve the xy using predict or actual, then stack it onto the score score_mnl_xy = predict.get_xy()[0].reshape([-1, 2]) score_mnl = np.hstack([score_mnl, score_mnl_xy]) # Create DataFrame for lmplot score_mnl_df = pd.DataFrame(score_mnl, columns=('PREDICT', 'ACTUAL', 'SELECTED', 'X', 'Y')) # Call lmplot fig_mnl = sns.lmplot( 'X', 'Y', data=score_mnl_df, fit_reg=False, # Don't render regression col=grouping, # Group By Predict hue=color, col_wrap=3, # Wrap around on 3 column plots scatter_kws={'s': scatter_size}, legend=True, aspect=predict.width() / predict.height(), legend_out=True) # Scatter Size """ MEANINGLESS CLASSIFICATION DETERMINANT This is the algorithm to determine the cluster that is the least meaningful. If there's too many clusters, this wouldn't work well as depicted in the paper. This will only rid off the least meaningful one, hence it'll fail on >1 MNL cluster """ # Contains the meaningless cluster number as labelled by KMeans ix_mnl: int = np.mean(predict_km_mnl.model.cluster_centers_, axis=1).argmin() # Contains the mask [XY] where you can mask against np.ndarrays # noinspection PyTypeChecker mask_mnl: np.ndarray = predict_km_mnl.model.labels_ != ix_mnl """ MEANINGFUL CLASSIFICATION For this part, we remove the MNL Cluster and perform another KMeans on it. """ predict_km_mnf =\ KMeans2D(predict, KMeans(clusters_mnf, verbose=True), fit_to=fit_indexes, frame_1dmask=mask_mnl, scaler=scale) # Contains the Label in 1D score_mnf = Frame2D.scorer_pair( predict_km_mnf.model.labels_, actual.data_flatten_xy()[predict_km_mnf.frame_1dmask, 0])['labels'] # We retrieve the xy again, but we need to mask it since we removed the MNL cluster score_mnf_xy = predict.get_xy()[0].reshape([-1, 2])[mask_mnl, :] score_mnf = np.hstack([score_mnf, score_mnf_xy]) # Create DataFrame for lmplot score_mnf_df = pd.DataFrame(score_mnf, columns=('PREDICT', 'ACTUAL', 'SELECTED', 'X', 'Y')) # Call lmplot fig_mnf = sns.lmplot( 'X', 'Y', data=score_mnf_df, fit_reg=False, # Don't render regression col='PREDICT', # Group By Predict hue='ACTUAL', col_wrap=3, # Wrap around on 3 column plots scatter_kws={'s': scatter_size}, aspect=predict.width() / predict.height(), legend=False) # Scatter Size # Return both Figures, Score and the detected MNL Cluster return dict( fig_mnl=fig_mnl, fig_mnf=fig_mnf, score_mnl=homogeneity_completeness_v_measure(score_mnl_df.ACTUAL, score_mnl_df.PREDICT), score_mnf=homogeneity_completeness_v_measure(score_mnf_df.ACTUAL, score_mnf_df.PREDICT), ix_mnl=ix_mnl)