def get_cm_problems(cm: npt.NDArray, labels: List[str]) -> None: """ Find problems of a classifier by analzing its confusion matrix. Parameters ---------- cm : ndarray labels : List[str] """ n = len(cm) # Find classes which are not present in the dataset for i in range(n): if sum(cm[i]) == 0: logger.warning(f"The class '{labels[i]}' was not in the dataset.") # Find classes which are never predicted cm = cm.transpose() never_predicted = [] for i in range(n): if sum(cm[i]) == 0: never_predicted.append(labels[i]) if len(never_predicted) > 0: logger.warning( f"The following classes were never predicted: {never_predicted}")
def generate_permutation( n: int, current_perm: npt.NDArray, tmp_cm: npt.NDArray ) -> Tuple[npt.NDArray, bool]: """ Generate a new permutation. Parameters ---------- n : int current_perm : List[int] tmp_cm : npt.NDArray Return ------ perm, make_swap : List[int], bool """ swap_prob = 0.5 make_swap = random.random() < swap_prob if n < 3: # In this case block-swaps don't make any sense make_swap = True if make_swap: # Choose what to swap i = random.randint(0, n - 1) j = i while j == i: j = random.randint(0, n - 1) # Define permutation perm = swap_1d(current_perm.copy(), i, j) # Define values after swap tmp_cm = swap(tmp_cm, i, j) else: # block-swap block_len = n while block_len >= n - 1: from_start = random.randint(0, n - 3) from_end = random.randint(from_start + 1, n - 2) block_len = from_start - from_end insert_pos = from_start while not (insert_pos < from_start or insert_pos > from_end): insert_pos = random.randint(0, n - 1) perm = move_1d(current_perm.copy(), from_start, from_end, insert_pos) # Define values after swap tmp_cm = move(tmp_cm, from_start, from_end, insert_pos) return perm, make_swap
def preprocess( features: npt.NDArray, targets: npt.NDArray, img_rows: int, img_cols: int, num_classes: int, ) -> Tuple[Any, Any]: if K.image_data_format() == "channels_first": features = features.reshape(features.shape[0], 1, img_rows, img_cols) else: features = features.reshape(features.shape[0], img_rows, img_cols, 1) features = features.astype("float32") features /= 255 print("x shape:", features.shape) print(f"{features.shape[0]} samples") # convert class vectors to binary class matrices targets = keras.utils.to_categorical(targets, num_classes) return features, targets
def store_permutation(cls, cm_file: str, permutation: npt.NDArray, iterations: int) -> None: """ Store a permutation. Parameters ---------- cm_file : str permutation : npt.NDArray iterations : int """ cm_file = os.path.abspath(cm_file) cfg_file = cls.get_cfg_path_from_cm_path(cm_file) if os.path.isfile(cfg_file): cfg = ClanaCfg.read_clana_cfg(cfg_file) else: cfg = {"version": clana.__version__, "data": {}} cm_file_base = os.path.basename(cm_file) if cm_file_base not in cfg["data"]: cfg["data"][cm_file_base] = {} cm_file_md5 = md5(cm_file) if cm_file_md5 not in cfg["data"][cm_file_base]: cfg["data"][cm_file_base][cm_file_md5] = { "permutation": permutation.tolist(), "iterations": 0, } cfg["data"][cm_file_base][cm_file_md5][ "permutation"] = permutation.tolist() cfg["data"][cm_file_base][cm_file_md5]["iterations"] += iterations # Write file print(cfg_file) with open(cfg_file, "w") as outfile: yaml.dump(cfg, outfile, default_flow_style=False, allow_unicode=True)
def write_cm(path: str, cm: npt.NDArray) -> None: """ Write confusion matrix to path. Parameters ---------- path : str cm : npt.NDArray """ with open(path, "w") as outfile: str_ = json.dumps(cm.tolist(), separators=(",", ": "), ensure_ascii=False) outfile.write(str_)
def get_accuracy(cm: npt.NDArray) -> float: """ Get the accuaracy by the confusion matrix cm. Parameters ---------- cm : ndarray Returns ------- accuracy : float Examples -------- >>> import numpy as np >>> cm = np.array([[10, 20], [30, 40]]) >>> get_accuracy(cm) 0.5 >>> cm = np.array([[20, 10], [30, 40]]) >>> get_accuracy(cm) 0.6 """ return float(sum(cm[i][i] for i in range(len(cm)))) / float(cm.sum())
def save_tensor_as_image(_tensor: npt.NDArray, file_path: str) -> None: """Save a tensor as image""" plt.imsave(file_path, _tensor.astype(np.uint8), origin="lower")
def show_tensor_as_image(_tensor: npt.NDArray) -> None: """Plot a tensor as image""" plt.imshow(_tensor.astype(np.uint8), origin="lower") plt.show()
def show_scatter(rdms: RDMs, coords: NDArray, rdm_descriptor: Optional[str] = None, pattern_descriptor: Optional[str] = None, icon_size: float = 0.1) -> Figure: """Draw a 2-dimensional scatter plot based on the provided coordinates Args: rdms (RDMs): The RDMs object to display coords (NDArray): Array of x and y coordinates for each pattern (patterns x 2) rdm_descriptor: (Optional[str]): If provided, this will be used as title for each individual RDM. pattern_descriptor (Optional[str]): If provided, the chosen pattern descriptor will be printed adjacent to each point in the plot icon_size: relative size of icons if the pattern descriptor chosen is of type Icon Returns: Figure: A matplotlib figure in which the plot is drawn """ frac, n = math.modf(math.sqrt(rdms.n_rdm)) nrows, ncols = math.floor(n), math.floor(n) if frac > 0: nrows += 1 if frac > 0.5: ncols += 1 fig, axes = matplotlib.pyplot.subplots(nrows=nrows, ncols=ncols) axes = numpy.array(axes) ## it's now an array even if there's only one for r, ax in enumerate(axes.ravel()): if r > (rdms.n_rdm - 1): ## fewer rdms than rows x cols, hide the remaining axes ax.axis('off') break ax.scatter(coords[r, :, 0], coords[r, :, 1]) ax.set_xlim(coords.min() * 0.95, coords.max() * 1.05) ax.set_ylim(coords.min() * 0.95, coords.max() * 1.05) ## RDM names if rdm_descriptor is not None: ax.set_title(rdms.rdm_descriptors[rdm_descriptor][r]) ## print labels next to dots if pattern_descriptor is not None: for p in range(coords.shape[1]): pat_desc = rdms.pattern_descriptors[pattern_descriptor][p] pat_coords = (coords[r, p, 0], coords[r, p, 1]) if isinstance(pat_desc, Icon): pat_desc.plot(pat_coords[0], pat_coords[1], ax=ax, size=icon_size) else: label = ax.annotate(pat_desc, pat_coords) label.set_alpha(.6) ## turn off all axis ticks and labels ax.tick_params(axis='both', which='both', bottom=False, top=False, right=False, left=False, labelbottom=False, labeltop=False, labelleft=False, labelright=False) return fig
def create_html_cm(cm: npt.NDArray, zero_diagonal: bool = False, labels: Optional[List[str]] = None) -> None: """ Plot a confusion matrix. Parameters ---------- cm : npt.NDArray zero_diagonal : bool, optional (default: False) If this is set to True, then the diagonal is overwritten with zeroes. labels : Optional[List[str]] If this is not given, then numbers are assigned to the classes """ if labels is None: labels = [str(i) for i in range(len(cm))] el_max = 200 template_path = resource_filename("clana", "templates/base.html") with open(template_path) as f: base = f.read() cm_t = cm.transpose() header_cells = [] for i, label in enumerate(labels): precision = cm[i][i] / float(sum(cm_t[i])) background_color = "transparent" if precision < 0.2: background_color = "red" elif precision > 0.98: background_color = "green" header_cells.append({ "precision": f"{precision:0.2f}", "background-color": background_color, "label": label, }) body_rows = [] for i, label, row in zip(range(len(labels)), labels, cm): body_row = [] row_str = [str(el) for el in row] support = sum(row) recall = cm[i][i] / float(support) background_color = "transparent" if recall < 0.2: background_color = "red" elif recall >= 0.98: background_color = "green" body_row.append({ "label": label, "recall": f"{recall:.2f}", "background-color": background_color, }) for _j, pred_label, el in zip(range(len(labels)), labels, row_str): background_color = "transparent" if el == "0": el = "" else: background_color = get_color_code(float(el), el_max) body_row.append({ "label": el, "true": label, "pred": pred_label, "background-color": background_color, }) body_rows.append({"row": body_row, "support": support}) html_template = Template(base) html = html_template.render(header_cells=header_cells, body_rows=body_rows) with open(cfg["visualize"]["html_save_path"], "w") as f: f.write(html)