예제 #1
0
def get_cm_problems(cm: npt.NDArray, labels: List[str]) -> None:
    """
    Find problems of a classifier by analzing its confusion matrix.

    Parameters
    ----------
    cm : ndarray
    labels : List[str]
    """
    n = len(cm)

    # Find classes which are not present in the dataset
    for i in range(n):
        if sum(cm[i]) == 0:
            logger.warning(f"The class '{labels[i]}' was not in the dataset.")

    # Find classes which are never predicted
    cm = cm.transpose()
    never_predicted = []
    for i in range(n):
        if sum(cm[i]) == 0:
            never_predicted.append(labels[i])
    if len(never_predicted) > 0:
        logger.warning(
            f"The following classes were never predicted: {never_predicted}")
예제 #2
0
def generate_permutation(
    n: int, current_perm: npt.NDArray, tmp_cm: npt.NDArray
) -> Tuple[npt.NDArray, bool]:
    """
    Generate a new permutation.

    Parameters
    ----------
    n : int
    current_perm : List[int]
    tmp_cm : npt.NDArray

    Return
    ------
    perm, make_swap : List[int], bool
    """
    swap_prob = 0.5
    make_swap = random.random() < swap_prob
    if n < 3:
        # In this case block-swaps don't make any sense
        make_swap = True
    if make_swap:
        # Choose what to swap
        i = random.randint(0, n - 1)
        j = i
        while j == i:
            j = random.randint(0, n - 1)
        # Define permutation
        perm = swap_1d(current_perm.copy(), i, j)
        # Define values after swap
        tmp_cm = swap(tmp_cm, i, j)
    else:
        # block-swap
        block_len = n
        while block_len >= n - 1:
            from_start = random.randint(0, n - 3)
            from_end = random.randint(from_start + 1, n - 2)
            block_len = from_start - from_end
        insert_pos = from_start
        while not (insert_pos < from_start or insert_pos > from_end):
            insert_pos = random.randint(0, n - 1)
        perm = move_1d(current_perm.copy(), from_start, from_end, insert_pos)

        # Define values after swap
        tmp_cm = move(tmp_cm, from_start, from_end, insert_pos)
    return perm, make_swap
예제 #3
0
def preprocess(
    features: npt.NDArray,
    targets: npt.NDArray,
    img_rows: int,
    img_cols: int,
    num_classes: int,
) -> Tuple[Any, Any]:
    if K.image_data_format() == "channels_first":
        features = features.reshape(features.shape[0], 1, img_rows, img_cols)
    else:
        features = features.reshape(features.shape[0], img_rows, img_cols, 1)
    features = features.astype("float32")
    features /= 255
    print("x shape:", features.shape)
    print(f"{features.shape[0]} samples")

    # convert class vectors to binary class matrices
    targets = keras.utils.to_categorical(targets, num_classes)
    return features, targets
예제 #4
0
파일: io.py 프로젝트: MartinThoma/clana
    def store_permutation(cls, cm_file: str, permutation: npt.NDArray,
                          iterations: int) -> None:
        """
        Store a permutation.

        Parameters
        ----------
        cm_file : str
        permutation : npt.NDArray
        iterations : int
        """
        cm_file = os.path.abspath(cm_file)
        cfg_file = cls.get_cfg_path_from_cm_path(cm_file)
        if os.path.isfile(cfg_file):
            cfg = ClanaCfg.read_clana_cfg(cfg_file)
        else:
            cfg = {"version": clana.__version__, "data": {}}

        cm_file_base = os.path.basename(cm_file)
        if cm_file_base not in cfg["data"]:
            cfg["data"][cm_file_base] = {}
        cm_file_md5 = md5(cm_file)
        if cm_file_md5 not in cfg["data"][cm_file_base]:
            cfg["data"][cm_file_base][cm_file_md5] = {
                "permutation": permutation.tolist(),
                "iterations": 0,
            }
        cfg["data"][cm_file_base][cm_file_md5][
            "permutation"] = permutation.tolist()
        cfg["data"][cm_file_base][cm_file_md5]["iterations"] += iterations

        # Write file
        print(cfg_file)
        with open(cfg_file, "w") as outfile:
            yaml.dump(cfg,
                      outfile,
                      default_flow_style=False,
                      allow_unicode=True)
예제 #5
0
파일: io.py 프로젝트: MartinThoma/clana
def write_cm(path: str, cm: npt.NDArray) -> None:
    """
    Write confusion matrix to path.

    Parameters
    ----------
    path : str
    cm : npt.NDArray
    """
    with open(path, "w") as outfile:
        str_ = json.dumps(cm.tolist(),
                          separators=(",", ": "),
                          ensure_ascii=False)
        outfile.write(str_)
예제 #6
0
def get_accuracy(cm: npt.NDArray) -> float:
    """
    Get the accuaracy by the confusion matrix cm.

    Parameters
    ----------
    cm : ndarray

    Returns
    -------
    accuracy : float

    Examples
    --------
    >>> import numpy as np
    >>> cm = np.array([[10, 20], [30, 40]])
    >>> get_accuracy(cm)
    0.5
    >>> cm = np.array([[20, 10], [30, 40]])
    >>> get_accuracy(cm)
    0.6
    """
    return float(sum(cm[i][i] for i in range(len(cm)))) / float(cm.sum())
예제 #7
0
def save_tensor_as_image(_tensor: npt.NDArray, file_path: str) -> None:
    """Save a tensor as image"""
    plt.imsave(file_path, _tensor.astype(np.uint8), origin="lower")
예제 #8
0
def show_tensor_as_image(_tensor: npt.NDArray) -> None:
    """Plot a tensor as image"""
    plt.imshow(_tensor.astype(np.uint8), origin="lower")
    plt.show()
예제 #9
0
def show_scatter(rdms: RDMs,
                 coords: NDArray,
                 rdm_descriptor: Optional[str] = None,
                 pattern_descriptor: Optional[str] = None,
                 icon_size: float = 0.1) -> Figure:
    """Draw a 2-dimensional scatter plot based on the provided coordinates

    Args:
        rdms (RDMs): The RDMs object to display
        coords (NDArray): Array of x and y coordinates for each
            pattern (patterns x 2)
        rdm_descriptor: (Optional[str]): If provided, this will be used as
            title for each individual RDM.
        pattern_descriptor (Optional[str]): If provided, the chosen pattern
            descriptor will be printed adjacent to each point in the plot
        icon_size: relative size of icons if the pattern descriptor chosen
            is of type Icon

    Returns:
        Figure: A matplotlib figure in which the plot is drawn
    """
    frac, n = math.modf(math.sqrt(rdms.n_rdm))
    nrows, ncols = math.floor(n), math.floor(n)
    if frac > 0:
        nrows += 1
    if frac > 0.5:
        ncols += 1
    fig, axes = matplotlib.pyplot.subplots(nrows=nrows, ncols=ncols)
    axes = numpy.array(axes)  ## it's now an array even if there's only one
    for r, ax in enumerate(axes.ravel()):

        if r > (rdms.n_rdm - 1):
            ## fewer rdms than rows x cols, hide the remaining axes
            ax.axis('off')
            break

        ax.scatter(coords[r, :, 0], coords[r, :, 1])
        ax.set_xlim(coords.min() * 0.95, coords.max() * 1.05)
        ax.set_ylim(coords.min() * 0.95, coords.max() * 1.05)

        ## RDM names
        if rdm_descriptor is not None:
            ax.set_title(rdms.rdm_descriptors[rdm_descriptor][r])

        ## print labels next to dots
        if pattern_descriptor is not None:
            for p in range(coords.shape[1]):
                pat_desc = rdms.pattern_descriptors[pattern_descriptor][p]
                pat_coords = (coords[r, p, 0], coords[r, p, 1])
                if isinstance(pat_desc, Icon):
                    pat_desc.plot(pat_coords[0],
                                  pat_coords[1],
                                  ax=ax,
                                  size=icon_size)
                else:
                    label = ax.annotate(pat_desc, pat_coords)
                    label.set_alpha(.6)

        ## turn off all axis ticks and labels
        ax.tick_params(axis='both',
                       which='both',
                       bottom=False,
                       top=False,
                       right=False,
                       left=False,
                       labelbottom=False,
                       labeltop=False,
                       labelleft=False,
                       labelright=False)
    return fig
예제 #10
0
def create_html_cm(cm: npt.NDArray,
                   zero_diagonal: bool = False,
                   labels: Optional[List[str]] = None) -> None:
    """
    Plot a confusion matrix.

    Parameters
    ----------
    cm : npt.NDArray
    zero_diagonal : bool, optional (default: False)
        If this is set to True, then the diagonal is overwritten with zeroes.
    labels : Optional[List[str]]
        If this is not given, then numbers are assigned to the classes
    """
    if labels is None:
        labels = [str(i) for i in range(len(cm))]

    el_max = 200

    template_path = resource_filename("clana", "templates/base.html")
    with open(template_path) as f:
        base = f.read()

    cm_t = cm.transpose()
    header_cells = []
    for i, label in enumerate(labels):
        precision = cm[i][i] / float(sum(cm_t[i]))
        background_color = "transparent"
        if precision < 0.2:
            background_color = "red"
        elif precision > 0.98:
            background_color = "green"
        header_cells.append({
            "precision": f"{precision:0.2f}",
            "background-color": background_color,
            "label": label,
        })

    body_rows = []
    for i, label, row in zip(range(len(labels)), labels, cm):
        body_row = []
        row_str = [str(el) for el in row]
        support = sum(row)
        recall = cm[i][i] / float(support)
        background_color = "transparent"
        if recall < 0.2:
            background_color = "red"
        elif recall >= 0.98:
            background_color = "green"
        body_row.append({
            "label": label,
            "recall": f"{recall:.2f}",
            "background-color": background_color,
        })
        for _j, pred_label, el in zip(range(len(labels)), labels, row_str):
            background_color = "transparent"
            if el == "0":
                el = ""
            else:
                background_color = get_color_code(float(el), el_max)

            body_row.append({
                "label": el,
                "true": label,
                "pred": pred_label,
                "background-color": background_color,
            })

        body_rows.append({"row": body_row, "support": support})

    html_template = Template(base)
    html = html_template.render(header_cells=header_cells, body_rows=body_rows)

    with open(cfg["visualize"]["html_save_path"], "w") as f:
        f.write(html)