예제 #1
0
def scatter_plot(plot: plt, df: HogwartsDataDescriber, course1: str,
                 course2: str):
    """
    Scatter plot for 2 courses
    :param plot: matplotlib.axes._subplots.AxesSubplot
    :param df: HogwartsDataDescriber
    :param course1: course 1 name
    :param course2: course 2 name
    :return: None
    """

    for house, color in zip(df.houses, df.colors):
        # choose course marks of students belonging to the house
        x = df[course1][df['Hogwarts House'] == house]
        y = df[course2][df['Hogwarts House'] == house]

        plot.scatter(x, y, color=color, alpha=0.5)
def plot_ecdf(x_:ndarray,ax:pyplot=None,figsize:tuple=FIGSIZE,fontsize:int=FONTSIZE,grid:bool=False,
                            xlabel:str='x', ylabel:str='y',
                            lw:int=1,color:str=None,dots_marker:str=None,dots_color:str='red',dots_size:float=10,
                            zorder:float=None, plot_data_ticks:bool=True, plot_data_dots:bool=True) -> pyplot:
    x = dataseries(x_).value
    fig=[]
    if ax is None:
        fig,ax = pyplot.subplots(figsize=figsize)
        if grid: ax.grid()
        ax.set_xlabel(xlabel,fontsize=fontsize)
        ax.set_ylabel(ylabel,fontsize=fontsize)
        ax.tick_params(direction='out', grid_alpha=0.5, labelsize='large')
    x1,y1 = stairs(x)
    ax.plot(x1,y1,color=color,zorder=zorder,lw=lw)
    if plot_data_ticks: ax.scatter(x,[0]*len(x),marker='|',s=200) # https://matplotlib.org/stable/api/markers_api.html
    x2,p=ecdf(x)
    if plot_data_dots: ax.scatter(x2,p,s=dots_size,color=dots_color,marker=dots_marker,zorder=zorder)
    return fig, ax 
예제 #3
0
def plot3D(ax: plt, sub3d: plt, X: np.ndarray, y: np.ndarray, w: np.ndarray,
           name: str) -> None:
    '''
    Visualize decision boundary and data classes in 3D
    :param ax:  matplotlib
    :param sub3d: fig.add_subplot(XXX, projection='3d')
    :param X: data
    :param y: data labels
    :param w: model parameters
    :param name: plot name identifier
    :return:
    '''
    x1 = np.array(X[1, :])  # note: X_train[0,:] is the added row of 1s (bias)
    x2 = np.array(X[2, :])
    posterior1 = LOGREG().activationFunction(w, X)
    posterior1 = np.squeeze(np.asarray(posterior1))
    markers = ['o', '+']
    groundTruthLabels = np.unique(y)
    for li in range(len(groundTruthLabels)):
        x1_sub = x1[y[:] == groundTruthLabels[li]]
        x2_sub = x2[y[:] == groundTruthLabels[li]]
        m_sub = markers[li]
        posterior1_sub = posterior1[y[:] == groundTruthLabels[li]]
        sub3d.scatter(x1_sub,
                      x2_sub,
                      posterior1_sub,
                      c=posterior1_sub,
                      vmin=0,
                      vmax=1,
                      marker=m_sub,
                      label='ground truth label = ' + str(li))
    ax.legend()
    x = np.arange(x1.min(), x1.max(), 0.1)
    pms = [[0.1, 'k:'], [0.25, 'k--'], [0.5, 'r'], [0.75, 'k-.'], [0.9, 'k-']]
    for (p, m) in pms:
        yp = (-np.log((1 / p) - 1) - w[1] * x - w[0]) / w[2]
        yp = np.squeeze(np.asarray(yp))
        z = np.ones(yp.shape) * p
        sub3d.plot(x, yp, z, m, label='p = ' + str(p))
        ax.legend()
    ax.xlabel('feature 1')
    ax.ylabel('feature 2')
    ax.title(name + '\n Posterior for class labeled 1')
예제 #4
0
    def add_scatter_and_annotate(self,
                                 fig: plt,
                                 x_all: np.array,
                                 y_all: np.array,
                                 colour: str,
                                 idxs: np.array,
                                 annotate=False):
        x = x_all[idxs]
        y = y_all[idxs]
        ax = fig.scatter(x, y, c=colour, alpha=self.opacity, s=20)

        # Check if we want to annotate any of these with their gene IDs

        if self.values_to_label is not None:
            texts = []
            labels = self.df[self.label_column].values[idxs]
            for i, name in enumerate(labels):
                if name in self.values_to_label:
                    lbl_bg = self.values_colours.get(name)
                    color = self.text_colours.get(name)
                    texts.append(
                        fig.text(x[i],
                                 y[i],
                                 name,
                                 color=color,
                                 fontsize=self.label_font_size,
                                 bbox=dict(fc=lbl_bg, alpha=1.0)))
            adjust_text(texts, force_text=2.0)
        # Check if the user wants these labeled
        if self.label_big_sig and annotate:
            # If they do have a limit on the number of ones we show (i.e. we don't want 10000 gene names...)
            max_values = -1 * self.max_labels
            if len(y) < self.max_labels:
                max_values = -1 * (len(y) - 1)
            most_sig_idxs = np.argpartition(y, max_values)[max_values:]
            labels = self.df[self.label_column].values[idxs][most_sig_idxs]
            x = x[most_sig_idxs]
            y = y[most_sig_idxs]
            # We only label the ones with the max log fc
            for i, name in enumerate(labels):
                fig.annotate(name, (x[i], y[i]),
                             xytext=(0, 10),
                             textcoords='offset points',
                             ha='center',
                             va='bottom',
                             bbox=dict(boxstyle='round,pad=0.5',
                                       fc='white',
                                       alpha=0.2))
        return ax