Esempio n. 1
0
def basicLinePlot(
        y,  # [n_sigs, n_bins] array (each signal is 1 row)
        x=None,  # either [n_bins] array-like signal, or [n_signs, n_bins] signal
        title='',
        xlbl='',
        ylbl='',
        names=None,  # list of legend entries
        show_leg=True,  # whether to show leg
        plot=True):
    ''' Plots a basic line. No frills (yet)'''

    y = np.atleast_2d(y)
    [n_sigs, n_bins] = y.shape

    if names == None:
        names = ['S_%d' % (n + 1) for n in range(n_sigs)]

    traces = []
    for n, sig in enumerate(y):
        traces += [go.Scatter(y=sig, x=x, name=names[n])]

    layout = go.Layout(
        title=title,
        xaxis={'title': xlbl},
        yaxis={'title': ylbl},
        # yaxis={'title': ylbl},
        hovermode='closest',
        showlegend=show_leg,
    )
    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 2
0
def plotTable2(
    data,
    top_headers,
    width=None,
    plot=True,
    title=None,
):
    '''
    Wrapper for plotly table function
    NOTE: this is NOT compatible w/ dashboards as plotly table object doesnt have a ._data field & thus
            cant easily be jsonified
    :return:
    '''
    colors = cl.scales['5']['seq']['Blues']

    trace = go.Table(
        header=dict(
            values=top_headers,
            line=dict(color='#7D7F80'),
            fill=dict(color='#a1c3d1'),
            font=dict(color='white', size=12),
            height=None,  # row-height
            align=['left'] * 5),
        cells=dict(values=data,
                   line=dict(color='#7D7F80'),
                   fill=dict(color='#EDFAFF'),
                   align=['left'] * 5),
        hoverinfo='x+y+name')

    layout = dict(width=width, height=None, title=title)
    data = [trace]
    fig = dict(data=data, layout=layout)

    return plotOut(fig, plot)
Esempio n. 3
0
def plotDF(
        df,  # pandas DF
        title='',  # title of plot
        ylbl='',  # ylabel
        xlbl=None,  # if None, uses df.index.name
        linemode='lines',  # 'lines'/'markers'/'lines+markers'
        cat_col=None,  # if name, then shades BG according to the label
        opacity=.7,  # transparaency of lines. [0.0, 1.0]
        norm=None,  # None or input to norm_mat
        plot=True,  # 1/0 whether we want to plot each of the individual lines
):
    """
    This plots a pandas DF.
    NOTE: see also plotly's cufflinks package which makes pnadas plotting super easy!
        cf.go_offline()
        df.iplot(kind='scatter')
    """

    nbins, ncols = df.shape

    # convert cat columns to numeric columns
    for col in df.columns:
        if df[col].dtype.name == 'category':
            df[col] = df[col].cat.codes

    # make line colors
    colors = cl.scales[str(max(3, ncols))]['qual']['Set3']
    tcols = ['rgba%s,%.2f)' % (c[3:-1], opacity) for c in colors]

    # normalize columns
    if norm is not None:
        for col in df.columns:
            df[col] = norm_mat(df[col].values, method='zscore')

    traces = [
        go.Scatter(x=df.index,
                   y=df[col].values,
                   name=col,
                   mode=linemode,
                   line={"color": tcols[i]})
        for i, col in enumerate(df.columns)
    ]

    if xlbl is None:
        xlbl = df.index.name

    layout = go.Layout(
        title=title,
        xaxis={'title': xlbl},
        yaxis={'title': ylbl},
        showlegend=True,
    )

    # shade background based on label
    if cat_col is not None:
        layout.shapes = labelsShading(df[cat_col].values)

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 4
0
def _plotSubplots(trace_array,
                  vert_spacing=.1,
                  title = '',
                  ylbl='',  # currently buggy
                  xlbl='',
                  sp_titles=None, # 2d np array of strings for subplot titles
                  plot=True
                  ):
    '''
    Internal function to make subplots based on passed traces, which are in a 2d np array
    '''
    n_rows, n_cols = trace_array.shape

    fig = make_subplots(rows=n_rows,
                        cols=n_cols,
                        shared_xaxes=True,
                        vertical_spacing=vert_spacing,
                        subplot_titles=sp_titles.flatten().tolist(),
                        )

    for r in range(n_rows):
        for c in range(n_cols):
            [fig.append_trace(trace, r+1, c+1) for trace in trace_array[r,c]]


    fig.layout.title = title
    # fig.layout.xaxis = {'title': xlbl}    # this ruins the shared x-axis for some reason
    #fig.layout.yaxis = {'title': ylbl}
    fig.layout.showlegend = True

    return plotOut(fig, plot)
Esempio n. 5
0
def plotTable(
    data,
    top_headers=None,  # only required if data is list/nparray, not for pandas df
    width=None,
    plot=True,
    title=None,
):
    '''
    Wrapper for plotly table function
    :return:
    '''
    import pandas as pd

    if type(data) == pd.core.frame.DataFrame:
        top_headers = data.columns
        tbl_data = data.values

    # TODO: this should only be done for numeric datatypes
    tbl_data = tbl_data.astype('|S7').astype(str)

    inp_data = np.vstack((top_headers, tbl_data))

    fig = ff.create_table(inp_data, hoverinfo='skip')

    fig.layout.width = width
    fig.layout.title = title
    fig.layout.margin = {'b': 80, 'r': 80}

    return plotOut(fig, plot)
def plotMultiPR(y_true,        # list of true labels
                    y_scores,   # array of scores for each class of shape [n_samples, n_classes]
                    title = 'Multiclass PR Plot',
                    labels = None, # list of labels for each class
                    threshdot=None, # whether to plot a dot @ the threshold
                    plot=True,  # 1/0. If 0, returns plotly json object, but doesnt plot
                ):
    """
    Makes a multiclass ROC plot
    """

    y_true = np.array(y_true)
    y_scores = np.array(y_scores)
    N, n_classes = y_scores.shape
    if n_classes == 1:  # needed to avoid inverting when doing binary classification
        y_scores = -1*y_scores

    # calc ROC curves & AUC
    precision = dict()
    recall = dict()
    pr_auc = dict()
    thresh = dict()
    thresh_txt = dict()
    for i in range(n_classes):
        precision[i], recall[i], thresh[i] = sk.metrics.precision_recall_curve(y_true == i, y_scores[:, i])
        #average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i])
        #pr_auc[i] = sk.metrics.auc(precision[i], recall[i])
        pr_auc[i] = 1
        thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]]

    if labels is None:
        labels = ['C%d' % n for n in range(1, n_classes+1)]

    labels = [str(x) for x in labels]  # convert to str

    # make traces
    traces = []
    [traces.append(go.Scatter(y=precision[i], x=recall[i], name=labels[i] + '. AUC= %.2f' % (pr_auc[i]), 
                        text=thresh_txt[i], legendgroup=str(i), line={'width': 1})) for i in range(n_classes)]

    if threshdot is not None:
        for i in range(n_classes):
            c_indx = (np.abs(thresh[i]-threshdot)).argmin()
            traces += [go.Scatter(x=[recall[i][c_indx]]*2, y=[precision[i][c_indx]]*2, mode='markers',
                                  name='Threshold', legendgroup=str(i), showlegend=False)]

    # make layout
    layout = go.Layout(title=title,
                       xaxis={'title': 'Precision = P(y=1 | yp=1)'},   # 'Precision = P(yp=y | yp=1)'
                       yaxis={'title': 'Recall = TPR = P(yp=1 | y=1)'}, # 'Recall = TPR = P(yp=y | y=1)'
                       legend=dict(x=1),
                       hovermode='closest',
    )

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 7
0
def scatterMatrix(df,
                  title='Scatterplot Matrix',
                  plot=True):  # if false, just returns plotly json object
    """
    This makes a scattermatrix for data
    """

    cols = df.columns
    N = len(cols)

    fig = py.tools.make_subplots(rows=N, cols=N)

    for n1 in range(1, N + 1):
        for n2 in range(1, n1 + 1):
            #print('n1:%d, n2:%d' %(n1,n2))
            if n1 == n2:
                #plot hist
                ff = plotHist(
                    df[cols[n1 - 1]],  # 1D list/np vector of data
                    maxData=
                    500,  # max # of points to plot above histogram (if too high, it will be slow)
                    plot=
                    False,  # 1/0. If 0, returns plotly json object, but doesnt plot
                    rm_outliers=True,  # 1/0 whether to remove outliers or not
                    density=True,  # whether to plot PDF or count
                    boxplot=0,
                    scatter=0,
                    diff_tst=0)
                [fig.append_trace(d, n1, n2) for d in ff.data]
            if n2 < n1:
                # plot scatter
                ff = corrPlot(
                    df[cols[n1 -
                            1]],  # 1D data vector or list of 1D dsata vectors
                    df[cols[n2 -
                            1]],  # 1D data vector or list of 1D dsata vectors
                    maxdata=
                    500,  # max # of points to plot above histogram (if too high, it will be slow)
                    addCorr=
                    False,  # whether to add correlation statistics into plot (R2, spearmanR2, Pvals, & y=mx+b)
                    addCorrLine=False,  # whether to plot correlation line
                    addXYline=False,  # whether to plot y=x line
                    plot=False,  # if false, just returns plotly json object
                )
                [fig.append_trace(d, n1, n2) for d in ff.data]

    fig['layout'].update(title=title)
    fig['layout'].update(showlegend=False)
    [
        fig['layout']['yaxis' + str((n - 1) * N + 1)].update(title=cols[n - 1])
        for n in range(1, N + 1)
    ]

    return plotOut(fig, plot)
def plotMultiROC(y_true,        # list of true labels
                    y_scores,   # array of scores for each class of shape [n_samples, n_classes]
                    title = 'Multiclass ROC Plot',
                    labels = None, # list of labels for each class
                    threshdot = None,
                    plot=True,  # 1/0. If 0, returns plotly json object, but doesnt plot
                ):
    """
    Makes a multiclass ROC plot
    """

    y_true = np.array(y_true)
    y_scores = np.array(y_scores)
    N, n_classes = y_scores.shape
    if n_classes == 1:  # needed to avoid inverting when doing binary classification
        y_scores = -1*y_scores

    # calc ROC curves & AUC
    fpr = dict()
    tpr = dict()
    thresh = dict()
    thresh_txt = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], thresh[i] = sk.metrics.roc_curve(y_true == i, y_scores[:, i])
        roc_auc[i] = sk.metrics.auc(fpr[i], tpr[i])
        thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]]

    labels = [str(x) for x in labels]  # convert labels to str

    # make traces
    traces = []
    [traces.append(go.Scatter(y=tpr[i], x=fpr[i], name=labels[i] + '. AUC= %.2f' % (roc_auc[i]), text=thresh_txt[i],
                              legendgroup=str(i), line={'width': 1}))
        for i in range(n_classes)]
    traces += [go.Scatter(y=[0, 1], x=[0, 1], name='Random classifier', line={'width': 1, 'dash': 'dot'})]

    if threshdot is not None:
        for i in range(n_classes):
            c_indx = (np.abs(thresh[i]-threshdot)).argmin()
            traces += [go.Scatter(x=[fpr[i][c_indx]]*2, y=[tpr[i][c_indx]]*2, mode='markers',
                                  name='Threshold', legendgroup=str(i), showlegend=False)]

    # make layout
    layout = go.Layout(title=title,
                       xaxis={'title': 'FPR'},
                       yaxis={'title': 'TPR'},
                       legend=dict(x=1),
                       hovermode='closest',
    )

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 9
0
def basicHeatmap(z, x=[], y=[], title='', xlbl='', ylbl='', plot=True):
    ''' Plots a basic heatmap'''
    traces = [go.Heatmap(z=z, x=x, y=y)]

    layout = go.Layout(
        title=title,
        xaxis={'title': xlbl},
        yaxis={'title': ylbl},
    )
    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 10
0
def basicBarPlot(
        data,  # list of #'s
        names=None,  # xtick labels. Can be numeric or str
        title='',
        ylbl='',
        xlbl='',
        text=None,  # list of txt vals or 'numb' for numbers
        orient=None,
        sort=False,  # if True, sorts from greatest to least
        line=None,  # add line perpendicular to bars (eg to show mean)
        color='rgb(158,202,225)',  # barplot internal color
        width=None,  # plot width. If None, autoscales
        plot=True):
    """
    Makes a basic bar plot where data is [n,1] list of values. No averaging/etc... For that see barPlot or propBarPlot
    EX: psp.basicBarPlot([1,2,3,2])
    """

    if sort:
        ord = np.argsort(data)[::-1]
        data = np.array(data)[ord]
        if names is not None:
            names = np.array(names)[ord]

    if text == 'numb':
        text = [str(x) for x in data]

    traces = [
        go.Bar(x=names,
               y=data,
               text=text,
               textposition='auto',
               marker=dict(
                   color=color,
                   line=dict(color='rgb(8,48,107)', width=1.5),
               ),
               opacity=0.6)
    ]

    layout = go.Layout(
        title=title,
        yaxis={'title': ylbl},
        xaxis={'title': xlbl},
        hovermode='closest',
        width=width,
    )
    if line:
        layout.shapes = [hline(line)]

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 11
0
def tornadoPlot(
        vals,  # in Nx3 array, where columns are[low_val, orig_val, high_val]
        names,  # parameter names (list of str)
        title,
        width=40,
        xlbl='Output node probability',
        plot=True):
    """ Makes a tornado plot in plotly """

    n_pars = len(names)
    traces = []

    # positive change lines
    traces += [
        go.Scatter(x=row[1:],
                   y=[names[i]] * 2,
                   name=names[i],
                   legendgroup='pos_change',
                   line={
                       'color': 'green',
                       'width': width
                   }) for i, row in enumerate(vals)
    ]
    traces += [
        go.Scatter(x=row[:2],
                   y=[names[i]] * 2,
                   name=names[i],
                   legendgroup='neg_change',
                   line={
                       'color': 'red',
                       'width': width
                   }) for i, row in enumerate(vals)
    ]

    layout = go.Layout(
        title=title,
        xaxis={'title': xlbl},
        yaxis={
            'position': .5,
            'autorange': 'reversed'
        },
        # yaxis={'title': ylbl},
        hovermode='closest',
        showlegend=False,
    )
    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 12
0
def plotPolar(
    data,  # N-d list/numpy array
    names=None,  # names of cols in data (ex:['A', 'B']
    scatter=True,  # whether to do polar scatter plot. Only works if N=1
    maxData=1000,  # max # of points to plot above histogram (if too high, it will be slow)
    hist=True,  # 1/0 whether to plot histogram of points
    numbins=40,  # bins in histogram
    normHist=True,  # whether to normalize histogram
    title='Polar Distribution',  # title of plot
    plot=True):  # 1/0. If 0, returns plotly json object, but doesnt plot
    """
    This plots a polar plot of data in plotly
    
    Usage:
    x1 = np.random.uniform(-np.pi, np.pi, (100))
    x2 = np.random.uniform(-np.pi, np.pi, (200))
    plotPolar([x1,x2], names=['A', 'B'], numbins=50)
    """

    ## Basic formatting
    if type(data) != np.ndarray: data = np.array(data)

    if np.issubdtype(data.dtype, np.number):  #given an np array
        data = np.atleast_2d(data)
        N, Lx = data.shape
        Lx = np.matlib.repmat(Lx, 1, N)
    else:  #given a data array
        N = len(data)
        Lx = [len(l) for l in data]

    if names is None:
        names = [str(i + 1) for i in range(N)]

    # make sure all data in radians
    [
        print('All data must be within +-pi') for col in data
        if (np.min(col) < -np.pi) or (np.max(col) > np.pi)
    ]

    if N > 1:
        lg = names
        showleg = True
        cols = cl.scales[str(N + 1)]['qual']['Set1']
    else:
        lg = [None]
        showleg = False
        cols = ['blue']

    # scale markersize
    Lxp = np.min([max(Lx), maxData])
    if Lxp > 5000:
        markersize = 1
    elif Lxp > 2000:
        markersize = 2
    elif Lxp > 1000:
        markersize = 3
    elif Lxp > 200:
        markersize = 4
    elif Lxp > 80:
        markersize = 5
    elif Lxp > 25:
        markersize = 7
    else:
        markersize = 9

    traces = []

    ## Histogram
    if hist:
        hy, hx = zip(*[
            np.histogram(
                col, bins=numbins, density=normHist, range=[-np.pi, np.pi])
            for col in data
        ])
        hx = np.array(hx)
        hy = np.array(hy)

        # add first element to last to complete the circle
        hx = np.hstack((hx, hx[:, 0:1]))
        hy = np.hstack((hy, hy[:, 0:1]))

        # t=theta, r=radius
        traces += [
            go.Scatter(t=hx[n] / np.pi * 180,
                       r=hy[n],
                       name=names[n],
                       mode='lines',
                       line={
                           'width': 3,
                           'color': cols[n]
                       },
                       hovertext=names[n],
                       hoverinfo='name+r+t') for n in range(N)
        ]
        top = np.max(hy.flatten()) * 1.2
    else:
        top = 1

    ## Scatter
    if scatter and N == 1:
        jitter = .05
        # if data too large only plot a subset
        if Lx[0, 0] > maxData:
            Np = maxData
            dataToPlot = np.random.choice(data[0], Np, replace=False)
        else:
            dataToPlot, Np = data[0], Lx[0, 0]
        traces += [
            go.Scatter(r=top + np.random.normal(size=Np) * top * jitter,
                       t=data[0] / np.pi * 180,
                       mode='markers',
                       name=names[0] + ' scatter',
                       marker={
                           'size': markersize,
                           'color': cols[0]
                       })
        ]

    ## make fig
    layout = go.Layout(title=title, showlegend=showleg)
    fig = go.Figure(data=traces, layout=layout)
    #pyo.plot(fig)

    return plotOut(fig, plot)
Esempio n. 13
0
def plotHist2D(
        x,  # 1D vector
        y,  # 1D vector
        bins=[15, 30],  # # of bins in histogram
        xlbl='',
        ylbl='',
        title='',
        log=False,  # whether to log the histogram counts
        mean=False,  # whether to overlay mean + std dhading onto heatmap
        plot=True):
    """
    plots 2D heatmap. Does the binning in np as its faster than plotly 2D hist
    """
    x = np.array(x)
    y = np.array(y)
    maxstd = 8  # if max above this many stddevs from mean, it is clipped
    percclip = [5, 95]  # percentile above which it is clipped
    meanx, stdx, minx, maxx = np.mean(x), np.std(x), np.min(x), np.max(x)
    xbins = np.linspace(
        *np.percentile(x, percclip), bins[0]
    ) if meanx + maxstd * stdx < maxx or meanx - maxstd * stdx > minx else bins[
        0]
    meany, stdy, miny, maxy = np.mean(y), np.std(y), np.min(y), np.max(y)
    ybins = np.linspace(
        *np.percentile(y, percclip), bins[1]
    ) if meany + maxstd * stdy < maxy or meany - maxstd * stdy > miny else bins[
        1]

    H, xedges, yedges = np.histogram2d(x, y, bins=[xbins, ybins], normed=False)
    H = H.T  # extremely important!!!!!

    if log:
        H[H == 0] = np.nan
        H = np.log10(H)
        zlbl = 'log(Count)'
    else:
        zlbl = 'Count'

    hist = go.Heatmap(
        x=xedges,  # sample to be binned on the x-axis
        y=yedges,  # sample to be binned on of the y-axis
        z=H,
        name='Heatmap',
        showlegend=True,
        zsmooth='best',  # (!) apply smoothing to contours
        colorscale='Portland',  # choose a pre-defined color scale
        colorbar=go.ColorBar(
            titleside='right',  # put title right of colorbar
            ticks='outside',  # put ticks outside colorbar
            title=zlbl,
        ))
    plots = [hist]

    # plotting trendline
    if mean:
        Hnorm = copy.deepcopy(H)
        Hnorm[np.isnan(Hnorm)] = 0
        Hnorm = Hnorm / np.sum(Hnorm, axis=0)
        Px_given_y = np.atleast_2d(yedges[:-1]) @ Hnorm
        dx = xedges[1] - xedges[0]
        meanLine = [
            go.Scatter(x=xedges + dx / 2,
                       y=Px_given_y[0],
                       name='Trendline',
                       showlegend=True)
        ]
        plots = meanLine + plots

    layout = go.Layout(
        title=title,
        xaxis={'title': xlbl},
        yaxis={'title': ylbl},
        showlegend=True,
    )

    fig = go.Figure(data=plots, layout=layout)

    return plotOut(fig, plot)
Esempio n. 14
0
def plotMultiROC(
        y_true,  # list of true labels
        y_scores,  # array of scores for each class of shape [n_samples, n_classes]
        title='Multiclass ROC Plot',
        n_points=100,  # reinterpolates to have exactly N points
        labels=None,  # list of labels for each class
        threshdot=None,
        return_auc=False,
        plot=True,  # 1/0. If 0, returns plotly json object, but doesnt plot
):
    """
    Makes a multiclass ROC plot. Can also be used for binary ROC plot
    """

    y_true = np.array(y_true)
    y_scores = np.array(y_scores)
    if y_scores.ndim == 1:  # convert to [n_samples, n_classes] even if 1 class
        y_scores = np.atleast_2d(y_scores).T
    N, n_classes = y_scores.shape
    if n_classes == 1:  # needed to avoid inverting when doing binary classification
        y_scores *= -1
        if threshdot is not None:
            threshdot *= -1

    # calc ROC curves & AUC
    fpr = dict()
    tpr = dict()
    thresh = dict()
    thresh_txt = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], thresh[i] = sk.metrics.roc_curve(
            y_true == i, y_scores[:, i])
        roc_auc[i] = sk.metrics.auc(fpr[i], tpr[i])
        if n_points is not None:
            x = np.linspace(0, 1, n_points)
            indxs = np.searchsorted(tpr[i], x)
            tpr[i] = tpr[i][indxs]
            fpr[i] = fpr[i][indxs]
            thresh[i] = thresh[i][indxs]
            # Add endpoints for proper AUC calcs
            tpr[i] = np.concatenate(([0], tpr[i], [1]))
            fpr[i] = np.concatenate(([0], fpr[i], [1]))
            thresh[i] = np.concatenate(([np.inf], thresh[i], [-np.inf]))
        thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]]

    if len(labels) != n_classes:
        print(
            f'Warning: have {len(labels)} lables, and {n_classes} classes. Disregarding labels'
        )
        labels = None

    if labels is None:
        labels = ['C%d' % n for n in range(1, n_classes + 1)]

    labels = [str(x) for x in labels]  # convert labels to str

    # make traces
    traces = []
    [
        traces.append(
            go.Scatter(y=tpr[i],
                       x=fpr[i],
                       name=labels[i] + '. AUC= %.2f' % (roc_auc[i]),
                       text=thresh_txt[i],
                       legendgroup=str(i),
                       line={'width': 1})) for i in range(n_classes)
    ]
    traces += [
        go.Scatter(y=[0, 1],
                   x=[0, 1],
                   name='Random classifier',
                   line={
                       'width': 1,
                       'dash': 'dot'
                   })
    ]

    if threshdot is not None:
        for i in range(n_classes):
            c_indx = (np.abs(thresh[i] - threshdot)).argmin()
            traces += [
                go.Scatter(x=[fpr[i][c_indx]] * 2,
                           y=[tpr[i][c_indx]] * 2,
                           mode='markers',
                           name='Threshold',
                           legendgroup=str(i),
                           showlegend=False)
            ]

    # make layout
    layout = go.Layout(
        title=title,
        xaxis={'title': 'FPR'},
        yaxis={'title': 'TPR'},
        legend=dict(x=1),
        hovermode='closest',
    )

    fig = go.Figure(data=traces, layout=layout)

    if return_auc:
        return plotOut(fig, plot),
    else:
        return plotOut(fig, plot)
Esempio n. 15
0
def plotMultiPR(
        y_true,  # list of true labels
        y_scores,  # array of scores for each class of shape [n_samples, n_classes]
        title='Multiclass PR Plot',
        n_points=100,  # reinterpolates to have exactly N points
        labels=None,  # list of labels for each class
        threshdot=None,  # whether to plot a dot @ the threshold
        plot=True,  # 1/0. If 0, returns plotly json object, but doesnt plot
):
    """
    Makes a multiclass PR plot
    """

    y_true = np.array(y_true)
    y_scores = np.array(y_scores)
    if y_scores.ndim == 1:  # convert to [n_samples, n_classes] even if 1 class
        y_scores = np.atleast_2d(y_scores).T
    N, n_classes = y_scores.shape
    if n_classes == 1:  # needed to avoid inverting when doing binary classification
        y_scores = -1 * y_scores

    # calc ROC curves & AUC
    precision = dict()
    recall = dict()
    pr_auc = dict()
    thresh = dict()
    thresh_txt = dict()
    for i in range(n_classes):
        precision[i], recall[i], thresh[i] = sk.metrics.precision_recall_curve(
            y_true == i, y_scores[:, i])
        #average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i])
        pr_auc[i] = np.sum(precision[i][1:] * -np.diff(recall[i]))
        if n_points is not None:
            x = np.linspace(precision[i][0], precision[i][-1], n_points)
            indxs = np.searchsorted(precision[i], x)
            precision[i] = precision[i][indxs]
            recall[i] = recall[i][indxs]
            thresh[i] = thresh[i][np.clip(indxs, 0, thresh[i].size - 1)]
            # Add endpoints for proper AUC calcs
            precision[i] = np.concatenate(([0], precision[i], [1]))
            recall[i] = np.concatenate(([1], recall[i], [0]))
            thresh[i] = np.concatenate(([-np.inf], thresh[i], [np.inf]))
        thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]]

    if labels is None:
        labels = ['C%d' % n for n in range(1, n_classes + 1)]

    labels = [str(x) for x in labels]  # convert to str

    # make traces
    traces = []
    [
        traces.append(
            go.Scatter(y=precision[i],
                       x=recall[i],
                       name=labels[i] + '. AUC= %.2f' % (pr_auc[i]),
                       text=thresh_txt[i],
                       legendgroup=str(i),
                       line={'width': 1})) for i in range(n_classes)
    ]

    if threshdot is not None:
        for i in range(n_classes):
            c_indx = (np.abs(thresh[i] - threshdot)).argmin()
            traces += [
                go.Scatter(x=[recall[i][c_indx]] * 2,
                           y=[precision[i][c_indx]] * 2,
                           mode='markers',
                           name='Threshold',
                           legendgroup=str(i),
                           showlegend=False)
            ]

    # make layout
    layout = go.Layout(
        title=title,
        yaxis={
            'title': 'Precision = P(y=1 | yp=1)',
            'range': [0, 1]
        },  # 'Precision = P(yp=y | yp=1)'
        xaxis={
            'title': 'Recall = TPR = P(yp=1 | y=1)',
            'range': [0, 1]
        },  # 'Recall = TPR = P(yp=y | y=1)'
        legend=dict(x=1),
        hovermode='closest',
    )

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 16
0
def plot2Hists(
    x1,  # data of 1st histogram
    x2,  # data of 2nd histogram
    names=['A', 'B'],  # legend names of x1, x2 (ex: ['A','B']
    maxData=500,  # max # of points to plot above histogram (if too high, it will be slow)
    normHist=True,  # 1/0. if 1, norms the histogram to a PDF
    samebins=True,  # whether both hists should have same edges
    numbins=40,  # # bins in histogram
    title='Data Distribution',  # title of plot
    rm_outliers=False,  #1/0 whether to remove outliers or not
    KS=False,  # whether to do 2 sample KS test for different distributions
    MW=False,  # whether to display the Mann-Whitney/Ranksum test for difference of distributions in title
    T=False,  # as MW, but for ttest
    alt='two-sided',  # one-sided or two-sided hypothesis testing. See scipy for options
    bp=True,  # whether to add barplot above histograms
    plot=True):  # 1/0. If 0, returns plotly json object, but doesnt plot
    """
    Plots two 1D histograms using plotly.
    Does the binning w/ numpy to make it go way faster than plotly's inherent histogram function

    Usage:

 
    """

    x1 = np.array(x1)
    x2 = np.array(x2)
    N1, N2 = len(x1), len(x2)

    # Remove NaNs
    x1 = x1[~np.isnan(x1)]
    x2 = x2[~np.isnan(x2)]

    # remove outliers & get basic stats
    adj1, corr_data1, outliers1, rng1, stats1 = removeOutliers(
        x1, stdbnd=6, percclip=[5, 95], rmv=rm_outliers)
    adj2, corr_data2, outliers2, rng2, stats2 = removeOutliers(
        x2, stdbnd=6, percclip=[5, 95], rmv=rm_outliers)

    if samebins:
        jointrng = [min(rng1[0], rng2[0]), max(rng1[1], rng2[1])]
        bins = np.linspace(jointrng[0], jointrng[1], numbins)
    else:
        bins = numbins

    hy1, hx1 = np.histogram(x1, bins=bins, density=normHist, range=rng1)
    hy2, hx2 = np.histogram(x2, bins=bins, density=normHist, range=rng2)

    top = np.max(np.hstack((hy1, hy2))) * 1.1

    # hist plots
    traces = []
    hist1 = go.Bar(x=hx1,
                   y=hy1,
                   name=names[0],
                   legendgroup=names[0],
                   opacity=.5,
                   marker=dict(color='red', line=dict(color='black', width=2)))
    hist2 = go.Bar(x=hx2,
                   y=hy2,
                   name=names[1],
                   legendgroup=names[1],
                   opacity=.5,
                   marker=dict(color='blue', line=dict(color='black',
                                                       width=2)))
    traces += [hist1, hist2]

    # data plots
    if N1 > maxData:  # if data too large only plot a subset
        Np = maxData
        dataToPlot = np.random.choice(x1, Np, replace=False)
    else:
        dataToPlot, Np = x1, N1
    dataPlot1 = go.Scatter(x=dataToPlot,
                           y=top * 1.2 + np.random.normal(size=Np) * top * .03,
                           mode='markers',
                           marker=dict(size=2, color='red'),
                           hoverinfo='x+name',
                           name=names[0],
                           legendgroup=names[0],
                           showlegend=False)
    if N2 > maxData:  # if data too large only plot a subset
        Np = maxData
        dataToPlot = np.random.choice(x2, Np, replace=False)
    else:
        dataToPlot, Np = x2, N1
    dataPlot2 = go.Scatter(x=dataToPlot,
                           y=top + np.random.normal(size=Np) * top * .03,
                           mode='markers',
                           marker=dict(size=2, color='blue'),
                           hoverinfo='x+name',
                           name=names[1],
                           legendgroup=names[1],
                           showlegend=False)
    traces += [dataPlot1, dataPlot2]

    # Boxplots
    if bp:
        bp1 = boxPlot(stats1['med'],
                      np.percentile(x1, [25, 75]),
                      rng1,
                      mean=stats1['mean'],
                      name=names[0],
                      horiz=True,
                      offset=top * 1.3,
                      legendGroup=names[0],
                      plot=False,
                      col='red')
        bp2 = boxPlot(stats2['med'],
                      np.percentile(x2, [25, 75]),
                      rng2,
                      mean=stats2['mean'],
                      name=names[1],
                      horiz=True,
                      offset=top * 1.1,
                      legendGroup=names[1],
                      plot=False,
                      col='blue')
        traces = traces + bp1 + bp2

    # Stat testing
    if MW:
        stat, p_MW = sp.stats.mannwhitneyu(x1, x2, alternative=alt)
        title += ' P_MW=%.3f' % (p_MW)
    if T:
        stat, p_T = sp.stats.ttest_ind(x1,
                                       x2,
                                       equal_var=True,
                                       nan_policy='omit')
        title += ' P_T=%.3f' % (p_T)
    if KS:
        stat, p_KS = sp.stats.ks_2samp(x1, x2)
        title += ' P_KS=%.3f' % (p_KS)

    plotrng = [min(rng1[0], rng2[0]) * .9, min(rng1[1], rng2[1]) * 1.1]
    ylbl = 'Denisty' if normHist else 'Count'
    fig = go.Figure(data=traces,
                    layout={
                        'title': title,
                        'yaxis': {
                            'title': ylbl
                        },
                        'xaxis': {
                            'range': plotrng
                        },
                        'barmode': 'overlay',
                        'bargap': 0,
                        'hovermode': 'closest',
                    })

    return plotOut(fig, plot)
Esempio n. 17
0
def multiLine(
        data,  # [N,Lx] numpy array or list, where rows are each line
        x=None,  # optional x-data
        z=None,  # optional z (color) data
        txt=None,  # optional txt over points
        lines=True,  # 1/0 whether we want to plot each of the individual lines
        mean=False,  # True/False where want mean+std line
        names=None,  # names of each data list
        plot=True,  # if false, just returns plotly json object
        title='',  # title of plot
        ylbl='',  #
        xlbl='',  #
        norm=None):  # input to norm_mat function if want to norm the data
    """
    Plots bunch of lines + mean in plotly

    Ex: psp.multiLine(data, x=x, names=[], xlbl='', ylbl='', title='')
    """

    data, x, z, names, info = _massageData(data, x=x, z=z, names=names)
    N, Lx = info['n_sigs'], info['n_bins']
    uniquex = not info['x_info']['shared']

    if norm is not None:
        data = norm_mat(data, method=norm)

    if info['z_info']['provided']:
        assert N == 1, 'So far coloring only works w/ 1 data series'
        cols = z
        showleg = False
        showscale = True
        line_col = ['black']
        line_mode = 'lines+markers'
        scattertext = ['z=%d' % (i) for i in range(Lx)] if txt is None else txt
    else:
        if N > 1:
            showleg = False
            cols = _getCols(N)
        else:
            showleg = True
            cols = ['blue']
        line_col = cols
        line_mode = 'lines'
        showscale = False
        if txt is None:
            scattertext = ''
        else:
            scattertext = txt

    traces = []
    if lines:
        for i in range(N):
            traces += [
                go.Scatter(y=data[i],
                           x=x[i * uniquex],
                           name=names[i],
                           line={'width': 1},
                           mode=line_mode,
                           text=scattertext,
                           marker={
                               'size': 2,
                               'color': cols[i],
                               'showscale': showscale,
                               'colorscale': 'Portland'
                           })
            ]

    if mean and not uniquex:
        mean = np.mean(data, axis=0)
        std = np.std(data, axis=0)
        plotmean = go.Scatter(x=x[0],
                              y=mean,
                              name='Mean',
                              legendgroup='mean',
                              line={'width': 6})
        ploterror_top = go.Scatter(
            x=x[0],
            y=mean + std,
            fill='none',
            fillcolor='rgba(0,100,80,0.2)',
            mode='lines',
            marker=dict(color='rgba(20,100,80,0)'),
            line=dict(width=0),
            showlegend=False,
            legendgroup='mean',
            name='upper bound',
            opacity=.7,
        )
        ploterror_bottom = go.Scatter(
            x=x[0],
            y=mean - std,
            fill='tonexty',
            fillcolor='rgba(0,100,80,0.2)',
            mode='lines',
            marker=dict(color="444"),
            line=dict(width=0),
            showlegend=False,
            legendgroup='mean',
            name='lower bound',
            opacity=.7,
        )
        traces = [plotmean, ploterror_top, ploterror_bottom] + traces

    if info['x_info']['provided'] and isinstance(x[0][0], str):
        xaxis = {
            'title': xlbl,
            'showgrid': True,
            'showticklabels': True,
            'tickvals': x[0],
            'tickfont': dict(size=18)
        }
    else:
        xaxis = {'title': xlbl}

    layout = go.Layout(
        title=title,
        xaxis=xaxis,
        yaxis={'title': ylbl},
    )
    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 18
0
def propBarPlot(
        data,  # list of 1D data vectors
        names=None,  # names of data vectors
        title=' ',  # title of plot
        ylbl='Proportion',  # y-label\
        plot=True):
    """
        Makes a custom plotly proportion barplot
        :return:
        """
    data = np.array(data)
    N = len(data)
    Lx = [len(col) for col in data]
    print(Lx)

    if names is None:
        names = [str(i + 1) for i in range(N)]
    if N >= 3:
        cols = cl.scales[str(N)]['qual']['Set1']
    else:
        cols = cl.scales[str(3)]['qual']['Set1'][0:N]
    jitter = .03

    means = [np.mean(col) for col in data]
    std = [(means[n] * (1 - means[n]) / Lx[n])**.5 for n in range(N)]

    traces = []
    bars = [
        go.Bar(x=list(range(N)),
               y=means,
               marker=dict(color=cols),
               text=['N = %d' % (l) for l in Lx],
               name='BAR',
               error_y=dict(type='data', array=std, visible=True),
               showlegend=False)
    ]
    traces += bars

    xaxis = go.XAxis(
        # title="",
        showgrid=True,
        showline=True,
        ticks="",
        showticklabels=True,
        linewidth=2,
        ticktext=names,
        tickvals=list(range(N)),
        tickfont=dict(size=18))

    layout = go.Layout(
        title=title,
        xaxis=xaxis,
        yaxis={'title': ylbl},
        bargap=.5,
        hovermode='closest',
        showlegend=False,
    )

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 19
0
def barPlot(
    data,  # list of 1D data vectors
    names=None,  # names of data vectors
    maxData=500,  # max # of points to plot above histogram (if too high, it will be slow)
    title=' ',  # title of plot
    ylbl='Mean',  # y-label
    bar=True,  # 1/0. If 0, makes boxplot instead of barplot
    stats=[],  # which stat tests to run, including [ttest, MW, ANOVA, KW] (kruchsal-wallis)
    plot=True):  # 1/0. If 0, just returns fig object
    """
    Makes a custom plotly barplot w/ data on side

    Ex: barPlot(data, names, title='Plot Title', ylbl='Metric')
    """
    # TODO: add outlier removal

    data = np.array(data)
    N = len(data)
    Lx = [len(col) for col in data]
    # remove NaNs
    data = [removeNaN(col) for col in data]

    if names is None:
        names = [str(i + 1) for i in range(N)]

    if N < 3:
        cols = cl.scales[str(3)]['qual']['Set1'][0:N]
    elif N <= 12:
        cols = cl.scales[str(N)]['qual']['Set3']
    else:
        cols = [None] * N

    jitter = .03

    means = [np.mean(col) for col in data]
    meds = [np.median(col) for col in data]
    std = [np.std(col) for col in data]

    traces = []
    if bar:
        bars = [
            go.Bar(x=list(range(N)),
                   y=means,
                   marker=dict(color=cols),
                   text=['median= %.4f' % (m) for m in meds],
                   name='BAR',
                   error_y=dict(type='data', array=std, visible=True),
                   showlegend=False)
        ]
        traces += bars
    else:
        #implement boxplot
        boxwidth = 50
        quartiles = np.array(
            [np.percentile(data[n], [25, 75]) for n in range(N)])
        minmax = np.array([np.percentile(data[n], [5, 95]) for n in range(N)])
        boxs = [
            boxPlot(meds[n],
                    quartiles[n],
                    minmax[n],
                    mean=means[n],
                    outliers=None,
                    name=names[n],
                    horiz=0,
                    offset=n,
                    legendGroup='boxplot',
                    showleg=False,
                    plot=False,
                    col=cols[n],
                    width=boxwidth) for n in range(N)
        ]
        traces += sum(boxs, [])

    # scale markersize
    Lxp = np.max(Lx)
    if Lxp > 5000:
        markersize = 1
    elif Lxp > 2000:
        markersize = 2
    elif Lxp > 1000:
        markersize = 3
    elif Lxp > 200:
        markersize = 4
    elif Lxp > 80:
        markersize = 5
    else:
        markersize = 7

    # reduce length of data for plotting
    data_to_plot = [
        np.random.choice(col, maxData, replace=False)
        if len(col) > maxData else col for col in data
    ]

    dataPlot = [
        go.Scatter(x=i + .5 +
                   np.random.normal(size=len(data_to_plot[i])) * jitter,
                   y=data_to_plot[i],
                   mode='markers',
                   marker=dict(size=markersize, color=cols[i]),
                   name=names[i]) for i in range(N)
    ]
    traces += dataPlot

    xaxis = go.layout.XAxis(
        # title="",
        showgrid=True,
        showline=True,
        ticks="",
        showticklabels=True,
        linewidth=2,
        ticktext=names,
        tickvals=list(range(N)),
        tickfont=dict(size=18))

    # if data has huge outliers, manually bring axes closer to look better
    auto_rng = np.max([np.max(col)
                       for col in data_to_plot]) < 2 * np.max(means + std)

    # stats
    statvals = []
    if 'MW' in stats and N == 2:
        try:
            stat, pval = sp.stats.mannwhitneyu(data[0],
                                               data[1],
                                               alternative='two-sided')
            statvals += [['MW', pval]]
        except:
            print('Could not process MW stats')
    if 'ttest' in stats and N == 2:
        stat, pval = sp.stats.ttest_ind(data[0], data[1])
        statvals += [['T-test', pval]]
    if 'ANOVA' in stats:
        print('ANOVA not yet implemented')
    if 'KW' in stats:
        print('Kruskal–Wallis test not yet implemented')
    if len(statvals) > 0:
        stat_str = '. '.join(['P(%s)=%.3f' % (x[0], x[1]) for x in statvals])
        title = title + '. ' + stat_str

    layout = go.Layout(
        title=title,
        xaxis=xaxis,
        yaxis={
            'title': ylbl,
            'range': [0, np.max(means + std) * 2],
            'autorange': auto_rng
        },
        bargap=.5,
        hovermode='closest',
        showlegend=False,
    )

    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 20
0
def scatterHistoPlot(x,
                     y,
                     title='2D Density Plot',
                     xlbl='',
                     ylbl='',
                     plot=True):
    """
    This creates a scatter plot above a contour plots for the data
    """

    scatter_plot = go.Scatter(x=x,
                              y=y,
                              mode='markers',
                              name='points',
                              marker=dict(color='rgb(102,0,0)',
                                          size=2,
                                          opacity=0.4))
    contour_plot = go.Histogram2dcontour(x=x,
                                         y=y,
                                         name='density',
                                         ncontours=20,
                                         colorscale='Hot',
                                         reversescale=True,
                                         showscale=False)
    x_density = go.Histogram(x=x,
                             name='x density',
                             marker=dict(color='rgb(102,0,0)'),
                             yaxis='y2')
    y_density = go.Histogram(y=y,
                             name='y density',
                             marker=dict(color='rgb(102,0,0)'),
                             xaxis='x2')
    data = [scatter_plot, contour_plot, x_density, y_density]

    scatterplot_ratio = .85  # ratio of figure to be taken by scatterplot vs histograms
    layout = go.Layout(title=title,
                       showlegend=False,
                       autosize=False,
                       width=600,
                       height=550,
                       xaxis=dict(title=xlbl,
                                  domain=[0, scatterplot_ratio],
                                  showgrid=False,
                                  zeroline=False),
                       yaxis=dict(title=ylbl,
                                  domain=[0, scatterplot_ratio],
                                  showgrid=False,
                                  zeroline=False),
                       margin=dict(t=50),
                       hovermode='closest',
                       bargap=0,
                       xaxis2=dict(domain=[scatterplot_ratio, 1],
                                   showgrid=False,
                                   zeroline=False),
                       yaxis2=dict(domain=[scatterplot_ratio, 1],
                                   showgrid=False,
                                   zeroline=False))

    fig = go.Figure(data=data, layout=layout)

    return plotOut(fig, plot)
Esempio n. 21
0
def multiMean(data,
              x=None,
              std=True,
              names=None,
              plot=True,
              title='',
              ylbl='',
              xlbl='',
              norm=None,
              indiv=False,
              indivnames=None):
    """
    Plots means of multiple data matrices
    :param data: list of data matrices
    :param x: optional x-data
    :param std: 1/0. If 1 plots shaded std deviation around mean
    :param names: names of data
    :param plot: if false, just returns plotly json object
    :param title: title of plot
    :param ylbl:
    :param xlbl:
    :param norm: nput to norm_mat function if want to norm the data
    :param indiv: 1/0 whether we want to plot each of the individual lines
    :param indivnames: names of individual line traces
    :return:
    """
    data = [np.atleast_2d(np.array(d)) for d in data]
    N = len(data)
    Ncol, Lx = zip(*[d.shape for d in data])
    if len(np.unique(Lx)) != 1:
        raise ValueError('Input data sources must be of the same length (Lx)')
    Lx = Lx[0]

    if norm is not None:
        data = [norm_mat(d, method=norm) for d in data]
    if names is None: names = ['#%d' % (i) for i in range(N)]
    if x is None: x = np.array(range(Lx))
    x = np.atleast_2d(x)

    traces = []
    cols = cl.scales[str(max(3, N))]['qual']['Set1']
    tcols = ['rgba' + c[3:-1] + ',.2)' for c in cols]
    for n in range(N):
        mean = np.mean(data[n], axis=0)
        std = np.std(data[n], axis=0)
        plotmean = go.Scatter(x=x[0],
                              y=mean,
                              name=names[n],
                              legendgroup=names[n],
                              line={
                                  'width': 4,
                                  'color': cols[n]
                              })
        ploterror_top = go.Scatter(
            x=x[0],
            y=mean + std,
            fill='none',
            fillcolor=tcols[n],
            mode='lines',
            marker=dict(color=tcols[n]),
            line=dict(width=0),
            showlegend=False,
            legendgroup=names[n],
            name=names[n] + ' UB',
            opacity=.7,
        )
        ploterror_bottom = go.Scatter(
            x=x[0],
            y=mean - std,
            fill='tonexty',
            fillcolor=tcols[n],
            mode='lines',
            marker=dict(color=tcols[n]),
            line=dict(width=0),
            showlegend=False,
            legendgroup=names[n],
            name=names[n] + ' LB',
            opacity=.7,
        )
        traces += [plotmean, ploterror_top, ploterror_bottom]
        if indiv and Ncol[n] > 1:
            inames = [''] * Ncol[n] if indivnames is None else indivnames
            indivlines = [
                go.Scatter(x=x[0],
                           y=l,
                           showlegend=c == 0,
                           name=names[n] + ' |',
                           legendgroup=names[n] + ' |',
                           hovertext=inames[c],
                           hoverinfo='text',
                           opacity=.3,
                           line={
                               'width': 1,
                               'color': cols[n],
                               'dash': 'dot'
                           }) for c, l in enumerate(data[n])
            ]
            traces += indivlines

    layout = go.Layout(
        title=title,
        xaxis={'title': xlbl},
        yaxis={'title': ylbl},
        hovermode='closest',
    )
    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
def plotConfusionMatrix(y_true, # list of true labels
                        y_pred, # list of predicted labels
                        conf_matrix = None, # optional mode to directly provide confusion matrix
                        title = 'Confusion Matrix',
                        labels = None, # list of labels for each class
                        binarized = None, # if int/str then makes 1vsAll confusion matrix of that class
                        add_totals = True, # whether to add an extra row for class totals
                        plot = True, # 1/0. If 0, returns plotly json object, but doesnt plot
                        fontsize=18,    # axis font
                        norm='rows',     # how to norm matrix colors. either 'all'/'rows'/'columns'
                ):
    """
    Plots either a full or binarized confusion matrix

    EX: plotConfusionMatrix(y_true, y_pred, labels)
    """

    n_classes = len(labels) if labels is not None else len(np.unique(y_true))

    if labels is None:
        labels = ['C%d' % n for n in range(1, n_classes+1)]

    if conf_matrix == None:
        conf_matrix = sk.metrics.confusion_matrix(y_true, y_pred, labels=range(n_classes))

    acc = np.diag(conf_matrix).sum() / np.sum(conf_matrix) * 100

    if binarized is not None:
        # identify index of 1vsAll category
        if type(binarized) == str:
            bin_indx = labels.index(binarized)
        else:
            bin_indx = binarized
        tp = np.sum(np.delete(np.delete(conf_matrix, bin_indx, axis=0), bin_indx, axis=1))
        fp = np.sum(np.delete(conf_matrix[bin_indx, :], bin_indx))
        fn = np.sum(np.delete(conf_matrix, bin_indx, axis=0)[:, bin_indx])
        tn = conf_matrix[bin_indx, bin_indx]
        conf_matrix = np.array([[tp, fn], [fp, tn]])
        labels = ['T','F']
        n_classes = 2

    labels = [str(x) for x in labels]   # convert to str
    labels = ['['+x+']' if len(x)==1 else x for x in labels]    #needed for stupid plotly bug

    # adds an extra row for matrix totals
    conf_matrix_tots =  copy.deepcopy(conf_matrix)
    if add_totals:
        pred_tots = np.sum(conf_matrix, 0)
        conf_matrix_tots = np.vstack((conf_matrix, pred_tots))
        true_tots = np.sum(conf_matrix_tots, 1, keepdims=True)
        conf_matrix_tots = np.hstack((conf_matrix_tots, true_tots ))
        labels = labels + ['TOTAL']

    # shorten labels
    labels_short = [x[:10] if type(x) == str else x for x in labels]

    # numeric labels
    num_labels = list(range(len(labels)))

    def normMatByTotal(mat, axis=0):
        ''' This normalzies a matrix by its row (axis=1) or column (axis=0) totals'''
        axis_sums = np.sum(mat, axis=axis, keepdims=True).astype('float32')
        axis_sums[axis_sums == 0] = np.nan  # this avoids divide by 0.
        mat = np.nan_to_num(mat / axis_sums)
        return mat

    # percentage hover labels
    row_percs = normMatByTotal(conf_matrix, axis=1)
    col_percs = normMatByTotal(conf_matrix, axis=0)

    # normalize matrix
    color_mat = copy.deepcopy(conf_matrix_tots)
    if norm != 'all':
        norm_conf_matrix = row_percs if norm=='rows' else col_percs
    else:
        norm_conf_matrix = conf_matrix
    color_mat = color_mat.astype(float)
    color_mat[:-1,:-1] = norm_conf_matrix

    # hover text
    txt_format = '<b>Pred:</b> %s <br><b>True:</b> %s <br><b>Row norm:</b> %.3f%% <br><b>Col norm:</b> %.3f%%'
    htext = np.array([[txt_format % (labels[c], labels[r], row_percs[r,c]*100, col_percs[r,c]*100)
                       for c in range(n_classes)] for r in range(n_classes)])

    # Adjust Total rows
    if add_totals:
        totals_row_shading = .0    # range 0 to 1. 0=darkest, 1=lightest
        tot_val = np.min(norm_conf_matrix) + (np.max(norm_conf_matrix) - np.min(norm_conf_matrix))*totals_row_shading
        color_mat[-1, :] = tot_val
        color_mat[:, -1] = tot_val
        pred_tot_text = np.array(['<b>%% of Predictions:</b> %.2f%%' % x for x in pred_tots/sum(pred_tots)*100])
        true_tot_text = np.array([['<b>%% of True Data:</b> %.2f%%' % x] for x in true_tots[:-1]/sum(true_tots[:-1])*100]+[['Total Samples']])
        htext = np.hstack((np.vstack((htext, pred_tot_text)), true_tot_text))

    fig = ff.create_annotated_heatmap(color_mat, x=num_labels, y=num_labels, colorscale='Greys', annotation_text=conf_matrix_tots)

    fig.layout.yaxis.title = 'True'
    fig.layout.xaxis.title = 'Predicted (Total accuracy = %.3f%%)' % acc
    fig.layout.xaxis.titlefont.size = fontsize
    fig.layout.yaxis.titlefont.size = fontsize
    fig.layout.xaxis.tickfont.size = fontsize - 2
    fig.layout.yaxis.tickfont.size = fontsize - 2
    fig.layout.showlegend = False
    # Add label text to axis values
    fig.layout.xaxis.tickmode = 'array'
    fig.layout.xaxis.range = [-.5, n_classes+.5]
    fig.layout.xaxis.tickvals = num_labels
    fig.layout.xaxis.ticktext = labels_short
    fig.data[0].hoverlabel.bgcolor = 'rgb(188,202,225)'

    # fig.layout.yaxis.autorange = 'reversed'
    fig.layout.yaxis.tickmode = 'array'
    fig.layout.yaxis.range = [n_classes+.5, -.5]
    fig.layout.yaxis.tickvals = num_labels
    fig.layout.yaxis.ticktext = labels_short
    fig.layout.margin.l = 120   # adjust left margin to avoid ylbl overlaying tick str's

    fig['data'][0]['xgap'] = 1
    fig['data'][0]['ygap'] = 1
    ## Change annotation font (& text)
    for i in range(len(fig.layout.annotations)):
        fig.layout.annotations[i].font.size = fontsize-3
        #fig.layout.annotations[i].text = str(conf_matrix_tots.flatten()[i])

    # add hover text
    fig.data[0].text = htext
    fig.data[0].hoverinfo = 'text'

    ### Adjust totals fontstyle
    if add_totals:
        # get totals indxs
        n = n_classes
        last_column_indxs = [(n + 1) * x - 1 for x in range(1, n + 1)]
        last_row_indxs = list(range((n + 1) * (n), (n + 1) ** 2))
        totals_annot_indxs = last_row_indxs + last_column_indxs
        # adjust totals font size & color
        for i in totals_annot_indxs:
            fig['layout']['annotations'][i]['font'] = dict(size=fontsize, color='#000099')

        # Add border lines for total row/col
        data = list(fig['data'])
        data += [go.Scatter(x=[n_classes - .5, n_classes - .5], y=[-.5, n_classes + .5], showlegend=False,
                            hoverinfo='none', line=dict(color='red', width=4, dash='solid'))]
        data += [go.Scatter(y=[n_classes - .5, n_classes - .5], x=[-.5, n_classes + .5], showlegend=False,
                            hoverinfo='none', line=dict(color='red', width=4, dash='solid'))]
        fig = go.Figure(data=data, layout=fig['layout'])

    return plotOut(fig, plot)
Esempio n. 23
0
def plotHist(
    data,  # 1D list/np vector of data
    maxData=1000,  #  max # of points to plot above histogram (if too high, it will be slow)
    plot=True,  #1/0. If 0, returns plotly json object, but doesnt plot
    title='Distribution',  # plot title
    xlbl='',  # plot label
    rm_outliers=False,  #1/0 whether to remove outliers or not
    density=True,  # whether to plot PDF or count
    boxplot=True,  # 1/0 whether to do upper boxplot
    scatter=True,  # 1/0 add upper scatterplot
    diff_tst=0
):  # 1/0. If 1 assumes we checking for a signif difference from 0
    """
    Plots a 1D histogram using plotly.
    Does the binning w/ numpy to make it go way faster than plotly's inherent histogram function

    Usage:
    x = np.random.normal(0,1,(100))
    plotHist(x, title='Normal Distribution', xlbl='values', diff_tst=1)

    :return: NA
    """

    N = len(data)
    data = np.array(data)

    # remove NaNs/Infs
    try:
        data = data[~np.isnan(data)]
        data = data[np.isfinite(data)]
    except:
        print('Failed to do NaN removal')

    adj, corr_data, outliers, rng, stats = removeOutliers(data,
                                                          stdbnd=6,
                                                          percclip=[5, 95],
                                                          rmv=rm_outliers)

    hy, hx = np.histogram(data, bins=40, density=density, range=rng)
    top = np.max(hy) * 1.1
    jitter = .02

    traces = []
    hist = go.Bar(x=hx,
                  y=hy,
                  name='Hist',
                  opacity=.5,
                  marker=dict(color='red', line=dict(color='black', width=2)))
    traces += [hist]

    # if data too large only plot a subset
    if scatter:
        if N > maxData:
            Np = maxData
            dataToPlot = np.random.choice(data, Np, replace=False)
        else:
            dataToPlot, Np = data, N
        dataPlot = go.Scatter(x=dataToPlot,
                              y=top + np.random.normal(size=Np) * top * jitter,
                              name='data',
                              mode='markers',
                              marker=dict(color='black', size=2),
                              hoverinfo='x+name')
        traces += [dataPlot]

    #boxplot
    if boxplot:
        bp = boxPlot(stats['med'],
                     np.percentile(data, [25, 75]),
                     rng,
                     mean=stats['mean'],
                     horiz=True,
                     offset=top * 1.2,
                     plot=False,
                     col='red',
                     showleg=True)
        traces += bp

    if diff_tst:
        vertline = go.Scatter(x=[0, 0],
                              y=[0, top * 1.1],
                              name='x=0',
                              showlegend=1,
                              line=dict(color='black', width=2, dash='dot'))
        traces += [vertline]
        _, Pt = sp.stats.ttest_1samp(data, 0)
        _, Pw = sp.stats.wilcoxon(data)
        title += ' P_t=%.2f. P_w=%.2f' % (Pt, Pw)

    ylbl = 'Probability Density' if density else 'Count'

    fig = go.Figure(data=traces,
                    layout={
                        'title': title,
                        'yaxis': {
                            'title': ylbl
                        },
                        'xaxis': {
                            'title': xlbl,
                            'range': [rng[0] * .9, rng[1] * 1.1]
                        },
                        'bargap': 0,
                        'hovermode': 'closest',
                    })

    return plotOut(fig, plot)
Esempio n. 24
0
def corrPlot(
        x,  # 1D data vector or list of 1D dsata vectors
        y,  # 1D data vector or list of 1D dsata vectors
        z=None,  # optional colors for the lines
        names=None,  # names of x, y (ex:['A', 'B']
        maxdata=2000,  # max # of points to plot above histogram (if too high, it will be slow)
        addCorr=True,  # whether to add correlation statistics into plot (R2, spearmanR2, Pvals, & y=mx+b)
        addCorrLine=True,  # whether to plot correlation line
        addXYline=False,  # whether to plot y=x line
        text=None,  # whether to add additional text to each point
        plot=True,  # if false, just returns plotly json object
        title='Correlation',  # title of plot
        xlbl='',  #
        ylbl='',
        markersize=None,  # either None or #. If None, will automatically determine best
):
    """
    Plots x , y data and their trendline using plotly

    EX: plot diff between two series
        corrPlot(x, y, xlbl='A', ylbl='B', addCorr=False, addCorrLine=False, addXYline=True)
    """
    #TODO: remove outliers

    # 1st convert t ndarray

    # 1st convert t ndarray
    if type(x) != np.ndarray: x = np.array(x)
    if type(y) != np.ndarray: y = np.array(y)

    # (1) get N
    if np.issubdtype(x.dtype, np.number):  # given an np array
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)
        N, Lx = x.shape
    else:  # given a data array
        N = len(x)

    # (2) remove NaNs
    tmpx, tmpy = [], []
    for n in range(N):
        bad = np.atleast_2d(np.isnan(x[n]) | np.isnan(y[n]))
        tmpx += [x[n][~bad[0]]]
        tmpy += [y[n][~bad[0]]]
    x = np.array(tmpx)
    y = np.array(tmpy)

    # (3) get Lx
    if np.issubdtype(x.dtype, np.number):  # given an np array
        N, Lx = x.shape
        Lx = np.tile(Lx, N)
    else:  # given a data array
        Lx = [len(l) for l in x]
        Ly = [len(l) for l in y]
        if Lx != Ly:
            raise ValueError('All x & y vectors must be same length!!!')

    # if data has too many points, remove some for speed
    Iplot = [
        np.arange(Lx[n]) if Lx[n] < maxdata else np.random.choice(
            Lx[n], size=maxdata, replace=False) for n in range(N)
    ]

    if names is None:
        names = ['Line ' + str(i) for i in range(N)]
    if isinstance(names, str):
        names = [names]

    traces = []

    # determine scatterpoint colors
    if z is not None:
        assert N == 1, 'So far coloring only works w/ 1 data series'
        if type(z) != np.ndarray: z = np.array(z)
        z = np.atleast_2d(z)
        cols = z
        showleg = False
        showscale = True
        line_col = ['black']
        lg = [None]
        scattertext = ['z=%d' % (i)
                       for i in range(Lx[0])] if text is None else text
    else:
        if N > 1:
            lg = names
            showleg = False
            cols = cl.scales[str(max(3, N))]['qual']['Set1']
        else:
            lg = [None]
            showleg = True
            cols = ['blue']
        line_col = cols
        showscale = False
        if text is None:
            scattertext = ''
        else:
            scattertext = text

    # scale markersize
    Lxp = np.min([max(Lx), maxdata])
    if markersize is None:
        if Lxp > 5000:
            markersize = 1
        elif Lxp > 2000:
            markersize = 2
        elif Lxp > 1000:
            markersize = 3
        elif Lxp > 200:
            markersize = 4
        elif Lxp > 80:
            markersize = 5
        elif Lxp > 25:
            markersize = 7
        else:
            markersize = 9

    scatPlot = [
        go.Scatter(x=x[n][Iplot[n]],
                   y=y[n][Iplot[n]],
                   name=names[n],
                   legendgroup=lg[n],
                   mode='markers',
                   opacity=.5,
                   text=scattertext,
                   marker={
                       'size': markersize,
                       'color': cols[n],
                       'showscale': showscale,
                       'colorscale': 'Portland'
                   }) for n in range(N)
    ]
    traces += scatPlot

    annots = []
    if addCorr:
        for n in range(N):
            slope, intercept, R2, p_val, std_err = sp.stats.linregress(
                x[n], y[n])
            R2sp, p_val_sp = sp.stats.spearmanr(x[n], y[n])
            corrtext = 'Pearson [R2, P]=[%.2f,%.2f] <br> ' \
                       'Spearman [R2, P]=[%.2f,%.2f] <br> ' \
                       'y=%.2fx+%.2f' \
                       % (R2, p_val, R2sp, p_val_sp, slope, intercept)
            #if only 1 data record print stats on graph
            if N == 1:
                annots = go.Annotations([
                    go.Annotation(x=0.05,
                                  y=0.95,
                                  showarrow=False,
                                  text=corrtext,
                                  xref='paper',
                                  yref='paper')
                ])

            if addCorrLine:
                x_rng = [np.min(x[0]), np.max(x[0])]
                dx_rng = x_rng[1] - x_rng[0]
                shift = .03  # shift from edges
                xc = np.array(
                    [x_rng[0] + dx_rng * shift, x_rng[1] - dx_rng * shift])
                yc = slope * xc + intercept
                corrline = [
                    go.Scatter(x=xc,
                               y=yc,
                               name=names[n] + ' corr',
                               legendgroup=lg[n],
                               showlegend=showleg,
                               mode='lines',
                               line={'color': line_col[n]},
                               hovertext=corrtext,
                               hoverinfo='name+text')
                ]
                traces += corrline

    if addXYline:
        x_rng = [np.min(x[0]), np.max(x[0])]
        dx_rng = x_rng[1] - x_rng[0]
        shift = .03  # shift from edges
        xc = np.array([x_rng[0] + dx_rng * shift, x_rng[1] - dx_rng * shift])
        xyline = [
            go.Scatter(x=xc,
                       y=xc,
                       name='X=Y',
                       showlegend=True,
                       mode='lines',
                       line={'color': 'black'})
        ]
        traces += xyline

    showleg = False if N == 1 else True

    layout = go.Layout(
        title=title,
        annotations=annots,
        xaxis={'title': xlbl},
        yaxis={'title': ylbl},
        hovermode='closest',
        showlegend=showleg,
    )
    fig = go.Figure(data=traces, layout=layout)

    return plotOut(fig, plot)
Esempio n. 25
0
def getSTA(
        trigger,
        signal,
        rng,
        lags=1,
        norm='zscore',  #how each STA trial is normalized in all_sta
        removeOutliers=True,  #1/0. If 1 remove data +- 6 std devs from mean
        # plotting parameters
    plot=False,
        xtra_times=None,  #plots dots relative to on times.
        Fs=1,
        title='Stimulus Triggered Average'):
    '''
    Computes stimulus triggered average of signal from trigger
    :param trigger:  trigger points around which to calc STA. 1D vector of bins (thus ints)
    :param signal: 1D vector
    :param rng: [lb, ub] in bins of how long to get the STA for
    :param lags: int of how much bins to skip (thus if 2, takes every other point in the STA)
    :param plot: 1/0. If 1, then plots in plotly
    :param Fs: sampling frequency of data. Only relevant for plotting
    :param title: title of plot
    :return:
    '''

    # calculate STA
    sta, bins, all_sta = calcSTA(trigger,
                                 signal,
                                 rng,
                                 lags=lags,
                                 norm=norm,
                                 removeOutliers=removeOutliers)

    # generate STA plot
    N = len(trigger)
    sta_rescaled = len(trigger) / (np.max(sta) -
                                   np.min(sta)) * (sta - np.min(sta)) + .5
    # good colormaps are Picnic, Rainbow
    heatmap = go.Heatmap(x=bins / Fs,
                         y=np.arange(1,
                                     len(all_sta) + 1),
                         z=all_sta,
                         colorscale='Rainbow')
    line = go.Scatter(x=bins / Fs,
                      y=sta_rescaled,
                      line={
                          'color': 'black',
                          'width': 3
                      },
                      name='STA')
    yaxis = go.Scatter(x=[0, 0],
                       y=[.5, N + .5],
                       showlegend=False,
                       line={
                           'color': 'black',
                           'dash': 'dash',
                           'width': 1
                       })
    if xtra_times is not None:
        dots = [
            go.Scatter(
                x=(xtra_times - trigger) / Fs,
                y=np.arange(N) + 1,
                name='dots',
                mode='markers',
                marker=dict(size=4, color='white'),
            )
        ]
    else:
        dots = []

    layout = {
        'title': title,
        'xaxis': {
            'title': 'Times (s)',
            'range': [-rng[0] / Fs, rng[1] / Fs]
        },
        'yaxis': {
            'title': 'Trial',
            'range': [.5, N + .5]
        },
    }
    fig = go.Figure(data=[heatmap, line, yaxis] + dots, layout=layout)

    plotOut(fig, plot)

    return sta, bins, all_sta, fig