Example #1
0
def display_comparison_plot_mpld3(t, arr, names, line_styles, title, xtitle,
                                  ytitle, ylim, figname):
    # Function used to generate interactive d3 plots in html
    f, ax = plt.subplots()
    lines = []
    for i in np.arange(0, len(names)):
        l, = ax.plot(t,
                     arr[:, i],
                     label=names[i],
                     lw=3,
                     ls=line_styles[i],
                     alpha=0.2)
        lines.append(l)

    ax.set_xlabel(xtitle)
    ax.set_ylabel(ytitle)
    ax.set_title(title)
    ax = plt.gca()
    ax.set_ylim(ylim)
    ax.grid()

    plugins.connect(f, HighlightLines(lines, names, css))

    mpld3.display()

    #mpld3.save_html(f, figname + '.html')

    return mpld3.fig_to_html(f)
Example #2
0
 def smithd3(self, x):
     import mpld3
     import twoport as tp
     fig, ax = pl.subplots()
     sc = tp.SmithChart(show_cursor=True, labels=True, ax=ax)
     sc.plot_s_param(a)
     mpld3.display(fig)
Example #3
0
 def after(self):
     if self.draw:
         plugins.connect(
             self.fig, plugins.InteractiveLegendPlugin(
                 self.s1, self.labels, ax=self.ax))
         mpld3.display()
     else:
         print meeting.minutes
def plot_ts(*args, **kwargs):
    """ Create an interactive JavaScript T-S plot. """
    ax = nplt.plot_ts(*args, **kwargs)
    pg = InteractiveLegendPlugin(ax.lines,
            kwargs.get("labels", [lin.get_label() for lin in ax.lines]),
            alpha_unsel=kwargs.get("alpha", 0.2))
    plugins.connect(ax.figure, pg)
    mpld3.display()
    return ax
Example #5
0
def plot_ts(*args, **kwargs):
    """ Create an interactive JavaScript T-S plot. """
    ax = nplt.plot_ts(*args, **kwargs)
    pg = InteractiveLegendPlugin(
        ax.lines,
        kwargs.get("labels", [lin.get_label() for lin in ax.lines]),
        alpha_unsel=kwargs.get("alpha", 0.2))
    plugins.connect(ax.figure, pg)
    mpld3.display()
    return ax
Example #6
0
def renderGraph(dataSet):

    data = dataSet[0]
    title = dataSet[1]
    ylabel = dataSet[2]
    xlabels = dataSet[3]

    ind = np.arange(len(data))  # the x locations for the groups
    width = 0.35  # the width of the bars

    fig, ax = plt.subplots()
    #print 'Before figure'
    chart = plt.figure()
    #print 'After first figure'

    rects1 = ax.bar(ind, data, width, color='r')

    # add some text for labels, title and axes ticks
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.set_xticks(ind + width / 2)
    ax.set_xticklabels(xlabels)

    def autolabel(rects):
        """
      Attach a text label above each bar displaying its height
      """
        for rect in rects:
            height = rect.get_height()
            ax.text(rect.get_x() + rect.get_width() / 2.,
                    1.05 * height,
                    '%d' % int(height),
                    ha='center',
                    va='bottom')

    autolabel(rects1)
    #fig_to_html attempt
    #current error: 'Figure' object has no attribute 'fig_to_html'

    #needed to download jinja2 for mpld3 to work
    graphOutput = plt.figure(
    )  #initialize variable as current graph to be later passed on to HTML code
    graphHTML = fig_to_html(graphOutput)  #convert current graph to HTML code
    #print graphHTML
    #plt.show()
    plt.close(graphOutput)  #close the current graph
    mpld3.display(chart)
    return graphHTML
Example #7
0
def plot_by_mpld3(dataframe,figsize=(12,6),marker='o',grid=True,
                           alpha_ax=0.3,alpha_plot=0.4,alpha_unsel=0.3,alpha_over=1.5,
                           title=None,xlabel=None,ylabel=None,mode="display",file=None):
    ## DataFrame 데이터를 이용하여 웹용 D3 chart 스크립트 생성
    # plot line + confidence interval
    fig, ax = plt.subplots(figsize=figsize)
    ax.grid(grid, alpha=alpha_ax)

    for key, val in dataframe.iteritems():
        l, = ax.plot(val.index, val.values, label=key, marker=marker)
        ax.plot(val.index,val.values, color=l.get_color(), alpha=alpha_plot)
    # define interactive legend
    handles, labels = ax.get_legend_handles_labels() # return lines and labels
    interactive_legend = plugins.InteractiveLegendPlugin(handles,labels,
                                                         alpha_unsel=alpha_unsel,
                                                         alpha_over=alpha_over, 
                                                         start_visible=True)
    plugins.connect(fig, interactive_legend)

    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)
    if title:
        ax.set_title(title, size=len(title)+5)

    ## mode
    if mode == 'html': # return html script
        return mpld3.fig_to_html(fig)
    elif mode == 'save' and file: # save file
        mpld3.save_html(fig,file)
    else: # display chart
        #mpld3.enable_notebook()
        return mpld3.display()
Example #8
0
def _scatter_plot(X, y, point_labels, scatter_params, css):
    X = pd.DataFrame(X, columns=['x', 'y'])
    fig, ax = plt.subplots(figsize=(8, 8))

    if 'figsize' in scatter_params:
        fig, ax = plt.subplots(figsize=scatter_params['figsize'])
        del scatter_params['figsize']

    colors, classes = None, None

    if y.dtype == 'object':
        colors, classes = y.factorize()
    else:
        colors = y

    scatter = ax.scatter(x=X['x'], y=X['y'], c=colors, **scatter_params)

    if point_labels is not None and len(point_labels) > 0:
        tooltip = mpld3.plugins.PointHTMLTooltip(scatter,
                                                 labels=point_labels,
                                                 css=css)
        mpld3.plugins.connect(fig, tooltip)
    else:
        mpld3.plugins.connect(fig)

    return mpld3.display()
Example #9
0
def zoomable_walk(adn):
    print("longueur de la séquence d'entrée", len(adn))
    X, Y = path_x_y(adn)
    pyplot.plot(X, Y)
    # au lieu d'afficher le dessin avec pyplot.show()
    # on retourne un objet HTML qui est rendu par le notebook
    return mpld3.display()
Example #10
0
def plot_mle_graph(function,
                   mle_params,
                   x_start=eps, x_end=1 - eps,
                   y_start=eps, y_end=1 - eps, resolution=100,
                   x_label="x", y_label="y",
                   show_constraint=False,
                   show_optimum=False):
    x = np.linspace(x_start, x_end, resolution)
    y = np.linspace(y_start, y_end, resolution)
    xx, yy = np.meshgrid(x, y)
    np_func = np.vectorize(lambda x, y: function(x, y))
    z = np_func(xx, yy)

    optimal_loss = function(*mle_params)
    levels_before = np.arange(optimal_loss - 3.0, optimal_loss, 0.25)
    levels_after = np.arange(optimal_loss, min(optimal_loss + 2.0, -0.1), 0.25)

    fig = plt.figure()
    contour = plt.contour(x, y, z, levels=np.concatenate([levels_before, levels_after]))
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    if show_constraint:
        plt.plot(x, 1 - x)
    if show_optimum:
        plt.plot(mle_params[0], mle_params[1], 'ro')
    plt.clabel(contour)
    return mpld3.display(fig)
Example #11
0
def plot_mle_graph(function,
                   mle_params,
                   x_start=eps,
                   x_end=1 - eps,
                   y_start=eps,
                   y_end=1 - eps,
                   resolution=100,
                   x_label="x",
                   y_label="y",
                   show_constraint=False,
                   show_optimum=False):
    x = np.linspace(x_start, x_end, resolution)
    y = np.linspace(y_start, y_end, resolution)
    xx, yy = np.meshgrid(x, y)
    np_func = np.vectorize(lambda x, y: function(x, y))
    z = np_func(xx, yy)

    optimal_loss = function(*mle_params)
    levels_before = np.arange(optimal_loss - 3.0, optimal_loss, 0.25)
    levels_after = np.arange(optimal_loss, min(optimal_loss + 2.0, -0.1), 0.25)

    fig = plt.figure()
    contour = plt.contour(x,
                          y,
                          z,
                          levels=np.concatenate([levels_before, levels_after]))
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    if show_constraint:
        plt.plot(x, 1 - x)
    if show_optimum:
        plt.plot(mle_params[0], mle_params[1], 'ro')
    plt.clabel(contour)
    return mpld3.display(fig)
def renderGraph(dataSet):
"""
The renderGraph function converts data into a bar graph object. It handles the measuring, drawing, and defining of the various aspects of the graph. From this point, it converts the graph code from Python to HTML and returns this value. This HTML code is then transferred to the website for the viewer.
"""
  data = dataSet[0]
  title = dataSet[1]
  ylabel = dataSet[2]
  xlabels = dataSet[3]
  dsize=len(data)
  ind = np.arange(dsize)  # the x locations for the groups
  width = 0.35       # the width of the bars

  #print 'Before figure'
  chart = plt.figure(1)
  chart.set_figwidth((11))
  #print 'After first figure'
  ax=plt.subplot(autoscale_on=True)
  rects1 = ax.bar(ind, data, width, color='r')

  # add some text for labels, title and axes ticks
  ax.set_ylabel(ylabel)
  ax.set_title(title)
  ax.set_xticks(ind + width / 2)
  ax.set_xticklabels(xlabels)

  def autolabel(rects):
      """
       Function attaches a text label above each bar displaying its height. This function provides the reader easy context on each value in the finished graph so that an understanding of the data can be made faster.
      """
      for rect in rects:
          height = rect.get_height()
          ax.text(rect.get_x() + rect.get_width()/2., 1.02*height,
                  '%d' % int(height),
                  ha='center', va='bottom')

  autolabel(rects1)
  #fig_to_html attempt
  #current error: 'Figure' object has no attribute 'fig_to_html'
  
  #needed to download jinja2 for mpld3 to work
  chart.tight_layout()
  graphOutput = plt.figure() #initialize variable as current graph to be later passed on to HTML code
  graphHTML = fig_to_html(chart) #convert current graph to HTML code
  #plt.show()
  plt.close(graphOutput) #close the current graph
  mpld3.display(chart)
  return graphHTML
def make_mpld3_mds_plot(xs,ys,data_pd=gdata_pd):

    cluster_colors = {'conservative': '#1b9e77', 'liberal': '#d95f02'}

    # Define new dataframe with MDS coordinates, some things from original DF.
    plot_df = pd.DataFrame(dict(x=xs, y=ys, label=data_pd.loc[:,'leaning'], source=data_pd.loc[:,'source'])) 
    groups = plot_df.groupby('label')

    #css formatting
    css = """
    text.mpld3-text, div.mpld3-tooltip {
    font-family:Arial, Helvetica, sans-serif;
    }

    g.mpld3-xaxis, g.mpld3-yaxis {
            display: none; }

    svg.mpld3-figure {
    margin-left: 0px;}
    """
    # margin as -200px

    fig, ax = plt.subplots(figsize=(14,6))

    for name, group in groups:
        points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, label=name, mec='none', color=cluster_colors[name])
    ax.set_aspect('auto')
    labels = [i for i in group.source]
    
    # MPLD3 Setup
    tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels,voffset=10, hoffset=10, css=css)
    mpld3.plugins.connect(fig, tooltip, TopToolbar())    
    
    # No axes, qualitative plot
    ax.axes.get_xaxis().set_ticks([])
    ax.axes.get_yaxis().set_ticks([])
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.legend(numpoints=1)

    mpld3.display()

    # export html
    html = mpld3.fig_to_html(fig)
    return
Example #14
0
def plot_1D(loss, theta1, theta2, N=100, eps=0.01, loss2=None, ylim=None):
    x = np.linspace(eps, 1.0 - eps, N)
    fig = plt.figure()
    if ylim is not None:
        plt.ylim(ylim)
    create_1D_plot(x, loss, theta1, theta2)
    if loss2 is not None:
        create_1D_plot(x, loss2, theta1, theta2)
    return mpld3.display(fig)
Example #15
0
def display_comparison_plot_mpld3(t, arr, names, line_styles, title, xtitle, ytitle, ylim, figname):
    # Function used to generate interactive d3 plots in html
    f, ax = plt.subplots()
    lines = []
    for i in np.arange(0, len(names)):
        l, = ax.plot(t, arr[:, i], label=names[i], lw=3, ls=line_styles[i], alpha=0.2)
        lines.append(l)

    ax.set_xlabel(xtitle)
    ax.set_ylabel(ytitle)
    ax.set_title(title)
    ax = plt.gca()
    ax.set_ylim(ylim)
    ax.grid()

    plugins.connect(f, HighlightLines(lines, names, css))

    mpld3.display()

    #mpld3.save_html(f, figname + '.html')

    return mpld3.fig_to_html(f)
Example #16
0
def dataVisualDynamtic(xs,ys,clusters,titles):
    # 用 MDS 后的结果加上聚类编号和绘色创建 DataFrame
    df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=titles))
    # 聚类归类
    groups = df.groupby('label')

    # 自定义 css 对字体格式化以及移除坐标轴标签
    css = """
    text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; }
    g.mpld3-xaxis, g.mpld3-yaxis { display: none; }
    svg.mpld3-figure { margin-left: -200px;}
    """

    # 绘图
    fig, ax = plt.subplots(figsize=(14,6)) # 设置大小
    ax.margins(0.03) # 可选项,只添加 5% 的填充(padding)来自动缩放

    # 对聚类进行迭代并分布在绘图,用到了 cluster_name 和 cluster_color 字典的“name”项,这样会返回相应的 color 和 label
    for name, group in groups:
        points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18,label=cluster_names[name], mec='none', color=cluster_colors[name])
        ax.set_aspect('auto')
        labels = [i for i in group.title]

        # 用点来设置气泡消息,标签以及已经定义的“css”
        tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10, css=css)
        # 将气泡消息与散点图联系起来
        mpld3.plugins.connect(fig, tooltip, TopToolbar())

        # 隐藏刻度线(tick marks)
        ax.axes.get_xaxis().set_ticks([])
        ax.axes.get_yaxis().set_ticks([])

        # 隐藏坐标轴
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)

    ax.legend(numpoints=1) # 图例中每项只显示一个点
    mpld3.display()
Example #17
0
def plot_2D_function(function, x_start=eps, x_end=1 - eps,
                     y_start=eps, y_end=1 - eps, resolution=100,
                     x_label="x", y_label="y"):
    x = np.linspace(x_start, x_end, resolution)
    y = np.linspace(y_start, y_end, resolution)
    xx, yy = np.meshgrid(x, y)
    np_func = np.vectorize(lambda x, y: function(x, y))
    z = np_func(xx, yy)
    fig = plt.figure()
    contour = plt.contour(x, y, z)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.clabel(contour)
    return mpld3.display(fig)
def plot_tree_mpld3(nodes: List[str],
                    edgelist: List[Tuple[str]],
                    svgs: List[Any],
                    extra_string: Optional[List] = None,
                    figsize: Optional[Tuple[int, int]] = None,
                    layout: str = 'rt',
                    figure_savefile: Optional[str] = None):
    G = Graph()
    G.add_vertices(nodes)
    G.add_edges(edgelist)

    if extra_string:
        lookup_str = "encoding='iso-8859-1'?>\n"  # in the svg
        style = 'style="background-color:#ffffff"'
        for i, (exs, svg) in enumerate(zip(extra_string, svgs)):
            str_pos = svg.find(lookup_str) + len(lookup_str)
            _insert = ''.join(
                [f'<div {style}>{x}</div>' for x in exs.split('\n')])
            svgs[i] = svg[:str_pos] + _insert + svg[str_pos:]

    x_edges, y_edges, x_nodes, y_nodes = compute_layout(G, layout)

    fig, ax = plt.subplots(figsize=figsize)

    for x, y in zip(x_edges, y_edges):
        ax.plot(x, y, c='gray', linestyle='--')

    points = ax.scatter(x_nodes, y_nodes, s=150, c='gray')

    tooltip = plugins.PointHTMLTooltip(points, svgs)
    plugins.connect(fig, tooltip)

    if figure_savefile is not None:
        with open(figure_savefile, 'w') as f:
            save_html(fig, f)
    display(fig)
Example #19
0
def plot_bar_graph(values, labels, rotation=0, align='center', use_mpld3=False):
    """
    Plots a bar graph.
    Args:
        use_mpld3: should we use mpld3 to render the graph.
        rotation: by which angle should the labels be rotated.
        align: how to align the labels
        values: bar values.
        labels: bar labels

    Returns: None

    """
    fig = plt.figure()
    plt.xticks([float(x) for x in range(0, len(values))], labels, rotation=rotation)
    plt.bar(range(0, len(values)), values, align=align)
    if use_mpld3:
        return mpld3.display(fig)
    def visualizeAmAll(self):
        df = pd.read_csv('Data/amazonAll' + self.twitterData)
        tweets = df.iloc[:, [
            False, True, False, False, False, False, False, False, False
        ]].to_numpy()

        df = pd.read_csv('Data/amazonAll' + self.twitterPreds)
        predictions = df.iloc[:,
                              [False, True, True, True, True, True]].to_numpy(
                              )

        tweets = np.array(
            [elem for singleList in tweets for elem in singleList])
        singlePred, confidence = interpretPreds(predictions)
        #print(singlePred)
        fig = plt.Figure()
        barChartPreds(singlePred)
        return mpld3.display(fig)
    def plot_ObsConfused(self,classes,preds,globalLim=10,globalNbCols=2,
        lim=10,limByPlots=100,elemsByRows=10,nbCols=2,mods=[],title=None,modelsNames=None,filename=None,titleFontsize=19,**plotConfMat_kwargs):
        # from ..helpers import plotDigits
        obj=self.obj

        modsN=obj.papa._models.namesModels
        models=obj.resultats

        if len(mods)>0:
            mods_ = [i if isStr(i) else modsN[i] for i in mods]
            models= [obj.resultats[i] for i in mods_]
            modelsNames_=[i for i in mods_]
            models=dict(zip(modelsNames_,models)) if modelsNames is None else dict(zip(modelsNames,models))

        # namesY= namesEscape(namesY) if namesY is not None else namesY
        confMatM=[v.viz.plot_ObsConfused(classes,preds,lim=lim,limByPlots=limByPlots,
                                        elemsByRows=elemsByRows,returnOK=True,nbCols=nbCols,show=False,**plotConfMat_kwargs) for v in models.values()]

        title = "ObsConfused CV {}".format(obj.ID) if title is None else title
        filename="obj_confused_cv_{}.png".format(obj.ID) if filename is None else filename
        # print(confMatM)
        img=IMG_GRID.grid(confMatM,nbCols=globalNbCols,toImg=True,title=title,titleFontsize=titleFontsize)
        # img.show(figsize=img.figsize,show=True);
        # print(img.data)
        fig=img.show(returnFig=True,show=False,figsize=img.figsize)
        from IPython.display import display_html, HTML
        import mpld3
        # mpld3.enable_notebook()
        display_html(HTML("""
        <style>
        g.mpld3-xaxis, g.mpld3-yaxis {
        display: none;
        }
        </style>
        """))
        # # print(img)
        # # print(img.filename)
        # # print(img.data)
        display_html(HTML("""
            <span style='width:20px;height:20px;position: absolute;' title="Save image as png">
        <a href="data:image/png;base64,{{imgData}}" download="{{filename}}"><img width="20px" src=""></a></span>
            """.replace("{{imgData}}",str(img.data)[2:-1]).replace("{{filename}}",filename)))
        display_html(mpld3.display(fig))
        plt.close()
Example #22
0
def plot_2D_function(function,
                     x_start=eps,
                     x_end=1 - eps,
                     y_start=eps,
                     y_end=1 - eps,
                     resolution=100,
                     x_label="x",
                     y_label="y"):
    x = np.linspace(x_start, x_end, resolution)
    y = np.linspace(y_start, y_end, resolution)
    xx, yy = np.meshgrid(x, y)
    np_func = np.vectorize(lambda x, y: function(x, y))
    z = np_func(xx, yy)
    fig = plt.figure()
    contour = plt.contour(x, y, z)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.clabel(contour)
    return mpld3.display(fig)
Example #23
0
def scatterz(x, y, data, labels, xscale='linear', yscale='linear', n=250):

    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'), figsize=(10, 10))
    fig.set_size_inches(8, 8)
    data = data.head(n)

    scatter = ax.scatter(data[x], data[y], s=25, alpha=0.5)

    ax.grid(color='white', linestyle='solid')

    if yscale == 'linear':

        yl = data[y].min()
        yh = data[y].max()
        yx = (yh - yl) * 0.035
        yl = yl - yx
        yh = yh + yx
        plt.ylim((yl, yh))

    elif yscale == 'log':

        plt.yscale("log")

    if xscale == 'linear':

        xl = data[x].min()
        xh = data[x].max()
        xx = (xh - xl) * 0.035
        xl = xl - xx
        xh = xh + xx
        plt.xlim((xl, xh))

    elif xscale == 'log':

        plt.xscale("log")

    plt.xlabel(x, fontsize=18, labelpad=15, color="gray")
    plt.ylabel(y, fontsize=18, labelpad=15, color="gray")
    labels = [' {0}'.format(i + str(' ')) for i in data[labels].astype(str)]
    tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
    mpld3.plugins.connect(fig, tooltip)

    return mpld3.display()
Example #24
0
 def display(self,
             legend=True,
             update=True,
             use_mpld3=True,
             hide_volume_labels=False,
             **kw):
     """Generate the figure and return an object for display
     This function is for viewing figures in a Jupyter notebook
     
     Arguments:
     
         legend   -  boolean for displaying a legend
                     Default: True
     
         update   -  boolean for reapplying spine, tick, title and 
                     axis limits to the graph
                     Default: True
     
         use_mpld3 - boolean for using mpld3 to produce interactive html
                     Default: True
     
         hide_volume_labels - boolean for hiding x axis volume labels
                     Default: False
     
         **kw      - additional keyword arguments are passed to mpld3.display
     """
     if legend == True:
         self.add_legend()
     elif legend == 'custom':
         pass
     else:
         self.remove_legend()
     if update:
         self.set_spines_and_ticks()
         self.set_title_and_labels(hide_volume_labels=hide_volume_labels)
         self.set_limits()
     if use_mpld3:
         return mpld3.display(self.fig, **kw)
     else:
         return self.fig
Example #25
0
def plot_bar_graph(values,
                   labels,
                   rotation=0,
                   align='center',
                   use_mpld3=False):
    """
    Plots a bar graph.
    Args:
        use_mpld3: should we use mpld3 to render the graph.
        rotation: by which angle should the labels be rotated.
        align: how to align the labels
        values: bar values.
        labels: bar labels

    Returns: None

    """
    fig = plt.figure()
    plt.xticks([float(x) for x in range(0, len(values))],
               labels,
               rotation=rotation)
    plt.bar(range(0, len(values)), values, align=align)
    if use_mpld3:
        return mpld3.display(fig)
    def scatterplot(self, year):
        assert year in self.years
        df = self.data[year]
        reserve = df.loc[:, ["Coal(tCO2)", "Oil(tCO2)", "Gas(tCO2)"]].sum(axis=1)
        emv = df.loc[:, ["EndingMarketValue"]]

        N = len(df.index)
        fig, ax = plt.subplots()

        sp = ax.scatter(emv, reserve)

        ax.set_xlabel("Equity Invested (USD)")
        ax.set_ylabel("Carbon Reserves (tCO2)")
        ax.set_title(f"Invested Fossil Fuel Companies in {year}")
        ax.get_xaxis().set_major_formatter(
            matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        ax.get_yaxis().set_major_formatter(
            matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))

        labels = df.loc[:, "Company(Company)"].values.tolist()
        tooltip = mpld3.plugins.PointLabelTooltip(sp, labels=labels)
        mpld3.plugins.connect(fig, tooltip)

        return mpld3.display()
Example #27
0
                                             hoffset=10,
                                             css=css)
    #connect tooltip to fig
    mpld3.plugins.connect(fig, tooltip, TopToolbar())

    #set tick marks as blank
    ax.axes.get_xaxis().set_ticks([])
    ax.axes.get_yaxis().set_ticks([])

    #set axis as blank
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)

ax.legend(numpoints=0.5)  #show legend with only one dot

mpld3.display()  #show the plot

#uncomment the below to export to html
#html = mpld3.fig_to_html(fig)
#print(html)

# In[193]:

# Plot silhouette distribution
X = tfidf_matrix
range_n_clusters = [12]

for n_clusters in range_n_clusters:
    # Create a subplot with 1 row and 2 columns
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(18, 7)
Example #28
0
def plotter(df,
            title=False,
            kind='line',
            x_label=None,
            y_label=None,
            style='ggplot',
            figsize=(8, 4),
            save=False,
            legend_pos='best',
            reverse_legend='guess',
            num_to_plot=7,
            tex='try',
            colours='default',
            cumulative=False,
            pie_legend=True,
            partial_pie=False,
            show_totals=False,
            transparent=False,
            output_format='png',
            interactive=False,
            black_and_white=False,
            show_p_val=False,
            indices=False,
            transpose=False,
            rot=False,
            **kwargs):
    """Visualise corpus interrogations.
    :param title: A title for the plot
    :type title: str
    :param df: Data to be plotted
    :type df: Pandas DataFrame
    :param x_label: A label for the x axis
    :type x_label: str
    :param y_label: A label for the y axis
    :type y_label: str
    :param kind: The kind of chart to make
    :type kind: str ('line'/'bar'/'barh'/'pie'/'area')
    :param style: Visual theme of plot
    :type style: str ('ggplot'/'bmh'/'fivethirtyeight'/'seaborn-talk'/etc)
    :param figsize: Size of plot
    :type figsize: tuple (int, int)
    :param save: If bool, save with *title* as name; if str, use str as name
    :type save: bool/str
    :param legend_pos: Where to place legend
    :type legend_pos: str ('upper right'/'outside right'/etc)
    :param reverse_legend: Reverse the order of the legend
    :type reverse_legend: bool
    :param num_to_plot: How many columns to plot
    :type num_to_plot: int/'all'
    :param tex: Use TeX to draw plot text
    :type tex: bool
    :param colours: Colourmap for lines/bars/slices
    :type colours: str
    :param cumulative: Plot values cumulatively
    :type cumulative: bool
    :param pie_legend: Show a legend for pie chart
    :type pie_legend: bool
    :param partial_pie: Allow plotting of pie slices only
    :type partial_pie: bool
    :param show_totals: Print sums in plot where possible
    :type show_totals: str -- 'legend'/'plot'/'both'
    :param transparent: Transparent .png background
    :type transparent: bool
    :param output_format: File format for saved image
    :type output_format: str -- 'png'/'pdf'
    :param black_and_white: Create black and white line styles
    :type black_and_white: bool
    :param show_p_val: Attempt to print p values in legend if contained in df
    :type show_p_val: bool
    :param indices: To use when plotting "distance from root"
    :type indices: bool
    :param stacked: When making bar chart, stack bars on top of one another
    :type stacked: str
    :param filled: For area and bar charts, make every column sum to 100
    :type filled: str
    :param legend: Show a legend
    :type legend: bool
    :param rot: Rotate x axis ticks by *rot* degrees
    :type rot: int
    :param subplots: Plot each column separately
    :type subplots: bool
    :param layout: Grid shape to use when *subplots* is True
    :type layout: tuple -- (int, int)
    :param interactive: Experimental interactive options
    :type interactive: list -- [1, 2, 3]
    :returns: matplotlib figure
    """
    import corpkit
    import os
    try:
        from IPython.utils.shimmodule import ShimWarning
        import warnings
        warnings.simplefilter('ignore', ShimWarning)
    except:
        pass

    kwargs['rot'] = rot

    xtickspan = kwargs.pop('xtickspan', False)

    import matplotlib as mpl
    from matplotlib import rc

    # prefer seaborn plotting
    try:
        import seaborn as sns
    except ImportError:
        pass
    except AttributeError:
        pass

    if interactive:
        import matplotlib.pyplot as plt, mpld3
    else:
        import matplotlib.pyplot as plt

    import matplotlib.ticker as ticker

    import pandas
    from pandas import DataFrame, Series

    from time import localtime, strftime
    from process import checkstack

    if interactive:
        import mpld3
        import collections
        from mpld3 import plugins, utils
        from plugins import InteractiveLegendPlugin, HighlightLines

    have_mpldc = False
    try:
        from mpldatacursor import datacursor, HighlightingDataCursor
        have_mpldc = True
    except ImportError:
        pass

    # check what environment we're in
    tk = checkstack('tkinter')
    running_python_tex = checkstack('pythontex')
    running_spider = checkstack('spyder')

    if not title:
        title = ''

    def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
        """remove extreme values from colourmap --- no pure white"""
        import matplotlib.colors as colors
        import numpy as np
        new_cmap = colors.LinearSegmentedColormap.from_list(
            'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name,
                                                a=minval,
                                                b=maxval),
            cmap(np.linspace(minval, maxval, n)))
        return new_cmap

    def get_savename(imagefolder, save=False, title=False, ext='png'):
        """Come up with the savename for the image."""
        import os
        from corpkit.process import urlify

        # name as
        if not ext.startswith('.'):
            ext = '.' + ext
        if isinstance(save, STRINGTYPE):
            savename = os.path.join(imagefolder, (urlify(save) + ext))
        #this 'else' is redundant now that title is obligatory
        else:
            if title:
                filename = urlify(title) + ext
                savename = os.path.join(imagefolder, filename)

        # remove duplicated ext
        if savename.endswith('%s%s' % (ext, ext)):
            savename = savename.replace('%s%s' % (ext, ext), ext, 1)
        return savename

    def rename_data_with_total(dataframe,
                               was_series=False,
                               using_tex=False,
                               absolutes=True):
        """adds totals (abs, rel, keyness) to entry name strings"""
        if was_series:
            where_the_words_are = dataframe.index
        else:
            where_the_words_are = dataframe.columns
        the_labs = []
        for w in list(where_the_words_are):
            if not absolutes:
                if was_series:
                    perc = dataframe.T[w][0]
                else:
                    the_labs.append(w)
                    continue
                if using_tex:
                    the_labs.append('%s (%.2f\%%)' % (w, perc))
                else:
                    the_labs.append('%s (%.2f %%)' % (w, perc))
            else:
                if was_series:
                    score = dataframe.T[w].sum()
                else:
                    score = dataframe[w].sum()
                if using_tex:
                    the_labs.append('%s (n=%d)' % (w, score))
                else:
                    the_labs.append('%s (n=%d)' % (w, score))
        if not was_series:
            dataframe.columns = the_labs
        else:
            vals = list(dataframe[list(dataframe.columns)[0]].values)
            dataframe = pandas.DataFrame(vals, index=the_labs)
            dataframe.columns = ['Total']
        return dataframe

    def auto_explode(dataframe, tinput, was_series=False, num_to_plot=7):
        """give me a list of strings and i'll output explode option"""
        output = [0 for s in range(num_to_plot)]

        if was_series:
            l = list(dataframe.index)
        else:
            l = list(dataframe.columns)

        if isinstance(tinput, (STRINGTYPE, int)):
            tinput = [tinput]
        if isinstance(tinput, list):
            for i in tinput:
                if isinstance(i, STRINGTYPE):
                    index = l.index(i)
                else:
                    index = i
                output[index] = 0.1
        return output

    # get a few options from kwargs
    sbplt = kwargs.get('subplots', False)
    show_grid = kwargs.pop('grid', True)
    the_rotation = kwargs.get('rot', False)
    dragmode = kwargs.pop('draggable', False)
    leg_frame = kwargs.pop('legend_frame', True)
    leg_alpha = kwargs.pop('legend_alpha', 0.8)
    # auto set num to plot based on layout
    lo = kwargs.get('layout', None)
    if lo:
        num_to_plot = lo[0] * lo[1]

    # todo: get this dynamically instead.
    styles = [
        'dark_background', 'bmh', 'grayscale', 'ggplot', 'fivethirtyeight',
        'matplotlib', False, 'mpl-white'
    ]
    #if style not in styles:
    #raise ValueError('Style %s not found. Use %s' % (str(style), ', '.join(styles)))

    if style == 'mpl-white':
        try:
            sns.set_style("whitegrid")
        except:
            pass
        style = 'matplotlib'

    if kwargs.get('savepath'):
        mpl.rcParams['savefig.directory'] = kwargs.get('savepath')
        kwargs.pop('savepath', None)

    mpl.rcParams['savefig.bbox'] = 'tight'
    mpl.rcParams.update({'figure.autolayout': True})

    # try to use tex
    # TO DO:
    # make some font kwargs here
    using_tex = False
    mpl.rcParams['font.family'] = 'sans-serif'
    mpl.rcParams['text.latex.unicode'] = True

    if tex == 'try' or tex is True:
        try:
            rc('text', usetex=True)
            rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
            using_tex = True
        except:
            matplotlib.rc('font', family='sans-serif')
            matplotlib.rc('font', serif='Helvetica Neue')
            matplotlib.rc('text', usetex='false')
            rc('text', usetex=False)
    else:
        rc('text', usetex=False)

    if interactive:
        using_tex = False

    if show_totals is False:
        show_totals = 'none'

    # find out what kind of plot we're making, and enable
    # or disable interactive values if need be
    kwargs['kind'] = kind.lower()

    if interactive:
        if kwargs['kind'].startswith('bar'):
            interactive_types = [3]
        elif kwargs['kind'] == 'area':
            interactive_types = [2, 3]
        elif kwargs['kind'] == 'line':
            interactive_types = [2, 3]
        elif kwargs['kind'] == 'pie':
            interactive_types = None
            warnings.warn(
                'Interactive plotting not yet available for pie plots.')
        else:
            interactive_types = [None]
    if interactive is False:
        interactive_types = [None]

    # find out if pie mode, add autopct format
    piemode = False
    if kind == 'pie':
        piemode = True
        # always the best spot for pie
        #if legend_pos == 'best':
        #legend_pos = 'lower left'
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            kwargs['pctdistance'] = 0.6
            if using_tex:
                kwargs['autopct'] = r'%1.1f\%%'
            else:
                kwargs['autopct'] = '%1.1f%%'

    # copy data, make series into df
    dataframe = df.copy()
    if kind == 'heatmap':
        try:
            dataframe = dataframe.T
        except:
            pass
    was_series = False
    if isinstance(dataframe, Series):
        was_series = True
        if not cumulative:
            dataframe = DataFrame(dataframe)
        else:
            dataframe = DataFrame(dataframe.cumsum())
    else:
        # don't know if this is much good.
        if transpose:
            dataframe = dataframe.T
        if cumulative:
            dataframe = DataFrame(dataframe.cumsum())
        if len(list(dataframe.columns)) == 1:
            was_series = True

    # attempt to convert x axis to ints:
    #try:
    #    dataframe.index = [int(i) for i in list(dataframe.index)]
    #except:
    #    pass

    # remove totals and tkinter order
    if not was_series:
        for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2,
                            [0, 1, 0, 1]):
            try:
                dataframe = dataframe.drop(name, axis=ax, errors='ignore')
            except:
                pass

    try:
        dataframe = dataframe.drop('tkintertable-order', errors='ignore')
    except:
        pass
    try:
        dataframe = dataframe.drop('tkintertable-order',
                                   axis=1,
                                   errors='ignore')
    except:
        pass

    # look at columns to see if all can be ints, in which case, set up figure
    # for depnumming
    if not was_series:
        if indices == 'guess':

            def isint(x):
                try:
                    a = float(x)
                    b = int(a)
                except (ValueError, OverflowError):
                    return False
                else:
                    return a == b

            if all([isint(x) is True for x in list(dataframe.columns)]):
                indices = True
            else:
                indices = False

        # if depnumming, plot all, transpose, and rename axes
        if indices is True:
            num_to_plot = 'all'
            dataframe = dataframe.T
            if y_label is None:
                y_label = 'Percentage of all matches'
            if x_label is None:
                x_label = ''

    # set backend?
    output_formats = [
        'svgz', 'ps', 'emf', 'rgba', 'raw', 'pdf', 'svg', 'eps', 'png', 'pgf'
    ]
    if output_format not in output_formats:
        raise ValueError('%s output format not recognised. Must be: %s' %
                         (output_format, ', '.join(output_formats)))

    # don't know if these are necessary
    if 'pdf' in output_format:
        plt.switch_backend(output_format)
    if 'pgf' in output_format:
        plt.switch_backend(output_format)

    if num_to_plot == 'all':
        if was_series:
            if not piemode:
                num_to_plot = len(dataframe)
            else:
                num_to_plot = len(dataframe)
        else:
            if not piemode:
                num_to_plot = len(list(dataframe.columns))
            else:
                num_to_plot = len(dataframe.index)

    # explode pie, or remove if not piemode
    if piemode and not sbplt and kwargs.get('explode'):
        kwargs['explode'] = auto_explode(dataframe,
                                         kwargs['explode'],
                                         was_series=was_series,
                                         num_to_plot=num_to_plot)
    else:
        kwargs.pop('explode', None)

    legend = kwargs.get('legend', True)

    #cut data short
    plotting_a_totals_column = False
    if was_series:
        if list(dataframe.columns)[0] != 'Total':
            try:
                can_be_ints = [int(x) for x in list(dataframe.index)]
                num_to_plot = len(dataframe)
            except:
                dataframe = dataframe[:num_to_plot]
        elif list(dataframe.columns)[0] == 'Total':
            plotting_a_totals_column = True
            if not 'legend' in kwargs:
                legend = False
            num_to_plot = len(dataframe)
    else:
        if transpose:
            dataframe = dataframe.head(num_to_plot)
        else:
            dataframe = dataframe.T.head(num_to_plot).T

    # remove stats fields, put p in entry text, etc.
    statfields = ['slope', 'intercept', 'r', 'p', 'stderr']
    try:
        dataframe = dataframe.drop(statfields, axis=1, errors='ignore')
    except:
        pass
    try:
        dataframe.ix['p']
        there_are_p_vals = True
    except:
        there_are_p_vals = False
    if show_p_val:
        if there_are_p_vals:
            newnames = []
            for col in list(dataframe.columns):
                pval = dataframe[col]['p']

                def p_string_formatter(val):
                    if val < 0.001:
                        if not using_tex:
                            return 'p < 0.001'
                        else:
                            return r'p $<$ 0.001'
                    else:
                        return 'p = %s' % format(val, '.3f')

                pstr = p_string_formatter(pval)
                newname = '%s (%s)' % (col, pstr)
                newnames.append(newname)
            dataframe.columns = newnames
            dataframe.drop(statfields, axis=0, inplace=True, errors='ignore')
        else:
            warnings.warn(
                'No p-values calculated to show.\n\nUse keep_stats kwarg while editing to generate these values.'
            )
    else:
        if there_are_p_vals:
            dataframe.drop(statfields, axis=0, inplace=True, errors='ignore')

    # make and set y label
    absolutes = True
    if isinstance(dataframe, DataFrame):
        try:
            if not all([s.is_integer() for s in dataframe.iloc[0, :].values]):
                absolutes = False
        except:
            pass
    else:
        if not all([s.is_integer() for s in dataframe.values]):
            absolutes = False

    ##########################################
    ################ COLOURS #################
    ##########################################

    # set defaults, with nothing for heatmap yet
    if colours is True or colours == 'default' or colours == 'Default':
        if kind != 'heatmap':
            colours = 'viridis'
        else:
            colours = 'default'

    # assume it's a single color, unless string denoting map
    cmap_or_c = 'color'
    if isinstance(colours, str):
        cmap_or_c = 'colormap'
    from matplotlib.colors import LinearSegmentedColormap
    if isinstance(colours, LinearSegmentedColormap):
        cmap_or_c = 'colormap'

    # for heatmaps, it's always a colormap
    if kind == 'heatmap':
        cmap_or_c = 'cmap'
        # if it's a defaulty string, set accordingly
        if isinstance(colours, str):
            if colours.lower().startswith('diverg'):
                colours = sns.diverging_palette(10, 133, as_cmap=True)

            # if default not set, do diverge for any df with a number < 0
            elif colours.lower() == 'default':
                mn = dataframe.min()
                if isinstance(mn, Series):
                    mn = mn.min()
                if mn < 0:
                    colours = sns.diverging_palette(10, 133, as_cmap=True)
                else:
                    colours = sns.light_palette("green", as_cmap=True)

    if 'seaborn' not in style:
        kwargs[cmap_or_c] = colours
    #if not was_series:
    #    if kind in ['pie', 'line', 'area']:
    #        if colours and not plotting_a_totals_column:
    #            kwargs[cmap_or_c] = colours
    #    else:
    #        if colours:
    #            kwargs[cmap_or_c] = colours
    #if piemode:
    #    if num_to_plot > 0:
    #        kwargs[cmap_or_c] = colours
    #    else:
    #        if num_to_plot > 0:
    #            kwargs[cmap_or_c] = colours

    # multicoloured bar charts
    #if colours and cmap_or_c == 'colormap':
    #    if kind.startswith('bar'):
    #        if len(list(dataframe.columns)) == 1:
    #            if not black_and_white:
    #                import numpy as np
    #                the_range = np.linspace(0, 1, num_to_plot)
    #                middle = len(the_range) / 2
    #                try:
    #                    cmap = plt.get_cmap(colours)
    #                    kwargs[cmap_or_c] = [cmap(n) for n in the_range][middle]
    #                except ValueError:
    #                    kwargs[cmap_or_c] = colours
    #            # make a bar width ... ? ...
    #            #kwargs['width'] = (figsize[0] / float(num_to_plot)) / 1.5

    # reversing legend option
    if reverse_legend is True:
        rev_leg = True
    elif reverse_legend is False:
        rev_leg = False

    # show legend or don't, guess whether to reverse based on kind
    if kind in ['bar', 'barh', 'area', 'line', 'pie']:
        if was_series:
            legend = False
        if kind == 'pie':
            if pie_legend:
                legend = True
            else:
                legend = False
    if kind in ['barh', 'area']:
        if reverse_legend == 'guess':
            rev_leg = True
    if not 'rev_leg' in locals():
        rev_leg = False

    # the default legend placement
    if legend_pos is True:
        legend_pos = 'best'

    # cut dataframe if just_totals
    try:
        tst = dataframe['Combined total']
        dataframe = dataframe.head(num_to_plot)
    except:
        pass

    # no title for subplots because ugly,
    if title and not sbplt:
        kwargs['title'] = title

    # no interactive subplots yet:
    if sbplt and interactive:
        import warnings
        interactive = False
        warnings.warn('No interactive subplots yet, sorry.')
        return

    # not using pandas for labels or legend anymore.
    #kwargs['labels'] = None
    #kwargs['legend'] = False

    if legend:
        if num_to_plot > 6:
            if not kwargs.get('ncol'):
                kwargs['ncol'] = num_to_plot // 7
        # kwarg options go in leg_options
        leg_options = {
            'framealpha': leg_alpha,
            'shadow': kwargs.get('shadow', False),
            'ncol': kwargs.pop('ncol', 1)
        }

        # determine legend position based on this dict
        if legend_pos:
            possible = {
                'best': 0,
                'upper right': 1,
                'upper left': 2,
                'lower left': 3,
                'lower right': 4,
                'right': 5,
                'center left': 6,
                'center right': 7,
                'lower center': 8,
                'upper center': 9,
                'center': 10,
                'o r': 2,
                'outside right': 2,
                'outside upper right': 2,
                'outside center right': 'center left',
                'outside lower right': 'lower left'
            }

            if isinstance(legend_pos, int):
                the_loc = legend_pos
            elif isinstance(legend_pos, str):
                try:
                    the_loc = possible[legend_pos]
                except KeyError:
                    raise KeyError(
                        'legend_pos value must be one of:\n%s\n or an int between 0-10.'
                        % ', '.join(list(possible.keys())))
            leg_options['loc'] = the_loc
            #weirdness needed for outside plot
            if legend_pos in ['o r', 'outside right', 'outside upper right']:
                leg_options['bbox_to_anchor'] = (1.02, 1)
            if legend_pos == 'outside center right':
                leg_options['bbox_to_anchor'] = (1.02, 0.5)
            if legend_pos == 'outside lower right':
                leg_options['loc'] == 'upper right'
                leg_options['bbox_to_anchor'] = (0.5, 0.5)

        # a bit of distance between legend and plot for outside legends
        if isinstance(legend_pos, str):
            if legend_pos.startswith('o'):
                leg_options['borderaxespad'] = 1

    if not piemode:
        if show_totals.endswith('both') or show_totals.endswith('legend'):
            dataframe = rename_data_with_total(dataframe,
                                               was_series=was_series,
                                               using_tex=using_tex,
                                               absolutes=absolutes)
    else:
        if pie_legend:
            if show_totals.endswith('both') or show_totals.endswith('legend'):
                dataframe = rename_data_with_total(dataframe,
                                                   was_series=was_series,
                                                   using_tex=using_tex,
                                                   absolutes=absolutes)

    if piemode:
        if partial_pie:
            dataframe = dataframe / 100.0

    # some pie things
    if piemode:
        if not sbplt:
            kwargs['y'] = list(dataframe.columns)[0]

    def filler(df):
        pby = df.T.copy()
        for i in list(pby.columns):
            tot = pby[i].sum()
            pby[i] = pby[i] * 100.0 / tot
        return pby.T

    areamode = False
    if kind == 'area':
        areamode = True

    if legend is False:
        kwargs['legend'] = False

    # line highlighting option for interactive!
    if interactive:
        if 2 in interactive_types:
            if kind == 'line':
                kwargs['marker'] = ','
        if not piemode:
            kwargs['alpha'] = 0.1

    # convert dates --- works only in my current case!
    #if plotting_a_totals_column or not was_series:
    #    try:
    #        can_it_be_int = int(list(dataframe.index)[0])
    #        can_be_int = True
    #    except:
    #        can_be_int = False
    #    if can_be_int:
    #        if 1500 < int(list(dataframe.index)[0]):
    #            if 2050 > int(list(dataframe.index)[0]):
    #                n = pandas.PeriodIndex([d for d in list(dataframe.index)], freq='A')
    #                dataframe = dataframe.set_index(n)

    if kwargs.get('filled'):
        if areamode or kind.startswith('bar'):
            dataframe = filler(dataframe)
        kwargs.pop('filled', None)

    MARKERSIZE = 4
    COLORMAP = {
        0: {
            'marker': None,
            'dash': (None, None)
        },
        1: {
            'marker': None,
            'dash': [5, 5]
        },
        2: {
            'marker': "o",
            'dash': (None, None)
        },
        3: {
            'marker': None,
            'dash': [1, 3]
        },
        4: {
            'marker': "s",
            'dash': [5, 2, 5, 2, 5, 10]
        },
        5: {
            'marker': None,
            'dash': [5, 3, 1, 2, 1, 10]
        },
        6: {
            'marker': 'o',
            'dash': (None, None)
        },
        7: {
            'marker': None,
            'dash': [5, 3, 1, 3]
        },
        8: {
            'marker': "1",
            'dash': [1, 3]
        },
        9: {
            'marker': "*",
            'dash': [5, 5]
        },
        10: {
            'marker': "2",
            'dash': [5, 2, 5, 2, 5, 10]
        },
        11: {
            'marker': "s",
            'dash': (None, None)
        }
    }

    HATCHES = {
        0: {
            'color': '#dfdfdf',
            'hatch': "/"
        },
        1: {
            'color': '#6f6f6f',
            'hatch': "\\"
        },
        2: {
            'color': 'b',
            'hatch': "|"
        },
        3: {
            'color': '#dfdfdf',
            'hatch': "-"
        },
        4: {
            'color': '#6f6f6f',
            'hatch': "+"
        },
        5: {
            'color': 'b',
            'hatch': "x"
        }
    }

    if black_and_white:
        if kind == 'line':
            kwargs['linewidth'] = 1

        cmap = plt.get_cmap('Greys')
        new_cmap = truncate_colormap(cmap, 0.25, 0.95)
        if kind == 'bar':
            # darker if just one entry
            if len(dataframe.columns) == 1:
                new_cmap = truncate_colormap(cmap, 0.70, 0.90)
        kwargs[cmap_or_c] = new_cmap

    # remove things from kwargs if heatmap
    if kind == 'heatmap':
        hmargs = {
            'annot': kwargs.pop('annot', True),
            cmap_or_c: kwargs.pop(cmap_or_c, None),
            'fmt': kwargs.pop('fmt', ".2f"),
            'cbar': kwargs.pop('cbar', False)
        }

        for i in [
                'vmin', 'vmax', 'linewidths', 'linecolor', 'robust', 'center',
                'cbar_kws', 'cbar_ax', 'square', 'mask', 'norm'
        ]:
            if i in kwargs.keys():
                hmargs[i] = kwargs.pop(i, None)

    class dummy_context_mgr():
        """a fake context for plotting without style
        perhaps made obsolete by 'classic' style in new mpl"""
        def __enter__(self):
            return None

        def __exit__(self, one, two, three):
            return False

    with plt.style.context(
        (style)) if style != 'matplotlib' else dummy_context_mgr():

        kwargs.pop('filled', None)

        if not sbplt:
            # check if negative values, no stacked if so
            if areamode:
                if not kwargs.get('ax'):
                    kwargs['legend'] = False
                if dataframe.applymap(lambda x: x < 0.0).any().any():
                    kwargs['stacked'] = False
                    rev_leg = False
            if kind != 'heatmap':
                # turn off pie labels at the last minute
                if kind == 'pie' and pie_legend:
                    kwargs['labels'] = None
                    kwargs['autopct'] = '%.2f'
                if kind == 'pie':
                    kwargs.pop('color', None)
                ax = dataframe.plot(figsize=figsize, **kwargs)
            else:
                fg = plt.figure(figsize=figsize)
                if title:
                    plt.title(title)
                ax = kwargs.get('ax', plt.axes())
                tmp = sns.heatmap(dataframe, ax=ax, **hmargs)
                ax.set_title(title)
                for item in tmp.get_yticklabels():
                    item.set_rotation(0)
                plt.close(fg)

            if areamode and not kwargs.get('ax'):
                handles, labels = plt.gca().get_legend_handles_labels()
                del handles
                del labels

            if x_label:
                ax.set_xlabel(x_label)
            if y_label:
                ax.set_ylabel(y_label)

        else:
            if not kwargs.get('layout'):
                plt.gcf().set_tight_layout(False)

            if kind != 'heatmap':
                ax = dataframe.plot(figsize=figsize, **kwargs)
            else:
                plt.figure(figsize=figsize)
                if title:
                    plt.title(title)
                ax = plt.axes()
                sns.heatmap(dataframe, ax=ax, **hmargs)
                plt.xticks(rotation=0)
                plt.yticks(rotation=0)

        def rotate_degrees(rotation, labels):
            if rotation is None:
                if max(labels, key=len) > 6:
                    return 45
                else:
                    return 0
            elif rotation is False:
                return 0
            elif rotation is True:
                return 45
            else:
                return rotation

        if sbplt:
            if 'layout' not in kwargs:
                axes = [l for l in ax]
            else:
                axes = []
                cols = [l for l in ax]
                for col in cols:
                    for bit in col:
                        axes.append(bit)
            for index, a in enumerate(axes):
                if xtickspan is not False:
                    a.xaxis.set_major_locator(
                        ticker.MultipleLocator(xtickspan))
                labels = [item.get_text() for item in a.get_xticklabels()]
                rotation = rotate_degrees(the_rotation, labels)
                try:
                    if the_rotation == 0:
                        ax.set_xticklabels(labels,
                                           rotation=rotation,
                                           ha='center')
                    else:
                        ax.set_xticklabels(labels,
                                           rotation=rotation,
                                           ha='right')
                except AttributeError:
                    pass
        else:
            if kind == 'heatmap':
                labels = [item.get_text() for item in ax.get_xticklabels()]
                rotation = rotate_degrees(the_rotation, labels)
                if the_rotation == 0:
                    ax.set_xticklabels(labels, rotation=rotation, ha='center')
                else:
                    ax.set_xticklabels(labels, rotation=rotation, ha='right')

        if transparent:
            plt.gcf().patch.set_facecolor('white')
            plt.gcf().patch.set_alpha(0)

        if black_and_white:
            if kind == 'line':
                # white background
                # change everything to black and white with interesting dashes and markers
                c = 0
                for line in ax.get_lines():
                    line.set_color('black')
                    #line.set_width(1)
                    line.set_dashes(COLORMAP[c]['dash'])
                    line.set_marker(COLORMAP[c]['marker'])
                    line.set_markersize(MARKERSIZE)
                    c += 1
                    if c == len(list(COLORMAP.keys())):
                        c = 0

        # draw legend with proper placement etc
        if legend:
            if not piemode and not sbplt and kind != 'heatmap':
                if 3 not in interactive_types:
                    handles, labels = plt.gca().get_legend_handles_labels()
                    # area doubles the handles and labels. this removes half:
                    #if areamode:
                    #    handles = handles[-len(handles) / 2:]
                    #    labels = labels[-len(labels) / 2:]
                    if rev_leg:
                        handles = handles[::-1]
                        labels = labels[::-1]
                    if kwargs.get('ax'):
                        lgd = plt.gca().legend(handles, labels, **leg_options)
                        ax.get_legend().draw_frame(leg_frame)
                    else:
                        lgd = plt.legend(handles, labels, **leg_options)
                        lgd.draw_frame(leg_frame)

    if interactive:
        # 1 = highlight lines
        # 2 = line labels
        # 3 = legend switches
        ax = plt.gca()
        # fails for piemode
        lines = ax.lines
        handles, labels = plt.gca().get_legend_handles_labels()
        if 1 in interactive_types:
            plugins.connect(plt.gcf(), HighlightLines(lines))

        if 3 in interactive_types:
            plugins.connect(
                plt.gcf(),
                InteractiveLegendPlugin(lines, labels, alpha_unsel=0.0))

        for i, l in enumerate(lines):
            y_vals = l.get_ydata()
            x_vals = l.get_xdata()
            x_vals = [str(x) for x in x_vals]
            if absolutes:
                ls = [
                    '%s (%s: %d)' % (labels[i], x_val, y_val)
                    for x_val, y_val in zip(x_vals, y_vals)
                ]
            else:
                ls = [
                    '%s (%s: %.2f%%)' % (labels[i], x_val, y_val)
                    for x_val, y_val in zip(x_vals, y_vals)
                ]
            if 2 in interactive_types:
                #if 'kind' in kwargs and kwargs['kind'] == 'area':
                tooltip_line = mpld3.plugins.LineLabelTooltip(
                    lines[i], labels[i])
                mpld3.plugins.connect(plt.gcf(), tooltip_line)
                #else:
                if kind == 'line':
                    tooltip_point = mpld3.plugins.PointLabelTooltip(l,
                                                                    labels=ls)
                    mpld3.plugins.connect(plt.gcf(), tooltip_point)

    if piemode:
        if not sbplt:
            plt.axis('equal')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

    # add x label
    # this could be revised now!
    # if time series period, it's year for now
    if isinstance(dataframe.index, pandas.tseries.period.PeriodIndex):
        x_label = 'Year'

    y_l = False
    if not absolutes:
        y_l = 'Percentage'
    else:
        y_l = 'Absolute frequency'

    # hacky: turn legend into subplot titles :)
    if sbplt:
        # title the big plot
        #plt.gca().suptitle(title, fontsize = 16)
        #plt.subplots_adjust(top=0.9)
        # get all axes
        if 'layout' not in kwargs:
            axes = [l for index, l in enumerate(ax)]
        else:
            axes = []
            cols = [l for index, l in enumerate(ax)]
            for col in cols:
                for bit in col:
                    axes.append(bit)

        # set subplot titles
        for index, a in enumerate(axes):
            try:
                titletext = list(dataframe.columns)[index]
            except:
                pass
            a.set_title(titletext)
            try:
                a.legend_.remove()
            except:
                pass
            #try:
            #    from matplotlib.ticker import MaxNLocator
            #    from corpkit.process import is_number
            #    indx = list(dataframe.index)
            #    if all([is_number(qq) for qq in indx]):
            #        ax.get_xaxis().set_major_locator(MaxNLocator(integer=True))
            #except:
            #    pass
            # remove axis labels for pie plots
            if piemode:
                a.axes.get_xaxis().set_visible(False)
                a.axes.get_yaxis().set_visible(False)
                a.axis('equal')

            a.grid(b=show_grid)

    # add sums to bar graphs and pie graphs
    # doubled right now, no matter

    if not sbplt:

        # show grid
        ax.grid(b=show_grid)

        if kind.startswith('bar'):
            width = ax.containers[0][0].get_width()

    if was_series:
        the_y_limit = plt.ylim()[1]
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            # make plot a bit higher if putting these totals on it
            plt.ylim([0, the_y_limit * 1.05])
            for i, label in enumerate(list(dataframe.index)):
                if len(dataframe.ix[label]) == 1:
                    score = dataframe.ix[label][0]
                else:
                    if absolutes:
                        score = dataframe.ix[label].sum()
                    else:
                        #import warnings
                        #warnings.warn("It's not possible to determine total percentage from individual percentages.")
                        continue
                if not absolutes:
                    plt.annotate('%.2f' % score, (i, score),
                                 ha='center',
                                 va='bottom')
                else:
                    plt.annotate(score, (i, score), ha='center', va='bottom')
    else:
        the_y_limit = plt.ylim()[1]
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            for i, label in enumerate(list(dataframe.columns)):
                if len(dataframe[label]) == 1:
                    score = dataframe[label][0]
                else:
                    if absolutes:
                        score = dataframe[label].sum()
                    else:
                        #import warnings
                        #warnings.warn("It's not possible to determine total percentage from individual percentages.")
                        continue
                if not absolutes:
                    plt.annotate('%.2f' % score, (i, score),
                                 ha='center',
                                 va='bottom')
                else:
                    plt.annotate(score, (i, score), ha='center', va='bottom')

    if not kwargs.get('layout') and not sbplt and not kwargs.get('ax'):
        plt.tight_layout()
    if kwargs.get('ax'):
        try:
            plt.gcf().set_tight_layout(False)
        except:
            pass
        try:
            plt.set_tight_layout(False)
        except:
            pass

    if save:
        if running_python_tex:
            imagefolder = '../images'
        else:
            imagefolder = 'images'

        savename = get_savename(imagefolder,
                                save=save,
                                title=title,
                                ext=output_format)

        if not os.path.isdir(imagefolder):
            os.makedirs(imagefolder)

        # save image and get on with our lives
        if legend_pos.startswith('o') and not sbplt:
            plt.gcf().savefig(savename,
                              dpi=150,
                              bbox_extra_artists=(lgd, ),
                              bbox_inches='tight',
                              format=output_format)
        else:
            plt.gcf().savefig(savename, dpi=150, format=output_format)
        time = strftime("%H:%M:%S", localtime())
        if os.path.isfile(savename):
            print('\n' + time + ": " + savename + " created.")
        else:
            raise ValueError("Error making %s." % savename)

    if dragmode:
        plt.legend().draggable()

    if sbplt:
        plt.subplots_adjust(right=.8)
        plt.subplots_adjust(left=.1)

    # add DataCursor to notebook backend if possible
    if have_mpldc:
        if kind == 'line':
            HighlightingDataCursor(
                plt.gca().get_lines(),
                highlight_width=4,
                highlight_color=False,
                formatter=lambda **kwargs: '%s: %s' %
                (kwargs['label'], "{0:.3f}".format(kwargs['y'])))
        else:
            datacursor(formatter=lambda **kwargs: '%s: %s' %
                       (kwargs['label'], "{0:.3f}".format(kwargs['height'])))

    #if not interactive and not running_python_tex and not running_spider \
    #    and not tk:
    #    plt.gcf().show()
    #    return plt
    #elif running_spider or tk:
    #    return plt

    if interactive:
        plt.subplots_adjust(right=.8)
        plt.subplots_adjust(left=.1)
        try:
            ax.legend_.remove()
        except:
            pass
        return mpld3.display()
    else:
        return plt
Example #29
0
    def buildGraph2(self):
        self.load_tfidf()
        cPaths = Paths(self.folder)
        self.filenames, self.folders = cPaths.getTxts()
        xs, ys = self.loadMDS()
        # xs, ys = self.create_MDS()
        cluster_colors, cluster_names = self.setClusters()

        #create data frame that has the result of the MDS plus the cluster numbers and titles
        df = pd.DataFrame(
            dict(x=xs, y=ys, label=self.clusters(), title=self.filenames))

        #group by cluster
        groups = df.groupby('label')

        #define custom css to format the font and to remove the axis labeling
        css = """
        text.mpld3-text, div.mpld3-tooltip {
        font-family:Arial, Helvetica, sans-serif;
        }

        g.mpld3-xaxis, g.mpld3-yaxis {
        display: none; }

        svg.mpld3-figure {
        margin-left: -200px;}
        """

        # Plot
        fig, ax = plt.subplots(figsize=(14, 6))  #set plot size
        ax.margins(0.03)  # Optional, just adds 5% padding to the autoscaling

        #iterate through groups to layer the plot
        #note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label
        for name, group in groups:
            points = ax.plot(group.x,
                             group.y,
                             marker='o',
                             linestyle='',
                             ms=18,
                             label=cluster_names[name],
                             mec='none',
                             color=cluster_colors[name])
            ax.set_aspect('auto')
            labels = [i for i in group.title]

            #set tooltip using points, labels and the already defined 'css'
            tooltip = mpld3.plugins.PointHTMLTooltip(points[0],
                                                     labels,
                                                     voffset=10,
                                                     hoffset=10,
                                                     css=css)
            #connect tooltip to fig
            mpld3.plugins.connect(fig, tooltip, TopToolbar())

            #set tick marks as blank
            ax.axes.get_xaxis().set_ticks([])
            ax.axes.get_yaxis().set_ticks([])

            #set axis as blank
            ax.axes.get_xaxis().set_visible(False)
            ax.axes.get_yaxis().set_visible(False)

        ax.legend(numpoints=1)  #show legend with only one dot

        mpld3.display()  #show the plot
Example #30
0
def plotEssays(x,
               y,
               labels,
               titles,
               cluster_names=None,
               ms=10,
               output='notebook'):
    #create data frame that has the result of the MDS plus the cluster numbers and titles
    df = pd.DataFrame(dict(x=x, y=y, label=labels, title=titles))

    #group by cluster
    groups = df.groupby('label')

    #define custom css to format the font and to remove the axis labeling
    css = """
    text.mpld3-text, div.mpld3-tooltip {
    font-family:Arial, Helvetica, sans-serif;
    }

    g.mpld3-xaxis, g.mpld3-yaxis {
    display: none; }

    svg.mpld3-figure {
    margin-left: -100px;
    margin-right: -100px}
    """

    #set up colors per clusters using a dict
    # cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e', 5: 'b', 6: 'g', 7:'r'}

    #set up cluster names using a dict
    if cluster_names is None:
        cluster_names = {x: x for x in xrange(len(set(labels)))}

    # Plot
    if output == 'notebook':
        fig, ax = plt.subplots(figsize=(14, 6))  #set plot size
    elif output == 'app':
        fig, ax = plt.subplots(figsize=(14, 8))

    ax.margins(0.03)  # Optional, just adds 5% padding to the autoscaling

    #iterate through groups to layer the plot
    #note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label
    for name, group in groups:
        points = ax.plot(group.x,
                         group.y,
                         marker='o',
                         linestyle='',
                         ms=ms,
                         label=cluster_names[name],
                         mec='none')
        ax.set_aspect('auto')
        labels = [i for i in group.title]

        #set tooltip using points, labels and the already defined 'css'
        tooltip = mpld3.plugins.PointHTMLTooltip(points[0],
                                                 labels,
                                                 voffset=10,
                                                 hoffset=10,
                                                 css=css)
        #connect tooltip to fig
        mpld3.plugins.connect(fig, tooltip, TopToolbar())

        #set tick marks as blank
        ax.axes.get_xaxis().set_ticks([])
        ax.axes.get_yaxis().set_ticks([])

        #set axis as blank
        ax.axes.get_xaxis().set_visible(True)
        ax.axes.get_yaxis().set_visible(True)

    ax.legend(numpoints=1)  #show legend with only one dot
    # ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
    #       fancybox=True, shadow=True, ncol=7)

    if output == 'notebook':
        return mpld3.display()
    elif output == 'app':
        html = mpld3.fig_to_html(fig)
        return html
# Para plotar polcgonos
def plot_polygon(ax, vertices, color_index, to_fill = False):
	color = get_tab10_color_from_index(color_index)
	points_series = [[vertex.x,vertex.y] for vertex in vertices]

	polygon = patches.Polygon(points_series,linewidth=1,edgecolor=color,fill=color if to_fill else None)
	ax.add_patch(polygon)


# Para anotar texto sobre a plotagem
def annotate(text, vertex):
	plt.annotate(text, (vertex.x, vertex.y))


# Para renderizar a plotagem
plot = lambda: mpld3.display()


# Para capturar texto de palavra
def get_word_text(word):
  return ''.join([symbol.text for symbol in word.symbols])

## Plotando palavras encontradas

from PIL import Image

image_data = Image.open(image_path)

ax = prepare_image_data(image_data)
for	item_index, word in enumerate(all_words):
  plt.title(f'{image_path} words')
Example #32
0
def pretty_draw(g):
    css = """
table
{
  border-collapse: collapse;
}
th
{
  color: #ffffff;
  background-color: #000000;
}
td
{
  background-color: #cccccc;
}
table, th, td
{
  font-family:Arial, Helvetica, sans-serif;
  border: 1px solid black;
  text-align: center;
  padding: 3px;
  font-size:11pt;
}
g.mpld3-xaxis, g.mpld3-yaxis {
    display: none;
}
    """

    fig = plt.figure(figsize=(12, 8))
    ax = plt.gca()

    nodes = []
    edges = list(g.edges())

    layout = nx.spring_layout(g, iterations=10)

    points = []
    labels = []
    for node, (x, y) in layout.items():
        nodes.append(node)
        points.append((x, y))
        try:
            try:
                labels.append(g.cpd(node)._repr_html_())
            except:
                labels.append(str(g.cpd(node)))
        except:
            pass
    points_x, points_y = zip(*points)

    ax.set_xlim(min(points_x) - 0.08, max(points_x) + 0.08)
    ax.set_ylim(min(points_y) - 0.08, max(points_y) + 0.08)

    for src, dst in edges:
        src_pos = layout[src]
        dst_pos = layout[dst]
        arr_pos = dst_pos - 0.15*(dst_pos - src_pos)

        ax.plot(*list(zip(src_pos, dst_pos)), color='grey')
        ax.plot(*list(zip(arr_pos, dst_pos)), color='black', alpha=.5, linewidth=5)

    ax.plot(points_x, points_y, 'o', color='lightgray',
                     mec='k', ms=20, mew=1, alpha=1.)

    for text, x, y in zip(nodes, points_x, points_y):
        ax.text(x, y, text, horizontalalignment='center', verticalalignment='center')

    pts = ax.plot(points_x, points_y, 'o', color='lightgray',
                     mec='k', ms=40, mew=1, alpha=0.)

    tooltip = plugins.PointHTMLTooltip(pts[0], labels,
                                       voffset=10, hoffset=10, css=css)
    plugins.connect(fig, tooltip)

    return mpld3.display()
Example #33
0
def cluster_graphic_html():

    MDS()

# two components as we're plotting points in a two-dimensional plane
# "precomputed" because we provide a distance matrix
# we will also specify `random_state` so the plot is reproducible.
    mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)

    pos = mds.fit_transform(model.dist)  # shape (n_components, n_samples)

    xs, ys = pos[:, 0], pos[:, 1]
    clusters = model.KMmodel.labels_.tolist()
    #create data frame that has the result of the MDS plus the cluster numbers and titles
    df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=model.titles))

#group by cluster
    groups = df.groupby('label')

#define custom css to format the font and to remove the axis labeling
    css = """
text.mpld3-text, div.mpld3-tooltip {
  font-family:Arial, Helvetica, sans-serif;
}

g.mpld3-xaxis, g.mpld3-yaxis {
display: none; }
"""

# Plot
    fig, ax = plt.subplots(figsize=(14,6)) #set plot size
    ax.margins(0.03) # Optional, just adds 5% padding to the autoscaling

    #set up colors per clusters using a dict
    cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e'}

#set up cluster names using a dict
    cluster_names = {0: 'Family, home, war',
                 1: 'Police, killed, murders',
                 2: 'Father, New York, brothers',
                 3: 'Dance, singing, love',
                 4: 'Killed, soldiers, captain'}
#iterate through groups to layer the plot
#note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label
    for name, group in groups:
        points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, label=cluster_names[name], mec='none', color=cluster_colors[name])
        ax.set_aspect('auto')
        labels = [i for i in group.title]

        #set tooltip using points, labels and the already defined 'css'
        tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels,
                                           voffset=10, hoffset=10, css=css)
        #connect tooltip to fig
        mpld3.plugins.connect(fig, tooltip, TopToolbar())

        #set tick marks as blank
        ax.axes.get_xaxis().set_ticks([])
        ax.axes.get_yaxis().set_ticks([])

        #set axis as blank
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)


    ax.legend(numpoints=1) #show legend with only one dot

    mpld3.display() #show the plot

    #uncomment the below to export to html
    html = mpld3.fig_to_html(fig)
    return html
fig, ax = plt.subplots(subplot_kw=dict(facecolor='#f2f6fc'))

for i in [duration]:
    x = timestamp
    y = duration
    scatter = ax.scatter(x,
                         y,
                         c=i,
                         alpha=0.5,
                         edgecolors='none',
                         cmap=plt.cm.jet,
                         label=l)

ax.grid(color='white', linestyle='solid')
ax.set_title("MarkLogic Slow Queries", size=20)
ax.set_xlabel('Timestamp',
              fontsize=12,
              fontdict={'family': 'monospace'},
              labelpad=5)
ax.set_ylabel('Query Duration (secs)',
              fontsize=12,
              fontdict={'family': 'monospace'},
              labelpad=10)

tooltip = mpld3.plugins.PointLabelTooltip(scatter)
mpld3.plugins.connect(fig, tooltip)

mpld3.display()

#plt.show()
Example #35
0
# nom_list = ['Sessions','Flynn','Gorsuch','Trump']
# color_list = ['b','r']

# useful list -> ['Russia','Trumpcare','MuslimBan']

nom_list = ['Sessions', 'Flynn', 'Gorsuch', 'Trump']
color_list = ['b', 'r', 'g', 'k']

# fig = plt.axes()
for nom, color in zip(nom_list, color_list):
    mention_counts = []
    dates = []
    for n in files:
        date_string = n[11:-5]
        dt_string = dt.strptime(date_string, '%Y-%m-%d')
        dates.append(dt_string)
        mention_count = nom_mentions(n, nom)
        mention_counts.append(mention_count)
    # print(dates)
    # print(mention_counts)
    plt.plot(dates, mention_counts, color)
    # fig.plot(dates, mention_counts, color)

plt.legend(nom_list)
# fig.legend(nom_list)
plt.title('Phone Number Tweets by Names')
plt.legend(nom_list)
# fig.set_title('Phone Number Tweets by Names')
mpld3.display(plt)
# plt.show()
Example #36
0
def plotter(title,
            df,
            x_label = None,
            y_label = None,
            style = 'ggplot',
            figsize = (8, 4),
            save = False,
            legend_pos = 'best',
            reverse_legend = 'guess',
            num_to_plot = 7,
            tex = 'try',
            colours = 'Paired',
            cumulative = False,
            pie_legend = True,
            partial_pie = False,
            show_totals = False,
            transparent = False,
            output_format = 'png',
            interactive = False,
            black_and_white = False,
            show_p_val = False,
            indices = 'guess',
            **kwargs):
    """plot interrogator() or editor() output.

    **kwargs are for pandas first, which can then send them through to matplotlib.plot():

    http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.plot.html
    http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot

    pie_legend: False to label slices rather than give legend
    show_totals: where to show percent/abs frequencies: False, 'plot', 'legend', or 'both'

    """

    import corpkit
    import os
    import matplotlib as mpl
    if interactive:
        import matplotlib.pyplot as plt, mpld3
    else:
        import matplotlib.pyplot as plt
    from matplotlib import rc
    import pandas
    import pandas as pd
    from pandas import DataFrame

    import numpy
    from time import localtime, strftime
    from corpkit.tests import check_pytex, check_spider, check_t_kinter

    if interactive:
        import mpld3
        import collections
        from mpld3 import plugins, utils
        from plugins import InteractiveLegendPlugin, HighlightLines

    tk = check_t_kinter()

    running_python_tex = check_pytex()
    # incorrect spelling of spider on purpose
    running_spider = check_spider()

    def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
        """remove extreme values from colourmap --- no pure white"""
        import matplotlib.colors as colors
        import numpy as np
        new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
        return new_cmap

    def get_savename(imagefolder, save = False, title = False, ext = 'png'):
        """Come up with the savename for the image."""
        import os

        def urlify(s):
            "Turn title into filename"
            import re
            s = s.lower()
            s = re.sub(r"[^\w\s-]", '', s)
            s = re.sub(r"\s+", '-', s)
            s = re.sub(r"-(textbf|emph|textsc|textit)", '-', s)
            return s
        # name as 
        if not ext.startswith('.'):
            ext = '.' + ext
        if type(save) == str:
            savename = os.path.join(imagefolder, (urlify(save) + ext))
        #this 'else' is redundant now that title is obligatory
        else:
            if title:
                filename = urlify(title) + ext
                savename = os.path.join(imagefolder, filename)

        # remove duplicated ext
        if savename.endswith('%s%s' % (ext, ext)):
            savename = savename.replace('%s%s' % (ext, ext), ext, 1)
        return savename

    def rename_data_with_total(dataframe, was_series = False, using_tex = False, absolutes = True):
        """adds totals (abs, rel, keyness) to entry name strings"""
        if was_series:
            where_the_words_are = dataframe.index
        else:
            where_the_words_are = dataframe.columns
        the_labs = []
        for w in list(where_the_words_are):
            if not absolutes:
                if was_series:
                    perc = dataframe.T[w][0]
                else:
                    the_labs.append(w)
                    continue
                if using_tex:
                    the_labs.append('%s (%.2f\%%)' % (w, perc))
                else:
                    the_labs.append('%s (%.2f %%)' % (w, perc))
            else:
                if was_series:
                    score = dataframe.T[w].sum()
                else:
                    score = dataframe[w].sum()
                if using_tex:
                    the_labs.append('%s (n=%d)' % (w, score))
                else:
                    the_labs.append('%s (n=%d)' % (w, score))
        if not was_series:
            dataframe.columns = the_labs
        else:
            vals = list(dataframe[list(dataframe.columns)[0]].values)
            dataframe = pd.DataFrame(vals, index = the_labs)
            dataframe.columns = ['Total']
        return dataframe

    def auto_explode(dataframe, input, was_series = False, num_to_plot = 7):
        """give me a list of strings and i'll output explode option"""
        output = [0 for s in range(num_to_plot)]
        if was_series:
            l = list(dataframe.index)
        else:
            l = list(dataframe.columns)

        if type(input) == str or type(input) == int:
            input = [input]
        if type(input) == list:
            for i in input:
                if type(i) == str:
                    index = l.index(i)
                else:
                    index = i
                output[index] = 0.1
        return output

    # are we doing subplots?
    sbplt = False
    if 'subplots' in kwargs:
        if kwargs['subplots'] is True:
            sbplt = True

    if colours is True:
        colours = 'Paired'

    styles = ['dark_background', 'bmh', 'grayscale', 'ggplot', 'fivethirtyeight']
    if style not in styles:
        raise ValueError('Style %s not found. Use %s' % (style, ', '.join(styles)))

    if 'savepath' in kwargs.keys():
        mpl.rcParams['savefig.directory'] = kwargs['savepath']
        del kwargs['savepath']

    mpl.rcParams['savefig.bbox'] = 'tight'

    # try to use tex
    # TO DO:
    # make some font kwargs here
    using_tex = False
    mpl.rcParams['font.family'] = 'sans-serif'
    mpl.rcParams['text.latex.unicode'] = True
    
    if tex == 'try' or tex is True:
        try:
            rc('text', usetex=True)
            rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
            using_tex = True
        except:
            matplotlib.rc('font', family='sans-serif') 
            matplotlib.rc('font', serif='Helvetica Neue') 
            matplotlib.rc('text', usetex='false') 
            rc('text', usetex=False)
    else:
        rc('text', usetex=False)  

    if interactive:
        using_tex = False 

    if show_totals is False:
        show_totals = 'none'

    # find out what kind of plot we're making, and enable
    # or disable interactive values if need be
    if 'kind' not in kwargs:
        kwargs['kind'] = 'line'

    if interactive:
        if kwargs['kind'].startswith('bar'):
            interactive_types = [3]
        elif kwargs['kind'] == 'area':
            interactive_types = [2, 3]
        elif kwargs['kind'] == 'line':
            interactive_types = [2, 3]
        elif kwargs['kind'] == 'pie':
            interactive_types = None
            warnings.warn('Interactive plotting not yet available for pie plots.')
        else:
            interactive_types = [None]
    if interactive is False:
        interactive_types = [None]

    # find out if pie mode, add autopct format
    piemode = False
    if 'kind' in kwargs:
        if kwargs['kind'] == 'pie':
            piemode = True
            # always the best spot for pie
            #if legend_pos == 'best':
                #legend_pos = 'lower left'
            if show_totals.endswith('plot') or show_totals.endswith('both'):
                kwargs['pctdistance'] = 0.6
                if using_tex:
                    kwargs['autopct'] = r'%1.1f\%%'
                else:
                    kwargs['autopct'] = '%1.1f%%'

    #if piemode:
        #if partial_pie:
            #kwargs['startangle'] = 180

    kwargs['subplots'] = sbplt

    # copy data, make series into df
    dataframe = df.copy()
    was_series = False
    if type(dataframe) == pandas.core.series.Series:
        was_series = True
        if not cumulative:
            dataframe = DataFrame(dataframe)
        else:
            dataframe = DataFrame(dataframe.cumsum())
    else:
        # don't know if this is much good.
        if cumulative:
            dataframe = DataFrame(dataframe.cumsum())
        if len(list(dataframe.columns)) == 1:
            was_series = True
    
    # attempt to convert x axis to ints:
    try:
        dataframe.index = [int(i) for i in list(dataframe.index)]
    except:
        pass

    # remove totals and tkinter order
    if not was_series:
        for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]):
            dataframe = dataframe.drop(name, axis = ax, errors = 'ignore')
    else:
        dataframe = dataframe.drop('tkintertable-order', errors = 'ignore')
        dataframe = dataframe.drop('tkintertable-order', axis = 1, errors = 'ignore')
            
    # look at columns to see if all can be ints, in which case, set up figure
    # for depnumming
    if not was_series:
        if indices == 'guess':
            def isint(x):
                try:
                    a = float(x)
                    b = int(a)
                except ValueError or OverflowError:
                    return False
                else:
                    return a == b

            if all([isint(x) is True for x in list(dataframe.columns)]):
                indices = True
            else:
                indices = False

        # if depnumming, plot all, transpose, and rename axes
        if indices is True:
            num_to_plot = 'all'
            dataframe = dataframe.T
            if y_label is None:
                y_label = 'Percentage of all matches'
            if x_label is None:
                x_label = ''

    # set backend?
    output_formats = ['svgz', 'ps', 'emf', 'rgba', 'raw', 'pdf', 'svg', 'eps', 'png', 'pgf']
    if output_format not in output_formats:
        raise ValueError('%s output format not recognised. Must be: %s' % (output_format, ', '.join(output_formats)))
    
    # don't know if these are necessary
    if 'pdf' in output_format:
        plt.switch_backend(output_format) 
    if 'pgf' in output_format:
        plt.switch_backend(output_format)

    if num_to_plot == 'all':
        if was_series:
            if not piemode:
                num_to_plot = len(dataframe)
            else:
                num_to_plot = len(dataframe)
        else:
            if not piemode:
                num_to_plot = len(list(dataframe.columns))
            else:
                num_to_plot = len(dataframe.index)

    # explode pie, or remove if not piemode
    if 'explode' in kwargs:
        if not piemode:
            del kwargs['explode']
    if piemode:
        if 'explode' in kwargs:
            if not sbplt:
                kwargs['explode'] = auto_explode(dataframe, 
                                             kwargs['explode'], 
                                             was_series = was_series, 
                                             num_to_plot = num_to_plot)

    if 'legend' in kwargs:
        legend = kwargs['legend']
    else:
        legend = True

    #cut data short
    plotting_a_totals_column = False
    if was_series:
        if list(dataframe.columns)[0] != 'Total':
            try:
                can_be_ints = [int(x) for x in list(dataframe.index)]
                num_to_plot = len(dataframe)
            except:
                dataframe = dataframe[:num_to_plot]
        elif list(dataframe.columns)[0] == 'Total':
            plotting_a_totals_column = True
            if not 'legend' in kwargs:
                legend = False
            num_to_plot = len(dataframe)
    else:
        dataframe = dataframe.T.head(num_to_plot).T

    # remove stats fields, put p in entry text, etc.
    statfields = ['slope', 'intercept', 'r', 'p', 'stderr']
    try:
        dataframe = dataframe.drop(statfields, axis = 1)
    except:
        pass    
    try:
        dataframe.ix['p']
        there_are_p_vals = True
    except:
        there_are_p_vals = False
    if show_p_val:
        if there_are_p_vals:
            newnames = []
            for col in list(dataframe.columns):
                pval = dataframe[col]['p']
                newname = '%s (p=%s)' % (col, format(pval, '.5f'))
                newnames.append(newname)
            dataframe.columns = newnames
            dataframe.drop(statfields, axis = 0, inplace = True)
        else:
            warnings.warn('No p-values calculated to show.\n\nUse sort_by and keep_stats in editor() to generate these values.')
    else:
        if there_are_p_vals:
            dataframe.drop(statfields, axis = 0, inplace = True)

    # make and set y label
    absolutes = True
    if type(dataframe) == pandas.core.frame.DataFrame:
        try:
            if not all([s.is_integer() for s in dataframe.iloc[0,:].values]):
                absolutes = False
        except:
            pass
    else:
        if not all([s.is_integer() for s in dataframe.values]):        
            absolutes = False

    #  use colormap if need be:
    if num_to_plot > 0:
        if not was_series:
            if 'kind' in kwargs:
                if kwargs['kind'] in ['pie', 'line', 'area']:
                    if colours:
                        if not plotting_a_totals_column:
                            if colours == 'Default':
                                colours = 'Paired'
                            kwargs['colormap'] = colours
        #else:
            if colours:
                if colours == 'Default':
                    colours = 'Paired'
                kwargs['colormap'] = colours

    if piemode:
        if num_to_plot > 0:
            if colours == 'Default':
                colours = 'Paired'
            kwargs['colormap'] = colours
        else:
            if num_to_plot > 0:
                if colours == 'Default':
                    colours = 'Paired'
                kwargs['colormap'] = colours
        #else:
            #if len(dataframe.T.columns) < 8:
                #try:
                    #del kwargs['colormap']
                #except:
                    #pass
    
    # multicoloured bar charts
    if 'kind' in kwargs:
        if colours:
            if kwargs['kind'].startswith('bar'):
                if len(list(dataframe.columns)) == 1:
                    if not black_and_white:
                        import numpy as np
                        the_range = np.linspace(0, 1, num_to_plot)
                        cmap = plt.get_cmap(colours)
                        kwargs['colors'] = [cmap(n) for n in the_range]
                    # make a bar width ... ?
                    #kwargs['width'] = (figsize[0] / float(num_to_plot)) / 1.5


    # reversing legend option
    if reverse_legend is True:
        rev_leg = True
    elif reverse_legend is False:
        rev_leg = False

    # show legend or don't, guess whether to reverse based on kind
    if 'kind' in kwargs:
        if kwargs['kind'] in ['bar', 'barh', 'area', 'line', 'pie']:
            if was_series:
                legend = False
            if kwargs['kind'] == 'pie':
                if pie_legend:
                    legend = True
                else:
                    legend = False
        if kwargs['kind'] in ['barh', 'area']:
            if reverse_legend == 'guess':
                rev_leg = True
    if not 'rev_leg' in locals():
        rev_leg = False

    # the default legend placement
    if legend_pos is True:
        legend_pos = 'best'

    # cut dataframe if just_totals
    try:
        tst = dataframe['Combined total']
        dataframe = dataframe.head(num_to_plot)
    except:
        pass
    
    # rotate automatically
    if 'rot' not in kwargs:
        if not was_series:
            xvals = [str(i) for i in list(dataframe.index)[:num_to_plot]]
            #if 'kind' in kwargs:
                #if kwargs['kind'] in ['barh', 'area']:
                    #xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]]
        else:
            xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]]
        if len(max(xvals, key=len)) > 6:
            if not piemode:
                kwargs['rot'] = 45

    # no title for subplots because ugly,
    if sbplt:
        if 'title' in kwargs:
            del kwargs['title'] 
    else:
        kwargs['title'] = title
        
    # no interactive subplots yet:


    if sbplt and interactive:
        import warnings
        interactive = False
        warnings.warn('No interactive subplots yet, sorry.')
        return
        
    # not using pandas for labels or legend anymore.
    #kwargs['labels'] = None
    #kwargs['legend'] = False

    if legend:
        # kwarg options go in leg_options
        leg_options = {'framealpha': .8}
        if 'shadow' in kwargs:
            leg_options['shadow'] = True
        if 'ncol' in kwargs:
            leg_options['ncol'] = kwargs['ncol']
            del kwargs['ncol']
        else:
            if num_to_plot > 6:
                leg_options['ncol'] = num_to_plot / 7

        # determine legend position based on this dict
        if legend_pos:
            possible = {'best': 0, 'upper right': 1, 'upper left': 2, 'lower left': 3, 'lower right': 4, 
                        'right': 5, 'center left': 6, 'center right': 7, 'lower center': 8, 'upper center': 9, 
                        'center': 10, 'o r': 2, 'outside right': 2, 'outside upper right': 2, 
                        'outside center right': 'center left', 'outside lower right': 'lower left'}

            if type(legend_pos) == int:
                the_loc = legend_pos
            elif type(legend_pos) == str:
                try:
                    the_loc = possible[legend_pos]
                except KeyError:
                    raise KeyError('legend_pos value must be one of:\n%s\n or an int between 0-10.' %', '.join(possible.keys()))
            leg_options['loc'] = the_loc
            #weirdness needed for outside plot
            if legend_pos in ['o r', 'outside right', 'outside upper right']:
                leg_options['bbox_to_anchor'] = (1.02, 1)
            if legend_pos == 'outside center right':
                leg_options['bbox_to_anchor'] = (1.02, 0.5)
            if legend_pos == 'outside lower right':
                leg_options['loc'] == 'upper right'
                leg_options['bbox_to_anchor'] = (0.5, 0.5)
        
        # a bit of distance between legend and plot for outside legends
        if type(legend_pos) == str:
            if legend_pos.startswith('o'):
                leg_options['borderaxespad'] = 1

    if not piemode:
        if show_totals.endswith('both') or show_totals.endswith('legend'):
            dataframe = rename_data_with_total(dataframe, 
                                           was_series = was_series, 
                                           using_tex = using_tex, 
                                           absolutes = absolutes)
    else:
        if pie_legend:
            if show_totals.endswith('both') or show_totals.endswith('legend'):
                dataframe = rename_data_with_total(dataframe, 
                                           was_series = was_series, 
                                           using_tex = using_tex, 
                                           absolutes = absolutes)

    if piemode:
        if partial_pie:
            dataframe = dataframe / 100.0

    # some pie things
    if piemode:
        if not sbplt:
            kwargs['y'] = list(dataframe.columns)[0]
            if pie_legend:
                kwargs['legend'] = False
                if was_series:
                    leg_options['labels'] = list(dataframe.index)
                else:
                    leg_options['labels'] = list(dataframe.columns)
        else:
            if pie_legend:
                kwargs['legend'] = False
                if was_series:
                    leg_options['labels'] = list(dataframe.index)
                else:
                    leg_options['labels'] = list(dataframe.index)   
    
    areamode = False
    if 'kind' in kwargs:
        if kwargs['kind'] == 'area':
            areamode = True        

    if legend is False:
        kwargs['legend'] = False

    # cumulative grab first col
    if cumulative:
        kwargs['y'] = list(dataframe.columns)[0]

    # line highlighting option for interactive!
    if interactive:
        if 2 in interactive_types:
            if kwargs['kind'] == 'line':
                kwargs['marker'] = ','
        if not piemode:
            kwargs['alpha'] = 0.1
    
    # convert dates --- works only in my current case!
    if plotting_a_totals_column or not was_series:
        try:
            can_it_be_int = int(list(dataframe.index)[0])
            can_be_int = True
        except:
            can_be_int = False
        if can_be_int:
            if 1500 < int(list(dataframe.index)[0]):
                if 2050 > int(list(dataframe.index)[0]):
                    n = pd.PeriodIndex([d for d in list(dataframe.index)], freq='A')
                    dataframe = dataframe.set_index(n)

    MARKERSIZE = 4
    COLORMAP = {
            0: {'marker': None, 'dash': (None,None)},
            1: {'marker': None, 'dash': [5,5]},
            2: {'marker': "o", 'dash': (None,None)},
            3: {'marker': None, 'dash': [1,3]},
            4: {'marker': "s", 'dash': [5,2,5,2,5,10]},
            5: {'marker': None, 'dash': [5,3,1,2,1,10]},
            6: {'marker': 'o', 'dash': (None,None)},
            7: {'marker': None, 'dash': [5,3,1,3]},
            8: {'marker': "1", 'dash': [1,3]},
            9: {'marker': "*", 'dash': [5,5]},
            10: {'marker': "2", 'dash': [5,2,5,2,5,10]},
            11: {'marker': "s", 'dash': (None,None)}
            }

    HATCHES = {
            0:  {'color': '#dfdfdf', 'hatch':"/"},
            1:  {'color': '#6f6f6f', 'hatch':"\\"},
            2:  {'color': 'b', 'hatch':"|"},
            3:  {'color': '#dfdfdf', 'hatch':"-"},
            4:  {'color': '#6f6f6f', 'hatch':"+"},
            5:  {'color': 'b', 'hatch':"x"}
            }

    if black_and_white:
        if kwargs['kind'] == 'line':
            kwargs['linewidth'] = 1

        cmap = plt.get_cmap('Greys')
        new_cmap = truncate_colormap(cmap, 0.25, 0.95)
        if kwargs['kind'] == 'bar':
            # darker if just one entry
            if len(dataframe.columns) == 1:
                new_cmap = truncate_colormap(cmap, 0.70, 0.90)
        kwargs['colormap'] = new_cmap

    # use styles and plot

    with plt.style.context((style)):

        if not sbplt:
            # check if negative values, no stacked if so
            if areamode:
                if dataframe.applymap(lambda x: x < 0.0).any().any():
                    kwargs['stacked'] = False
                    rev_leg = False
            ax = dataframe.plot(figsize = figsize, **kwargs)
        else:
            if not piemode and not sbplt:
                ax = dataframe.plot(figsize = figsize, **kwargs)
            else:
                ax = dataframe.plot(figsize = figsize, **kwargs)
                handles, labels = plt.gca().get_legend_handles_labels()
                plt.legend( handles, labels, loc = leg_options['loc'], bbox_to_anchor = (0,-0.1,1,1),
                bbox_transform = plt.gcf().transFigure )
                if not tk:
                    plt.show()
                    return
        if 'rot' in kwargs:
            if kwargs['rot'] != 0 and kwargs['rot'] != 90:
                labels = [item.get_text() for item in ax.get_xticklabels()]
                ax.set_xticklabels(labels, rotation = kwargs['rot'], ha='right')

        if transparent:
            plt.gcf().patch.set_facecolor('white')
            plt.gcf().patch.set_alpha(0)

        if black_and_white:
            #plt.grid()
            plt.gca().set_axis_bgcolor('w')
            if kwargs['kind'] == 'line':
                # white background

                # change everything to black and white with interesting dashes and markers
                c = 0
                for line in ax.get_lines():
                    line.set_color('black')
                    #line.set_width(1)
                    line.set_dashes(COLORMAP[c]['dash'])
                    line.set_marker(COLORMAP[c]['marker'])
                    line.set_markersize(MARKERSIZE)
                    c += 1
                    if c == len(COLORMAP.keys()):
                        c = 0

        if legend:
            if not piemode and not sbplt:
                if 3 not in interactive_types:
                    if not rev_leg:
                        lgd = plt.legend(**leg_options)
                    else:
                        handles, labels = plt.gca().get_legend_handles_labels()
                        lgd = plt.legend(handles[::-1], labels[::-1], **leg_options)

            #if black_and_white:
                #lgd.set_facecolor('w')

        #if interactive:
            #if legend:
                #lgd.set_title("")
        #if not sbplt:
            #if 'layout' not in kwargs:
                #plt.tight_layout()

    if interactive:
        # 1 = highlight lines
        # 2 = line labels
        # 3 = legend switches
        ax = plt.gca()
        # fails for piemode
        lines = ax.lines
        handles, labels = plt.gca().get_legend_handles_labels()
        if 1 in interactive_types:
            plugins.connect(plt.gcf(), HighlightLines(lines))

        if 3 in interactive_types:
            plugins.connect(plt.gcf(), InteractiveLegendPlugin(lines, labels, alpha_unsel=0.0))

        for i, l in enumerate(lines):
            y_vals = l.get_ydata()
            x_vals = l.get_xdata()
            x_vals = [str(x) for x in x_vals]
            if absolutes:
                ls = ['%s (%s: %d)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)]
            else:
                ls = ['%s (%s: %.2f%%)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)]
            if 2 in interactive_types:
                #if 'kind' in kwargs and kwargs['kind'] == 'area':
                tooltip_line = mpld3.plugins.LineLabelTooltip(lines[i], labels[i])
                mpld3.plugins.connect(plt.gcf(), tooltip_line)
                #else:
                if kwargs['kind'] == 'line':
                    tooltip_point = mpld3.plugins.PointLabelTooltip(l, labels = ls)
                    mpld3.plugins.connect(plt.gcf(), tooltip_point)
        
            # works:
            #plugins.connect(plt.gcf(), plugins.LineLabelTooltip(l, labels[i]))


        #labels = ["Point {0}".format(i) for i in range(num_to_plot)]
        #tooltip = plugins.LineLabelTooltip(lines)
        #mpld3.plugins.connect(plt.gcf(), mpld3.plugins.PointLabelTooltip(lines))

    if piemode:
        if not sbplt:
            plt.axis('equal')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)


    # add x label
    # this could be revised now!
    # if time series period, it's year for now
    if type(dataframe.index) == pandas.tseries.period.PeriodIndex:
        x_label = 'Year'

    if x_label is not False:
        if type(x_label) == str:
            plt.xlabel(x_label)
        else:
            check_x_axis = list(dataframe.index)[0] # get first entry# get second entry of first entry (year, count)
            try:
                if type(dataframe.index) == pandas.tseries.period.PeriodIndex:
                    x_label = 'Year'
                check_x_axis = int(check_x_axis)
                if 1500 < check_x_axis < 2050:
                    x_label = 'Year'
                else:
                    x_label = 'Group'
            except:
                x_label = 'Group'

        if not sbplt:
            if not piemode:
                plt.xlabel(x_label)

    # no offsets for numerical x and y values
    if type(dataframe.index) != pandas.tseries.period.PeriodIndex:
        try:
            # check if x axis can be an int
            check_x_axis = list(dataframe.index)[0]
            can_it_be_int = int(check_x_axis)
            # if so, set these things
            from matplotlib.ticker import ScalarFormatter
            plt.gca().xaxis.set_major_formatter(ScalarFormatter()) 
        except:
            pass

    # same for y axis
    try:
        # check if x axis can be an int
        check_y_axis = list(dataframe.columns)[0]
        can_it_be_int = int(check_y_axis)
        # if so, set these things
        from matplotlib.ticker import ScalarFormatter
        plt.gca().yaxis.set_major_formatter(ScalarFormatter()) 
    except:
        pass

    # y labelling
    y_l = False
    if not absolutes:
        y_l = 'Percentage'
    else:
        y_l = 'Absolute frequency'
    
    if y_label is not False:
        if not sbplt:
            if not piemode:
                if type(y_label) == str:
                    plt.ylabel(y_label)
                else:
                    plt.ylabel(y_l)

    # hacky: turn legend into subplot titles :)
    if sbplt:
        # title the big plot
        #plt.suptitle(title, fontsize = 16)
        # get all axes
        if 'layout' not in kwargs:
            axes = [l for index, l in enumerate(ax)]
        else:
            axes = []
            cols = [l for index, l in enumerate(ax)]
            for col in cols:
                for bit in col:
                    axes.append(bit)
    
        # set subplot titles
    
        for index, a in enumerate(axes):
            try:
                titletext = list(dataframe.columns)[index]
            except:
                pass
            a.set_title(titletext)
            try:
                a.legend_.remove()
            except:
                pass
            # remove axis labels for pie plots
            if piemode:
                a.axes.get_xaxis().set_visible(False)
                a.axes.get_yaxis().set_visible(False)
                a.axis('equal')
    
    # add sums to bar graphs and pie graphs
    # doubled right now, no matter

    if not sbplt:
        if 'kind' in kwargs:
            if kwargs['kind'].startswith('bar'):
                width = ax.containers[0][0].get_width()

    if was_series:
        the_y_limit = plt.ylim()[1]
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            # make plot a bit higher if putting these totals on it
            plt.ylim([0,the_y_limit * 1.05])
            for i, label in enumerate(list(dataframe.index)):
                if len(dataframe.ix[label]) == 1:
                    score = dataframe.ix[label][0]
                else:
                    if absolutes:
                        score = dataframe.ix[label].sum()
                    else:
                        #import warnings
                        #warnings.warn("It's not possible to determine total percentage from individual percentages.")
                        continue
                if not absolutes:
                    plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom')
                else:
                    plt.annotate(score, (i, score), ha = 'center', va = 'bottom')
    else:
        the_y_limit = plt.ylim()[1]
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            for i, label in enumerate(list(dataframe.columns)):
                if len(dataframe[label]) == 1:
                    score = dataframe[label][0]
                else:
                    if absolutes:
                        score = dataframe[label].sum()
                    else:
                        #import warnings
                        #warnings.warn("It's not possible to determine total percentage from individual percentages.")
                        continue
                if not absolutes:
                    plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom')
                else:
                    plt.annotate(score, (i, score), ha = 'center', va = 'bottom')        

    #if not running_python_tex:
        #plt.gcf().show()

    plt.subplots_adjust(left=0.1)
    plt.subplots_adjust(bottom=0.18)
    #if 'layout' not in kwargs:
        #plt.tight_layout()



    if save:
        import os
        if running_python_tex:
            imagefolder = '../images'
        else:
            imagefolder = 'images'

        savename = get_savename(imagefolder, save = save, title = title, ext = output_format)

        if not os.path.isdir(imagefolder):
            os.makedirs(imagefolder)

        # save image and get on with our lives
        if legend_pos.startswith('o'):
            plt.gcf().savefig(savename, dpi=150, bbox_extra_artists=(lgd,), bbox_inches='tight', format = output_format)
        else:
            plt.gcf().savefig(savename, dpi=150, format = output_format)
        time = strftime("%H:%M:%S", localtime())
        if os.path.isfile(savename):
            print '\n' + time + ": " + savename + " created."
        else:
            raise ValueError("Error making %s." % savename)

    if not interactive and not running_python_tex and not running_spider and not tk:
        plt.show()
        return
    if running_spider or tk or sbplt:
        return plt

    if interactive:
        plt.subplots_adjust(right=.8)
        plt.subplots_adjust(left=.1)
        try:
            ax.legend_.remove()
        except:
            pass
        return mpld3.display()
Example #37
0
def plotter(title,
            df,
            kind = 'line',
            x_label = None,
            y_label = None,
            style = 'ggplot',
            figsize = (8, 4),
            save = False,
            legend_pos = 'best',
            reverse_legend = 'guess',
            num_to_plot = 7,
            tex = 'try',
            colours = 'Accent',
            cumulative = False,
            pie_legend = True,
            partial_pie = False,
            show_totals = False,
            transparent = False,
            output_format = 'png',
            interactive = False,
            black_and_white = False,
            show_p_val = False,
            indices = False,
            **kwargs):
    """Visualise corpus interrogations.

    :param title: A title for the plot
    :type title: str
    :param df: Data to be plotted
    :type df: pandas.core.frame.DataFrame
    :param x_label: A label for the x axis
    :type x_label: str
    :param y_label: A label for the y axis
    :type y_label: str
    :param kind: The kind of chart to make
    :type kind: str ('line'/'bar'/'barh'/'pie'/'area')
    :param style: Visual theme of plot
    :type style: str ('ggplot'/'bmh'/'fivethirtyeight'/'seaborn-talk'/etc)
    :param figsize: Size of plot
    :type figsize: tuple (int, int)
    :param save: If bool, save with *title* as name; if str, use str as name
    :type save: bool/str
    :param legend_pos: Where to place legend
    :type legend_pos: str ('upper right'/'outside right'/etc)
    :param reverse_legend: Reverse the order of the legend
    :type reverse_legend: bool
    :param num_to_plot: How many columns to plot
    :type num_to_plot: int/'all'
    :param tex: Use TeX to draw plot text
    :type tex: bool
    :param colours: Colourmap for lines/bars/slices
    :type colours: str
    :param cumulative: Plot values cumulatively
    :type cumulative: bool
    :param pie_legend: Show a legend for pie chart
    :type pie_legend: bool
    :param partial_pie: Allow plotting of pie slices only
    :type partial_pie: bool
    :param show_totals: Print sums in plot where possible
    :type show_totals: str -- 'legend'/'plot'/'both'
    :param transparent: Transparent .png background
    :type transparent: bool
    :param output_format: File format for saved image
    :type output_format: str -- 'png'/'pdf'
    :param black_and_white: Create black and white line styles
    :type black_and_white: bool
    :param show_p_val: Attempt to print p values in legend if contained in df
    :type show_p_val: bool
    :param indices: To use when plotting "distance from root"
    :type indices: bool
    :param stacked: When making bar chart, stack bars on top of one another
    :type stacked: str
    :param filled: For area and bar charts, make every column sum to 100
    :type filled: str
    :param legend: Show a legend
    :type legend: bool
    :param rot: Rotate x axis ticks by *rot* degrees
    :type rot: int
    :param subplots: Plot each column separately
    :type subplots: bool
    :param layout: Grid shape to use when *subplots* is True
    :type layout: tuple -- (int, int)
    :param interactive: Experimental interactive options
    :type interactive: list -- [1, 2, 3]
    :returns: matplotlib figure
    """
    import corpkit
    import os

    try:
        from IPython.utils.shimmodule import ShimWarning
        import warnings
        warnings.simplefilter('ignore', ShimWarning)
    except:
        pass

    import matplotlib as mpl
    from matplotlib import rc

    # prefer seaborn plotting
    try:
        import seaborn as sns
    except:
        pass   
    
    if interactive:
        import matplotlib.pyplot as plt, mpld3
    else:
        import matplotlib.pyplot as plt
    
    import pandas
    from pandas import DataFrame

    import numpy
    from time import localtime, strftime
    from tests import check_pytex, check_spider, check_t_kinter

    if interactive:
        import mpld3
        import collections
        from mpld3 import plugins, utils
        from plugins import InteractiveLegendPlugin, HighlightLines

    # check what environment we're in
    tk = check_t_kinter()
    running_python_tex = check_pytex()
    running_spider = check_spider()

    def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
        """remove extreme values from colourmap --- no pure white"""
        import matplotlib.colors as colors
        import numpy as np
        new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
        return new_cmap

    def get_savename(imagefolder, save = False, title = False, ext = 'png'):
        """Come up with the savename for the image."""
        import os

        def urlify(s):
            "Turn title into filename"
            import re
            s = s.lower()
            s = re.sub(r"[^\w\s-]", '', s)
            s = re.sub(r"\s+", '-', s)
            s = re.sub(r"-(textbf|emph|textsc|textit)", '-', s)
            return s
        # name as 
        if not ext.startswith('.'):
            ext = '.' + ext
        if type(save) == str:
            savename = os.path.join(imagefolder, (urlify(save) + ext))
        #this 'else' is redundant now that title is obligatory
        else:
            if title:
                filename = urlify(title) + ext
                savename = os.path.join(imagefolder, filename)

        # remove duplicated ext
        if savename.endswith('%s%s' % (ext, ext)):
            savename = savename.replace('%s%s' % (ext, ext), ext, 1)
        return savename

    def rename_data_with_total(dataframe, was_series = False, using_tex = False, absolutes = True):
        """adds totals (abs, rel, keyness) to entry name strings"""
        if was_series:
            where_the_words_are = dataframe.index
        else:
            where_the_words_are = dataframe.columns
        the_labs = []
        for w in list(where_the_words_are):
            if not absolutes:
                if was_series:
                    perc = dataframe.T[w][0]
                else:
                    the_labs.append(w)
                    continue
                if using_tex:
                    the_labs.append('%s (%.2f\%%)' % (w, perc))
                else:
                    the_labs.append('%s (%.2f %%)' % (w, perc))
            else:
                if was_series:
                    score = dataframe.T[w].sum()
                else:
                    score = dataframe[w].sum()
                if using_tex:
                    the_labs.append('%s (n=%d)' % (w, score))
                else:
                    the_labs.append('%s (n=%d)' % (w, score))
        if not was_series:
            dataframe.columns = the_labs
        else:
            vals = list(dataframe[list(dataframe.columns)[0]].values)
            dataframe = pandas.DataFrame(vals, index = the_labs)
            dataframe.columns = ['Total']
        return dataframe

    def auto_explode(dataframe, input, was_series = False, num_to_plot = 7):
        """give me a list of strings and i'll output explode option"""
        output = [0 for s in range(num_to_plot)]
        if was_series:
            l = list(dataframe.index)
        else:
            l = list(dataframe.columns)

        if type(input) == str or type(input) == int:
            input = [input]
        if type(input) == list:
            for i in input:
                if type(i) == str:
                    index = l.index(i)
                else:
                    index = i
                output[index] = 0.1
        return output

    # check if we're doing subplots
    sbplt = False
    if 'subplots' in kwargs:
        if kwargs['subplots'] is True:
            sbplt = True
    kwargs['subplots'] = sbplt

    if colours is True:
        colours = 'Paired'

    # todo: get this dynamically instead.
    styles = ['dark_background', 'bmh', 'grayscale', 'ggplot', 'fivethirtyeight', 'matplotlib', False, 'mpl-white']
    #if style not in styles:
        #raise ValueError('Style %s not found. Use %s' % (str(style), ', '.join(styles)))

    if style == 'mpl-white':
        try:
            sns.set_style("whitegrid")
        except:
            pass
        style = 'matplotlib'

    if style is not False and style.startswith('seaborn'):
        colours = False

    # use 'draggable = True' to make a draggable legend
    dragmode = kwargs.get('draggable', False)
    kwargs.pop('draggable', None)

    if kwargs.get('savepath'):
        mpl.rcParams['savefig.directory'] = kwargs.get('savepath')
        kwargs.pop('savepath', None)

    mpl.rcParams['savefig.bbox'] = 'tight'
    mpl.rcParams.update({'figure.autolayout': True})

    # try to use tex
    # TO DO:
    # make some font kwargs here
    using_tex = False
    mpl.rcParams['font.family'] = 'sans-serif'
    mpl.rcParams['text.latex.unicode'] = True
    
    if tex == 'try' or tex is True:
        try:
            rc('text', usetex=True)
            rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
            using_tex = True
        except:
            matplotlib.rc('font', family='sans-serif') 
            matplotlib.rc('font', serif='Helvetica Neue') 
            matplotlib.rc('text', usetex='false') 
            rc('text', usetex=False)
    else:
        rc('text', usetex=False)  

    if interactive:
        using_tex = False 

    if show_totals is False:
        show_totals = 'none'

    # find out what kind of plot we're making, and enable
    # or disable interactive values if need be
    kwargs['kind'] = kind.lower()

    if interactive:
        if kwargs['kind'].startswith('bar'):
            interactive_types = [3]
        elif kwargs['kind'] == 'area':
            interactive_types = [2, 3]
        elif kwargs['kind'] == 'line':
            interactive_types = [2, 3]
        elif kwargs['kind'] == 'pie':
            interactive_types = None
            warnings.warn('Interactive plotting not yet available for pie plots.')
        else:
            interactive_types = [None]
    if interactive is False:
        interactive_types = [None]

    # find out if pie mode, add autopct format
    piemode = False
    if kind == 'pie':
        piemode = True
        # always the best spot for pie
        #if legend_pos == 'best':
            #legend_pos = 'lower left'
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            kwargs['pctdistance'] = 0.6
            if using_tex:
                kwargs['autopct'] = r'%1.1f\%%'
            else:
                kwargs['autopct'] = '%1.1f%%'

    # copy data, make series into df
    dataframe = df.copy()
    was_series = False
    if type(dataframe) == pandas.core.series.Series:
        was_series = True
        if not cumulative:
            dataframe = DataFrame(dataframe)
        else:
            dataframe = DataFrame(dataframe.cumsum())
    else:
        # don't know if this is much good.
        if cumulative:
            dataframe = DataFrame(dataframe.cumsum())
        if len(list(dataframe.columns)) == 1:
            was_series = True
    
    # attempt to convert x axis to ints:
    try:
        dataframe.index = [int(i) for i in list(dataframe.index)]
    except:
        pass

    # remove totals and tkinter order
    if not was_series and not all(x.lower() == 'total' for x in list(dataframe.columns)):
        for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]):
            try:
                dataframe = dataframe.drop(name, axis = ax, errors = 'ignore')
            except:
                pass
    else:
        dataframe = dataframe.drop('tkintertable-order', errors = 'ignore')
        dataframe = dataframe.drop('tkintertable-order', axis = 1, errors = 'ignore')
            
    # look at columns to see if all can be ints, in which case, set up figure
    # for depnumming
    if not was_series:
        if indices == 'guess':
            def isint(x):
                try:
                    a = float(x)
                    b = int(a)
                except ValueError or OverflowError:
                    return False
                else:
                    return a == b

            if all([isint(x) is True for x in list(dataframe.columns)]):
                indices = True
            else:
                indices = False

        # if depnumming, plot all, transpose, and rename axes
        if indices is True:
            num_to_plot = 'all'
            dataframe = dataframe.T
            if y_label is None:
                y_label = 'Percentage of all matches'
            if x_label is None:
                x_label = ''

    # set backend?
    output_formats = ['svgz', 'ps', 'emf', 'rgba', 'raw', 'pdf', 'svg', 'eps', 'png', 'pgf']
    if output_format not in output_formats:
        raise ValueError('%s output format not recognised. Must be: %s' % (output_format, ', '.join(output_formats)))
    
    # don't know if these are necessary
    if 'pdf' in output_format:
        plt.switch_backend(output_format) 
    if 'pgf' in output_format:
        plt.switch_backend(output_format)

    if num_to_plot == 'all':
        if was_series:
            if not piemode:
                num_to_plot = len(dataframe)
            else:
                num_to_plot = len(dataframe)
        else:
            if not piemode:
                num_to_plot = len(list(dataframe.columns))
            else:
                num_to_plot = len(dataframe.index)

    # explode pie, or remove if not piemode
    if piemode and not sbplt and kwargs.get('explode'):
        kwargs['explode'] = auto_explode(dataframe, 
                                        kwargs['explode'], 
                                        was_series = was_series, 
                                        num_to_plot = num_to_plot)
    else:
        kwargs.pop('explode', None)

    legend = kwargs.get('legend', False)

    #cut data short
    plotting_a_totals_column = False
    if was_series:
        if list(dataframe.columns)[0] != 'Total':
            try:
                can_be_ints = [int(x) for x in list(dataframe.index)]
                num_to_plot = len(dataframe)
            except:
                dataframe = dataframe[:num_to_plot]
        elif list(dataframe.columns)[0] == 'Total':
            plotting_a_totals_column = True
            if not 'legend' in kwargs:
                legend = False
            num_to_plot = len(dataframe)
    else:
        dataframe = dataframe.T.head(num_to_plot).T

    # remove stats fields, put p in entry text, etc.
    statfields = ['slope', 'intercept', 'r', 'p', 'stderr']
    try:
        dataframe = dataframe.drop(statfields, axis = 1, errors = 'ignore')
    except:
        pass    
    try:
        dataframe.ix['p']
        there_are_p_vals = True
    except:
        there_are_p_vals = False
    if show_p_val:
        if there_are_p_vals:
            newnames = []
            for col in list(dataframe.columns):
                pval = dataframe[col]['p']

                def p_string_formatter(val):
                    if val < 0.001:
                        if not using_tex:
                            return 'p < 0.001'
                        else:
                            return r'p $<$ 0.001'
                    else:
                        return 'p = %s' % format(val, '.3f')

                pstr = p_string_formatter(pval)
                newname = '%s (%s)' % (col, pstr)
                newnames.append(newname)
            dataframe.columns = newnames
            dataframe.drop(statfields, axis = 0, inplace = True, errors = 'ignore')
        else:
            warnings.warn('No p-values calculated to show.\n\nUse sort_by and keep_stats in editor() to generate these values.')
    else:
        if there_are_p_vals:
            dataframe.drop(statfields, axis = 0, inplace = True, errors = 'ignore')

    # make and set y label
    absolutes = True
    if type(dataframe) == pandas.core.frame.DataFrame:
        try:
            if not all([s.is_integer() for s in dataframe.iloc[0,:].values]):
                absolutes = False
        except:
            pass
    else:
        if not all([s.is_integer() for s in dataframe.values]):        
            absolutes = False

    #  use colormap if need be:
    if num_to_plot > 0:
        if not was_series:
            if kind in ['pie', 'line', 'area']:
                if colours:
                    if not plotting_a_totals_column:
                        if colours == 'Default':
                            colours = 'Paired'
                        kwargs['colormap'] = colours
        #else:

            if colours:
                if colours == 'Default':
                    colours = 'Paired'
                kwargs['colormap'] = colours

    if piemode:
        if num_to_plot > 0:
            if colours == 'Default':
                colours = 'Paired'
            kwargs['colormap'] = colours
        else:
            if num_to_plot > 0:
                if colours == 'Default':
                    colours = 'Paired'
                kwargs['colormap'] = colours
    
    # multicoloured bar charts
    if colours:
        if kind.startswith('bar'):
            if len(list(dataframe.columns)) == 1:
                if not black_and_white:
                    import numpy as np
                    the_range = np.linspace(0, 1, num_to_plot)
                    cmap = plt.get_cmap(colours)
                    kwargs['colors'] = [cmap(n) for n in the_range]
                # make a bar width ... ? ...
                #kwargs['width'] = (figsize[0] / float(num_to_plot)) / 1.5


    # reversing legend option
    if reverse_legend is True:
        rev_leg = True
    elif reverse_legend is False:
        rev_leg = False

    # show legend or don't, guess whether to reverse based on kind
    if kind in ['bar', 'barh', 'area', 'line', 'pie']:
        if was_series:
            legend = False
        if kind == 'pie':
            if pie_legend:
                legend = True
            else:
                legend = False
    if kind in ['barh', 'area']:
        if reverse_legend == 'guess':
            rev_leg = True
    if not 'rev_leg' in locals():
        rev_leg = False

    # the default legend placement
    if legend_pos is True:
        legend_pos = 'best'

    # cut dataframe if just_totals
    try:
        tst = dataframe['Combined total']
        dataframe = dataframe.head(num_to_plot)
    except:
        pass
    
    # rotate automatically
    if 'rot' not in kwargs:
        if not was_series:
            xvals = [str(i) for i in list(dataframe.index)[:num_to_plot]]
            #if 'kind' in kwargs:
                #if kwargs['kind'] in ['barh', 'area']:
                    #xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]]
        else:
            xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]]
        if len(max(xvals, key=len)) > 6:
            if not piemode:
                kwargs['rot'] = 45

    # no title for subplots because ugly,
    if title and not sbplt:
        kwargs['title'] = title
        
    # no interactive subplots yet:
    if sbplt and interactive:
        import warnings
        interactive = False
        warnings.warn('No interactive subplots yet, sorry.')
        return
        
    # not using pandas for labels or legend anymore.
    #kwargs['labels'] = None
    #kwargs['legend'] = False

    if legend:
        if num_to_plot > 6:
            if not kwargs.get('ncol'):
                kwargs['ncol'] = num_to_plot / 7
        # kwarg options go in leg_options
        leg_options = {'framealpha': .8,
                       'shadow': kwargs.get('shadow', False),
                       'ncol': kwargs.pop('ncol', 1)}    

        # determine legend position based on this dict
        if legend_pos:
            possible = {'best': 0, 'upper right': 1, 'upper left': 2, 'lower left': 3, 'lower right': 4, 
                        'right': 5, 'center left': 6, 'center right': 7, 'lower center': 8, 'upper center': 9, 
                        'center': 10, 'o r': 2, 'outside right': 2, 'outside upper right': 2, 
                        'outside center right': 'center left', 'outside lower right': 'lower left'}

            if type(legend_pos) == int:
                the_loc = legend_pos
            elif type(legend_pos) == str:
                try:
                    the_loc = possible[legend_pos]
                except KeyError:
                    raise KeyError('legend_pos value must be one of:\n%s\n or an int between 0-10.' %', '.join(list(possible.keys())))
            leg_options['loc'] = the_loc
            #weirdness needed for outside plot
            if legend_pos in ['o r', 'outside right', 'outside upper right']:
                leg_options['bbox_to_anchor'] = (1.02, 1)
            if legend_pos == 'outside center right':
                leg_options['bbox_to_anchor'] = (1.02, 0.5)
            if legend_pos == 'outside lower right':
                leg_options['loc'] == 'upper right'
                leg_options['bbox_to_anchor'] = (0.5, 0.5)
        
        # a bit of distance between legend and plot for outside legends
        if type(legend_pos) == str:
            if legend_pos.startswith('o'):
                leg_options['borderaxespad'] = 1

    if not piemode:
        if show_totals.endswith('both') or show_totals.endswith('legend'):
            dataframe = rename_data_with_total(dataframe, 
                                           was_series = was_series, 
                                           using_tex = using_tex, 
                                           absolutes = absolutes)
    else:
        if pie_legend:
            if show_totals.endswith('both') or show_totals.endswith('legend'):
                dataframe = rename_data_with_total(dataframe, 
                                           was_series = was_series, 
                                           using_tex = using_tex, 
                                           absolutes = absolutes)

    if piemode:
        if partial_pie:
            dataframe = dataframe / 100.0

    # some pie things
    if piemode:
        if not sbplt:
            kwargs['y'] = list(dataframe.columns)[0]
            if pie_legend:
                kwargs['legend'] = False
                if was_series:
                    leg_options['labels'] = list(dataframe.index)
                else:
                    leg_options['labels'] = list(dataframe.columns)
        else:
            if pie_legend:
                kwargs['legend'] = False
                if was_series:
                    leg_options['labels'] = list(dataframe.index)
                else:
                    leg_options['labels'] = list(dataframe.index)   
    
    def filler(df):
        pby = df.T.copy()
        for i in list(pby.columns):
            tot = pby[i].sum()
            pby[i] = pby[i] * 100.0 / tot
        return pby.T

    areamode = False
    if kind == 'area':
        areamode = True

    if legend is False:
        kwargs['legend'] = False

    # line highlighting option for interactive!
    if interactive:
        if 2 in interactive_types:
            if kind == 'line':
                kwargs['marker'] = ','
        if not piemode:
            kwargs['alpha'] = 0.1
    
    # convert dates --- works only in my current case!
    if plotting_a_totals_column or not was_series:
        try:
            can_it_be_int = int(list(dataframe.index)[0])
            can_be_int = True
        except:
            can_be_int = False
        if can_be_int:
            if 1500 < int(list(dataframe.index)[0]):
                if 2050 > int(list(dataframe.index)[0]):
                    n = pandas.PeriodIndex([d for d in list(dataframe.index)], freq='A')
                    dataframe = dataframe.set_index(n)

        if kwargs.get('filled'):
            if areamode or kind.startswith('bar'):
                dataframe = filler(dataframe)
            kwargs.pop('filled', None)

    MARKERSIZE = 4
    COLORMAP = {
            0: {'marker': None, 'dash': (None,None)},
            1: {'marker': None, 'dash': [5,5]},
            2: {'marker': "o", 'dash': (None,None)},
            3: {'marker': None, 'dash': [1,3]},
            4: {'marker': "s", 'dash': [5,2,5,2,5,10]},
            5: {'marker': None, 'dash': [5,3,1,2,1,10]},
            6: {'marker': 'o', 'dash': (None,None)},
            7: {'marker': None, 'dash': [5,3,1,3]},
            8: {'marker': "1", 'dash': [1,3]},
            9: {'marker': "*", 'dash': [5,5]},
            10: {'marker': "2", 'dash': [5,2,5,2,5,10]},
            11: {'marker': "s", 'dash': (None,None)}
            }

    HATCHES = {
            0:  {'color': '#dfdfdf', 'hatch':"/"},
            1:  {'color': '#6f6f6f', 'hatch':"\\"},
            2:  {'color': 'b', 'hatch':"|"},
            3:  {'color': '#dfdfdf', 'hatch':"-"},
            4:  {'color': '#6f6f6f', 'hatch':"+"},
            5:  {'color': 'b', 'hatch':"x"}
            }

    if black_and_white:
        if kind == 'line':
            kwargs['linewidth'] = 1

        cmap = plt.get_cmap('Greys')
        new_cmap = truncate_colormap(cmap, 0.25, 0.95)
        if kind == 'bar':
            # darker if just one entry
            if len(dataframe.columns) == 1:
                new_cmap = truncate_colormap(cmap, 0.70, 0.90)
        kwargs['colormap'] = new_cmap

    class dummy_context_mgr():
        """a fake context for plotting without style
        perhaps made obsolete by 'classic' style in new mpl"""
        def __enter__(self):
            return None
        def __exit__(self, one, two, three):
            return False

    with plt.style.context((style)) if style != 'matplotlib' else dummy_context_mgr():

        if not sbplt:
            # check if negative values, no stacked if so
            if areamode:
                kwargs['legend'] = False
                if dataframe.applymap(lambda x: x < 0.0).any().any():
                    kwargs['stacked'] = False
                    rev_leg = False
            ax = dataframe.plot(figsize = figsize, **kwargs)
            if areamode:
                handles, labels = plt.gca().get_legend_handles_labels()
                del handles
                del labels
        else:
            plt.gcf().set_tight_layout(False)
            if not piemode:
                ax = dataframe.plot(figsize = figsize, **kwargs)
            else:
                ax = dataframe.plot(figsize = figsize, **kwargs)
                handles, labels = plt.gca().get_legend_handles_labels()
                plt.legend( handles, labels, loc = leg_options['loc'], bbox_to_anchor = (0,-0.1,1,1),
                bbox_transform = plt.gcf().transFigure )

                # this line allows layouts with missing plots
                # i.e. layout = (5, 2) with only nine plots
                plt.gcf().set_tight_layout(False)
                
        if 'rot' in kwargs:
            if kwargs['rot'] != 0 and kwargs['rot'] != 90:
                labels = [item.get_text() for item in ax.get_xticklabels()]
                ax.set_xticklabels(labels, rotation = kwargs['rot'], ha='right')

        if transparent:
            plt.gcf().patch.set_facecolor('white')
            plt.gcf().patch.set_alpha(0)

        if black_and_white:
            if kind == 'line':
                # white background
                # change everything to black and white with interesting dashes and markers
                c = 0
                for line in ax.get_lines():
                    line.set_color('black')
                    #line.set_width(1)
                    line.set_dashes(COLORMAP[c]['dash'])
                    line.set_marker(COLORMAP[c]['marker'])
                    line.set_markersize(MARKERSIZE)
                    c += 1
                    if c == len(list(COLORMAP.keys())):
                        c = 0

        # draw legend with proper placement etc
        if legend:
            if not piemode and not sbplt:
                if 3 not in interactive_types:
                    handles, labels = plt.gca().get_legend_handles_labels()
                    # area doubles the handles and labels. this removes half:
                    if areamode:
                        handles = handles[-len(handles) / 2:]
                        labels = labels[-len(labels) / 2:]
                    if rev_leg:
                        handles = handles[::-1]
                        labels = labels[::-1]
                    lgd = plt.legend(handles, labels, **leg_options)

    if interactive:
        # 1 = highlight lines
        # 2 = line labels
        # 3 = legend switches
        ax = plt.gca()
        # fails for piemode
        lines = ax.lines
        handles, labels = plt.gca().get_legend_handles_labels()
        if 1 in interactive_types:
            plugins.connect(plt.gcf(), HighlightLines(lines))

        if 3 in interactive_types:
            plugins.connect(plt.gcf(), InteractiveLegendPlugin(lines, labels, alpha_unsel=0.0))

        for i, l in enumerate(lines):
            y_vals = l.get_ydata()
            x_vals = l.get_xdata()
            x_vals = [str(x) for x in x_vals]
            if absolutes:
                ls = ['%s (%s: %d)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)]
            else:
                ls = ['%s (%s: %.2f%%)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)]
            if 2 in interactive_types:
                #if 'kind' in kwargs and kwargs['kind'] == 'area':
                tooltip_line = mpld3.plugins.LineLabelTooltip(lines[i], labels[i])
                mpld3.plugins.connect(plt.gcf(), tooltip_line)
                #else:
                if kind == 'line':
                    tooltip_point = mpld3.plugins.PointLabelTooltip(l, labels = ls)
                    mpld3.plugins.connect(plt.gcf(), tooltip_point)
        
    if piemode:
        if not sbplt:
            plt.axis('equal')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

    # add x label
    # this could be revised now!
    # if time series period, it's year for now
    if type(dataframe.index) == pandas.tseries.period.PeriodIndex:
        x_label = 'Year'

    if x_label is not False:
        if type(x_label) == str:
            plt.xlabel(x_label)
        else:
            check_x_axis = list(dataframe.index)[0] # get first entry# get second entry of first entry (year, count)
            try:
                if type(dataframe.index) == pandas.tseries.period.PeriodIndex:
                    x_label = 'Year'
                check_x_axis = int(check_x_axis)
                if 1500 < check_x_axis < 2050:
                    x_label = 'Year'
                else:
                    x_label = 'Group'
            except:
                x_label = 'Group'

        if not sbplt:
            if not piemode:
                plt.xlabel(x_label)

    def is_number(s):
        """check if str can be can be made into float/int"""
        try:
            float(s) # for int, long and float
        except ValueError:
            try:
                complex(s) # for complex
            except ValueError:
                return False
        return True

    # for now, always turn off sci notation
    from matplotlib.ticker import ScalarFormatter
    if type(dataframe.index) != pandas.tseries.period.PeriodIndex:
        try:
            if all(is_number(s) for s in list(dataframe.index)):
                plt.gca().xaxis.set_major_formatter(ScalarFormatter()) 
        except:
            pass
    try:
        if all(is_number(s) for s in list(dataframe.columns)):
            plt.gca().yaxis.set_major_formatter(ScalarFormatter()) 
    except:
        pass

    # y labelling
    y_l = False
    if not absolutes:
        y_l = 'Percentage'
    else:
        y_l = 'Absolute frequency'
    
    def suplabel(axis,label,label_prop=None,
                 labelpad=5,
                 ha='center',va='center'):
        ''' Add super ylabel or xlabel to the figure
        Similar to matplotlib.suptitle
        axis       - string: "x" or "y"
        label      - string
        label_prop - keyword dictionary for Text
        labelpad   - padding from the axis (default: 5)
        ha         - horizontal alignment (default: "center")
        va         - vertical alignment (default: "center")
        '''
        fig = plt.gcf()
        xmin = []
        ymin = []
        for ax in fig.axes:
            xmin.append(ax.get_position().xmin)
            ymin.append(ax.get_position().ymin)
        xmin,ymin = min(xmin),min(ymin)
        dpi = fig.dpi
        if axis.lower() == "y":
            rotation=90.
            x = xmin-float(labelpad)/dpi
            y = 0.5
        elif axis.lower() == 'x':
            rotation = 0.
            x = 0.5
            y = ymin - float(labelpad)/dpi
        else:
            raise Exception("Unexpected axis: x or y")
        if label_prop is None: 
            label_prop = dict()
        plt.gcf().text(x,y,label,rotation=rotation,
                   transform=fig.transFigure,
                   ha=ha,va=va,
                   **label_prop)

    if y_label is not False:
        if not sbplt:
            if not piemode:
                if type(y_label) == str:
                    plt.ylabel(y_label)
                else:
                    plt.ylabel(y_l)
        else:
            if type(y_label) == str:
                the_y = y_label
            else:
                the_y = y_l
            #suplabel('y', the_y, labelpad = 1.5)
            plt.gcf().text(0.04, 0.5, the_y, va='center', rotation='vertical')
            #plt.subplots_adjust(left=0.5)
        
        #    if not piemode:
        #        if type(y_label) == str:
        #            plt.ylabel(y_label)
        #        else:
        #            plt.ylabel(y_l)


    # hacky: turn legend into subplot titles :)
    if sbplt:
        # title the big plot
        #plt.gca().suptitle(title, fontsize = 16)
        #plt.subplots_adjust(top=0.9)
        # get all axes
        if 'layout' not in kwargs:
            axes = [l for index, l in enumerate(ax)]
        else:
            axes = []
            cols = [l for index, l in enumerate(ax)]
            for col in cols:
                for bit in col:
                    axes.append(bit)
    
        # set subplot titles
        for index, a in enumerate(axes):
            try:
                titletext = list(dataframe.columns)[index]
            except:
                pass
            a.set_title(titletext)
            try:
                a.legend_.remove()
            except:
                pass
            # remove axis labels for pie plots
            if piemode:
                a.axes.get_xaxis().set_visible(False)
                a.axes.get_yaxis().set_visible(False)
                a.axis('equal')

            # show grid
            a.grid(b=kwargs.get('grid', False))
            kwargs.pop('grid', None)
    
    # add sums to bar graphs and pie graphs
    # doubled right now, no matter

    if not sbplt:
        if kind.startswith('bar'):
            width = ax.containers[0][0].get_width()

        # show grid
        ax.grid(b=kwargs.get('grid', False))
        kwargs.pop('grid', None)

    if was_series:
        the_y_limit = plt.ylim()[1]
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            # make plot a bit higher if putting these totals on it
            plt.ylim([0,the_y_limit * 1.05])
            for i, label in enumerate(list(dataframe.index)):
                if len(dataframe.ix[label]) == 1:
                    score = dataframe.ix[label][0]
                else:
                    if absolutes:
                        score = dataframe.ix[label].sum()
                    else:
                        #import warnings
                        #warnings.warn("It's not possible to determine total percentage from individual percentages.")
                        continue
                if not absolutes:
                    plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom')
                else:
                    plt.annotate(score, (i, score), ha = 'center', va = 'bottom')
    else:
        the_y_limit = plt.ylim()[1]
        if show_totals.endswith('plot') or show_totals.endswith('both'):
            for i, label in enumerate(list(dataframe.columns)):
                if len(dataframe[label]) == 1:
                    score = dataframe[label][0]
                else:
                    if absolutes:
                        score = dataframe[label].sum()
                    else:
                        #import warnings
                        #warnings.warn("It's not possible to determine total percentage from individual percentages.")
                        continue
                if not absolutes:
                    plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom')
                else:
                    plt.annotate(score, (i, score), ha = 'center', va = 'bottom')        

    plt.subplots_adjust(left=0.1)
    plt.subplots_adjust(bottom=0.18)

    if 'layout' not in kwargs:
        if not sbplt:
            plt.tight_layout()

    if save:
        import os
        if running_python_tex:
            imagefolder = '../images'
        else:
            imagefolder = 'images'

        savename = get_savename(imagefolder, save = save, title = title, ext = output_format)

        if not os.path.isdir(imagefolder):
            os.makedirs(imagefolder)

        # save image and get on with our lives
        if legend_pos.startswith('o'):
            plt.gcf().savefig(savename, dpi=150, bbox_extra_artists=(lgd,), bbox_inches='tight', format = output_format)
        else:
            plt.gcf().savefig(savename, dpi=150, format = output_format)
        time = strftime("%H:%M:%S", localtime())
        if os.path.isfile(savename):
            print('\n' + time + ": " + savename + " created.")
        else:
            raise ValueError("Error making %s." % savename)

    if dragmode:
        plt.legend().draggable()


    if sbplt:
        plt.subplots_adjust(right=.8)
        plt.subplots_adjust(left=.1)

    if not interactive and not running_python_tex and not running_spider \
        and not tk:
        plt.gcf().show()
        return
    elif running_spider or tk:
        return plt

    if interactive:
        plt.subplots_adjust(right=.8)
        plt.subplots_adjust(left=.1)
        try:
            ax.legend_.remove()
        except:
            pass
        return mpld3.display()
Example #38
0
def topic_sim(segrated_result):
    #print segrated_result.info
    texts_list = segrated_result.snippet2.tolist()

    #print(texts_list)
    vectorizer = TfidfVectorizer()
    dtm = vectorizer.fit_transform(texts_list)
    invtrm = vectorizer.inverse_transform(dtm)
    #print invtrm
    #print(invtrm)
    vocab = vectorizer.get_feature_names()
    #print(vectorizer.vocabulary_)
    #print dtm.shape

    scipy.sparse.csr.csr_matrix
    dtm = dtm.toarray()  # convert to a regular array
    #print(dtm)
    #vocab = np.array(vocab)
    #print(vocab)
    # for v in vocab:
    #     print(v)
    dist = 1 - cosine_similarity(dtm)
    np.round(dist, 2)
    np.round(dist, 2).shape
    # norms = np.sqrt(np.sum(dtm * dtm, axis=1, keepdims=True))  # multiplication between arrays is element-wise
    # dtm_normed = dtm / norms
    # similarities = np.dot(dtm_normed, dtm_normed.T)
    # np.round(similarities, 2)
    mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)

    pos = mds.fit_transform(dist)  # shape (n_components, n_samples)
    #print dist
    xs, ys = pos[:, 0], pos[:, 1]
    names = list(reversed(range(1, len(xs), 1)))
    # plt.style.use('fivethirtyeight')

    # for x, y, name in zip(xs, ys, names):
    #     plt.title("Visualizing distances between the different text corpuses")
    #     plt.scatter(x, y)
    #     plt.text(x, y, name)
    # #plt.show()

    #     fig, ax = plt.subplots()
    #     np.random.seed(0)
    #     color, size = np.random.random((2, len(xs)))
    #     for x, y, name in zip(xs, ys, names):
    # #         #ax.plot(np.random.normal(size=100),np.random.normal(size=100),'or', ms=10, alpha=0.3)
    # #         #ax.plot(np.random.normal(size=100),np.random.normal(size=100),'ob', ms=20, alpha=0.1)
    # #
    #         ax.set_xlabel('x')
    #         ax.set_ylabel('y')
    #         ax.set_title('Visualizing distances between the different text corpuses', size=15)
    #         ax.grid(color='lightgray', alpha=0.7)
    # #         #ax.plot(x, y)
    # #
    #         ax.scatter(x, y, c=color, s=500 * size, alpha=0.3)
    #         ax.text(x,y,name)

    fig, ax = plt.subplots()
    #N = 100
    color, size = np.random.random((2, len(xs)))
    scatter = ax.scatter(xs,
                         ys,
                         c=color,
                         s=1000 * size,
                         alpha=0.3,
                         cmap=plt.cm.jet)
    ax.grid(color='lightgray', linestyle='solid', alpha=0.7)

    ax.set_title('Scatter plot of text corpuses distances', size=16)

    labels = ['Text {0}'.format(i) for i in names]
    tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
    mpld3.plugins.connect(fig, tooltip)

    #     # Scatter points
    #     fig, ax = plt.subplots()
    #     np.random.seed(0)
    #     #x, y = np.random.normal(size=(2, 200))
    #     color, size = np.random.random((2, len(xs)))
    #     print names
    #     ax.scatter(xs, ys, c=color, s=500 * size, alpha=0.3)
    #     #ax.text(xs,ys,names)
    #     ax.grid(color='lightgray', alpha=0.7)

    tfidf_feature_names = vectorizer.get_feature_names()
    no_topics = 20
    # Run NMF
    #nmf = NMF(n_components=no_topics, random_state=1, alpha=.1, l1_ratio=.5, init='nndsvd').fit(dtm)
    # Run LDA
    lda = LatentDirichletAllocation(n_topics=no_topics,
                                    max_iter=500,
                                    learning_method='online',
                                    learning_offset=50.,
                                    random_state=0).fit(dtm)
    no_top_words = 10
    #display_topics(nmf, tfidf_feature_names, no_top_words)
    #display_topics(lda, tf_feature_names, no_top_words)
    display_result = display_topics(lda, tfidf_feature_names, no_top_words)

    #return mpld3.display(fig)
    return display_result, mpld3.display()
column_rename_map = {'season': 'Season',
                     'number': 'Episode',
                     'rating' : 'Rating',
                     'votes' : 'Votes',
                     'first_aired' : 'Air date', 
                     'overview' : 'Synopsis'}
for i in range(len(df)):
    label = df.iloc[[i]][columns_for_labels].T
    label.columns = df.iloc[[i]].title
    labels.append(label.rename(column_rename_map).to_html())

# Plot scatter points
c = (df.season-1).map(pd.Series(list(sns.color_palette(n_colors=len(df.season.value_counts())))))
points = ax.scatter(df.number_abs, df.rating, alpha=.99, c=c, zorder=2)


ax.set_ylim(5,10)
ax.set_xlim(0,main_episodes.number_abs.max()+1)
ax.set_ylabel('Trakt.tv Episode Rating')
ax.set_xlabel('Episode number')
fig.set_size_inches(12, 6)
ax.set_title(show_summary.title[0], size=20)

tooltip = plugins.PointHTMLTooltip(points, labels,
                                   voffset=10, hoffset=10, css=css)
plugins.connect(fig, tooltip)

mpld3.save_html(fig, str(show_summary.title[0] + ".html"))
mpld3.display()

Example #40
0
import matplotlib.pyplot as plt
import numpy as np

import mpld3
from mpld3 import plugins

fig, ax = plt.subplots()

x = np.linspace(-2, 2, 20)
y = x[:, None]
X = np.zeros((20, 20, 4))

X[:, :, 0] = np.exp(- (x - 1) ** 2 - (y) ** 2)
X[:, :, 1] = np.exp(- (x + 0.71) ** 2 - (y - 0.71) ** 2)
X[:, :, 2] = np.exp(- (x + 0.71) ** 2 - (y + 0.71) ** 2)
X[:, :, 3] = np.exp(-0.25 * (x ** 2 + y ** 2))

im = ax.imshow(X, extent=(10, 20, 10, 20),
               origin='lower', zorder=1, interpolation='nearest')
fig.colorbar(im, ax=ax)

ax.set_title('An Image', size=20)

plugins.connect(fig, plugins.MousePosition(fontsize=14))

mpld3.display(fig)
Example #41
0
def make_figure(figANDax, sample_data):
    fig, ax, fig2, ax2 = figANDax
    with open('tp', 'rb') as f:  # Python 3: open(..., 'rb')
        hdist, tst_data = pickle.load(f)
        hdist = np.array(hdist).reshape([512, 512])

    col = []
    # data=np.array([[]]);np.array()
    tst_data = np.array(tst_data)
    tst_data = tst_data
    cm = plt.cm.rainbow
    col = col + list(cm(.01) for i in range(tst_data.shape[0]))

    sample_data = np.array(sample_data)
    col += list(cm(.9) for i in range(sample_data.shape[0]))
    data = np.vstack((tst_data, sample_data))

    xs = data[:, 3]
    xs = xs.astype(np.float)
    xs[np.isnan(xs)] = 0
    ys = (data[:, 4].astype(np.float))
    # ys[ys==0]=1;
    # ys=np.log(ys);
    ys[~np.isfinite(ys)] = 0
    zs = (data[:, 5].astype(np.float))
    zs[~np.isfinite(zs)] = 0

    sizs = list((.6 - float(x)) / .00755 for x in data[:, 5])
    N = xs.size
    labels = list(data[:, 1])
    # labels=data[:,[0,1,3]].T.to_html

    # fig, ax = plt.subplots(subplot_kw=dict(axisbg='#DDDDDD'
    #                                        ,projection='3d'
    # ))
    fig.set_size_inches([5, 4])
    ax.grid(color='white', linestyle='solid')
    ax.set_ylim(0, 0.38)
    ax.set_xlim(0, 1)
    put_patches(ax)
    sct = ax.scatter(
        xs,
        ys,
        c=col,
        s=sizs,
        alpha=1.0,
        #                      label=labels,
        cmap=plt.cm.rainbow)
    red_patch = mpatches.Patch(color=plt.cm.rainbow(.98), label='The red data')
    pur_patch = mpatches.Patch(color=plt.cm.rainbow(.02), label='The red data')
    yel_patch = mpatches.Patch(color=plt.cm.rainbow(.02), label='The red data')
    handles, leglabels = ax.get_legend_handles_labels()
    handles += [red_patch, pur_patch]
    leglabels += ['sample', 'reference']
    ax.legend(handles, leglabels)
    ax.set_title("Dynamic landscape, 2D projection", size=20)
    plugins.connect(
        fig,
        plugins.PointLabelTooltip(sct, labels),
        #                 plugins.Zoom(enabled=False),
        ClickInfo(sct, labels))
    ax.set_xlabel('Avg Temp', size=15)
    ax.set_ylabel('mean(abs(d_Temp)) - abs(mean(d_Temp))', size=15)

    sct3d = ax2.scatter(
        xs,
        ys,
        zs,
        c=col,
        #                      c=list( 1.*float(i)/N for i in xs),
        s=sizs,
        alpha=1.0,
        #                      label=labels,
        cmap=plt.cm.rainbow)
    ax2.set_title("Dynamic landscape, 3D", size=20)
    ax2.set_xlabel('Avg Temp', size=15)
    ax2.set_ylabel('mean(abs(d_Temp)) - abs(mean(d_Temp))', size=15)
    ax2.set_zlabel('Density of dominating state', size=15)
    mpld3.display(fig)
                                       voffset=10, hoffset=10, css=css)
    #connect tooltip to fig
    mpld3.plugins.connect(fig, tooltip, TopToolbar())

    #set tick marks as blank
    ax.axes.get_xaxis().set_ticks([])
    ax.axes.get_yaxis().set_ticks([])

    #set axis as blank
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)


ax.legend(numpoints=1) #show legend with only one dot

mpld3.display() #show the plot

#uncomment the below to export to html
#html = mpld3.fig_to_html(fig)
#print(html)


# # Hierarchical document clusterin

from scipy.cluster.hierarchy import ward, dendrogram

linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances

fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=titles);
Example #43
0
def mouseshow():
    fig = gcf()
    plugins.connect(fig, plugins.MousePosition(fontsize=14))
    return mpld3.display(fig)
Example #44
0
                annot.set_visible(False)
                fig.canvas.draw_idle()

fig.canvas.mpl_connect("motion_notify_event", hover)

plt.show()
#%% Matplot tooltips hack
import numpy as np
import matplotlib.pyplot as plt, mpld3
import seaborn as sns
import pandas as pd

N=10
data = pd.DataFrame({"x": np.random.randn(N),
                     "y": np.random.randn(N), 
                     "size": np.random.randint(20,200, size=N),
                     "label": np.arange(N)
                     })


scatter_sns = sns.lmplot("x", "y", 
           scatter_kws={"s": data["size"]},
           robust=False, # slow if true
           data=data, size=8)
fig = plt.gcf()

tooltip = mpld3.plugins.PointLabelTooltip(fig, labels=list(data.label))
mpld3.plugins.connect(fig, tooltip)

mpld3.display(fig)
def kmeans_cluster(terms, description_tokens, tfidf_matrix, titles, bookobj_tokens_dict):
    """Kmeans algorithm used to create clusters of documents using scikitlearn.
    Datapoints plotted using matplotlibd3."""

    ######################
    ### KMeans Cluster ###
    ######################
    from sklearn.metrics.pairwise import cosine_similarity
    # # dist is defined as 1 - the cosine similarity of each document. Cosine similarity is measured against 
    # # the tf-idf matrix can be used to generate a measure of similarity between each document and the 
    # # other documents in the corpus (each synopsis among the synopses). Subtracting it from 1 provides 
    # # cosine distance which I will use for plotting on a euclidean (2-dimensional) plane.
    dist = 1 - cosine_similarity(tfidf_matrix)
    num_clusters = 8
    km = KMeans(n_clusters = num_clusters)
    km.fit(tfidf_matrix)
    clusters = km.labels_.tolist()
    print "clusters, ", clusters
    joblib.dump(km, 'doc_cluster.pkl')
    km = joblib.load('doc_cluster.pkl')
    clusters = km.labels_.tolist()
    books = {'title':titles, 'synopsis':description_tokens,'cluster':clusters}
    print "books, ", books
    frame = pd.DataFrame(books,index=[clusters],columns=['title','cluster'])
    frame['cluster'].value_counts()
    grouped = frame['title'].groupby(frame['cluster'])

    #############################
    ### Top Terms Per Cluster ###
    #############################
    print "Top terms per cluster:"
    print 
    order_centroids = km.cluster_centers_.argsort()[:, ::-1]
    print "order centroids, ", order_centroids
    totalvocab_tokenized_list = [word for token_sublist in bookobj_tokens_dict.values() for word in token_sublist]
    print "total vocab tokenized list, ", totalvocab_tokenized_list
    vocab_frame = pd.DataFrame({'words': totalvocab_tokenized_list}, index=totalvocab_tokenized_list)
    print 'there are ' + str(vocab_frame.shape[0]) + ' items in vocab_frame'
    print "num cluster, ", num_clusters
    print "values, ", frame.ix
    print order_centroids

    graph_keys = [i for i in range(num_clusters)]
    graph_values = []
    for i in range(num_clusters):
        print "Cluster %d words:" % i
        the_terms = []

        for ind in order_centroids[i, :3]:  # top 3 words that are nearest to the cluster centroid
            
            graph_terms = vocab_frame.ix[terms[ind].split(' ')].values.tolist()[0][0].encode('utf-8','ignore')
            the_terms.append(graph_terms)
            print "graph terms ", graph_terms
        print "the terms, ", the_terms
        
        graph_values.append(the_terms)
        
        print
        print
        print "Cluster %d titles:" % i
    
        df = frame.ix[i]['title']
        if type(df) is str:
            print ' %s,' % df
        else:
            for title in df.values.tolist():
                print ' %s,' % title
        print 
        print
    print "graph values ", graph_values
    
    # def multi_diminsional_scaling_for_2D_array(dist, graph_values, graph_keys):
    #################################
    ### Multi-Dimensional Scaling ###
    #################################

    # # convert the dist matrix into a 2-dimensional array using MDS
    MDS()

    # # convert two components while plotting points in 2-D plane
    # # 'precomputed' because provide a distance matrix
    # # will also specify random_state so the plot is reproducible
    mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)
    print "mds, ", mds
    pos = mds.fit_transform(dist) # shape (n_compoents, n_samples)
    print "pos, ", pos
    xs, ys = pos[:, 0], pos[:, 1]

    #################################
    ### Visualizing Book Clusters ###
    #################################

    # # set up colors per clusters using a dict
    cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e', 
                    5:'#F7EC45', 6:'#2ee3b6', 7:'#cd82c0'}    

    # # set up cluster names using a dict
    graph_values = [", ".join(term_list) for term_list in graph_values]
    cluster_names = dict(zip(graph_keys, graph_values))
    print "cluster names dictionary, ", cluster_names


    # # create data frame that has the result of the MDS plus the cluster numbers and titles
    df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=titles)) 

    # # group by cluster
    groups = df.groupby('label')

    # # define custom css to format the font and to remove the axis labeling
    css = """
    text.mpld3-text, div.mpld3-tooltip {
      font-family:Arial, Helvetica, sans-serif;
    }
    g.mpld3-xaxis, g.mpld3-yaxis {
    display: none; }
    svg.mpld3-figure {
    margin-left: 0px;}
    """

    # Ploting using matplotlib
    fig, ax = plt.subplots(figsize=(14,6))  #set plot size
    ax.margins(0.03) # Optional, just adds 5% padding to the autoscaling

    # iterate through groups to layer the plot
    # note that I use the cluster_name and cluster_color dicts with the 'name' lookup 
    # to return the appropriate color/label
    for name, group in groups:
        points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, 
                         label=cluster_names[name], mec='none', 
                         color=cluster_colors[name])
        ax.set_aspect('auto')
        labels = [i for i in group.title]
        
        # set tooltip using points, labels and the already defined 'css' - see above
        tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels,
                                           voffset=10, hoffset=10, css=css)
        # connect tooltip to fig
        mpld3.plugins.connect(fig, tooltip, TopToolbar())    
        
        # set tick marks as blank
        ax.axes.get_xaxis().set_ticks([])
        ax.axes.get_yaxis().set_ticks([])
        
        #set axis as blank
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)

        
    ax.legend(numpoints=1, title='')  # show legend with only one dot
 
    mpld3.display()  # show the plot

    # uncomment the below to export to html
    graph_html = mpld3.fig_to_html(fig)
    print "KMEANS CLUSTER GRAPH HTML", graph_html
    return graph_html
Example #46
0
def parallel_coordinates(data_sets, colors=None, columNames=None, alpha=None):
    dims = len(data_sets[0])
    x = range(dims)
    fig, axes = plt.subplots(1, dims - 1, sharey=False)

    if colors is None:
        colors = ['r-'] * len(data_sets)

    # Calculate the limits on the data
    min_max_range = list()
    for m in zip(*data_sets):
        mn = min(m)
        mx = max(m)
        if mn == mx:
            mn -= 0.5
            mx = mn + 1.
        r = float(mx - mn)
        min_max_range.append((mn, mx, r))

    # Normalize the data sets
    norm_data_sets = list()
    for ds in data_sets:
        nds = []
        for dimension, value in enumerate(ds):
            v = (value -
                 min_max_range[dimension][0]) / min_max_range[dimension][2]
            nds.append(v)
        norm_data_sets.append(nds)

    data_sets = norm_data_sets

    # Plot the datasets on all the subplots
    for i, ax in enumerate(axes):
        for dsi, d in enumerate(data_sets):
            ax.plot(x, d, c=colors[dsi], alpha=alpha[dsi])
        ax.set_xlim([x[i], x[i + 1]])

    # Set the x axis ticks
    for dimension, (axx, xx) in enumerate(zip(axes, x[:-1])):
        axx.xaxis.set_major_locator(ticker.FixedLocator([xx]))
        ticks = len(axx.get_yticklabels())
        labels = list()
        step = min_max_range[dimension][2] / (ticks - 3)
        mn = min_max_range[dimension][0]
        for i in range(-1, ticks):
            v = mn + i * step
            labels.append('%6.2f' % v)
        axx.set_yticklabels(labels)

    # Move the final axis' ticks to the right-hand side
    axx = plt.twinx(axes[-1])
    dimension += 1
    axx.xaxis.set_major_locator(ticker.FixedLocator([x[-2], x[-1]]))
    ticks = len(axx.get_yticklabels())
    step = min_max_range[dimension][2] / (ticks - 1)
    mn = min_max_range[dimension][0]
    labels = ['%6.2f' % (mn + i * step) for i in range(ticks)]
    axx.set_yticklabels(labels)

    i = 0
    for col in columNames[:-2]:
        plt.sca(axes[i])
        plt.xticks([i], (col, ), rotation='vertical')
        i += 1
    plt.sca(axes[i])
    plt.xticks([i, i + 1], columNames[i:], rotation='vertical')

    #color labels
    plt.plot([], [], color='r', label='Infeasible')
    plt.plot([], [], color='b', label='Feasible')
    plt.plot([], [], color='g', label='Non-dominated')

    #delete whitespace
    plt.subplots_adjust(wspace=0)

    #title
    plt.suptitle('Parallel Coordinate Plot')

    plt.legend(bbox_to_anchor=(1.6, 1), loc=2, borderaxespad=0.)
    #    fig.savefig("paralelcoordinate1.pdf",dpi=600,bbox_inches='tight')
    #fig.savefig("paralelcoordinate")
    mpld3.display(fig)
    def writeResults(fn, userID, batchID):


        Con2 = DBconnections.openDB()
        cursor2 = Con2.cursor()

        #get total number of reads
        queryTotal = "SELECT SUM(seqcount) As TotalReads FROM tempcontent WHERE userID = {}".format(userID)
        #print queryTotal

        qLocus = "SELECT locus FROM tempcontent WHERE userID = {} Group by locus".format(userID)

        cursor2.execute (queryTotal)
        tReads = cursor2.fetchone()
        totalReads = tReads[0]
        #print "<br/>total reads: " + str(totalReads)

        cursor2.execute (qLocus)
        locuslinks = cursor2.fetchall()
        links = 0
        # View Locus select list
        print "<hr/><br/><form action='' method='post' name='locfrm' id='locfrm' enctype='multipart/form-data' >" +\
                "<b>View Locus: </b><select id='locusname' name='locusname' onchange='javascript: getlocus(this);'>" +\
                "<option value=''"
        if locusname == "":
            print "selected>"
        else:
            print ">"
        print "All</option>"

        for lcs in locuslinks:
            print "<option value='" + str(lcs[0]) + "' "
            if locusname == str(lcs[0]):
                print "selected"
            print ">" + str(lcs[0]) + "</option>"
        print "</select>"

        # Allele Ratio Filter select list
        print "&nbsp;&nbsp;<b>Allele Ratio Filter: </b>" +\
        "<select id='filter' name='filter' style='width:100px;' onchange='javascript: getFilter(this);'>"
        num = 0
        perctg = 0
        for n in range(0,12):
            print "<option value='" + str(num) + "'"
            if str(filter) == str(num):
                print "selected"
            print ">"
            if num == 0:
                print "Nothing"
            else:
                print "> " + str(num)
            print "</option>"
            num = num + 0.001
        print "</select>"

        # Locus Filter list
        print "&nbsp;&nbsp;<b>Locus Ratio Filter: </b>" +\
        "<select id='locfilter' name='locfilter' style='width:100px;' onchange='javascript: getLocFilter(this);'>"
        num = 0
        perctg = 0
        for n in range(0,4):
            print "<option value='" + str(num) + "'"
            if str(locfilter) == str(num):
                print "selected"
            print ">"
            if num == 0:
                print "Nothing"
            else:
                print "> " + str(num)
            print "</option>"
            num = num + 0.01
        print "</select>"

        print "<br/><input type='hidden' name='idnumber' id='idnumber' value='" + batchID + "'>" +\
        "<input type='hidden' name='fileholder' id='fileholder' value='" + fileholder  + "'>" +\
        "</form>"

        if locusname == "":
            queryCount = "SELECT locus, SUM(seqcount) As TotalReads FROM tempcontent WHERE userID = {} Group by locus".format(userID) +\
            " ORDER BY locus"
        else:
            queryCount = "SELECT locus, SUM(seqcount) As TotalReads FROM tempcontent WHERE " +\
            "locus = '" + locusname + "' AND " +\
            "userID = {} Group by locus".format(userID) +\
            " ORDER BY locus"

        cursor2.execute (queryCount)
        Readresults = cursor2.fetchall()

        for rec in Readresults:
            locus = rec[0]
            sCount = rec[1]
            # sort and count number of reads
            #filename locus allele read_dir sequence seq_Len reads allele_Count allele_ratio total_ratio num_repeats
            query = "SELECT tempcontent.locus, allele, readdir, " +\
            "tempcontent.seq as seq, length(tempcontent.seq)  as seqLen, SUM(seqcount) as seqCounts, " +\
            "((SUM(tempcontent.seqcount))/{}.0)".format(sCount) + " As allele_ratio, ((SUM(tempcontent.seqcount))/{}.0)".format(totalReads) + " As total_ratio " +\
            "FROM tempcontent  " +\
            "WHERE tempcontent.locus = '" + locus + "' AND userID = {}".format(userID) +\
            " Group by allele, locus, readDir, tempcontent.seq "
            #if filter > 0:
                #query = query + " HAVING ((SUM(tempcontent.seqcount))/{}.0)".format(sCount) + " > " + filter
            query = query + " Order by length(tempcontent.seq), tempcontent.seq, readdir, SUM(seqcount) desc   "

            cursor2.execute (query)
            getRecords = cursor2.fetchall()


            print "<br/><table style='border-collapse:collapse; border: solid 1px black;font-size:12px;'>"
            count = 0
            aH = ""
            lH = ""
            sH = ""
            rdH = ""
            FRRH = ""
            slH = 0
            scH = 0
            arH = 0
            trH = 0
            flag = False
            for seqRecord in getRecords:
                locus = seqRecord[0]
                allele = seqRecord[1]
                readdir = seqRecord[2]
                seq = seqRecord[3]
                seqLen = seqRecord[4]
                seqCounts = seqRecord[5]
                alleleRatio = seqRecord[6]
                totalRatio = seqRecord[7]
                locusRatio = sCount/totalReads

                if count == 0:
                    print "<tr style='background-color: #cccccc;'><td colspan='8' style='border-style:solid;border-width: 0px 0px 1px 0px;'><b>" +\
                    locus + "</b><br/>" +\
                    "<b>Allele total: </b>" + str(sCount) + "&nbsp;&nbsp;<b>Total Count:</b> " + str(totalReads) +\
                    "&nbsp;&nbsp;<b>Locus Ratio: </b>" + "{0:.4f}".format(locusRatio) + "</td></tr>"
                    if float(locusRatio) > float(locfilter):
                        print "<tr style='background-color:#FAEBD7;border:1px solid black;'><th style='padding-right:5px;'>Allele</th><th style='padding-right:5px;'>locus</th>" +\
                        "<th style='padding-right:8px;'>Seq</th><th style='padding-right:8px;'>Seq<br/>Length</th>" +\
                        "<th style='padding-right:8px;'>Seq<br/>Counts</th><th style='padding-right:8px;'>FWD/REV</th>" +\
                        "<th style='padding-right:8px;'>Allele<br/>Ratio</th><th style='padding-right:8px;'>Total<br/>Ratio</th></tr>"
                    aH = allele
                    lH = locus
                    sH = seq
                    slH = seqLen
                    scH = seqCounts
                    arH = alleleRatio
                    trH = totalRatio
                    flag = True

                if float(locusRatio) > float(locfilter):
                    if count > 0:
                        if readdir == 'REV' and seq == sH:
                            aH = allele
                            lH = locus
                            sH = seq
                            slH = seqLen
                            FR = scH/(scH + seqCounts)
                            RR = seqCounts/(scH + seqCounts)
                            FRRH = "{0:.4f}".format(FR) + "/" + "{0:.4f}".format(RR)
                            scH = scH + seqCounts
                            arH = arH + alleleRatio
                            trH = trH + totalRatio

                            if float(arH) > float(filter):
                                print "<tr><td style='padding-left:5px;padding-right:5px;'>{}</th>".format(aH)
                                print "<th style='padding-right:5px;'>{}</th>".format(lH)
                                print "<th style='padding-right:5px;'>{}</th>".format(sH)
                                print "<th style='padding-right:8px;'>{}</th>".format(slH)
                                print "<th style='padding-right:8px;'>{}</th>".format(scH)
                                print "<th style='padding-right:8px;'>{}</th>".format(FRRH)
                                print "<th style='padding-right:8px;'>{}</th>".format(arH)
                                print "<th style='padding-right:8px;'>{}</th></tr>".format(trH)
                                flag = False

                        else:
                            if flag == True :
                                if float(arH) > float(filter):
                                    print "<tr><td style='padding-left:5px;padding-right:5px;'>{}</th>".format(aH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(lH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(sH)
                                    print "<th style='padding-right:8px;'>{}</th>".format(slH)
                                    print "<th style='padding-right:8px;'>{}</th>".format(scH)
                                    print "<th style='padding-right:8px;'>{}</th>".format(FRRH)
                                    print "<th style='padding-right:8px;'>{}</th>".format(arH)
                                    print "<th style='padding-right:8px;'>{}</th></tr>".format(trH)
                                    flag = False

                            aH = allele
                            lH = locus
                            sH = seq
                            slH = seqLen
                            scH = seqCounts
                            arH = alleleRatio
                            trH = totalRatio

                            if readdir == "FWD":
                                FRRH = "1/0"
                                flag = True
                            else:
                                FRRH = "0/1"
                                flag = False
                            if flag == False:
                                if float(arH) > float(filter):
                                    print "<tr><td style='padding-left:5px;padding-right:5px;'>{}</th>".format(aH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(lH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(sH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(slH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(scH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(FRRH)
                                    print "<th style='padding-right:5px;'>{}</th>".format(arH)
                                    print "<th style='padding-right:5px;'>{}</th></tr>".format(trH)
                                    flag = False



                    count += 1
                else:
                    print "<tr style='background-color:#ccffff'><td colspan='8' style='border-top:1px solid black;'>No Data</td></tr>"
                    break
            print "</table><br/>"

            N = 5
            menMeans = (20, 35, 30, 35, 27)
            menStd = (2, 3, 4, 1, 2)

            ind = np.arange(N)  # the x locations for the groups
            width = 0.35       # the width of the bars

            fig, ax = plt.subplots()
            rects1 = ax.bar(ind, menMeans, width, color='r', yerr=menStd)

            womenMeans = (25, 32, 34, 20, 25)
            womenStd = (3, 5, 2, 3, 3)
            rects2 = ax.bar(ind + width, womenMeans, width, color='y', yerr=womenStd)

            # add some text for labels, title and axes ticks
            ax.set_ylabel('Scores')
            ax.set_title('Scores by group and gender')
            ax.set_xticks(ind + width)
            ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))

            ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))


            autolabel(rects1)
            autolabel(rects2)

            #plt.show()
            mpld3.display(plt,True,False)

        cursor2.close()
        DBconnections.closeDB(Con2)