def display_comparison_plot_mpld3(t, arr, names, line_styles, title, xtitle, ytitle, ylim, figname): # Function used to generate interactive d3 plots in html f, ax = plt.subplots() lines = [] for i in np.arange(0, len(names)): l, = ax.plot(t, arr[:, i], label=names[i], lw=3, ls=line_styles[i], alpha=0.2) lines.append(l) ax.set_xlabel(xtitle) ax.set_ylabel(ytitle) ax.set_title(title) ax = plt.gca() ax.set_ylim(ylim) ax.grid() plugins.connect(f, HighlightLines(lines, names, css)) mpld3.display() #mpld3.save_html(f, figname + '.html') return mpld3.fig_to_html(f)
def smithd3(self, x): import mpld3 import twoport as tp fig, ax = pl.subplots() sc = tp.SmithChart(show_cursor=True, labels=True, ax=ax) sc.plot_s_param(a) mpld3.display(fig)
def after(self): if self.draw: plugins.connect( self.fig, plugins.InteractiveLegendPlugin( self.s1, self.labels, ax=self.ax)) mpld3.display() else: print meeting.minutes
def plot_ts(*args, **kwargs): """ Create an interactive JavaScript T-S plot. """ ax = nplt.plot_ts(*args, **kwargs) pg = InteractiveLegendPlugin(ax.lines, kwargs.get("labels", [lin.get_label() for lin in ax.lines]), alpha_unsel=kwargs.get("alpha", 0.2)) plugins.connect(ax.figure, pg) mpld3.display() return ax
def plot_ts(*args, **kwargs): """ Create an interactive JavaScript T-S plot. """ ax = nplt.plot_ts(*args, **kwargs) pg = InteractiveLegendPlugin( ax.lines, kwargs.get("labels", [lin.get_label() for lin in ax.lines]), alpha_unsel=kwargs.get("alpha", 0.2)) plugins.connect(ax.figure, pg) mpld3.display() return ax
def renderGraph(dataSet): data = dataSet[0] title = dataSet[1] ylabel = dataSet[2] xlabels = dataSet[3] ind = np.arange(len(data)) # the x locations for the groups width = 0.35 # the width of the bars fig, ax = plt.subplots() #print 'Before figure' chart = plt.figure() #print 'After first figure' rects1 = ax.bar(ind, data, width, color='r') # add some text for labels, title and axes ticks ax.set_ylabel(ylabel) ax.set_title(title) ax.set_xticks(ind + width / 2) ax.set_xticklabels(xlabels) def autolabel(rects): """ Attach a text label above each bar displaying its height """ for rect in rects: height = rect.get_height() ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, '%d' % int(height), ha='center', va='bottom') autolabel(rects1) #fig_to_html attempt #current error: 'Figure' object has no attribute 'fig_to_html' #needed to download jinja2 for mpld3 to work graphOutput = plt.figure( ) #initialize variable as current graph to be later passed on to HTML code graphHTML = fig_to_html(graphOutput) #convert current graph to HTML code #print graphHTML #plt.show() plt.close(graphOutput) #close the current graph mpld3.display(chart) return graphHTML
def plot_by_mpld3(dataframe,figsize=(12,6),marker='o',grid=True, alpha_ax=0.3,alpha_plot=0.4,alpha_unsel=0.3,alpha_over=1.5, title=None,xlabel=None,ylabel=None,mode="display",file=None): ## DataFrame 데이터를 이용하여 웹용 D3 chart 스크립트 생성 # plot line + confidence interval fig, ax = plt.subplots(figsize=figsize) ax.grid(grid, alpha=alpha_ax) for key, val in dataframe.iteritems(): l, = ax.plot(val.index, val.values, label=key, marker=marker) ax.plot(val.index,val.values, color=l.get_color(), alpha=alpha_plot) # define interactive legend handles, labels = ax.get_legend_handles_labels() # return lines and labels interactive_legend = plugins.InteractiveLegendPlugin(handles,labels, alpha_unsel=alpha_unsel, alpha_over=alpha_over, start_visible=True) plugins.connect(fig, interactive_legend) if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) if title: ax.set_title(title, size=len(title)+5) ## mode if mode == 'html': # return html script return mpld3.fig_to_html(fig) elif mode == 'save' and file: # save file mpld3.save_html(fig,file) else: # display chart #mpld3.enable_notebook() return mpld3.display()
def _scatter_plot(X, y, point_labels, scatter_params, css): X = pd.DataFrame(X, columns=['x', 'y']) fig, ax = plt.subplots(figsize=(8, 8)) if 'figsize' in scatter_params: fig, ax = plt.subplots(figsize=scatter_params['figsize']) del scatter_params['figsize'] colors, classes = None, None if y.dtype == 'object': colors, classes = y.factorize() else: colors = y scatter = ax.scatter(x=X['x'], y=X['y'], c=colors, **scatter_params) if point_labels is not None and len(point_labels) > 0: tooltip = mpld3.plugins.PointHTMLTooltip(scatter, labels=point_labels, css=css) mpld3.plugins.connect(fig, tooltip) else: mpld3.plugins.connect(fig) return mpld3.display()
def zoomable_walk(adn): print("longueur de la séquence d'entrée", len(adn)) X, Y = path_x_y(adn) pyplot.plot(X, Y) # au lieu d'afficher le dessin avec pyplot.show() # on retourne un objet HTML qui est rendu par le notebook return mpld3.display()
def plot_mle_graph(function, mle_params, x_start=eps, x_end=1 - eps, y_start=eps, y_end=1 - eps, resolution=100, x_label="x", y_label="y", show_constraint=False, show_optimum=False): x = np.linspace(x_start, x_end, resolution) y = np.linspace(y_start, y_end, resolution) xx, yy = np.meshgrid(x, y) np_func = np.vectorize(lambda x, y: function(x, y)) z = np_func(xx, yy) optimal_loss = function(*mle_params) levels_before = np.arange(optimal_loss - 3.0, optimal_loss, 0.25) levels_after = np.arange(optimal_loss, min(optimal_loss + 2.0, -0.1), 0.25) fig = plt.figure() contour = plt.contour(x, y, z, levels=np.concatenate([levels_before, levels_after])) plt.xlabel(x_label) plt.ylabel(y_label) if show_constraint: plt.plot(x, 1 - x) if show_optimum: plt.plot(mle_params[0], mle_params[1], 'ro') plt.clabel(contour) return mpld3.display(fig)
def renderGraph(dataSet): """ The renderGraph function converts data into a bar graph object. It handles the measuring, drawing, and defining of the various aspects of the graph. From this point, it converts the graph code from Python to HTML and returns this value. This HTML code is then transferred to the website for the viewer. """ data = dataSet[0] title = dataSet[1] ylabel = dataSet[2] xlabels = dataSet[3] dsize=len(data) ind = np.arange(dsize) # the x locations for the groups width = 0.35 # the width of the bars #print 'Before figure' chart = plt.figure(1) chart.set_figwidth((11)) #print 'After first figure' ax=plt.subplot(autoscale_on=True) rects1 = ax.bar(ind, data, width, color='r') # add some text for labels, title and axes ticks ax.set_ylabel(ylabel) ax.set_title(title) ax.set_xticks(ind + width / 2) ax.set_xticklabels(xlabels) def autolabel(rects): """ Function attaches a text label above each bar displaying its height. This function provides the reader easy context on each value in the finished graph so that an understanding of the data can be made faster. """ for rect in rects: height = rect.get_height() ax.text(rect.get_x() + rect.get_width()/2., 1.02*height, '%d' % int(height), ha='center', va='bottom') autolabel(rects1) #fig_to_html attempt #current error: 'Figure' object has no attribute 'fig_to_html' #needed to download jinja2 for mpld3 to work chart.tight_layout() graphOutput = plt.figure() #initialize variable as current graph to be later passed on to HTML code graphHTML = fig_to_html(chart) #convert current graph to HTML code #plt.show() plt.close(graphOutput) #close the current graph mpld3.display(chart) return graphHTML
def make_mpld3_mds_plot(xs,ys,data_pd=gdata_pd): cluster_colors = {'conservative': '#1b9e77', 'liberal': '#d95f02'} # Define new dataframe with MDS coordinates, some things from original DF. plot_df = pd.DataFrame(dict(x=xs, y=ys, label=data_pd.loc[:,'leaning'], source=data_pd.loc[:,'source'])) groups = plot_df.groupby('label') #css formatting css = """ text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } svg.mpld3-figure { margin-left: 0px;} """ # margin as -200px fig, ax = plt.subplots(figsize=(14,6)) for name, group in groups: points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, label=name, mec='none', color=cluster_colors[name]) ax.set_aspect('auto') labels = [i for i in group.source] # MPLD3 Setup tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels,voffset=10, hoffset=10, css=css) mpld3.plugins.connect(fig, tooltip, TopToolbar()) # No axes, qualitative plot ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=1) mpld3.display() # export html html = mpld3.fig_to_html(fig) return
def plot_1D(loss, theta1, theta2, N=100, eps=0.01, loss2=None, ylim=None): x = np.linspace(eps, 1.0 - eps, N) fig = plt.figure() if ylim is not None: plt.ylim(ylim) create_1D_plot(x, loss, theta1, theta2) if loss2 is not None: create_1D_plot(x, loss2, theta1, theta2) return mpld3.display(fig)
def dataVisualDynamtic(xs,ys,clusters,titles): # 用 MDS 后的结果加上聚类编号和绘色创建 DataFrame df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=titles)) # 聚类归类 groups = df.groupby('label') # 自定义 css 对字体格式化以及移除坐标轴标签 css = """ text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } svg.mpld3-figure { margin-left: -200px;} """ # 绘图 fig, ax = plt.subplots(figsize=(14,6)) # 设置大小 ax.margins(0.03) # 可选项,只添加 5% 的填充(padding)来自动缩放 # 对聚类进行迭代并分布在绘图,用到了 cluster_name 和 cluster_color 字典的“name”项,这样会返回相应的 color 和 label for name, group in groups: points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18,label=cluster_names[name], mec='none', color=cluster_colors[name]) ax.set_aspect('auto') labels = [i for i in group.title] # 用点来设置气泡消息,标签以及已经定义的“css” tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10, css=css) # 将气泡消息与散点图联系起来 mpld3.plugins.connect(fig, tooltip, TopToolbar()) # 隐藏刻度线(tick marks) ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) # 隐藏坐标轴 ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=1) # 图例中每项只显示一个点 mpld3.display()
def plot_2D_function(function, x_start=eps, x_end=1 - eps, y_start=eps, y_end=1 - eps, resolution=100, x_label="x", y_label="y"): x = np.linspace(x_start, x_end, resolution) y = np.linspace(y_start, y_end, resolution) xx, yy = np.meshgrid(x, y) np_func = np.vectorize(lambda x, y: function(x, y)) z = np_func(xx, yy) fig = plt.figure() contour = plt.contour(x, y, z) plt.xlabel(x_label) plt.ylabel(y_label) plt.clabel(contour) return mpld3.display(fig)
def plot_tree_mpld3(nodes: List[str], edgelist: List[Tuple[str]], svgs: List[Any], extra_string: Optional[List] = None, figsize: Optional[Tuple[int, int]] = None, layout: str = 'rt', figure_savefile: Optional[str] = None): G = Graph() G.add_vertices(nodes) G.add_edges(edgelist) if extra_string: lookup_str = "encoding='iso-8859-1'?>\n" # in the svg style = 'style="background-color:#ffffff"' for i, (exs, svg) in enumerate(zip(extra_string, svgs)): str_pos = svg.find(lookup_str) + len(lookup_str) _insert = ''.join( [f'<div {style}>{x}</div>' for x in exs.split('\n')]) svgs[i] = svg[:str_pos] + _insert + svg[str_pos:] x_edges, y_edges, x_nodes, y_nodes = compute_layout(G, layout) fig, ax = plt.subplots(figsize=figsize) for x, y in zip(x_edges, y_edges): ax.plot(x, y, c='gray', linestyle='--') points = ax.scatter(x_nodes, y_nodes, s=150, c='gray') tooltip = plugins.PointHTMLTooltip(points, svgs) plugins.connect(fig, tooltip) if figure_savefile is not None: with open(figure_savefile, 'w') as f: save_html(fig, f) display(fig)
def plot_bar_graph(values, labels, rotation=0, align='center', use_mpld3=False): """ Plots a bar graph. Args: use_mpld3: should we use mpld3 to render the graph. rotation: by which angle should the labels be rotated. align: how to align the labels values: bar values. labels: bar labels Returns: None """ fig = plt.figure() plt.xticks([float(x) for x in range(0, len(values))], labels, rotation=rotation) plt.bar(range(0, len(values)), values, align=align) if use_mpld3: return mpld3.display(fig)
def visualizeAmAll(self): df = pd.read_csv('Data/amazonAll' + self.twitterData) tweets = df.iloc[:, [ False, True, False, False, False, False, False, False, False ]].to_numpy() df = pd.read_csv('Data/amazonAll' + self.twitterPreds) predictions = df.iloc[:, [False, True, True, True, True, True]].to_numpy( ) tweets = np.array( [elem for singleList in tweets for elem in singleList]) singlePred, confidence = interpretPreds(predictions) #print(singlePred) fig = plt.Figure() barChartPreds(singlePred) return mpld3.display(fig)
def plot_ObsConfused(self,classes,preds,globalLim=10,globalNbCols=2, lim=10,limByPlots=100,elemsByRows=10,nbCols=2,mods=[],title=None,modelsNames=None,filename=None,titleFontsize=19,**plotConfMat_kwargs): # from ..helpers import plotDigits obj=self.obj modsN=obj.papa._models.namesModels models=obj.resultats if len(mods)>0: mods_ = [i if isStr(i) else modsN[i] for i in mods] models= [obj.resultats[i] for i in mods_] modelsNames_=[i for i in mods_] models=dict(zip(modelsNames_,models)) if modelsNames is None else dict(zip(modelsNames,models)) # namesY= namesEscape(namesY) if namesY is not None else namesY confMatM=[v.viz.plot_ObsConfused(classes,preds,lim=lim,limByPlots=limByPlots, elemsByRows=elemsByRows,returnOK=True,nbCols=nbCols,show=False,**plotConfMat_kwargs) for v in models.values()] title = "ObsConfused CV {}".format(obj.ID) if title is None else title filename="obj_confused_cv_{}.png".format(obj.ID) if filename is None else filename # print(confMatM) img=IMG_GRID.grid(confMatM,nbCols=globalNbCols,toImg=True,title=title,titleFontsize=titleFontsize) # img.show(figsize=img.figsize,show=True); # print(img.data) fig=img.show(returnFig=True,show=False,figsize=img.figsize) from IPython.display import display_html, HTML import mpld3 # mpld3.enable_notebook() display_html(HTML(""" <style> g.mpld3-xaxis, g.mpld3-yaxis { display: none; } </style> """)) # # print(img) # # print(img.filename) # # print(img.data) display_html(HTML(""" <span style='width:20px;height:20px;position: absolute;' title="Save image as png"> <a href="data:image/png;base64,{{imgData}}" download="{{filename}}"><img width="20px" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABoAAAAaCAYAAACpSkzOAAAABmJLR0QA/wD/AP+gvaeTAAAAs0lEQVRIie2WPQ6DMAxGXzJwqIrerN3pORi7cqWwtjegQymyUlMZCIlU8UleIvt7cv4BKuAG9MCQIJ7ABfBEapTkU5xkVC087mMTk4ICskqrkWOdhGntpwJ9OvNuxtgtAMU1mt81F+iRC/S9BfdScVBtrHciAM6/Epds59UqPnW7KMUdp0nee0O8RtbzY9Xk/X9rdIAOUBlQn4ETPNCKAevzYJF8Mlp4f4ca9G/X1gijd/UCDStihJWAousAAAAASUVORK5CYII="></a></span> """.replace("{{imgData}}",str(img.data)[2:-1]).replace("{{filename}}",filename))) display_html(mpld3.display(fig)) plt.close()
def scatterz(x, y, data, labels, xscale='linear', yscale='linear', n=250): fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'), figsize=(10, 10)) fig.set_size_inches(8, 8) data = data.head(n) scatter = ax.scatter(data[x], data[y], s=25, alpha=0.5) ax.grid(color='white', linestyle='solid') if yscale == 'linear': yl = data[y].min() yh = data[y].max() yx = (yh - yl) * 0.035 yl = yl - yx yh = yh + yx plt.ylim((yl, yh)) elif yscale == 'log': plt.yscale("log") if xscale == 'linear': xl = data[x].min() xh = data[x].max() xx = (xh - xl) * 0.035 xl = xl - xx xh = xh + xx plt.xlim((xl, xh)) elif xscale == 'log': plt.xscale("log") plt.xlabel(x, fontsize=18, labelpad=15, color="gray") plt.ylabel(y, fontsize=18, labelpad=15, color="gray") labels = [' {0}'.format(i + str(' ')) for i in data[labels].astype(str)] tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels) mpld3.plugins.connect(fig, tooltip) return mpld3.display()
def display(self, legend=True, update=True, use_mpld3=True, hide_volume_labels=False, **kw): """Generate the figure and return an object for display This function is for viewing figures in a Jupyter notebook Arguments: legend - boolean for displaying a legend Default: True update - boolean for reapplying spine, tick, title and axis limits to the graph Default: True use_mpld3 - boolean for using mpld3 to produce interactive html Default: True hide_volume_labels - boolean for hiding x axis volume labels Default: False **kw - additional keyword arguments are passed to mpld3.display """ if legend == True: self.add_legend() elif legend == 'custom': pass else: self.remove_legend() if update: self.set_spines_and_ticks() self.set_title_and_labels(hide_volume_labels=hide_volume_labels) self.set_limits() if use_mpld3: return mpld3.display(self.fig, **kw) else: return self.fig
def scatterplot(self, year): assert year in self.years df = self.data[year] reserve = df.loc[:, ["Coal(tCO2)", "Oil(tCO2)", "Gas(tCO2)"]].sum(axis=1) emv = df.loc[:, ["EndingMarketValue"]] N = len(df.index) fig, ax = plt.subplots() sp = ax.scatter(emv, reserve) ax.set_xlabel("Equity Invested (USD)") ax.set_ylabel("Carbon Reserves (tCO2)") ax.set_title(f"Invested Fossil Fuel Companies in {year}") ax.get_xaxis().set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) ax.get_yaxis().set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) labels = df.loc[:, "Company(Company)"].values.tolist() tooltip = mpld3.plugins.PointLabelTooltip(sp, labels=labels) mpld3.plugins.connect(fig, tooltip) return mpld3.display()
hoffset=10, css=css) #connect tooltip to fig mpld3.plugins.connect(fig, tooltip, TopToolbar()) #set tick marks as blank ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) #set axis as blank ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=0.5) #show legend with only one dot mpld3.display() #show the plot #uncomment the below to export to html #html = mpld3.fig_to_html(fig) #print(html) # In[193]: # Plot silhouette distribution X = tfidf_matrix range_n_clusters = [12] for n_clusters in range_n_clusters: # Create a subplot with 1 row and 2 columns fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(18, 7)
def plotter(df, title=False, kind='line', x_label=None, y_label=None, style='ggplot', figsize=(8, 4), save=False, legend_pos='best', reverse_legend='guess', num_to_plot=7, tex='try', colours='default', cumulative=False, pie_legend=True, partial_pie=False, show_totals=False, transparent=False, output_format='png', interactive=False, black_and_white=False, show_p_val=False, indices=False, transpose=False, rot=False, **kwargs): """Visualise corpus interrogations. :param title: A title for the plot :type title: str :param df: Data to be plotted :type df: Pandas DataFrame :param x_label: A label for the x axis :type x_label: str :param y_label: A label for the y axis :type y_label: str :param kind: The kind of chart to make :type kind: str ('line'/'bar'/'barh'/'pie'/'area') :param style: Visual theme of plot :type style: str ('ggplot'/'bmh'/'fivethirtyeight'/'seaborn-talk'/etc) :param figsize: Size of plot :type figsize: tuple (int, int) :param save: If bool, save with *title* as name; if str, use str as name :type save: bool/str :param legend_pos: Where to place legend :type legend_pos: str ('upper right'/'outside right'/etc) :param reverse_legend: Reverse the order of the legend :type reverse_legend: bool :param num_to_plot: How many columns to plot :type num_to_plot: int/'all' :param tex: Use TeX to draw plot text :type tex: bool :param colours: Colourmap for lines/bars/slices :type colours: str :param cumulative: Plot values cumulatively :type cumulative: bool :param pie_legend: Show a legend for pie chart :type pie_legend: bool :param partial_pie: Allow plotting of pie slices only :type partial_pie: bool :param show_totals: Print sums in plot where possible :type show_totals: str -- 'legend'/'plot'/'both' :param transparent: Transparent .png background :type transparent: bool :param output_format: File format for saved image :type output_format: str -- 'png'/'pdf' :param black_and_white: Create black and white line styles :type black_and_white: bool :param show_p_val: Attempt to print p values in legend if contained in df :type show_p_val: bool :param indices: To use when plotting "distance from root" :type indices: bool :param stacked: When making bar chart, stack bars on top of one another :type stacked: str :param filled: For area and bar charts, make every column sum to 100 :type filled: str :param legend: Show a legend :type legend: bool :param rot: Rotate x axis ticks by *rot* degrees :type rot: int :param subplots: Plot each column separately :type subplots: bool :param layout: Grid shape to use when *subplots* is True :type layout: tuple -- (int, int) :param interactive: Experimental interactive options :type interactive: list -- [1, 2, 3] :returns: matplotlib figure """ import corpkit import os try: from IPython.utils.shimmodule import ShimWarning import warnings warnings.simplefilter('ignore', ShimWarning) except: pass kwargs['rot'] = rot xtickspan = kwargs.pop('xtickspan', False) import matplotlib as mpl from matplotlib import rc # prefer seaborn plotting try: import seaborn as sns except ImportError: pass except AttributeError: pass if interactive: import matplotlib.pyplot as plt, mpld3 else: import matplotlib.pyplot as plt import matplotlib.ticker as ticker import pandas from pandas import DataFrame, Series from time import localtime, strftime from process import checkstack if interactive: import mpld3 import collections from mpld3 import plugins, utils from plugins import InteractiveLegendPlugin, HighlightLines have_mpldc = False try: from mpldatacursor import datacursor, HighlightingDataCursor have_mpldc = True except ImportError: pass # check what environment we're in tk = checkstack('tkinter') running_python_tex = checkstack('pythontex') running_spider = checkstack('spyder') if not title: title = '' def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): """remove extreme values from colourmap --- no pure white""" import matplotlib.colors as colors import numpy as np new_cmap = colors.LinearSegmentedColormap.from_list( 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), cmap(np.linspace(minval, maxval, n))) return new_cmap def get_savename(imagefolder, save=False, title=False, ext='png'): """Come up with the savename for the image.""" import os from corpkit.process import urlify # name as if not ext.startswith('.'): ext = '.' + ext if isinstance(save, STRINGTYPE): savename = os.path.join(imagefolder, (urlify(save) + ext)) #this 'else' is redundant now that title is obligatory else: if title: filename = urlify(title) + ext savename = os.path.join(imagefolder, filename) # remove duplicated ext if savename.endswith('%s%s' % (ext, ext)): savename = savename.replace('%s%s' % (ext, ext), ext, 1) return savename def rename_data_with_total(dataframe, was_series=False, using_tex=False, absolutes=True): """adds totals (abs, rel, keyness) to entry name strings""" if was_series: where_the_words_are = dataframe.index else: where_the_words_are = dataframe.columns the_labs = [] for w in list(where_the_words_are): if not absolutes: if was_series: perc = dataframe.T[w][0] else: the_labs.append(w) continue if using_tex: the_labs.append('%s (%.2f\%%)' % (w, perc)) else: the_labs.append('%s (%.2f %%)' % (w, perc)) else: if was_series: score = dataframe.T[w].sum() else: score = dataframe[w].sum() if using_tex: the_labs.append('%s (n=%d)' % (w, score)) else: the_labs.append('%s (n=%d)' % (w, score)) if not was_series: dataframe.columns = the_labs else: vals = list(dataframe[list(dataframe.columns)[0]].values) dataframe = pandas.DataFrame(vals, index=the_labs) dataframe.columns = ['Total'] return dataframe def auto_explode(dataframe, tinput, was_series=False, num_to_plot=7): """give me a list of strings and i'll output explode option""" output = [0 for s in range(num_to_plot)] if was_series: l = list(dataframe.index) else: l = list(dataframe.columns) if isinstance(tinput, (STRINGTYPE, int)): tinput = [tinput] if isinstance(tinput, list): for i in tinput: if isinstance(i, STRINGTYPE): index = l.index(i) else: index = i output[index] = 0.1 return output # get a few options from kwargs sbplt = kwargs.get('subplots', False) show_grid = kwargs.pop('grid', True) the_rotation = kwargs.get('rot', False) dragmode = kwargs.pop('draggable', False) leg_frame = kwargs.pop('legend_frame', True) leg_alpha = kwargs.pop('legend_alpha', 0.8) # auto set num to plot based on layout lo = kwargs.get('layout', None) if lo: num_to_plot = lo[0] * lo[1] # todo: get this dynamically instead. styles = [ 'dark_background', 'bmh', 'grayscale', 'ggplot', 'fivethirtyeight', 'matplotlib', False, 'mpl-white' ] #if style not in styles: #raise ValueError('Style %s not found. Use %s' % (str(style), ', '.join(styles))) if style == 'mpl-white': try: sns.set_style("whitegrid") except: pass style = 'matplotlib' if kwargs.get('savepath'): mpl.rcParams['savefig.directory'] = kwargs.get('savepath') kwargs.pop('savepath', None) mpl.rcParams['savefig.bbox'] = 'tight' mpl.rcParams.update({'figure.autolayout': True}) # try to use tex # TO DO: # make some font kwargs here using_tex = False mpl.rcParams['font.family'] = 'sans-serif' mpl.rcParams['text.latex.unicode'] = True if tex == 'try' or tex is True: try: rc('text', usetex=True) rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}) using_tex = True except: matplotlib.rc('font', family='sans-serif') matplotlib.rc('font', serif='Helvetica Neue') matplotlib.rc('text', usetex='false') rc('text', usetex=False) else: rc('text', usetex=False) if interactive: using_tex = False if show_totals is False: show_totals = 'none' # find out what kind of plot we're making, and enable # or disable interactive values if need be kwargs['kind'] = kind.lower() if interactive: if kwargs['kind'].startswith('bar'): interactive_types = [3] elif kwargs['kind'] == 'area': interactive_types = [2, 3] elif kwargs['kind'] == 'line': interactive_types = [2, 3] elif kwargs['kind'] == 'pie': interactive_types = None warnings.warn( 'Interactive plotting not yet available for pie plots.') else: interactive_types = [None] if interactive is False: interactive_types = [None] # find out if pie mode, add autopct format piemode = False if kind == 'pie': piemode = True # always the best spot for pie #if legend_pos == 'best': #legend_pos = 'lower left' if show_totals.endswith('plot') or show_totals.endswith('both'): kwargs['pctdistance'] = 0.6 if using_tex: kwargs['autopct'] = r'%1.1f\%%' else: kwargs['autopct'] = '%1.1f%%' # copy data, make series into df dataframe = df.copy() if kind == 'heatmap': try: dataframe = dataframe.T except: pass was_series = False if isinstance(dataframe, Series): was_series = True if not cumulative: dataframe = DataFrame(dataframe) else: dataframe = DataFrame(dataframe.cumsum()) else: # don't know if this is much good. if transpose: dataframe = dataframe.T if cumulative: dataframe = DataFrame(dataframe.cumsum()) if len(list(dataframe.columns)) == 1: was_series = True # attempt to convert x axis to ints: #try: # dataframe.index = [int(i) for i in list(dataframe.index)] #except: # pass # remove totals and tkinter order if not was_series: for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]): try: dataframe = dataframe.drop(name, axis=ax, errors='ignore') except: pass try: dataframe = dataframe.drop('tkintertable-order', errors='ignore') except: pass try: dataframe = dataframe.drop('tkintertable-order', axis=1, errors='ignore') except: pass # look at columns to see if all can be ints, in which case, set up figure # for depnumming if not was_series: if indices == 'guess': def isint(x): try: a = float(x) b = int(a) except (ValueError, OverflowError): return False else: return a == b if all([isint(x) is True for x in list(dataframe.columns)]): indices = True else: indices = False # if depnumming, plot all, transpose, and rename axes if indices is True: num_to_plot = 'all' dataframe = dataframe.T if y_label is None: y_label = 'Percentage of all matches' if x_label is None: x_label = '' # set backend? output_formats = [ 'svgz', 'ps', 'emf', 'rgba', 'raw', 'pdf', 'svg', 'eps', 'png', 'pgf' ] if output_format not in output_formats: raise ValueError('%s output format not recognised. Must be: %s' % (output_format, ', '.join(output_formats))) # don't know if these are necessary if 'pdf' in output_format: plt.switch_backend(output_format) if 'pgf' in output_format: plt.switch_backend(output_format) if num_to_plot == 'all': if was_series: if not piemode: num_to_plot = len(dataframe) else: num_to_plot = len(dataframe) else: if not piemode: num_to_plot = len(list(dataframe.columns)) else: num_to_plot = len(dataframe.index) # explode pie, or remove if not piemode if piemode and not sbplt and kwargs.get('explode'): kwargs['explode'] = auto_explode(dataframe, kwargs['explode'], was_series=was_series, num_to_plot=num_to_plot) else: kwargs.pop('explode', None) legend = kwargs.get('legend', True) #cut data short plotting_a_totals_column = False if was_series: if list(dataframe.columns)[0] != 'Total': try: can_be_ints = [int(x) for x in list(dataframe.index)] num_to_plot = len(dataframe) except: dataframe = dataframe[:num_to_plot] elif list(dataframe.columns)[0] == 'Total': plotting_a_totals_column = True if not 'legend' in kwargs: legend = False num_to_plot = len(dataframe) else: if transpose: dataframe = dataframe.head(num_to_plot) else: dataframe = dataframe.T.head(num_to_plot).T # remove stats fields, put p in entry text, etc. statfields = ['slope', 'intercept', 'r', 'p', 'stderr'] try: dataframe = dataframe.drop(statfields, axis=1, errors='ignore') except: pass try: dataframe.ix['p'] there_are_p_vals = True except: there_are_p_vals = False if show_p_val: if there_are_p_vals: newnames = [] for col in list(dataframe.columns): pval = dataframe[col]['p'] def p_string_formatter(val): if val < 0.001: if not using_tex: return 'p < 0.001' else: return r'p $<$ 0.001' else: return 'p = %s' % format(val, '.3f') pstr = p_string_formatter(pval) newname = '%s (%s)' % (col, pstr) newnames.append(newname) dataframe.columns = newnames dataframe.drop(statfields, axis=0, inplace=True, errors='ignore') else: warnings.warn( 'No p-values calculated to show.\n\nUse keep_stats kwarg while editing to generate these values.' ) else: if there_are_p_vals: dataframe.drop(statfields, axis=0, inplace=True, errors='ignore') # make and set y label absolutes = True if isinstance(dataframe, DataFrame): try: if not all([s.is_integer() for s in dataframe.iloc[0, :].values]): absolutes = False except: pass else: if not all([s.is_integer() for s in dataframe.values]): absolutes = False ########################################## ################ COLOURS ################# ########################################## # set defaults, with nothing for heatmap yet if colours is True or colours == 'default' or colours == 'Default': if kind != 'heatmap': colours = 'viridis' else: colours = 'default' # assume it's a single color, unless string denoting map cmap_or_c = 'color' if isinstance(colours, str): cmap_or_c = 'colormap' from matplotlib.colors import LinearSegmentedColormap if isinstance(colours, LinearSegmentedColormap): cmap_or_c = 'colormap' # for heatmaps, it's always a colormap if kind == 'heatmap': cmap_or_c = 'cmap' # if it's a defaulty string, set accordingly if isinstance(colours, str): if colours.lower().startswith('diverg'): colours = sns.diverging_palette(10, 133, as_cmap=True) # if default not set, do diverge for any df with a number < 0 elif colours.lower() == 'default': mn = dataframe.min() if isinstance(mn, Series): mn = mn.min() if mn < 0: colours = sns.diverging_palette(10, 133, as_cmap=True) else: colours = sns.light_palette("green", as_cmap=True) if 'seaborn' not in style: kwargs[cmap_or_c] = colours #if not was_series: # if kind in ['pie', 'line', 'area']: # if colours and not plotting_a_totals_column: # kwargs[cmap_or_c] = colours # else: # if colours: # kwargs[cmap_or_c] = colours #if piemode: # if num_to_plot > 0: # kwargs[cmap_or_c] = colours # else: # if num_to_plot > 0: # kwargs[cmap_or_c] = colours # multicoloured bar charts #if colours and cmap_or_c == 'colormap': # if kind.startswith('bar'): # if len(list(dataframe.columns)) == 1: # if not black_and_white: # import numpy as np # the_range = np.linspace(0, 1, num_to_plot) # middle = len(the_range) / 2 # try: # cmap = plt.get_cmap(colours) # kwargs[cmap_or_c] = [cmap(n) for n in the_range][middle] # except ValueError: # kwargs[cmap_or_c] = colours # # make a bar width ... ? ... # #kwargs['width'] = (figsize[0] / float(num_to_plot)) / 1.5 # reversing legend option if reverse_legend is True: rev_leg = True elif reverse_legend is False: rev_leg = False # show legend or don't, guess whether to reverse based on kind if kind in ['bar', 'barh', 'area', 'line', 'pie']: if was_series: legend = False if kind == 'pie': if pie_legend: legend = True else: legend = False if kind in ['barh', 'area']: if reverse_legend == 'guess': rev_leg = True if not 'rev_leg' in locals(): rev_leg = False # the default legend placement if legend_pos is True: legend_pos = 'best' # cut dataframe if just_totals try: tst = dataframe['Combined total'] dataframe = dataframe.head(num_to_plot) except: pass # no title for subplots because ugly, if title and not sbplt: kwargs['title'] = title # no interactive subplots yet: if sbplt and interactive: import warnings interactive = False warnings.warn('No interactive subplots yet, sorry.') return # not using pandas for labels or legend anymore. #kwargs['labels'] = None #kwargs['legend'] = False if legend: if num_to_plot > 6: if not kwargs.get('ncol'): kwargs['ncol'] = num_to_plot // 7 # kwarg options go in leg_options leg_options = { 'framealpha': leg_alpha, 'shadow': kwargs.get('shadow', False), 'ncol': kwargs.pop('ncol', 1) } # determine legend position based on this dict if legend_pos: possible = { 'best': 0, 'upper right': 1, 'upper left': 2, 'lower left': 3, 'lower right': 4, 'right': 5, 'center left': 6, 'center right': 7, 'lower center': 8, 'upper center': 9, 'center': 10, 'o r': 2, 'outside right': 2, 'outside upper right': 2, 'outside center right': 'center left', 'outside lower right': 'lower left' } if isinstance(legend_pos, int): the_loc = legend_pos elif isinstance(legend_pos, str): try: the_loc = possible[legend_pos] except KeyError: raise KeyError( 'legend_pos value must be one of:\n%s\n or an int between 0-10.' % ', '.join(list(possible.keys()))) leg_options['loc'] = the_loc #weirdness needed for outside plot if legend_pos in ['o r', 'outside right', 'outside upper right']: leg_options['bbox_to_anchor'] = (1.02, 1) if legend_pos == 'outside center right': leg_options['bbox_to_anchor'] = (1.02, 0.5) if legend_pos == 'outside lower right': leg_options['loc'] == 'upper right' leg_options['bbox_to_anchor'] = (0.5, 0.5) # a bit of distance between legend and plot for outside legends if isinstance(legend_pos, str): if legend_pos.startswith('o'): leg_options['borderaxespad'] = 1 if not piemode: if show_totals.endswith('both') or show_totals.endswith('legend'): dataframe = rename_data_with_total(dataframe, was_series=was_series, using_tex=using_tex, absolutes=absolutes) else: if pie_legend: if show_totals.endswith('both') or show_totals.endswith('legend'): dataframe = rename_data_with_total(dataframe, was_series=was_series, using_tex=using_tex, absolutes=absolutes) if piemode: if partial_pie: dataframe = dataframe / 100.0 # some pie things if piemode: if not sbplt: kwargs['y'] = list(dataframe.columns)[0] def filler(df): pby = df.T.copy() for i in list(pby.columns): tot = pby[i].sum() pby[i] = pby[i] * 100.0 / tot return pby.T areamode = False if kind == 'area': areamode = True if legend is False: kwargs['legend'] = False # line highlighting option for interactive! if interactive: if 2 in interactive_types: if kind == 'line': kwargs['marker'] = ',' if not piemode: kwargs['alpha'] = 0.1 # convert dates --- works only in my current case! #if plotting_a_totals_column or not was_series: # try: # can_it_be_int = int(list(dataframe.index)[0]) # can_be_int = True # except: # can_be_int = False # if can_be_int: # if 1500 < int(list(dataframe.index)[0]): # if 2050 > int(list(dataframe.index)[0]): # n = pandas.PeriodIndex([d for d in list(dataframe.index)], freq='A') # dataframe = dataframe.set_index(n) if kwargs.get('filled'): if areamode or kind.startswith('bar'): dataframe = filler(dataframe) kwargs.pop('filled', None) MARKERSIZE = 4 COLORMAP = { 0: { 'marker': None, 'dash': (None, None) }, 1: { 'marker': None, 'dash': [5, 5] }, 2: { 'marker': "o", 'dash': (None, None) }, 3: { 'marker': None, 'dash': [1, 3] }, 4: { 'marker': "s", 'dash': [5, 2, 5, 2, 5, 10] }, 5: { 'marker': None, 'dash': [5, 3, 1, 2, 1, 10] }, 6: { 'marker': 'o', 'dash': (None, None) }, 7: { 'marker': None, 'dash': [5, 3, 1, 3] }, 8: { 'marker': "1", 'dash': [1, 3] }, 9: { 'marker': "*", 'dash': [5, 5] }, 10: { 'marker': "2", 'dash': [5, 2, 5, 2, 5, 10] }, 11: { 'marker': "s", 'dash': (None, None) } } HATCHES = { 0: { 'color': '#dfdfdf', 'hatch': "/" }, 1: { 'color': '#6f6f6f', 'hatch': "\\" }, 2: { 'color': 'b', 'hatch': "|" }, 3: { 'color': '#dfdfdf', 'hatch': "-" }, 4: { 'color': '#6f6f6f', 'hatch': "+" }, 5: { 'color': 'b', 'hatch': "x" } } if black_and_white: if kind == 'line': kwargs['linewidth'] = 1 cmap = plt.get_cmap('Greys') new_cmap = truncate_colormap(cmap, 0.25, 0.95) if kind == 'bar': # darker if just one entry if len(dataframe.columns) == 1: new_cmap = truncate_colormap(cmap, 0.70, 0.90) kwargs[cmap_or_c] = new_cmap # remove things from kwargs if heatmap if kind == 'heatmap': hmargs = { 'annot': kwargs.pop('annot', True), cmap_or_c: kwargs.pop(cmap_or_c, None), 'fmt': kwargs.pop('fmt', ".2f"), 'cbar': kwargs.pop('cbar', False) } for i in [ 'vmin', 'vmax', 'linewidths', 'linecolor', 'robust', 'center', 'cbar_kws', 'cbar_ax', 'square', 'mask', 'norm' ]: if i in kwargs.keys(): hmargs[i] = kwargs.pop(i, None) class dummy_context_mgr(): """a fake context for plotting without style perhaps made obsolete by 'classic' style in new mpl""" def __enter__(self): return None def __exit__(self, one, two, three): return False with plt.style.context( (style)) if style != 'matplotlib' else dummy_context_mgr(): kwargs.pop('filled', None) if not sbplt: # check if negative values, no stacked if so if areamode: if not kwargs.get('ax'): kwargs['legend'] = False if dataframe.applymap(lambda x: x < 0.0).any().any(): kwargs['stacked'] = False rev_leg = False if kind != 'heatmap': # turn off pie labels at the last minute if kind == 'pie' and pie_legend: kwargs['labels'] = None kwargs['autopct'] = '%.2f' if kind == 'pie': kwargs.pop('color', None) ax = dataframe.plot(figsize=figsize, **kwargs) else: fg = plt.figure(figsize=figsize) if title: plt.title(title) ax = kwargs.get('ax', plt.axes()) tmp = sns.heatmap(dataframe, ax=ax, **hmargs) ax.set_title(title) for item in tmp.get_yticklabels(): item.set_rotation(0) plt.close(fg) if areamode and not kwargs.get('ax'): handles, labels = plt.gca().get_legend_handles_labels() del handles del labels if x_label: ax.set_xlabel(x_label) if y_label: ax.set_ylabel(y_label) else: if not kwargs.get('layout'): plt.gcf().set_tight_layout(False) if kind != 'heatmap': ax = dataframe.plot(figsize=figsize, **kwargs) else: plt.figure(figsize=figsize) if title: plt.title(title) ax = plt.axes() sns.heatmap(dataframe, ax=ax, **hmargs) plt.xticks(rotation=0) plt.yticks(rotation=0) def rotate_degrees(rotation, labels): if rotation is None: if max(labels, key=len) > 6: return 45 else: return 0 elif rotation is False: return 0 elif rotation is True: return 45 else: return rotation if sbplt: if 'layout' not in kwargs: axes = [l for l in ax] else: axes = [] cols = [l for l in ax] for col in cols: for bit in col: axes.append(bit) for index, a in enumerate(axes): if xtickspan is not False: a.xaxis.set_major_locator( ticker.MultipleLocator(xtickspan)) labels = [item.get_text() for item in a.get_xticklabels()] rotation = rotate_degrees(the_rotation, labels) try: if the_rotation == 0: ax.set_xticklabels(labels, rotation=rotation, ha='center') else: ax.set_xticklabels(labels, rotation=rotation, ha='right') except AttributeError: pass else: if kind == 'heatmap': labels = [item.get_text() for item in ax.get_xticklabels()] rotation = rotate_degrees(the_rotation, labels) if the_rotation == 0: ax.set_xticklabels(labels, rotation=rotation, ha='center') else: ax.set_xticklabels(labels, rotation=rotation, ha='right') if transparent: plt.gcf().patch.set_facecolor('white') plt.gcf().patch.set_alpha(0) if black_and_white: if kind == 'line': # white background # change everything to black and white with interesting dashes and markers c = 0 for line in ax.get_lines(): line.set_color('black') #line.set_width(1) line.set_dashes(COLORMAP[c]['dash']) line.set_marker(COLORMAP[c]['marker']) line.set_markersize(MARKERSIZE) c += 1 if c == len(list(COLORMAP.keys())): c = 0 # draw legend with proper placement etc if legend: if not piemode and not sbplt and kind != 'heatmap': if 3 not in interactive_types: handles, labels = plt.gca().get_legend_handles_labels() # area doubles the handles and labels. this removes half: #if areamode: # handles = handles[-len(handles) / 2:] # labels = labels[-len(labels) / 2:] if rev_leg: handles = handles[::-1] labels = labels[::-1] if kwargs.get('ax'): lgd = plt.gca().legend(handles, labels, **leg_options) ax.get_legend().draw_frame(leg_frame) else: lgd = plt.legend(handles, labels, **leg_options) lgd.draw_frame(leg_frame) if interactive: # 1 = highlight lines # 2 = line labels # 3 = legend switches ax = plt.gca() # fails for piemode lines = ax.lines handles, labels = plt.gca().get_legend_handles_labels() if 1 in interactive_types: plugins.connect(plt.gcf(), HighlightLines(lines)) if 3 in interactive_types: plugins.connect( plt.gcf(), InteractiveLegendPlugin(lines, labels, alpha_unsel=0.0)) for i, l in enumerate(lines): y_vals = l.get_ydata() x_vals = l.get_xdata() x_vals = [str(x) for x in x_vals] if absolutes: ls = [ '%s (%s: %d)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals) ] else: ls = [ '%s (%s: %.2f%%)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals) ] if 2 in interactive_types: #if 'kind' in kwargs and kwargs['kind'] == 'area': tooltip_line = mpld3.plugins.LineLabelTooltip( lines[i], labels[i]) mpld3.plugins.connect(plt.gcf(), tooltip_line) #else: if kind == 'line': tooltip_point = mpld3.plugins.PointLabelTooltip(l, labels=ls) mpld3.plugins.connect(plt.gcf(), tooltip_point) if piemode: if not sbplt: plt.axis('equal') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # add x label # this could be revised now! # if time series period, it's year for now if isinstance(dataframe.index, pandas.tseries.period.PeriodIndex): x_label = 'Year' y_l = False if not absolutes: y_l = 'Percentage' else: y_l = 'Absolute frequency' # hacky: turn legend into subplot titles :) if sbplt: # title the big plot #plt.gca().suptitle(title, fontsize = 16) #plt.subplots_adjust(top=0.9) # get all axes if 'layout' not in kwargs: axes = [l for index, l in enumerate(ax)] else: axes = [] cols = [l for index, l in enumerate(ax)] for col in cols: for bit in col: axes.append(bit) # set subplot titles for index, a in enumerate(axes): try: titletext = list(dataframe.columns)[index] except: pass a.set_title(titletext) try: a.legend_.remove() except: pass #try: # from matplotlib.ticker import MaxNLocator # from corpkit.process import is_number # indx = list(dataframe.index) # if all([is_number(qq) for qq in indx]): # ax.get_xaxis().set_major_locator(MaxNLocator(integer=True)) #except: # pass # remove axis labels for pie plots if piemode: a.axes.get_xaxis().set_visible(False) a.axes.get_yaxis().set_visible(False) a.axis('equal') a.grid(b=show_grid) # add sums to bar graphs and pie graphs # doubled right now, no matter if not sbplt: # show grid ax.grid(b=show_grid) if kind.startswith('bar'): width = ax.containers[0][0].get_width() if was_series: the_y_limit = plt.ylim()[1] if show_totals.endswith('plot') or show_totals.endswith('both'): # make plot a bit higher if putting these totals on it plt.ylim([0, the_y_limit * 1.05]) for i, label in enumerate(list(dataframe.index)): if len(dataframe.ix[label]) == 1: score = dataframe.ix[label][0] else: if absolutes: score = dataframe.ix[label].sum() else: #import warnings #warnings.warn("It's not possible to determine total percentage from individual percentages.") continue if not absolutes: plt.annotate('%.2f' % score, (i, score), ha='center', va='bottom') else: plt.annotate(score, (i, score), ha='center', va='bottom') else: the_y_limit = plt.ylim()[1] if show_totals.endswith('plot') or show_totals.endswith('both'): for i, label in enumerate(list(dataframe.columns)): if len(dataframe[label]) == 1: score = dataframe[label][0] else: if absolutes: score = dataframe[label].sum() else: #import warnings #warnings.warn("It's not possible to determine total percentage from individual percentages.") continue if not absolutes: plt.annotate('%.2f' % score, (i, score), ha='center', va='bottom') else: plt.annotate(score, (i, score), ha='center', va='bottom') if not kwargs.get('layout') and not sbplt and not kwargs.get('ax'): plt.tight_layout() if kwargs.get('ax'): try: plt.gcf().set_tight_layout(False) except: pass try: plt.set_tight_layout(False) except: pass if save: if running_python_tex: imagefolder = '../images' else: imagefolder = 'images' savename = get_savename(imagefolder, save=save, title=title, ext=output_format) if not os.path.isdir(imagefolder): os.makedirs(imagefolder) # save image and get on with our lives if legend_pos.startswith('o') and not sbplt: plt.gcf().savefig(savename, dpi=150, bbox_extra_artists=(lgd, ), bbox_inches='tight', format=output_format) else: plt.gcf().savefig(savename, dpi=150, format=output_format) time = strftime("%H:%M:%S", localtime()) if os.path.isfile(savename): print('\n' + time + ": " + savename + " created.") else: raise ValueError("Error making %s." % savename) if dragmode: plt.legend().draggable() if sbplt: plt.subplots_adjust(right=.8) plt.subplots_adjust(left=.1) # add DataCursor to notebook backend if possible if have_mpldc: if kind == 'line': HighlightingDataCursor( plt.gca().get_lines(), highlight_width=4, highlight_color=False, formatter=lambda **kwargs: '%s: %s' % (kwargs['label'], "{0:.3f}".format(kwargs['y']))) else: datacursor(formatter=lambda **kwargs: '%s: %s' % (kwargs['label'], "{0:.3f}".format(kwargs['height']))) #if not interactive and not running_python_tex and not running_spider \ # and not tk: # plt.gcf().show() # return plt #elif running_spider or tk: # return plt if interactive: plt.subplots_adjust(right=.8) plt.subplots_adjust(left=.1) try: ax.legend_.remove() except: pass return mpld3.display() else: return plt
def buildGraph2(self): self.load_tfidf() cPaths = Paths(self.folder) self.filenames, self.folders = cPaths.getTxts() xs, ys = self.loadMDS() # xs, ys = self.create_MDS() cluster_colors, cluster_names = self.setClusters() #create data frame that has the result of the MDS plus the cluster numbers and titles df = pd.DataFrame( dict(x=xs, y=ys, label=self.clusters(), title=self.filenames)) #group by cluster groups = df.groupby('label') #define custom css to format the font and to remove the axis labeling css = """ text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } svg.mpld3-figure { margin-left: -200px;} """ # Plot fig, ax = plt.subplots(figsize=(14, 6)) #set plot size ax.margins(0.03) # Optional, just adds 5% padding to the autoscaling #iterate through groups to layer the plot #note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label for name, group in groups: points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, label=cluster_names[name], mec='none', color=cluster_colors[name]) ax.set_aspect('auto') labels = [i for i in group.title] #set tooltip using points, labels and the already defined 'css' tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10, css=css) #connect tooltip to fig mpld3.plugins.connect(fig, tooltip, TopToolbar()) #set tick marks as blank ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) #set axis as blank ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=1) #show legend with only one dot mpld3.display() #show the plot
def plotEssays(x, y, labels, titles, cluster_names=None, ms=10, output='notebook'): #create data frame that has the result of the MDS plus the cluster numbers and titles df = pd.DataFrame(dict(x=x, y=y, label=labels, title=titles)) #group by cluster groups = df.groupby('label') #define custom css to format the font and to remove the axis labeling css = """ text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } svg.mpld3-figure { margin-left: -100px; margin-right: -100px} """ #set up colors per clusters using a dict # cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e', 5: 'b', 6: 'g', 7:'r'} #set up cluster names using a dict if cluster_names is None: cluster_names = {x: x for x in xrange(len(set(labels)))} # Plot if output == 'notebook': fig, ax = plt.subplots(figsize=(14, 6)) #set plot size elif output == 'app': fig, ax = plt.subplots(figsize=(14, 8)) ax.margins(0.03) # Optional, just adds 5% padding to the autoscaling #iterate through groups to layer the plot #note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label for name, group in groups: points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=ms, label=cluster_names[name], mec='none') ax.set_aspect('auto') labels = [i for i in group.title] #set tooltip using points, labels and the already defined 'css' tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10, css=css) #connect tooltip to fig mpld3.plugins.connect(fig, tooltip, TopToolbar()) #set tick marks as blank ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) #set axis as blank ax.axes.get_xaxis().set_visible(True) ax.axes.get_yaxis().set_visible(True) ax.legend(numpoints=1) #show legend with only one dot # ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), # fancybox=True, shadow=True, ncol=7) if output == 'notebook': return mpld3.display() elif output == 'app': html = mpld3.fig_to_html(fig) return html
# Para plotar polcgonos def plot_polygon(ax, vertices, color_index, to_fill = False): color = get_tab10_color_from_index(color_index) points_series = [[vertex.x,vertex.y] for vertex in vertices] polygon = patches.Polygon(points_series,linewidth=1,edgecolor=color,fill=color if to_fill else None) ax.add_patch(polygon) # Para anotar texto sobre a plotagem def annotate(text, vertex): plt.annotate(text, (vertex.x, vertex.y)) # Para renderizar a plotagem plot = lambda: mpld3.display() # Para capturar texto de palavra def get_word_text(word): return ''.join([symbol.text for symbol in word.symbols]) ## Plotando palavras encontradas from PIL import Image image_data = Image.open(image_path) ax = prepare_image_data(image_data) for item_index, word in enumerate(all_words): plt.title(f'{image_path} words')
def pretty_draw(g): css = """ table { border-collapse: collapse; } th { color: #ffffff; background-color: #000000; } td { background-color: #cccccc; } table, th, td { font-family:Arial, Helvetica, sans-serif; border: 1px solid black; text-align: center; padding: 3px; font-size:11pt; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } """ fig = plt.figure(figsize=(12, 8)) ax = plt.gca() nodes = [] edges = list(g.edges()) layout = nx.spring_layout(g, iterations=10) points = [] labels = [] for node, (x, y) in layout.items(): nodes.append(node) points.append((x, y)) try: try: labels.append(g.cpd(node)._repr_html_()) except: labels.append(str(g.cpd(node))) except: pass points_x, points_y = zip(*points) ax.set_xlim(min(points_x) - 0.08, max(points_x) + 0.08) ax.set_ylim(min(points_y) - 0.08, max(points_y) + 0.08) for src, dst in edges: src_pos = layout[src] dst_pos = layout[dst] arr_pos = dst_pos - 0.15*(dst_pos - src_pos) ax.plot(*list(zip(src_pos, dst_pos)), color='grey') ax.plot(*list(zip(arr_pos, dst_pos)), color='black', alpha=.5, linewidth=5) ax.plot(points_x, points_y, 'o', color='lightgray', mec='k', ms=20, mew=1, alpha=1.) for text, x, y in zip(nodes, points_x, points_y): ax.text(x, y, text, horizontalalignment='center', verticalalignment='center') pts = ax.plot(points_x, points_y, 'o', color='lightgray', mec='k', ms=40, mew=1, alpha=0.) tooltip = plugins.PointHTMLTooltip(pts[0], labels, voffset=10, hoffset=10, css=css) plugins.connect(fig, tooltip) return mpld3.display()
def cluster_graphic_html(): MDS() # two components as we're plotting points in a two-dimensional plane # "precomputed" because we provide a distance matrix # we will also specify `random_state` so the plot is reproducible. mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1) pos = mds.fit_transform(model.dist) # shape (n_components, n_samples) xs, ys = pos[:, 0], pos[:, 1] clusters = model.KMmodel.labels_.tolist() #create data frame that has the result of the MDS plus the cluster numbers and titles df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=model.titles)) #group by cluster groups = df.groupby('label') #define custom css to format the font and to remove the axis labeling css = """ text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } """ # Plot fig, ax = plt.subplots(figsize=(14,6)) #set plot size ax.margins(0.03) # Optional, just adds 5% padding to the autoscaling #set up colors per clusters using a dict cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e'} #set up cluster names using a dict cluster_names = {0: 'Family, home, war', 1: 'Police, killed, murders', 2: 'Father, New York, brothers', 3: 'Dance, singing, love', 4: 'Killed, soldiers, captain'} #iterate through groups to layer the plot #note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label for name, group in groups: points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, label=cluster_names[name], mec='none', color=cluster_colors[name]) ax.set_aspect('auto') labels = [i for i in group.title] #set tooltip using points, labels and the already defined 'css' tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10, css=css) #connect tooltip to fig mpld3.plugins.connect(fig, tooltip, TopToolbar()) #set tick marks as blank ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) #set axis as blank ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=1) #show legend with only one dot mpld3.display() #show the plot #uncomment the below to export to html html = mpld3.fig_to_html(fig) return html
fig, ax = plt.subplots(subplot_kw=dict(facecolor='#f2f6fc')) for i in [duration]: x = timestamp y = duration scatter = ax.scatter(x, y, c=i, alpha=0.5, edgecolors='none', cmap=plt.cm.jet, label=l) ax.grid(color='white', linestyle='solid') ax.set_title("MarkLogic Slow Queries", size=20) ax.set_xlabel('Timestamp', fontsize=12, fontdict={'family': 'monospace'}, labelpad=5) ax.set_ylabel('Query Duration (secs)', fontsize=12, fontdict={'family': 'monospace'}, labelpad=10) tooltip = mpld3.plugins.PointLabelTooltip(scatter) mpld3.plugins.connect(fig, tooltip) mpld3.display() #plt.show()
# nom_list = ['Sessions','Flynn','Gorsuch','Trump'] # color_list = ['b','r'] # useful list -> ['Russia','Trumpcare','MuslimBan'] nom_list = ['Sessions', 'Flynn', 'Gorsuch', 'Trump'] color_list = ['b', 'r', 'g', 'k'] # fig = plt.axes() for nom, color in zip(nom_list, color_list): mention_counts = [] dates = [] for n in files: date_string = n[11:-5] dt_string = dt.strptime(date_string, '%Y-%m-%d') dates.append(dt_string) mention_count = nom_mentions(n, nom) mention_counts.append(mention_count) # print(dates) # print(mention_counts) plt.plot(dates, mention_counts, color) # fig.plot(dates, mention_counts, color) plt.legend(nom_list) # fig.legend(nom_list) plt.title('Phone Number Tweets by Names') plt.legend(nom_list) # fig.set_title('Phone Number Tweets by Names') mpld3.display(plt) # plt.show()
def plotter(title, df, x_label = None, y_label = None, style = 'ggplot', figsize = (8, 4), save = False, legend_pos = 'best', reverse_legend = 'guess', num_to_plot = 7, tex = 'try', colours = 'Paired', cumulative = False, pie_legend = True, partial_pie = False, show_totals = False, transparent = False, output_format = 'png', interactive = False, black_and_white = False, show_p_val = False, indices = 'guess', **kwargs): """plot interrogator() or editor() output. **kwargs are for pandas first, which can then send them through to matplotlib.plot(): http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.plot.html http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot pie_legend: False to label slices rather than give legend show_totals: where to show percent/abs frequencies: False, 'plot', 'legend', or 'both' """ import corpkit import os import matplotlib as mpl if interactive: import matplotlib.pyplot as plt, mpld3 else: import matplotlib.pyplot as plt from matplotlib import rc import pandas import pandas as pd from pandas import DataFrame import numpy from time import localtime, strftime from corpkit.tests import check_pytex, check_spider, check_t_kinter if interactive: import mpld3 import collections from mpld3 import plugins, utils from plugins import InteractiveLegendPlugin, HighlightLines tk = check_t_kinter() running_python_tex = check_pytex() # incorrect spelling of spider on purpose running_spider = check_spider() def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): """remove extreme values from colourmap --- no pure white""" import matplotlib.colors as colors import numpy as np new_cmap = colors.LinearSegmentedColormap.from_list( 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), cmap(np.linspace(minval, maxval, n))) return new_cmap def get_savename(imagefolder, save = False, title = False, ext = 'png'): """Come up with the savename for the image.""" import os def urlify(s): "Turn title into filename" import re s = s.lower() s = re.sub(r"[^\w\s-]", '', s) s = re.sub(r"\s+", '-', s) s = re.sub(r"-(textbf|emph|textsc|textit)", '-', s) return s # name as if not ext.startswith('.'): ext = '.' + ext if type(save) == str: savename = os.path.join(imagefolder, (urlify(save) + ext)) #this 'else' is redundant now that title is obligatory else: if title: filename = urlify(title) + ext savename = os.path.join(imagefolder, filename) # remove duplicated ext if savename.endswith('%s%s' % (ext, ext)): savename = savename.replace('%s%s' % (ext, ext), ext, 1) return savename def rename_data_with_total(dataframe, was_series = False, using_tex = False, absolutes = True): """adds totals (abs, rel, keyness) to entry name strings""" if was_series: where_the_words_are = dataframe.index else: where_the_words_are = dataframe.columns the_labs = [] for w in list(where_the_words_are): if not absolutes: if was_series: perc = dataframe.T[w][0] else: the_labs.append(w) continue if using_tex: the_labs.append('%s (%.2f\%%)' % (w, perc)) else: the_labs.append('%s (%.2f %%)' % (w, perc)) else: if was_series: score = dataframe.T[w].sum() else: score = dataframe[w].sum() if using_tex: the_labs.append('%s (n=%d)' % (w, score)) else: the_labs.append('%s (n=%d)' % (w, score)) if not was_series: dataframe.columns = the_labs else: vals = list(dataframe[list(dataframe.columns)[0]].values) dataframe = pd.DataFrame(vals, index = the_labs) dataframe.columns = ['Total'] return dataframe def auto_explode(dataframe, input, was_series = False, num_to_plot = 7): """give me a list of strings and i'll output explode option""" output = [0 for s in range(num_to_plot)] if was_series: l = list(dataframe.index) else: l = list(dataframe.columns) if type(input) == str or type(input) == int: input = [input] if type(input) == list: for i in input: if type(i) == str: index = l.index(i) else: index = i output[index] = 0.1 return output # are we doing subplots? sbplt = False if 'subplots' in kwargs: if kwargs['subplots'] is True: sbplt = True if colours is True: colours = 'Paired' styles = ['dark_background', 'bmh', 'grayscale', 'ggplot', 'fivethirtyeight'] if style not in styles: raise ValueError('Style %s not found. Use %s' % (style, ', '.join(styles))) if 'savepath' in kwargs.keys(): mpl.rcParams['savefig.directory'] = kwargs['savepath'] del kwargs['savepath'] mpl.rcParams['savefig.bbox'] = 'tight' # try to use tex # TO DO: # make some font kwargs here using_tex = False mpl.rcParams['font.family'] = 'sans-serif' mpl.rcParams['text.latex.unicode'] = True if tex == 'try' or tex is True: try: rc('text', usetex=True) rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}) using_tex = True except: matplotlib.rc('font', family='sans-serif') matplotlib.rc('font', serif='Helvetica Neue') matplotlib.rc('text', usetex='false') rc('text', usetex=False) else: rc('text', usetex=False) if interactive: using_tex = False if show_totals is False: show_totals = 'none' # find out what kind of plot we're making, and enable # or disable interactive values if need be if 'kind' not in kwargs: kwargs['kind'] = 'line' if interactive: if kwargs['kind'].startswith('bar'): interactive_types = [3] elif kwargs['kind'] == 'area': interactive_types = [2, 3] elif kwargs['kind'] == 'line': interactive_types = [2, 3] elif kwargs['kind'] == 'pie': interactive_types = None warnings.warn('Interactive plotting not yet available for pie plots.') else: interactive_types = [None] if interactive is False: interactive_types = [None] # find out if pie mode, add autopct format piemode = False if 'kind' in kwargs: if kwargs['kind'] == 'pie': piemode = True # always the best spot for pie #if legend_pos == 'best': #legend_pos = 'lower left' if show_totals.endswith('plot') or show_totals.endswith('both'): kwargs['pctdistance'] = 0.6 if using_tex: kwargs['autopct'] = r'%1.1f\%%' else: kwargs['autopct'] = '%1.1f%%' #if piemode: #if partial_pie: #kwargs['startangle'] = 180 kwargs['subplots'] = sbplt # copy data, make series into df dataframe = df.copy() was_series = False if type(dataframe) == pandas.core.series.Series: was_series = True if not cumulative: dataframe = DataFrame(dataframe) else: dataframe = DataFrame(dataframe.cumsum()) else: # don't know if this is much good. if cumulative: dataframe = DataFrame(dataframe.cumsum()) if len(list(dataframe.columns)) == 1: was_series = True # attempt to convert x axis to ints: try: dataframe.index = [int(i) for i in list(dataframe.index)] except: pass # remove totals and tkinter order if not was_series: for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]): dataframe = dataframe.drop(name, axis = ax, errors = 'ignore') else: dataframe = dataframe.drop('tkintertable-order', errors = 'ignore') dataframe = dataframe.drop('tkintertable-order', axis = 1, errors = 'ignore') # look at columns to see if all can be ints, in which case, set up figure # for depnumming if not was_series: if indices == 'guess': def isint(x): try: a = float(x) b = int(a) except ValueError or OverflowError: return False else: return a == b if all([isint(x) is True for x in list(dataframe.columns)]): indices = True else: indices = False # if depnumming, plot all, transpose, and rename axes if indices is True: num_to_plot = 'all' dataframe = dataframe.T if y_label is None: y_label = 'Percentage of all matches' if x_label is None: x_label = '' # set backend? output_formats = ['svgz', 'ps', 'emf', 'rgba', 'raw', 'pdf', 'svg', 'eps', 'png', 'pgf'] if output_format not in output_formats: raise ValueError('%s output format not recognised. Must be: %s' % (output_format, ', '.join(output_formats))) # don't know if these are necessary if 'pdf' in output_format: plt.switch_backend(output_format) if 'pgf' in output_format: plt.switch_backend(output_format) if num_to_plot == 'all': if was_series: if not piemode: num_to_plot = len(dataframe) else: num_to_plot = len(dataframe) else: if not piemode: num_to_plot = len(list(dataframe.columns)) else: num_to_plot = len(dataframe.index) # explode pie, or remove if not piemode if 'explode' in kwargs: if not piemode: del kwargs['explode'] if piemode: if 'explode' in kwargs: if not sbplt: kwargs['explode'] = auto_explode(dataframe, kwargs['explode'], was_series = was_series, num_to_plot = num_to_plot) if 'legend' in kwargs: legend = kwargs['legend'] else: legend = True #cut data short plotting_a_totals_column = False if was_series: if list(dataframe.columns)[0] != 'Total': try: can_be_ints = [int(x) for x in list(dataframe.index)] num_to_plot = len(dataframe) except: dataframe = dataframe[:num_to_plot] elif list(dataframe.columns)[0] == 'Total': plotting_a_totals_column = True if not 'legend' in kwargs: legend = False num_to_plot = len(dataframe) else: dataframe = dataframe.T.head(num_to_plot).T # remove stats fields, put p in entry text, etc. statfields = ['slope', 'intercept', 'r', 'p', 'stderr'] try: dataframe = dataframe.drop(statfields, axis = 1) except: pass try: dataframe.ix['p'] there_are_p_vals = True except: there_are_p_vals = False if show_p_val: if there_are_p_vals: newnames = [] for col in list(dataframe.columns): pval = dataframe[col]['p'] newname = '%s (p=%s)' % (col, format(pval, '.5f')) newnames.append(newname) dataframe.columns = newnames dataframe.drop(statfields, axis = 0, inplace = True) else: warnings.warn('No p-values calculated to show.\n\nUse sort_by and keep_stats in editor() to generate these values.') else: if there_are_p_vals: dataframe.drop(statfields, axis = 0, inplace = True) # make and set y label absolutes = True if type(dataframe) == pandas.core.frame.DataFrame: try: if not all([s.is_integer() for s in dataframe.iloc[0,:].values]): absolutes = False except: pass else: if not all([s.is_integer() for s in dataframe.values]): absolutes = False # use colormap if need be: if num_to_plot > 0: if not was_series: if 'kind' in kwargs: if kwargs['kind'] in ['pie', 'line', 'area']: if colours: if not plotting_a_totals_column: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours #else: if colours: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours if piemode: if num_to_plot > 0: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours else: if num_to_plot > 0: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours #else: #if len(dataframe.T.columns) < 8: #try: #del kwargs['colormap'] #except: #pass # multicoloured bar charts if 'kind' in kwargs: if colours: if kwargs['kind'].startswith('bar'): if len(list(dataframe.columns)) == 1: if not black_and_white: import numpy as np the_range = np.linspace(0, 1, num_to_plot) cmap = plt.get_cmap(colours) kwargs['colors'] = [cmap(n) for n in the_range] # make a bar width ... ? #kwargs['width'] = (figsize[0] / float(num_to_plot)) / 1.5 # reversing legend option if reverse_legend is True: rev_leg = True elif reverse_legend is False: rev_leg = False # show legend or don't, guess whether to reverse based on kind if 'kind' in kwargs: if kwargs['kind'] in ['bar', 'barh', 'area', 'line', 'pie']: if was_series: legend = False if kwargs['kind'] == 'pie': if pie_legend: legend = True else: legend = False if kwargs['kind'] in ['barh', 'area']: if reverse_legend == 'guess': rev_leg = True if not 'rev_leg' in locals(): rev_leg = False # the default legend placement if legend_pos is True: legend_pos = 'best' # cut dataframe if just_totals try: tst = dataframe['Combined total'] dataframe = dataframe.head(num_to_plot) except: pass # rotate automatically if 'rot' not in kwargs: if not was_series: xvals = [str(i) for i in list(dataframe.index)[:num_to_plot]] #if 'kind' in kwargs: #if kwargs['kind'] in ['barh', 'area']: #xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]] else: xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]] if len(max(xvals, key=len)) > 6: if not piemode: kwargs['rot'] = 45 # no title for subplots because ugly, if sbplt: if 'title' in kwargs: del kwargs['title'] else: kwargs['title'] = title # no interactive subplots yet: if sbplt and interactive: import warnings interactive = False warnings.warn('No interactive subplots yet, sorry.') return # not using pandas for labels or legend anymore. #kwargs['labels'] = None #kwargs['legend'] = False if legend: # kwarg options go in leg_options leg_options = {'framealpha': .8} if 'shadow' in kwargs: leg_options['shadow'] = True if 'ncol' in kwargs: leg_options['ncol'] = kwargs['ncol'] del kwargs['ncol'] else: if num_to_plot > 6: leg_options['ncol'] = num_to_plot / 7 # determine legend position based on this dict if legend_pos: possible = {'best': 0, 'upper right': 1, 'upper left': 2, 'lower left': 3, 'lower right': 4, 'right': 5, 'center left': 6, 'center right': 7, 'lower center': 8, 'upper center': 9, 'center': 10, 'o r': 2, 'outside right': 2, 'outside upper right': 2, 'outside center right': 'center left', 'outside lower right': 'lower left'} if type(legend_pos) == int: the_loc = legend_pos elif type(legend_pos) == str: try: the_loc = possible[legend_pos] except KeyError: raise KeyError('legend_pos value must be one of:\n%s\n or an int between 0-10.' %', '.join(possible.keys())) leg_options['loc'] = the_loc #weirdness needed for outside plot if legend_pos in ['o r', 'outside right', 'outside upper right']: leg_options['bbox_to_anchor'] = (1.02, 1) if legend_pos == 'outside center right': leg_options['bbox_to_anchor'] = (1.02, 0.5) if legend_pos == 'outside lower right': leg_options['loc'] == 'upper right' leg_options['bbox_to_anchor'] = (0.5, 0.5) # a bit of distance between legend and plot for outside legends if type(legend_pos) == str: if legend_pos.startswith('o'): leg_options['borderaxespad'] = 1 if not piemode: if show_totals.endswith('both') or show_totals.endswith('legend'): dataframe = rename_data_with_total(dataframe, was_series = was_series, using_tex = using_tex, absolutes = absolutes) else: if pie_legend: if show_totals.endswith('both') or show_totals.endswith('legend'): dataframe = rename_data_with_total(dataframe, was_series = was_series, using_tex = using_tex, absolutes = absolutes) if piemode: if partial_pie: dataframe = dataframe / 100.0 # some pie things if piemode: if not sbplt: kwargs['y'] = list(dataframe.columns)[0] if pie_legend: kwargs['legend'] = False if was_series: leg_options['labels'] = list(dataframe.index) else: leg_options['labels'] = list(dataframe.columns) else: if pie_legend: kwargs['legend'] = False if was_series: leg_options['labels'] = list(dataframe.index) else: leg_options['labels'] = list(dataframe.index) areamode = False if 'kind' in kwargs: if kwargs['kind'] == 'area': areamode = True if legend is False: kwargs['legend'] = False # cumulative grab first col if cumulative: kwargs['y'] = list(dataframe.columns)[0] # line highlighting option for interactive! if interactive: if 2 in interactive_types: if kwargs['kind'] == 'line': kwargs['marker'] = ',' if not piemode: kwargs['alpha'] = 0.1 # convert dates --- works only in my current case! if plotting_a_totals_column or not was_series: try: can_it_be_int = int(list(dataframe.index)[0]) can_be_int = True except: can_be_int = False if can_be_int: if 1500 < int(list(dataframe.index)[0]): if 2050 > int(list(dataframe.index)[0]): n = pd.PeriodIndex([d for d in list(dataframe.index)], freq='A') dataframe = dataframe.set_index(n) MARKERSIZE = 4 COLORMAP = { 0: {'marker': None, 'dash': (None,None)}, 1: {'marker': None, 'dash': [5,5]}, 2: {'marker': "o", 'dash': (None,None)}, 3: {'marker': None, 'dash': [1,3]}, 4: {'marker': "s", 'dash': [5,2,5,2,5,10]}, 5: {'marker': None, 'dash': [5,3,1,2,1,10]}, 6: {'marker': 'o', 'dash': (None,None)}, 7: {'marker': None, 'dash': [5,3,1,3]}, 8: {'marker': "1", 'dash': [1,3]}, 9: {'marker': "*", 'dash': [5,5]}, 10: {'marker': "2", 'dash': [5,2,5,2,5,10]}, 11: {'marker': "s", 'dash': (None,None)} } HATCHES = { 0: {'color': '#dfdfdf', 'hatch':"/"}, 1: {'color': '#6f6f6f', 'hatch':"\\"}, 2: {'color': 'b', 'hatch':"|"}, 3: {'color': '#dfdfdf', 'hatch':"-"}, 4: {'color': '#6f6f6f', 'hatch':"+"}, 5: {'color': 'b', 'hatch':"x"} } if black_and_white: if kwargs['kind'] == 'line': kwargs['linewidth'] = 1 cmap = plt.get_cmap('Greys') new_cmap = truncate_colormap(cmap, 0.25, 0.95) if kwargs['kind'] == 'bar': # darker if just one entry if len(dataframe.columns) == 1: new_cmap = truncate_colormap(cmap, 0.70, 0.90) kwargs['colormap'] = new_cmap # use styles and plot with plt.style.context((style)): if not sbplt: # check if negative values, no stacked if so if areamode: if dataframe.applymap(lambda x: x < 0.0).any().any(): kwargs['stacked'] = False rev_leg = False ax = dataframe.plot(figsize = figsize, **kwargs) else: if not piemode and not sbplt: ax = dataframe.plot(figsize = figsize, **kwargs) else: ax = dataframe.plot(figsize = figsize, **kwargs) handles, labels = plt.gca().get_legend_handles_labels() plt.legend( handles, labels, loc = leg_options['loc'], bbox_to_anchor = (0,-0.1,1,1), bbox_transform = plt.gcf().transFigure ) if not tk: plt.show() return if 'rot' in kwargs: if kwargs['rot'] != 0 and kwargs['rot'] != 90: labels = [item.get_text() for item in ax.get_xticklabels()] ax.set_xticklabels(labels, rotation = kwargs['rot'], ha='right') if transparent: plt.gcf().patch.set_facecolor('white') plt.gcf().patch.set_alpha(0) if black_and_white: #plt.grid() plt.gca().set_axis_bgcolor('w') if kwargs['kind'] == 'line': # white background # change everything to black and white with interesting dashes and markers c = 0 for line in ax.get_lines(): line.set_color('black') #line.set_width(1) line.set_dashes(COLORMAP[c]['dash']) line.set_marker(COLORMAP[c]['marker']) line.set_markersize(MARKERSIZE) c += 1 if c == len(COLORMAP.keys()): c = 0 if legend: if not piemode and not sbplt: if 3 not in interactive_types: if not rev_leg: lgd = plt.legend(**leg_options) else: handles, labels = plt.gca().get_legend_handles_labels() lgd = plt.legend(handles[::-1], labels[::-1], **leg_options) #if black_and_white: #lgd.set_facecolor('w') #if interactive: #if legend: #lgd.set_title("") #if not sbplt: #if 'layout' not in kwargs: #plt.tight_layout() if interactive: # 1 = highlight lines # 2 = line labels # 3 = legend switches ax = plt.gca() # fails for piemode lines = ax.lines handles, labels = plt.gca().get_legend_handles_labels() if 1 in interactive_types: plugins.connect(plt.gcf(), HighlightLines(lines)) if 3 in interactive_types: plugins.connect(plt.gcf(), InteractiveLegendPlugin(lines, labels, alpha_unsel=0.0)) for i, l in enumerate(lines): y_vals = l.get_ydata() x_vals = l.get_xdata() x_vals = [str(x) for x in x_vals] if absolutes: ls = ['%s (%s: %d)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)] else: ls = ['%s (%s: %.2f%%)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)] if 2 in interactive_types: #if 'kind' in kwargs and kwargs['kind'] == 'area': tooltip_line = mpld3.plugins.LineLabelTooltip(lines[i], labels[i]) mpld3.plugins.connect(plt.gcf(), tooltip_line) #else: if kwargs['kind'] == 'line': tooltip_point = mpld3.plugins.PointLabelTooltip(l, labels = ls) mpld3.plugins.connect(plt.gcf(), tooltip_point) # works: #plugins.connect(plt.gcf(), plugins.LineLabelTooltip(l, labels[i])) #labels = ["Point {0}".format(i) for i in range(num_to_plot)] #tooltip = plugins.LineLabelTooltip(lines) #mpld3.plugins.connect(plt.gcf(), mpld3.plugins.PointLabelTooltip(lines)) if piemode: if not sbplt: plt.axis('equal') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # add x label # this could be revised now! # if time series period, it's year for now if type(dataframe.index) == pandas.tseries.period.PeriodIndex: x_label = 'Year' if x_label is not False: if type(x_label) == str: plt.xlabel(x_label) else: check_x_axis = list(dataframe.index)[0] # get first entry# get second entry of first entry (year, count) try: if type(dataframe.index) == pandas.tseries.period.PeriodIndex: x_label = 'Year' check_x_axis = int(check_x_axis) if 1500 < check_x_axis < 2050: x_label = 'Year' else: x_label = 'Group' except: x_label = 'Group' if not sbplt: if not piemode: plt.xlabel(x_label) # no offsets for numerical x and y values if type(dataframe.index) != pandas.tseries.period.PeriodIndex: try: # check if x axis can be an int check_x_axis = list(dataframe.index)[0] can_it_be_int = int(check_x_axis) # if so, set these things from matplotlib.ticker import ScalarFormatter plt.gca().xaxis.set_major_formatter(ScalarFormatter()) except: pass # same for y axis try: # check if x axis can be an int check_y_axis = list(dataframe.columns)[0] can_it_be_int = int(check_y_axis) # if so, set these things from matplotlib.ticker import ScalarFormatter plt.gca().yaxis.set_major_formatter(ScalarFormatter()) except: pass # y labelling y_l = False if not absolutes: y_l = 'Percentage' else: y_l = 'Absolute frequency' if y_label is not False: if not sbplt: if not piemode: if type(y_label) == str: plt.ylabel(y_label) else: plt.ylabel(y_l) # hacky: turn legend into subplot titles :) if sbplt: # title the big plot #plt.suptitle(title, fontsize = 16) # get all axes if 'layout' not in kwargs: axes = [l for index, l in enumerate(ax)] else: axes = [] cols = [l for index, l in enumerate(ax)] for col in cols: for bit in col: axes.append(bit) # set subplot titles for index, a in enumerate(axes): try: titletext = list(dataframe.columns)[index] except: pass a.set_title(titletext) try: a.legend_.remove() except: pass # remove axis labels for pie plots if piemode: a.axes.get_xaxis().set_visible(False) a.axes.get_yaxis().set_visible(False) a.axis('equal') # add sums to bar graphs and pie graphs # doubled right now, no matter if not sbplt: if 'kind' in kwargs: if kwargs['kind'].startswith('bar'): width = ax.containers[0][0].get_width() if was_series: the_y_limit = plt.ylim()[1] if show_totals.endswith('plot') or show_totals.endswith('both'): # make plot a bit higher if putting these totals on it plt.ylim([0,the_y_limit * 1.05]) for i, label in enumerate(list(dataframe.index)): if len(dataframe.ix[label]) == 1: score = dataframe.ix[label][0] else: if absolutes: score = dataframe.ix[label].sum() else: #import warnings #warnings.warn("It's not possible to determine total percentage from individual percentages.") continue if not absolutes: plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom') else: plt.annotate(score, (i, score), ha = 'center', va = 'bottom') else: the_y_limit = plt.ylim()[1] if show_totals.endswith('plot') or show_totals.endswith('both'): for i, label in enumerate(list(dataframe.columns)): if len(dataframe[label]) == 1: score = dataframe[label][0] else: if absolutes: score = dataframe[label].sum() else: #import warnings #warnings.warn("It's not possible to determine total percentage from individual percentages.") continue if not absolutes: plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom') else: plt.annotate(score, (i, score), ha = 'center', va = 'bottom') #if not running_python_tex: #plt.gcf().show() plt.subplots_adjust(left=0.1) plt.subplots_adjust(bottom=0.18) #if 'layout' not in kwargs: #plt.tight_layout() if save: import os if running_python_tex: imagefolder = '../images' else: imagefolder = 'images' savename = get_savename(imagefolder, save = save, title = title, ext = output_format) if not os.path.isdir(imagefolder): os.makedirs(imagefolder) # save image and get on with our lives if legend_pos.startswith('o'): plt.gcf().savefig(savename, dpi=150, bbox_extra_artists=(lgd,), bbox_inches='tight', format = output_format) else: plt.gcf().savefig(savename, dpi=150, format = output_format) time = strftime("%H:%M:%S", localtime()) if os.path.isfile(savename): print '\n' + time + ": " + savename + " created." else: raise ValueError("Error making %s." % savename) if not interactive and not running_python_tex and not running_spider and not tk: plt.show() return if running_spider or tk or sbplt: return plt if interactive: plt.subplots_adjust(right=.8) plt.subplots_adjust(left=.1) try: ax.legend_.remove() except: pass return mpld3.display()
def plotter(title, df, kind = 'line', x_label = None, y_label = None, style = 'ggplot', figsize = (8, 4), save = False, legend_pos = 'best', reverse_legend = 'guess', num_to_plot = 7, tex = 'try', colours = 'Accent', cumulative = False, pie_legend = True, partial_pie = False, show_totals = False, transparent = False, output_format = 'png', interactive = False, black_and_white = False, show_p_val = False, indices = False, **kwargs): """Visualise corpus interrogations. :param title: A title for the plot :type title: str :param df: Data to be plotted :type df: pandas.core.frame.DataFrame :param x_label: A label for the x axis :type x_label: str :param y_label: A label for the y axis :type y_label: str :param kind: The kind of chart to make :type kind: str ('line'/'bar'/'barh'/'pie'/'area') :param style: Visual theme of plot :type style: str ('ggplot'/'bmh'/'fivethirtyeight'/'seaborn-talk'/etc) :param figsize: Size of plot :type figsize: tuple (int, int) :param save: If bool, save with *title* as name; if str, use str as name :type save: bool/str :param legend_pos: Where to place legend :type legend_pos: str ('upper right'/'outside right'/etc) :param reverse_legend: Reverse the order of the legend :type reverse_legend: bool :param num_to_plot: How many columns to plot :type num_to_plot: int/'all' :param tex: Use TeX to draw plot text :type tex: bool :param colours: Colourmap for lines/bars/slices :type colours: str :param cumulative: Plot values cumulatively :type cumulative: bool :param pie_legend: Show a legend for pie chart :type pie_legend: bool :param partial_pie: Allow plotting of pie slices only :type partial_pie: bool :param show_totals: Print sums in plot where possible :type show_totals: str -- 'legend'/'plot'/'both' :param transparent: Transparent .png background :type transparent: bool :param output_format: File format for saved image :type output_format: str -- 'png'/'pdf' :param black_and_white: Create black and white line styles :type black_and_white: bool :param show_p_val: Attempt to print p values in legend if contained in df :type show_p_val: bool :param indices: To use when plotting "distance from root" :type indices: bool :param stacked: When making bar chart, stack bars on top of one another :type stacked: str :param filled: For area and bar charts, make every column sum to 100 :type filled: str :param legend: Show a legend :type legend: bool :param rot: Rotate x axis ticks by *rot* degrees :type rot: int :param subplots: Plot each column separately :type subplots: bool :param layout: Grid shape to use when *subplots* is True :type layout: tuple -- (int, int) :param interactive: Experimental interactive options :type interactive: list -- [1, 2, 3] :returns: matplotlib figure """ import corpkit import os try: from IPython.utils.shimmodule import ShimWarning import warnings warnings.simplefilter('ignore', ShimWarning) except: pass import matplotlib as mpl from matplotlib import rc # prefer seaborn plotting try: import seaborn as sns except: pass if interactive: import matplotlib.pyplot as plt, mpld3 else: import matplotlib.pyplot as plt import pandas from pandas import DataFrame import numpy from time import localtime, strftime from tests import check_pytex, check_spider, check_t_kinter if interactive: import mpld3 import collections from mpld3 import plugins, utils from plugins import InteractiveLegendPlugin, HighlightLines # check what environment we're in tk = check_t_kinter() running_python_tex = check_pytex() running_spider = check_spider() def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): """remove extreme values from colourmap --- no pure white""" import matplotlib.colors as colors import numpy as np new_cmap = colors.LinearSegmentedColormap.from_list( 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), cmap(np.linspace(minval, maxval, n))) return new_cmap def get_savename(imagefolder, save = False, title = False, ext = 'png'): """Come up with the savename for the image.""" import os def urlify(s): "Turn title into filename" import re s = s.lower() s = re.sub(r"[^\w\s-]", '', s) s = re.sub(r"\s+", '-', s) s = re.sub(r"-(textbf|emph|textsc|textit)", '-', s) return s # name as if not ext.startswith('.'): ext = '.' + ext if type(save) == str: savename = os.path.join(imagefolder, (urlify(save) + ext)) #this 'else' is redundant now that title is obligatory else: if title: filename = urlify(title) + ext savename = os.path.join(imagefolder, filename) # remove duplicated ext if savename.endswith('%s%s' % (ext, ext)): savename = savename.replace('%s%s' % (ext, ext), ext, 1) return savename def rename_data_with_total(dataframe, was_series = False, using_tex = False, absolutes = True): """adds totals (abs, rel, keyness) to entry name strings""" if was_series: where_the_words_are = dataframe.index else: where_the_words_are = dataframe.columns the_labs = [] for w in list(where_the_words_are): if not absolutes: if was_series: perc = dataframe.T[w][0] else: the_labs.append(w) continue if using_tex: the_labs.append('%s (%.2f\%%)' % (w, perc)) else: the_labs.append('%s (%.2f %%)' % (w, perc)) else: if was_series: score = dataframe.T[w].sum() else: score = dataframe[w].sum() if using_tex: the_labs.append('%s (n=%d)' % (w, score)) else: the_labs.append('%s (n=%d)' % (w, score)) if not was_series: dataframe.columns = the_labs else: vals = list(dataframe[list(dataframe.columns)[0]].values) dataframe = pandas.DataFrame(vals, index = the_labs) dataframe.columns = ['Total'] return dataframe def auto_explode(dataframe, input, was_series = False, num_to_plot = 7): """give me a list of strings and i'll output explode option""" output = [0 for s in range(num_to_plot)] if was_series: l = list(dataframe.index) else: l = list(dataframe.columns) if type(input) == str or type(input) == int: input = [input] if type(input) == list: for i in input: if type(i) == str: index = l.index(i) else: index = i output[index] = 0.1 return output # check if we're doing subplots sbplt = False if 'subplots' in kwargs: if kwargs['subplots'] is True: sbplt = True kwargs['subplots'] = sbplt if colours is True: colours = 'Paired' # todo: get this dynamically instead. styles = ['dark_background', 'bmh', 'grayscale', 'ggplot', 'fivethirtyeight', 'matplotlib', False, 'mpl-white'] #if style not in styles: #raise ValueError('Style %s not found. Use %s' % (str(style), ', '.join(styles))) if style == 'mpl-white': try: sns.set_style("whitegrid") except: pass style = 'matplotlib' if style is not False and style.startswith('seaborn'): colours = False # use 'draggable = True' to make a draggable legend dragmode = kwargs.get('draggable', False) kwargs.pop('draggable', None) if kwargs.get('savepath'): mpl.rcParams['savefig.directory'] = kwargs.get('savepath') kwargs.pop('savepath', None) mpl.rcParams['savefig.bbox'] = 'tight' mpl.rcParams.update({'figure.autolayout': True}) # try to use tex # TO DO: # make some font kwargs here using_tex = False mpl.rcParams['font.family'] = 'sans-serif' mpl.rcParams['text.latex.unicode'] = True if tex == 'try' or tex is True: try: rc('text', usetex=True) rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}) using_tex = True except: matplotlib.rc('font', family='sans-serif') matplotlib.rc('font', serif='Helvetica Neue') matplotlib.rc('text', usetex='false') rc('text', usetex=False) else: rc('text', usetex=False) if interactive: using_tex = False if show_totals is False: show_totals = 'none' # find out what kind of plot we're making, and enable # or disable interactive values if need be kwargs['kind'] = kind.lower() if interactive: if kwargs['kind'].startswith('bar'): interactive_types = [3] elif kwargs['kind'] == 'area': interactive_types = [2, 3] elif kwargs['kind'] == 'line': interactive_types = [2, 3] elif kwargs['kind'] == 'pie': interactive_types = None warnings.warn('Interactive plotting not yet available for pie plots.') else: interactive_types = [None] if interactive is False: interactive_types = [None] # find out if pie mode, add autopct format piemode = False if kind == 'pie': piemode = True # always the best spot for pie #if legend_pos == 'best': #legend_pos = 'lower left' if show_totals.endswith('plot') or show_totals.endswith('both'): kwargs['pctdistance'] = 0.6 if using_tex: kwargs['autopct'] = r'%1.1f\%%' else: kwargs['autopct'] = '%1.1f%%' # copy data, make series into df dataframe = df.copy() was_series = False if type(dataframe) == pandas.core.series.Series: was_series = True if not cumulative: dataframe = DataFrame(dataframe) else: dataframe = DataFrame(dataframe.cumsum()) else: # don't know if this is much good. if cumulative: dataframe = DataFrame(dataframe.cumsum()) if len(list(dataframe.columns)) == 1: was_series = True # attempt to convert x axis to ints: try: dataframe.index = [int(i) for i in list(dataframe.index)] except: pass # remove totals and tkinter order if not was_series and not all(x.lower() == 'total' for x in list(dataframe.columns)): for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]): try: dataframe = dataframe.drop(name, axis = ax, errors = 'ignore') except: pass else: dataframe = dataframe.drop('tkintertable-order', errors = 'ignore') dataframe = dataframe.drop('tkintertable-order', axis = 1, errors = 'ignore') # look at columns to see if all can be ints, in which case, set up figure # for depnumming if not was_series: if indices == 'guess': def isint(x): try: a = float(x) b = int(a) except ValueError or OverflowError: return False else: return a == b if all([isint(x) is True for x in list(dataframe.columns)]): indices = True else: indices = False # if depnumming, plot all, transpose, and rename axes if indices is True: num_to_plot = 'all' dataframe = dataframe.T if y_label is None: y_label = 'Percentage of all matches' if x_label is None: x_label = '' # set backend? output_formats = ['svgz', 'ps', 'emf', 'rgba', 'raw', 'pdf', 'svg', 'eps', 'png', 'pgf'] if output_format not in output_formats: raise ValueError('%s output format not recognised. Must be: %s' % (output_format, ', '.join(output_formats))) # don't know if these are necessary if 'pdf' in output_format: plt.switch_backend(output_format) if 'pgf' in output_format: plt.switch_backend(output_format) if num_to_plot == 'all': if was_series: if not piemode: num_to_plot = len(dataframe) else: num_to_plot = len(dataframe) else: if not piemode: num_to_plot = len(list(dataframe.columns)) else: num_to_plot = len(dataframe.index) # explode pie, or remove if not piemode if piemode and not sbplt and kwargs.get('explode'): kwargs['explode'] = auto_explode(dataframe, kwargs['explode'], was_series = was_series, num_to_plot = num_to_plot) else: kwargs.pop('explode', None) legend = kwargs.get('legend', False) #cut data short plotting_a_totals_column = False if was_series: if list(dataframe.columns)[0] != 'Total': try: can_be_ints = [int(x) for x in list(dataframe.index)] num_to_plot = len(dataframe) except: dataframe = dataframe[:num_to_plot] elif list(dataframe.columns)[0] == 'Total': plotting_a_totals_column = True if not 'legend' in kwargs: legend = False num_to_plot = len(dataframe) else: dataframe = dataframe.T.head(num_to_plot).T # remove stats fields, put p in entry text, etc. statfields = ['slope', 'intercept', 'r', 'p', 'stderr'] try: dataframe = dataframe.drop(statfields, axis = 1, errors = 'ignore') except: pass try: dataframe.ix['p'] there_are_p_vals = True except: there_are_p_vals = False if show_p_val: if there_are_p_vals: newnames = [] for col in list(dataframe.columns): pval = dataframe[col]['p'] def p_string_formatter(val): if val < 0.001: if not using_tex: return 'p < 0.001' else: return r'p $<$ 0.001' else: return 'p = %s' % format(val, '.3f') pstr = p_string_formatter(pval) newname = '%s (%s)' % (col, pstr) newnames.append(newname) dataframe.columns = newnames dataframe.drop(statfields, axis = 0, inplace = True, errors = 'ignore') else: warnings.warn('No p-values calculated to show.\n\nUse sort_by and keep_stats in editor() to generate these values.') else: if there_are_p_vals: dataframe.drop(statfields, axis = 0, inplace = True, errors = 'ignore') # make and set y label absolutes = True if type(dataframe) == pandas.core.frame.DataFrame: try: if not all([s.is_integer() for s in dataframe.iloc[0,:].values]): absolutes = False except: pass else: if not all([s.is_integer() for s in dataframe.values]): absolutes = False # use colormap if need be: if num_to_plot > 0: if not was_series: if kind in ['pie', 'line', 'area']: if colours: if not plotting_a_totals_column: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours #else: if colours: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours if piemode: if num_to_plot > 0: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours else: if num_to_plot > 0: if colours == 'Default': colours = 'Paired' kwargs['colormap'] = colours # multicoloured bar charts if colours: if kind.startswith('bar'): if len(list(dataframe.columns)) == 1: if not black_and_white: import numpy as np the_range = np.linspace(0, 1, num_to_plot) cmap = plt.get_cmap(colours) kwargs['colors'] = [cmap(n) for n in the_range] # make a bar width ... ? ... #kwargs['width'] = (figsize[0] / float(num_to_plot)) / 1.5 # reversing legend option if reverse_legend is True: rev_leg = True elif reverse_legend is False: rev_leg = False # show legend or don't, guess whether to reverse based on kind if kind in ['bar', 'barh', 'area', 'line', 'pie']: if was_series: legend = False if kind == 'pie': if pie_legend: legend = True else: legend = False if kind in ['barh', 'area']: if reverse_legend == 'guess': rev_leg = True if not 'rev_leg' in locals(): rev_leg = False # the default legend placement if legend_pos is True: legend_pos = 'best' # cut dataframe if just_totals try: tst = dataframe['Combined total'] dataframe = dataframe.head(num_to_plot) except: pass # rotate automatically if 'rot' not in kwargs: if not was_series: xvals = [str(i) for i in list(dataframe.index)[:num_to_plot]] #if 'kind' in kwargs: #if kwargs['kind'] in ['barh', 'area']: #xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]] else: xvals = [str(i) for i in list(dataframe.columns)[:num_to_plot]] if len(max(xvals, key=len)) > 6: if not piemode: kwargs['rot'] = 45 # no title for subplots because ugly, if title and not sbplt: kwargs['title'] = title # no interactive subplots yet: if sbplt and interactive: import warnings interactive = False warnings.warn('No interactive subplots yet, sorry.') return # not using pandas for labels or legend anymore. #kwargs['labels'] = None #kwargs['legend'] = False if legend: if num_to_plot > 6: if not kwargs.get('ncol'): kwargs['ncol'] = num_to_plot / 7 # kwarg options go in leg_options leg_options = {'framealpha': .8, 'shadow': kwargs.get('shadow', False), 'ncol': kwargs.pop('ncol', 1)} # determine legend position based on this dict if legend_pos: possible = {'best': 0, 'upper right': 1, 'upper left': 2, 'lower left': 3, 'lower right': 4, 'right': 5, 'center left': 6, 'center right': 7, 'lower center': 8, 'upper center': 9, 'center': 10, 'o r': 2, 'outside right': 2, 'outside upper right': 2, 'outside center right': 'center left', 'outside lower right': 'lower left'} if type(legend_pos) == int: the_loc = legend_pos elif type(legend_pos) == str: try: the_loc = possible[legend_pos] except KeyError: raise KeyError('legend_pos value must be one of:\n%s\n or an int between 0-10.' %', '.join(list(possible.keys()))) leg_options['loc'] = the_loc #weirdness needed for outside plot if legend_pos in ['o r', 'outside right', 'outside upper right']: leg_options['bbox_to_anchor'] = (1.02, 1) if legend_pos == 'outside center right': leg_options['bbox_to_anchor'] = (1.02, 0.5) if legend_pos == 'outside lower right': leg_options['loc'] == 'upper right' leg_options['bbox_to_anchor'] = (0.5, 0.5) # a bit of distance between legend and plot for outside legends if type(legend_pos) == str: if legend_pos.startswith('o'): leg_options['borderaxespad'] = 1 if not piemode: if show_totals.endswith('both') or show_totals.endswith('legend'): dataframe = rename_data_with_total(dataframe, was_series = was_series, using_tex = using_tex, absolutes = absolutes) else: if pie_legend: if show_totals.endswith('both') or show_totals.endswith('legend'): dataframe = rename_data_with_total(dataframe, was_series = was_series, using_tex = using_tex, absolutes = absolutes) if piemode: if partial_pie: dataframe = dataframe / 100.0 # some pie things if piemode: if not sbplt: kwargs['y'] = list(dataframe.columns)[0] if pie_legend: kwargs['legend'] = False if was_series: leg_options['labels'] = list(dataframe.index) else: leg_options['labels'] = list(dataframe.columns) else: if pie_legend: kwargs['legend'] = False if was_series: leg_options['labels'] = list(dataframe.index) else: leg_options['labels'] = list(dataframe.index) def filler(df): pby = df.T.copy() for i in list(pby.columns): tot = pby[i].sum() pby[i] = pby[i] * 100.0 / tot return pby.T areamode = False if kind == 'area': areamode = True if legend is False: kwargs['legend'] = False # line highlighting option for interactive! if interactive: if 2 in interactive_types: if kind == 'line': kwargs['marker'] = ',' if not piemode: kwargs['alpha'] = 0.1 # convert dates --- works only in my current case! if plotting_a_totals_column or not was_series: try: can_it_be_int = int(list(dataframe.index)[0]) can_be_int = True except: can_be_int = False if can_be_int: if 1500 < int(list(dataframe.index)[0]): if 2050 > int(list(dataframe.index)[0]): n = pandas.PeriodIndex([d for d in list(dataframe.index)], freq='A') dataframe = dataframe.set_index(n) if kwargs.get('filled'): if areamode or kind.startswith('bar'): dataframe = filler(dataframe) kwargs.pop('filled', None) MARKERSIZE = 4 COLORMAP = { 0: {'marker': None, 'dash': (None,None)}, 1: {'marker': None, 'dash': [5,5]}, 2: {'marker': "o", 'dash': (None,None)}, 3: {'marker': None, 'dash': [1,3]}, 4: {'marker': "s", 'dash': [5,2,5,2,5,10]}, 5: {'marker': None, 'dash': [5,3,1,2,1,10]}, 6: {'marker': 'o', 'dash': (None,None)}, 7: {'marker': None, 'dash': [5,3,1,3]}, 8: {'marker': "1", 'dash': [1,3]}, 9: {'marker': "*", 'dash': [5,5]}, 10: {'marker': "2", 'dash': [5,2,5,2,5,10]}, 11: {'marker': "s", 'dash': (None,None)} } HATCHES = { 0: {'color': '#dfdfdf', 'hatch':"/"}, 1: {'color': '#6f6f6f', 'hatch':"\\"}, 2: {'color': 'b', 'hatch':"|"}, 3: {'color': '#dfdfdf', 'hatch':"-"}, 4: {'color': '#6f6f6f', 'hatch':"+"}, 5: {'color': 'b', 'hatch':"x"} } if black_and_white: if kind == 'line': kwargs['linewidth'] = 1 cmap = plt.get_cmap('Greys') new_cmap = truncate_colormap(cmap, 0.25, 0.95) if kind == 'bar': # darker if just one entry if len(dataframe.columns) == 1: new_cmap = truncate_colormap(cmap, 0.70, 0.90) kwargs['colormap'] = new_cmap class dummy_context_mgr(): """a fake context for plotting without style perhaps made obsolete by 'classic' style in new mpl""" def __enter__(self): return None def __exit__(self, one, two, three): return False with plt.style.context((style)) if style != 'matplotlib' else dummy_context_mgr(): if not sbplt: # check if negative values, no stacked if so if areamode: kwargs['legend'] = False if dataframe.applymap(lambda x: x < 0.0).any().any(): kwargs['stacked'] = False rev_leg = False ax = dataframe.plot(figsize = figsize, **kwargs) if areamode: handles, labels = plt.gca().get_legend_handles_labels() del handles del labels else: plt.gcf().set_tight_layout(False) if not piemode: ax = dataframe.plot(figsize = figsize, **kwargs) else: ax = dataframe.plot(figsize = figsize, **kwargs) handles, labels = plt.gca().get_legend_handles_labels() plt.legend( handles, labels, loc = leg_options['loc'], bbox_to_anchor = (0,-0.1,1,1), bbox_transform = plt.gcf().transFigure ) # this line allows layouts with missing plots # i.e. layout = (5, 2) with only nine plots plt.gcf().set_tight_layout(False) if 'rot' in kwargs: if kwargs['rot'] != 0 and kwargs['rot'] != 90: labels = [item.get_text() for item in ax.get_xticklabels()] ax.set_xticklabels(labels, rotation = kwargs['rot'], ha='right') if transparent: plt.gcf().patch.set_facecolor('white') plt.gcf().patch.set_alpha(0) if black_and_white: if kind == 'line': # white background # change everything to black and white with interesting dashes and markers c = 0 for line in ax.get_lines(): line.set_color('black') #line.set_width(1) line.set_dashes(COLORMAP[c]['dash']) line.set_marker(COLORMAP[c]['marker']) line.set_markersize(MARKERSIZE) c += 1 if c == len(list(COLORMAP.keys())): c = 0 # draw legend with proper placement etc if legend: if not piemode and not sbplt: if 3 not in interactive_types: handles, labels = plt.gca().get_legend_handles_labels() # area doubles the handles and labels. this removes half: if areamode: handles = handles[-len(handles) / 2:] labels = labels[-len(labels) / 2:] if rev_leg: handles = handles[::-1] labels = labels[::-1] lgd = plt.legend(handles, labels, **leg_options) if interactive: # 1 = highlight lines # 2 = line labels # 3 = legend switches ax = plt.gca() # fails for piemode lines = ax.lines handles, labels = plt.gca().get_legend_handles_labels() if 1 in interactive_types: plugins.connect(plt.gcf(), HighlightLines(lines)) if 3 in interactive_types: plugins.connect(plt.gcf(), InteractiveLegendPlugin(lines, labels, alpha_unsel=0.0)) for i, l in enumerate(lines): y_vals = l.get_ydata() x_vals = l.get_xdata() x_vals = [str(x) for x in x_vals] if absolutes: ls = ['%s (%s: %d)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)] else: ls = ['%s (%s: %.2f%%)' % (labels[i], x_val, y_val) for x_val, y_val in zip(x_vals, y_vals)] if 2 in interactive_types: #if 'kind' in kwargs and kwargs['kind'] == 'area': tooltip_line = mpld3.plugins.LineLabelTooltip(lines[i], labels[i]) mpld3.plugins.connect(plt.gcf(), tooltip_line) #else: if kind == 'line': tooltip_point = mpld3.plugins.PointLabelTooltip(l, labels = ls) mpld3.plugins.connect(plt.gcf(), tooltip_point) if piemode: if not sbplt: plt.axis('equal') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # add x label # this could be revised now! # if time series period, it's year for now if type(dataframe.index) == pandas.tseries.period.PeriodIndex: x_label = 'Year' if x_label is not False: if type(x_label) == str: plt.xlabel(x_label) else: check_x_axis = list(dataframe.index)[0] # get first entry# get second entry of first entry (year, count) try: if type(dataframe.index) == pandas.tseries.period.PeriodIndex: x_label = 'Year' check_x_axis = int(check_x_axis) if 1500 < check_x_axis < 2050: x_label = 'Year' else: x_label = 'Group' except: x_label = 'Group' if not sbplt: if not piemode: plt.xlabel(x_label) def is_number(s): """check if str can be can be made into float/int""" try: float(s) # for int, long and float except ValueError: try: complex(s) # for complex except ValueError: return False return True # for now, always turn off sci notation from matplotlib.ticker import ScalarFormatter if type(dataframe.index) != pandas.tseries.period.PeriodIndex: try: if all(is_number(s) for s in list(dataframe.index)): plt.gca().xaxis.set_major_formatter(ScalarFormatter()) except: pass try: if all(is_number(s) for s in list(dataframe.columns)): plt.gca().yaxis.set_major_formatter(ScalarFormatter()) except: pass # y labelling y_l = False if not absolutes: y_l = 'Percentage' else: y_l = 'Absolute frequency' def suplabel(axis,label,label_prop=None, labelpad=5, ha='center',va='center'): ''' Add super ylabel or xlabel to the figure Similar to matplotlib.suptitle axis - string: "x" or "y" label - string label_prop - keyword dictionary for Text labelpad - padding from the axis (default: 5) ha - horizontal alignment (default: "center") va - vertical alignment (default: "center") ''' fig = plt.gcf() xmin = [] ymin = [] for ax in fig.axes: xmin.append(ax.get_position().xmin) ymin.append(ax.get_position().ymin) xmin,ymin = min(xmin),min(ymin) dpi = fig.dpi if axis.lower() == "y": rotation=90. x = xmin-float(labelpad)/dpi y = 0.5 elif axis.lower() == 'x': rotation = 0. x = 0.5 y = ymin - float(labelpad)/dpi else: raise Exception("Unexpected axis: x or y") if label_prop is None: label_prop = dict() plt.gcf().text(x,y,label,rotation=rotation, transform=fig.transFigure, ha=ha,va=va, **label_prop) if y_label is not False: if not sbplt: if not piemode: if type(y_label) == str: plt.ylabel(y_label) else: plt.ylabel(y_l) else: if type(y_label) == str: the_y = y_label else: the_y = y_l #suplabel('y', the_y, labelpad = 1.5) plt.gcf().text(0.04, 0.5, the_y, va='center', rotation='vertical') #plt.subplots_adjust(left=0.5) # if not piemode: # if type(y_label) == str: # plt.ylabel(y_label) # else: # plt.ylabel(y_l) # hacky: turn legend into subplot titles :) if sbplt: # title the big plot #plt.gca().suptitle(title, fontsize = 16) #plt.subplots_adjust(top=0.9) # get all axes if 'layout' not in kwargs: axes = [l for index, l in enumerate(ax)] else: axes = [] cols = [l for index, l in enumerate(ax)] for col in cols: for bit in col: axes.append(bit) # set subplot titles for index, a in enumerate(axes): try: titletext = list(dataframe.columns)[index] except: pass a.set_title(titletext) try: a.legend_.remove() except: pass # remove axis labels for pie plots if piemode: a.axes.get_xaxis().set_visible(False) a.axes.get_yaxis().set_visible(False) a.axis('equal') # show grid a.grid(b=kwargs.get('grid', False)) kwargs.pop('grid', None) # add sums to bar graphs and pie graphs # doubled right now, no matter if not sbplt: if kind.startswith('bar'): width = ax.containers[0][0].get_width() # show grid ax.grid(b=kwargs.get('grid', False)) kwargs.pop('grid', None) if was_series: the_y_limit = plt.ylim()[1] if show_totals.endswith('plot') or show_totals.endswith('both'): # make plot a bit higher if putting these totals on it plt.ylim([0,the_y_limit * 1.05]) for i, label in enumerate(list(dataframe.index)): if len(dataframe.ix[label]) == 1: score = dataframe.ix[label][0] else: if absolutes: score = dataframe.ix[label].sum() else: #import warnings #warnings.warn("It's not possible to determine total percentage from individual percentages.") continue if not absolutes: plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom') else: plt.annotate(score, (i, score), ha = 'center', va = 'bottom') else: the_y_limit = plt.ylim()[1] if show_totals.endswith('plot') or show_totals.endswith('both'): for i, label in enumerate(list(dataframe.columns)): if len(dataframe[label]) == 1: score = dataframe[label][0] else: if absolutes: score = dataframe[label].sum() else: #import warnings #warnings.warn("It's not possible to determine total percentage from individual percentages.") continue if not absolutes: plt.annotate('%.2f' % score, (i, score), ha = 'center', va = 'bottom') else: plt.annotate(score, (i, score), ha = 'center', va = 'bottom') plt.subplots_adjust(left=0.1) plt.subplots_adjust(bottom=0.18) if 'layout' not in kwargs: if not sbplt: plt.tight_layout() if save: import os if running_python_tex: imagefolder = '../images' else: imagefolder = 'images' savename = get_savename(imagefolder, save = save, title = title, ext = output_format) if not os.path.isdir(imagefolder): os.makedirs(imagefolder) # save image and get on with our lives if legend_pos.startswith('o'): plt.gcf().savefig(savename, dpi=150, bbox_extra_artists=(lgd,), bbox_inches='tight', format = output_format) else: plt.gcf().savefig(savename, dpi=150, format = output_format) time = strftime("%H:%M:%S", localtime()) if os.path.isfile(savename): print('\n' + time + ": " + savename + " created.") else: raise ValueError("Error making %s." % savename) if dragmode: plt.legend().draggable() if sbplt: plt.subplots_adjust(right=.8) plt.subplots_adjust(left=.1) if not interactive and not running_python_tex and not running_spider \ and not tk: plt.gcf().show() return elif running_spider or tk: return plt if interactive: plt.subplots_adjust(right=.8) plt.subplots_adjust(left=.1) try: ax.legend_.remove() except: pass return mpld3.display()
def topic_sim(segrated_result): #print segrated_result.info texts_list = segrated_result.snippet2.tolist() #print(texts_list) vectorizer = TfidfVectorizer() dtm = vectorizer.fit_transform(texts_list) invtrm = vectorizer.inverse_transform(dtm) #print invtrm #print(invtrm) vocab = vectorizer.get_feature_names() #print(vectorizer.vocabulary_) #print dtm.shape scipy.sparse.csr.csr_matrix dtm = dtm.toarray() # convert to a regular array #print(dtm) #vocab = np.array(vocab) #print(vocab) # for v in vocab: # print(v) dist = 1 - cosine_similarity(dtm) np.round(dist, 2) np.round(dist, 2).shape # norms = np.sqrt(np.sum(dtm * dtm, axis=1, keepdims=True)) # multiplication between arrays is element-wise # dtm_normed = dtm / norms # similarities = np.dot(dtm_normed, dtm_normed.T) # np.round(similarities, 2) mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1) pos = mds.fit_transform(dist) # shape (n_components, n_samples) #print dist xs, ys = pos[:, 0], pos[:, 1] names = list(reversed(range(1, len(xs), 1))) # plt.style.use('fivethirtyeight') # for x, y, name in zip(xs, ys, names): # plt.title("Visualizing distances between the different text corpuses") # plt.scatter(x, y) # plt.text(x, y, name) # #plt.show() # fig, ax = plt.subplots() # np.random.seed(0) # color, size = np.random.random((2, len(xs))) # for x, y, name in zip(xs, ys, names): # # #ax.plot(np.random.normal(size=100),np.random.normal(size=100),'or', ms=10, alpha=0.3) # # #ax.plot(np.random.normal(size=100),np.random.normal(size=100),'ob', ms=20, alpha=0.1) # # # ax.set_xlabel('x') # ax.set_ylabel('y') # ax.set_title('Visualizing distances between the different text corpuses', size=15) # ax.grid(color='lightgray', alpha=0.7) # # #ax.plot(x, y) # # # ax.scatter(x, y, c=color, s=500 * size, alpha=0.3) # ax.text(x,y,name) fig, ax = plt.subplots() #N = 100 color, size = np.random.random((2, len(xs))) scatter = ax.scatter(xs, ys, c=color, s=1000 * size, alpha=0.3, cmap=plt.cm.jet) ax.grid(color='lightgray', linestyle='solid', alpha=0.7) ax.set_title('Scatter plot of text corpuses distances', size=16) labels = ['Text {0}'.format(i) for i in names] tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels) mpld3.plugins.connect(fig, tooltip) # # Scatter points # fig, ax = plt.subplots() # np.random.seed(0) # #x, y = np.random.normal(size=(2, 200)) # color, size = np.random.random((2, len(xs))) # print names # ax.scatter(xs, ys, c=color, s=500 * size, alpha=0.3) # #ax.text(xs,ys,names) # ax.grid(color='lightgray', alpha=0.7) tfidf_feature_names = vectorizer.get_feature_names() no_topics = 20 # Run NMF #nmf = NMF(n_components=no_topics, random_state=1, alpha=.1, l1_ratio=.5, init='nndsvd').fit(dtm) # Run LDA lda = LatentDirichletAllocation(n_topics=no_topics, max_iter=500, learning_method='online', learning_offset=50., random_state=0).fit(dtm) no_top_words = 10 #display_topics(nmf, tfidf_feature_names, no_top_words) #display_topics(lda, tf_feature_names, no_top_words) display_result = display_topics(lda, tfidf_feature_names, no_top_words) #return mpld3.display(fig) return display_result, mpld3.display()
column_rename_map = {'season': 'Season', 'number': 'Episode', 'rating' : 'Rating', 'votes' : 'Votes', 'first_aired' : 'Air date', 'overview' : 'Synopsis'} for i in range(len(df)): label = df.iloc[[i]][columns_for_labels].T label.columns = df.iloc[[i]].title labels.append(label.rename(column_rename_map).to_html()) # Plot scatter points c = (df.season-1).map(pd.Series(list(sns.color_palette(n_colors=len(df.season.value_counts()))))) points = ax.scatter(df.number_abs, df.rating, alpha=.99, c=c, zorder=2) ax.set_ylim(5,10) ax.set_xlim(0,main_episodes.number_abs.max()+1) ax.set_ylabel('Trakt.tv Episode Rating') ax.set_xlabel('Episode number') fig.set_size_inches(12, 6) ax.set_title(show_summary.title[0], size=20) tooltip = plugins.PointHTMLTooltip(points, labels, voffset=10, hoffset=10, css=css) plugins.connect(fig, tooltip) mpld3.save_html(fig, str(show_summary.title[0] + ".html")) mpld3.display()
import matplotlib.pyplot as plt import numpy as np import mpld3 from mpld3 import plugins fig, ax = plt.subplots() x = np.linspace(-2, 2, 20) y = x[:, None] X = np.zeros((20, 20, 4)) X[:, :, 0] = np.exp(- (x - 1) ** 2 - (y) ** 2) X[:, :, 1] = np.exp(- (x + 0.71) ** 2 - (y - 0.71) ** 2) X[:, :, 2] = np.exp(- (x + 0.71) ** 2 - (y + 0.71) ** 2) X[:, :, 3] = np.exp(-0.25 * (x ** 2 + y ** 2)) im = ax.imshow(X, extent=(10, 20, 10, 20), origin='lower', zorder=1, interpolation='nearest') fig.colorbar(im, ax=ax) ax.set_title('An Image', size=20) plugins.connect(fig, plugins.MousePosition(fontsize=14)) mpld3.display(fig)
def make_figure(figANDax, sample_data): fig, ax, fig2, ax2 = figANDax with open('tp', 'rb') as f: # Python 3: open(..., 'rb') hdist, tst_data = pickle.load(f) hdist = np.array(hdist).reshape([512, 512]) col = [] # data=np.array([[]]);np.array() tst_data = np.array(tst_data) tst_data = tst_data cm = plt.cm.rainbow col = col + list(cm(.01) for i in range(tst_data.shape[0])) sample_data = np.array(sample_data) col += list(cm(.9) for i in range(sample_data.shape[0])) data = np.vstack((tst_data, sample_data)) xs = data[:, 3] xs = xs.astype(np.float) xs[np.isnan(xs)] = 0 ys = (data[:, 4].astype(np.float)) # ys[ys==0]=1; # ys=np.log(ys); ys[~np.isfinite(ys)] = 0 zs = (data[:, 5].astype(np.float)) zs[~np.isfinite(zs)] = 0 sizs = list((.6 - float(x)) / .00755 for x in data[:, 5]) N = xs.size labels = list(data[:, 1]) # labels=data[:,[0,1,3]].T.to_html # fig, ax = plt.subplots(subplot_kw=dict(axisbg='#DDDDDD' # ,projection='3d' # )) fig.set_size_inches([5, 4]) ax.grid(color='white', linestyle='solid') ax.set_ylim(0, 0.38) ax.set_xlim(0, 1) put_patches(ax) sct = ax.scatter( xs, ys, c=col, s=sizs, alpha=1.0, # label=labels, cmap=plt.cm.rainbow) red_patch = mpatches.Patch(color=plt.cm.rainbow(.98), label='The red data') pur_patch = mpatches.Patch(color=plt.cm.rainbow(.02), label='The red data') yel_patch = mpatches.Patch(color=plt.cm.rainbow(.02), label='The red data') handles, leglabels = ax.get_legend_handles_labels() handles += [red_patch, pur_patch] leglabels += ['sample', 'reference'] ax.legend(handles, leglabels) ax.set_title("Dynamic landscape, 2D projection", size=20) plugins.connect( fig, plugins.PointLabelTooltip(sct, labels), # plugins.Zoom(enabled=False), ClickInfo(sct, labels)) ax.set_xlabel('Avg Temp', size=15) ax.set_ylabel('mean(abs(d_Temp)) - abs(mean(d_Temp))', size=15) sct3d = ax2.scatter( xs, ys, zs, c=col, # c=list( 1.*float(i)/N for i in xs), s=sizs, alpha=1.0, # label=labels, cmap=plt.cm.rainbow) ax2.set_title("Dynamic landscape, 3D", size=20) ax2.set_xlabel('Avg Temp', size=15) ax2.set_ylabel('mean(abs(d_Temp)) - abs(mean(d_Temp))', size=15) ax2.set_zlabel('Density of dominating state', size=15) mpld3.display(fig)
voffset=10, hoffset=10, css=css) #connect tooltip to fig mpld3.plugins.connect(fig, tooltip, TopToolbar()) #set tick marks as blank ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) #set axis as blank ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=1) #show legend with only one dot mpld3.display() #show the plot #uncomment the below to export to html #html = mpld3.fig_to_html(fig) #print(html) # # Hierarchical document clusterin from scipy.cluster.hierarchy import ward, dendrogram linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances fig, ax = plt.subplots(figsize=(15, 20)) # set size ax = dendrogram(linkage_matrix, orientation="right", labels=titles);
def mouseshow(): fig = gcf() plugins.connect(fig, plugins.MousePosition(fontsize=14)) return mpld3.display(fig)
annot.set_visible(False) fig.canvas.draw_idle() fig.canvas.mpl_connect("motion_notify_event", hover) plt.show() #%% Matplot tooltips hack import numpy as np import matplotlib.pyplot as plt, mpld3 import seaborn as sns import pandas as pd N=10 data = pd.DataFrame({"x": np.random.randn(N), "y": np.random.randn(N), "size": np.random.randint(20,200, size=N), "label": np.arange(N) }) scatter_sns = sns.lmplot("x", "y", scatter_kws={"s": data["size"]}, robust=False, # slow if true data=data, size=8) fig = plt.gcf() tooltip = mpld3.plugins.PointLabelTooltip(fig, labels=list(data.label)) mpld3.plugins.connect(fig, tooltip) mpld3.display(fig)
def kmeans_cluster(terms, description_tokens, tfidf_matrix, titles, bookobj_tokens_dict): """Kmeans algorithm used to create clusters of documents using scikitlearn. Datapoints plotted using matplotlibd3.""" ###################### ### KMeans Cluster ### ###################### from sklearn.metrics.pairwise import cosine_similarity # # dist is defined as 1 - the cosine similarity of each document. Cosine similarity is measured against # # the tf-idf matrix can be used to generate a measure of similarity between each document and the # # other documents in the corpus (each synopsis among the synopses). Subtracting it from 1 provides # # cosine distance which I will use for plotting on a euclidean (2-dimensional) plane. dist = 1 - cosine_similarity(tfidf_matrix) num_clusters = 8 km = KMeans(n_clusters = num_clusters) km.fit(tfidf_matrix) clusters = km.labels_.tolist() print "clusters, ", clusters joblib.dump(km, 'doc_cluster.pkl') km = joblib.load('doc_cluster.pkl') clusters = km.labels_.tolist() books = {'title':titles, 'synopsis':description_tokens,'cluster':clusters} print "books, ", books frame = pd.DataFrame(books,index=[clusters],columns=['title','cluster']) frame['cluster'].value_counts() grouped = frame['title'].groupby(frame['cluster']) ############################# ### Top Terms Per Cluster ### ############################# print "Top terms per cluster:" print order_centroids = km.cluster_centers_.argsort()[:, ::-1] print "order centroids, ", order_centroids totalvocab_tokenized_list = [word for token_sublist in bookobj_tokens_dict.values() for word in token_sublist] print "total vocab tokenized list, ", totalvocab_tokenized_list vocab_frame = pd.DataFrame({'words': totalvocab_tokenized_list}, index=totalvocab_tokenized_list) print 'there are ' + str(vocab_frame.shape[0]) + ' items in vocab_frame' print "num cluster, ", num_clusters print "values, ", frame.ix print order_centroids graph_keys = [i for i in range(num_clusters)] graph_values = [] for i in range(num_clusters): print "Cluster %d words:" % i the_terms = [] for ind in order_centroids[i, :3]: # top 3 words that are nearest to the cluster centroid graph_terms = vocab_frame.ix[terms[ind].split(' ')].values.tolist()[0][0].encode('utf-8','ignore') the_terms.append(graph_terms) print "graph terms ", graph_terms print "the terms, ", the_terms graph_values.append(the_terms) print print print "Cluster %d titles:" % i df = frame.ix[i]['title'] if type(df) is str: print ' %s,' % df else: for title in df.values.tolist(): print ' %s,' % title print print print "graph values ", graph_values # def multi_diminsional_scaling_for_2D_array(dist, graph_values, graph_keys): ################################# ### Multi-Dimensional Scaling ### ################################# # # convert the dist matrix into a 2-dimensional array using MDS MDS() # # convert two components while plotting points in 2-D plane # # 'precomputed' because provide a distance matrix # # will also specify random_state so the plot is reproducible mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1) print "mds, ", mds pos = mds.fit_transform(dist) # shape (n_compoents, n_samples) print "pos, ", pos xs, ys = pos[:, 0], pos[:, 1] ################################# ### Visualizing Book Clusters ### ################################# # # set up colors per clusters using a dict cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e', 5:'#F7EC45', 6:'#2ee3b6', 7:'#cd82c0'} # # set up cluster names using a dict graph_values = [", ".join(term_list) for term_list in graph_values] cluster_names = dict(zip(graph_keys, graph_values)) print "cluster names dictionary, ", cluster_names # # create data frame that has the result of the MDS plus the cluster numbers and titles df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=titles)) # # group by cluster groups = df.groupby('label') # # define custom css to format the font and to remove the axis labeling css = """ text.mpld3-text, div.mpld3-tooltip { font-family:Arial, Helvetica, sans-serif; } g.mpld3-xaxis, g.mpld3-yaxis { display: none; } svg.mpld3-figure { margin-left: 0px;} """ # Ploting using matplotlib fig, ax = plt.subplots(figsize=(14,6)) #set plot size ax.margins(0.03) # Optional, just adds 5% padding to the autoscaling # iterate through groups to layer the plot # note that I use the cluster_name and cluster_color dicts with the 'name' lookup # to return the appropriate color/label for name, group in groups: points = ax.plot(group.x, group.y, marker='o', linestyle='', ms=18, label=cluster_names[name], mec='none', color=cluster_colors[name]) ax.set_aspect('auto') labels = [i for i in group.title] # set tooltip using points, labels and the already defined 'css' - see above tooltip = mpld3.plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10, css=css) # connect tooltip to fig mpld3.plugins.connect(fig, tooltip, TopToolbar()) # set tick marks as blank ax.axes.get_xaxis().set_ticks([]) ax.axes.get_yaxis().set_ticks([]) #set axis as blank ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.legend(numpoints=1, title='') # show legend with only one dot mpld3.display() # show the plot # uncomment the below to export to html graph_html = mpld3.fig_to_html(fig) print "KMEANS CLUSTER GRAPH HTML", graph_html return graph_html
def parallel_coordinates(data_sets, colors=None, columNames=None, alpha=None): dims = len(data_sets[0]) x = range(dims) fig, axes = plt.subplots(1, dims - 1, sharey=False) if colors is None: colors = ['r-'] * len(data_sets) # Calculate the limits on the data min_max_range = list() for m in zip(*data_sets): mn = min(m) mx = max(m) if mn == mx: mn -= 0.5 mx = mn + 1. r = float(mx - mn) min_max_range.append((mn, mx, r)) # Normalize the data sets norm_data_sets = list() for ds in data_sets: nds = [] for dimension, value in enumerate(ds): v = (value - min_max_range[dimension][0]) / min_max_range[dimension][2] nds.append(v) norm_data_sets.append(nds) data_sets = norm_data_sets # Plot the datasets on all the subplots for i, ax in enumerate(axes): for dsi, d in enumerate(data_sets): ax.plot(x, d, c=colors[dsi], alpha=alpha[dsi]) ax.set_xlim([x[i], x[i + 1]]) # Set the x axis ticks for dimension, (axx, xx) in enumerate(zip(axes, x[:-1])): axx.xaxis.set_major_locator(ticker.FixedLocator([xx])) ticks = len(axx.get_yticklabels()) labels = list() step = min_max_range[dimension][2] / (ticks - 3) mn = min_max_range[dimension][0] for i in range(-1, ticks): v = mn + i * step labels.append('%6.2f' % v) axx.set_yticklabels(labels) # Move the final axis' ticks to the right-hand side axx = plt.twinx(axes[-1]) dimension += 1 axx.xaxis.set_major_locator(ticker.FixedLocator([x[-2], x[-1]])) ticks = len(axx.get_yticklabels()) step = min_max_range[dimension][2] / (ticks - 1) mn = min_max_range[dimension][0] labels = ['%6.2f' % (mn + i * step) for i in range(ticks)] axx.set_yticklabels(labels) i = 0 for col in columNames[:-2]: plt.sca(axes[i]) plt.xticks([i], (col, ), rotation='vertical') i += 1 plt.sca(axes[i]) plt.xticks([i, i + 1], columNames[i:], rotation='vertical') #color labels plt.plot([], [], color='r', label='Infeasible') plt.plot([], [], color='b', label='Feasible') plt.plot([], [], color='g', label='Non-dominated') #delete whitespace plt.subplots_adjust(wspace=0) #title plt.suptitle('Parallel Coordinate Plot') plt.legend(bbox_to_anchor=(1.6, 1), loc=2, borderaxespad=0.) # fig.savefig("paralelcoordinate1.pdf",dpi=600,bbox_inches='tight') #fig.savefig("paralelcoordinate") mpld3.display(fig)
def writeResults(fn, userID, batchID): Con2 = DBconnections.openDB() cursor2 = Con2.cursor() #get total number of reads queryTotal = "SELECT SUM(seqcount) As TotalReads FROM tempcontent WHERE userID = {}".format(userID) #print queryTotal qLocus = "SELECT locus FROM tempcontent WHERE userID = {} Group by locus".format(userID) cursor2.execute (queryTotal) tReads = cursor2.fetchone() totalReads = tReads[0] #print "<br/>total reads: " + str(totalReads) cursor2.execute (qLocus) locuslinks = cursor2.fetchall() links = 0 # View Locus select list print "<hr/><br/><form action='' method='post' name='locfrm' id='locfrm' enctype='multipart/form-data' >" +\ "<b>View Locus: </b><select id='locusname' name='locusname' onchange='javascript: getlocus(this);'>" +\ "<option value=''" if locusname == "": print "selected>" else: print ">" print "All</option>" for lcs in locuslinks: print "<option value='" + str(lcs[0]) + "' " if locusname == str(lcs[0]): print "selected" print ">" + str(lcs[0]) + "</option>" print "</select>" # Allele Ratio Filter select list print " <b>Allele Ratio Filter: </b>" +\ "<select id='filter' name='filter' style='width:100px;' onchange='javascript: getFilter(this);'>" num = 0 perctg = 0 for n in range(0,12): print "<option value='" + str(num) + "'" if str(filter) == str(num): print "selected" print ">" if num == 0: print "Nothing" else: print "> " + str(num) print "</option>" num = num + 0.001 print "</select>" # Locus Filter list print " <b>Locus Ratio Filter: </b>" +\ "<select id='locfilter' name='locfilter' style='width:100px;' onchange='javascript: getLocFilter(this);'>" num = 0 perctg = 0 for n in range(0,4): print "<option value='" + str(num) + "'" if str(locfilter) == str(num): print "selected" print ">" if num == 0: print "Nothing" else: print "> " + str(num) print "</option>" num = num + 0.01 print "</select>" print "<br/><input type='hidden' name='idnumber' id='idnumber' value='" + batchID + "'>" +\ "<input type='hidden' name='fileholder' id='fileholder' value='" + fileholder + "'>" +\ "</form>" if locusname == "": queryCount = "SELECT locus, SUM(seqcount) As TotalReads FROM tempcontent WHERE userID = {} Group by locus".format(userID) +\ " ORDER BY locus" else: queryCount = "SELECT locus, SUM(seqcount) As TotalReads FROM tempcontent WHERE " +\ "locus = '" + locusname + "' AND " +\ "userID = {} Group by locus".format(userID) +\ " ORDER BY locus" cursor2.execute (queryCount) Readresults = cursor2.fetchall() for rec in Readresults: locus = rec[0] sCount = rec[1] # sort and count number of reads #filename locus allele read_dir sequence seq_Len reads allele_Count allele_ratio total_ratio num_repeats query = "SELECT tempcontent.locus, allele, readdir, " +\ "tempcontent.seq as seq, length(tempcontent.seq) as seqLen, SUM(seqcount) as seqCounts, " +\ "((SUM(tempcontent.seqcount))/{}.0)".format(sCount) + " As allele_ratio, ((SUM(tempcontent.seqcount))/{}.0)".format(totalReads) + " As total_ratio " +\ "FROM tempcontent " +\ "WHERE tempcontent.locus = '" + locus + "' AND userID = {}".format(userID) +\ " Group by allele, locus, readDir, tempcontent.seq " #if filter > 0: #query = query + " HAVING ((SUM(tempcontent.seqcount))/{}.0)".format(sCount) + " > " + filter query = query + " Order by length(tempcontent.seq), tempcontent.seq, readdir, SUM(seqcount) desc " cursor2.execute (query) getRecords = cursor2.fetchall() print "<br/><table style='border-collapse:collapse; border: solid 1px black;font-size:12px;'>" count = 0 aH = "" lH = "" sH = "" rdH = "" FRRH = "" slH = 0 scH = 0 arH = 0 trH = 0 flag = False for seqRecord in getRecords: locus = seqRecord[0] allele = seqRecord[1] readdir = seqRecord[2] seq = seqRecord[3] seqLen = seqRecord[4] seqCounts = seqRecord[5] alleleRatio = seqRecord[6] totalRatio = seqRecord[7] locusRatio = sCount/totalReads if count == 0: print "<tr style='background-color: #cccccc;'><td colspan='8' style='border-style:solid;border-width: 0px 0px 1px 0px;'><b>" +\ locus + "</b><br/>" +\ "<b>Allele total: </b>" + str(sCount) + " <b>Total Count:</b> " + str(totalReads) +\ " <b>Locus Ratio: </b>" + "{0:.4f}".format(locusRatio) + "</td></tr>" if float(locusRatio) > float(locfilter): print "<tr style='background-color:#FAEBD7;border:1px solid black;'><th style='padding-right:5px;'>Allele</th><th style='padding-right:5px;'>locus</th>" +\ "<th style='padding-right:8px;'>Seq</th><th style='padding-right:8px;'>Seq<br/>Length</th>" +\ "<th style='padding-right:8px;'>Seq<br/>Counts</th><th style='padding-right:8px;'>FWD/REV</th>" +\ "<th style='padding-right:8px;'>Allele<br/>Ratio</th><th style='padding-right:8px;'>Total<br/>Ratio</th></tr>" aH = allele lH = locus sH = seq slH = seqLen scH = seqCounts arH = alleleRatio trH = totalRatio flag = True if float(locusRatio) > float(locfilter): if count > 0: if readdir == 'REV' and seq == sH: aH = allele lH = locus sH = seq slH = seqLen FR = scH/(scH + seqCounts) RR = seqCounts/(scH + seqCounts) FRRH = "{0:.4f}".format(FR) + "/" + "{0:.4f}".format(RR) scH = scH + seqCounts arH = arH + alleleRatio trH = trH + totalRatio if float(arH) > float(filter): print "<tr><td style='padding-left:5px;padding-right:5px;'>{}</th>".format(aH) print "<th style='padding-right:5px;'>{}</th>".format(lH) print "<th style='padding-right:5px;'>{}</th>".format(sH) print "<th style='padding-right:8px;'>{}</th>".format(slH) print "<th style='padding-right:8px;'>{}</th>".format(scH) print "<th style='padding-right:8px;'>{}</th>".format(FRRH) print "<th style='padding-right:8px;'>{}</th>".format(arH) print "<th style='padding-right:8px;'>{}</th></tr>".format(trH) flag = False else: if flag == True : if float(arH) > float(filter): print "<tr><td style='padding-left:5px;padding-right:5px;'>{}</th>".format(aH) print "<th style='padding-right:5px;'>{}</th>".format(lH) print "<th style='padding-right:5px;'>{}</th>".format(sH) print "<th style='padding-right:8px;'>{}</th>".format(slH) print "<th style='padding-right:8px;'>{}</th>".format(scH) print "<th style='padding-right:8px;'>{}</th>".format(FRRH) print "<th style='padding-right:8px;'>{}</th>".format(arH) print "<th style='padding-right:8px;'>{}</th></tr>".format(trH) flag = False aH = allele lH = locus sH = seq slH = seqLen scH = seqCounts arH = alleleRatio trH = totalRatio if readdir == "FWD": FRRH = "1/0" flag = True else: FRRH = "0/1" flag = False if flag == False: if float(arH) > float(filter): print "<tr><td style='padding-left:5px;padding-right:5px;'>{}</th>".format(aH) print "<th style='padding-right:5px;'>{}</th>".format(lH) print "<th style='padding-right:5px;'>{}</th>".format(sH) print "<th style='padding-right:5px;'>{}</th>".format(slH) print "<th style='padding-right:5px;'>{}</th>".format(scH) print "<th style='padding-right:5px;'>{}</th>".format(FRRH) print "<th style='padding-right:5px;'>{}</th>".format(arH) print "<th style='padding-right:5px;'>{}</th></tr>".format(trH) flag = False count += 1 else: print "<tr style='background-color:#ccffff'><td colspan='8' style='border-top:1px solid black;'>No Data</td></tr>" break print "</table><br/>" N = 5 menMeans = (20, 35, 30, 35, 27) menStd = (2, 3, 4, 1, 2) ind = np.arange(N) # the x locations for the groups width = 0.35 # the width of the bars fig, ax = plt.subplots() rects1 = ax.bar(ind, menMeans, width, color='r', yerr=menStd) womenMeans = (25, 32, 34, 20, 25) womenStd = (3, 5, 2, 3, 3) rects2 = ax.bar(ind + width, womenMeans, width, color='y', yerr=womenStd) # add some text for labels, title and axes ticks ax.set_ylabel('Scores') ax.set_title('Scores by group and gender') ax.set_xticks(ind + width) ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5')) ax.legend((rects1[0], rects2[0]), ('Men', 'Women')) autolabel(rects1) autolabel(rects2) #plt.show() mpld3.display(plt,True,False) cursor2.close() DBconnections.closeDB(Con2)