예제 #1
0
 def createBokehChart(self):
     plotwidth = int(self.getPreferredOutputWidth() / 2)
     plotheight = int(self.getPreferredOutputHeight() * 0.75)
     defaultbin = math.sqrt(len(self.getWorkingPandasDataFrame().index))
     binsize = int(self.options.get('binsize', defaultbin))
     valueFields = self.getValueFields()
     histograms = []
     if len(valueFields) != 1:
         for i, valueField in enumerate(valueFields):
             color = self.options.get("color")
             if color is None:
                 color = Colors.hexRGB(1. * i / 2)
             histograms.append(
                 Histogram(self.getWorkingPandasDataFrame(),
                           values=valueField,
                           plot_width=plotwidth,
                           plot_height=plotheight,
                           bins=binsize,
                           color=color,
                           xgrid=True,
                           ygrid=True,
                           ylabel='Frequency'))
         return histograms
     else:
         return Histogram(self.getWorkingPandasDataFrame(),
                          values=self.getValueFields()[0],
                          bins=binsize,
                          color=self.options.get("color"),
                          xgrid=True,
                          ygrid=True,
                          ylabel='Frequency')
    def matplotlibRender(self, fig, ax):
        stacked = len(self.getValueFields()) > 1 and self.options.get(
            "histoChartType", "stacked") == "stacked"
        subplots = self.isSubplots()
        defaultbin = math.sqrt(len(self.getWorkingPandasDataFrame().index))
        binsize = int(self.options.get('binsize', defaultbin))

        def plot(ax, valueField=None, color=None):
            data = self.getWorkingPandasDataFrame(
            ) if valueField is None else self.getWorkingPandasDataFrame(
            )[valueField]
            data.plot(kind="hist",
                      stacked=stacked,
                      ax=ax,
                      bins=binsize,
                      legend=self.showLegend(),
                      x=valueField,
                      label=valueField,
                      color=color,
                      colormap=None if color else Colors.colormap)

        if subplots:
            for j, valueField in enumerate(self.getValueFields()):
                plot(self.getAxItem(ax, j), valueField,
                     Colors.colormap(1. * j / 2))
        else:
            plot(ax)
예제 #3
0
    def matplotlibRender(self, fig, ax):
        stacked = len(self.getValueFields()) > 1 and self.options.get("histoChartType", "stacked") == "stacked"
        subplots = self.isSubplots()
        defaultbin = math.sqrt(len(self.getWorkingPandasDataFrame().index))
        binsize = int(self.options.get('binsize', defaultbin))

        def plot(ax, valueField=None, color=None):
            data = self.getWorkingPandasDataFrame() if valueField is None else self.getWorkingPandasDataFrame()[valueField]
            data.plot(
                kind="hist", stacked=stacked, ax=ax, bins=binsize, legend=self.showLegend(), x = valueField,
                label=valueField,color = color, colormap = None if color else Colors.colormap
            )

        if subplots:
            for j, valueField in enumerate(self.getValueFields()):
                plot(self.getAxItem(ax, j), valueField, Colors.colormap(1.*j/2))
        else:
            plot(ax)
예제 #4
0
    def matplotlibRender(self, fig, ax):
        rug=self.options.get("rug","false") == "true"
        kde=self.options.get("kde","true") == "true"
        defaultbin = math.sqrt(len(self.getWorkingPandasDataFrame().index))
        binsize = int(self.options.get('binsize', defaultbin))

        def plot(ax, valueField=None, color=None):
            data = self.getWorkingPandasDataFrame()[valueField]
            sns.distplot( data, ax=ax, bins=binsize, rug=rug, kde=kde,
                kde_kws={"label":"{0} KDE Estim".format(valueField)}, hist_kws={"label":"{0} Freq".format(valueField)},
                label=valueField,color = color
            )

        if len(self.getValueFields()) > 1:
            for j, valueField in enumerate(self.getValueFields()):
                plot(self.getAxItem(ax, j), valueField, Colors.colormap(1.*j/2))
        else:
            plot(ax, self.getValueFields()[0])
예제 #5
0
    def createBokehChart(self):
        keyFields = self.getKeyFields()
        valueFields = self.getValueFields()
        clusterby = self.options.get("clusterby")
        subplots = self.isSubplot()
        workingPDF = self.getWorkingPandasDataFrame().copy()

        for index, row in workingPDF.iterrows():
            for k in keyFields:
                if isinstance(row[k],
                              str if sys.version >= '3' else basestring):
                    row[k] = row[k].replace(':', '.')
            workingPDF.loc[index] = row

        charts = []
        if clusterby is not None and (subplots or len(valueFields) <= 1):
            subplots = subplots if len(valueFields) == 1 or subplots else False
            for j, valueField in enumerate(valueFields):
                pivot = workingPDF.pivot(index=keyFields[0],
                                         columns=clusterby,
                                         values=valueField)

                if not subplots:
                    fig = figure(
                        x_range=self.safeList(pivot.index) if not np.
                        issubdtype(pivot.index.dtype, np.number) else None)
                    charts.append(fig)
                for i, col in enumerate(pivot.columns[:10]):  #max 10
                    if subplots:
                        charts.append(
                            Line(pivot[col].values,
                                 color=Colors.hexRGB(1. * i / 2),
                                 ylabel=valueField,
                                 xlabel=keyFields[0],
                                 legend=False,
                                 title="{0} = {1}".format(
                                     clusterby, pivot.columns[i])))
                    else:
                        xValues = pivot.index.values.tolist()
                        if not np.issubdtype(pivot.index.dtype, np.number):
                            xValues = range(1, len(xValues) + 1)
                        fig.line(
                            x=xValues,
                            y=pivot[col].values,
                            color=Colors.hexRGB(1. * i / 2),
                            legend=str(col) if self.showLegend() else None)
        else:
            if subplots:
                for i, valueField in enumerate(valueFields):
                    charts.append(
                        Line(workingPDF,
                             x=keyFields[0],
                             y=valueField,
                             color=Colors.hexRGB(1. * i / 2),
                             legend=self.showLegend(),
                             plot_width=int(800 / len(valueFields))))
            else:
                charts.append(
                    Line(workingPDF,
                         x=keyFields[0],
                         y=valueFields,
                         color=valueFields,
                         legend=self.showLegend()))

            if clusterby is not None:
                self.addMessage(
                    "Warning: 'Cluster By' ignored when grouped option with multiple Value Fields is selected"
                )
        return charts
예제 #6
0
    def createBokehChart(self):
        keyFields = self.getKeyFields()
        valueFields = self.getValueFields()
        clusterby = self.options.get("clusterby")
        stacked = self.options.get("charttype", "grouped") == "stacked"
        subplots = self.isSubplot()
        workingPDF = self.getWorkingPandasDataFrame().copy()

        def convertPDFDate(df, col):
            #Bokeh doesn't support datetime as index in Bar chart. Convert to String
            if len(keyFields) == 1:
                dtype = df[col].dtype.type if col in df else None
                if numpy.issubdtype(dtype, numpy.datetime64):
                    dateFormat = self.options.get("dateFormat", None)
                    try:
                        df[col] = df[col].apply(lambda x: str(x).replace(':','-') if dateFormat is None else x.strftime(dateFormat))
                    except:
                        self.exception("Error converting dateFormat {}".format(dateFormat))
                        df[col] = df[col].apply(lambda x: str(x).replace(':','-'))

        for index, row in workingPDF.iterrows():
            for k in keyFields:
                if isinstance(row[k], str if sys.version >= '3' else basestring):
                    row[k] = row[k].replace(':', '.')
            workingPDF.loc[index] = row

        charts=[]
        def goChart(label, stack_or_group, values, ylabel=None, color=None):
            convertPDFDate(workingPDF, keyFields[0])
            if ylabel is None:
                ylabel=values
            label=label if isinstance(label, (list, tuple)) else [label]
            if stacked:
                charts.append( Bar(workingPDF, label=CatAttr(columns=label, sort=False), stack=stack_or_group, color=color, values=values, legend=self.showLegend(), ylabel=ylabel))
            else:
                charts.append( Bar(workingPDF, label=CatAttr(columns=label, sort=False), group=stack_or_group, color=color, values=values, legend=self.showLegend(), ylabel=ylabel))

        if clusterby is not None and (subplots or len(valueFields)<=1):
            subplots = subplots if len(valueFields)==1 or subplots else False
            if subplots:
                for j, valueField in enumerate(valueFields):
                    pivot = workingPDF.pivot(
                        index=keyFields[0], columns=clusterby, values=valueField
                    )
                    for i,col in enumerate(pivot.columns[:10]): #max 10
                        data = pd.DataFrame({'values':pivot[col].values, 'names': pivot.index.values})
                        convertPDFDate(data, 'names')
                        if subplots:                        
                            charts.append( 
                                Bar(data, label=CatAttr(columns=['names'], sort=False), color = Colors.hexRGB( 1.*i/2 ), values='values', ylabel=valueField, legend=False, 
                                    title="{0} = {1}".format(clusterby, pivot.columns[i])
                                )
                            )
            else:
                goChart( keyFields[0], clusterby, valueFields[0])
        else:
            if subplots:
                for i,valueField in enumerate(valueFields):
                    goChart( keyFields[0], None, valueField, color=Colors.hexRGB( 1.*i/2 ))
            else:
                if len(valueFields) > 1:
                    series = '_'.join(valueFields)
                    values = blend(*valueFields, name=series.replace('_', ','), labels_name=series)
                else:
                    series = False
                    values = valueFields[0]
                goChart(keyFields, series, values, ylabel=','.join(valueFields))

            if clusterby is not None:
                self.addMessage("Warning: 'Cluster By' ignored when grouped option with multiple Value Fields is selected")
        return charts