def getMenuInfo(self, entity, dataHandler): if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity): return [ {"categoryId": "Download", "title": "Download as File", "icon": "fa-download", "id": "downloadFile"} ] else: return []
def getDataHandler(options, entity): if dataFrameMisc.isPySparkDataFrame(entity): return PySparkDataFrameDataHandler(options, entity) elif dataFrameMisc.isPandasDataFrame(entity): return PandasDataFrameDataHandler(options, entity) return None
def getMenuInfo(self,entity): if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity): return [ {"categoryId": "Download", "title": "Download as File", "icon": "fa-download", "id": "downloadFile"} ] else: return []
def getMenuInfo(self, entity, dataHandler): if dataHandler is not None: entity = dataHandler.entity if dataFrameMisc.isPySparkDataFrame( entity) or dataFrameMisc.isPandasDataFrame(entity): return [{ "categoryId": "Table", "title": "DataFrame Table", "icon": "fa-table", "id": "dataframe" }] elif dataFrameMisc.fqName( entity) == "graphframes.graphframe.GraphFrame": return [{ "categoryId": "Table", "title": "Graph Vertices", "icon": "fa-location-arrow", "id": "vertices" }, { "categoryId": "Table", "title": "Graph Edges", "icon": "fa-link", "id": "edges" }] else: return []
def getDataHandler(options, entity): if dataFrameMisc.isPySparkDataFrame(entity): return PySparkDataFrameDataHandler(options, entity) elif dataFrameMisc.isPandasDataFrame(entity): return PandasDataFrameDataHandler(options, entity) elif isinstance(entity, StreamingDataAdapter): return entity.getDisplayDataHandler(options, entity) return None
def getDataHandler(options, entity): if dataFrameMisc.isPySparkDataFrame(entity): return PySparkDataFrameDataHandler(options, entity) elif dataFrameMisc.isPandasDataFrame(entity): return PandasDataFrameDataHandler(options, entity) elif isinstance(entity, dict) or isArrayOfDict(entity): return JSONDataHandler(options, entity) elif isinstance(entity, StreamingDataAdapter): return entity.getDisplayDataHandler(options, entity) return None
def getMenuInfo(self,entity, dataHandler): if dataHandler is not None: entity = dataHandler.entity if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity): return [ {"categoryId": "Table", "title": "DataFrame Table", "icon": "fa-table", "id": "dataframe"} ] elif dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame": return [ {"categoryId": "Table", "title": "Graph Vertices", "icon": "fa-location-arrow", "id":"vertices"}, {"categoryId": "Table", "title": "Graph Edges", "icon": "fa-link", "id":"edges"} ] else: return []
def doRender(self, handlerId): entity=self.entity if dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame": if handlerId == "edges": entity=entity.edges else: entity=entity.vertices if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity): self._addHTMLTemplate('dataframeTable.html', entity=PandasDataFrameAdapter(entity)) return self._addHTML(""" <b>Unable to display object</b> """ )
def doRender(self, handlerId): entity=self.entity if dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame": if handlerId == "edges": entity=entity.edges else: entity=entity.vertices if dataFrameMisc.isPySparkDataFrame(entity) or dataFrameMisc.isPandasDataFrame(entity): self.hideColumns = self.options.get("hideColumns") and [a.strip() for a in self.options.get("hideColumns").split(",")] self.showColumns = self.options.get("showColumns") and [a.strip() for a in self.options.get("showColumns").split(",")] self._addHTMLTemplate('dataframeTable.html', entity=PandasDataFrameAdapter(entity), table_noschema=self.options.get("table_noschema", "false")) return self._addHTML(""" <b>Unable to display object {}</b> """.format(entity) )
def getMenuInfo(self, entity): if dataFrameMisc.isPySparkDataFrame( entity) or dataFrameMisc.isPandasDataFrame(entity): return [{ "categoryId": "Chart", "title": "Bar Chart", "icon": "fa-bar-chart", "id": "barChart" }, { "categoryId": "Chart", "title": "Line Chart", "icon": "fa-line-chart", "id": "lineChart" }, { "categoryId": "Chart", "title": "Scatter Plot", "icon": "fa-circle", "id": "scatterPlot" }, { "categoryId": "Chart", "title": "Pie Chart", "icon": "fa-pie-chart", "id": "pieChart" }, { "categoryId": "Chart", "title": "Map", "icon": "fa-globe", "id": "mapChart" }, { "categoryId": "Chart", "title": "Histogram", "icon": "fa-table", "id": "histogram" }] else: return []
def doRender(self, handlerId): entity = self.entity if dataFrameMisc.fqName(entity) == "graphframes.graphframe.GraphFrame": if handlerId == "edges": entity = entity.edges else: entity = entity.vertices if dataFrameMisc.isPySparkDataFrame( entity) or dataFrameMisc.isPandasDataFrame(entity): hcmap = {} if "hideColumns" in self.options: hcarr = self.options.get("showColumns").split(",") for s in hcarr: hcmap[s] = 1 self._addHTMLTemplate('dataframeTable.html', entity=PandasDataFrameAdapter(entity), table_noschema=self.options.get( "table_noschema", "false"), table_hidecols=hcmap) return self._addHTML(""" <b>Unable to display object {}</b> """.format(entity))
def stats_table(self, field): self.summary_stats = [] self.quantiles = [] self.frequents = [] if isPySparkDataFrame(self.df): statsdf = self.df.describe(field) lbls = ['count','mean','std','min','max'] for i in [0,1,2,3,4]: if i == 0: self.summary_stats.append((lbls[i], "{:.0f}".format(float(statsdf.collect()[i][1])))) else: self.summary_stats.append((lbls[i], "{:.2f}".format(float(statsdf.collect()[i][1])))) if Environment.sparkVersion == 2: lbls = ['2%','9%','25%','50%','75%','91%','98%'] quants = self.df.approxQuantile(field, [.02, .09, .25, .50, .75, .91, .98], 0.1) for i, q in enumerate(quants): self.quantiles.append((lbls[i] + "ile", "{:.2f}".format(q))) freqdf = self.df.stat.freqItems([field], 0.1) freqlist = freqdf.collect()[0][field+'_freqItems'] stop = 5 for i in freqlist: if stop > 0: self.frequents.append(str(i)) stop = stop - 1 else: if not isPandasDataFrame(self.df): self.df = self.data_handler.entity if isPandasDataFrame(self.df): statsdf = self.df[field].describe([.02, .09, .25, .50, .75, .91, .98]) lbls = ['count','mean','std','min','max'] for i in range(0,len(lbls)): if i == 0: self.summary_stats.append((lbls[i], "{:.0f}".format(statsdf[i]))) else: self.summary_stats.append((lbls[i], "{:.2f}".format(statsdf[lbls[i]]))) lbls = ['2%','9%','25%','50%','75%','91%','98%'] for i in range(0,len(lbls)): self.quantiles.append((lbls[i] + "ile", "{:.2f}".format(statsdf[lbls[i]]))) freqseries = self.df[field].value_counts() stop = 5 for ix in freqseries.index: if stop > 0: self.frequents.append(str(ix)) stop = stop - 1 summaryname = '<br>'.join(s[0] for s in self.summary_stats) summaryvalue = '<br>'.join(s[1] for s in self.summary_stats) quantname = '<br>'.join(q[0] for q in self.quantiles) quantvalue = '<br>'.join(q[1] for q in self.quantiles) freqvalue = '<br>'.join(f for f in self.frequents) table = """ <table class="stats-table"> <thead> <tr>""" table += "<th colspan='2'>Summary</th> <th colspan='2'>Quantiles</th> <th>Frequents</th>" if Environment.sparkVersion == 2 else "<th colspan='2'>Summary</th> <th>Frequents</th>" table += """ </tr> </thead> <tbody> <tr>""" table += "<td>{}</td> <td>{}</td> <td>{}</td> <td>{}</td> <td>{}</td>" if Environment.sparkVersion == 2 else "<td>{}</td> <td>{}</td> <td>{}</td>" table += """ </tr> </tbody> </table>""" if Environment.sparkVersion == 2: return table.format(summaryname, summaryvalue, quantname, quantvalue, freqvalue) else: return table.format(summaryname, summaryvalue, freqvalue)
def createDataframeAdapter(entity): if dataFrameMisc.isPandasDataFrame(entity): return PandasDataFrameAdapter(entity) elif dataFrameMisc.isPySparkDataFrame(entity): return entity raise ValueError("Invalid argument")