def sampleColumn(self, numerical): default = None if Environment.hasSpark: from pyspark.sql import functions as F for field in self.entity.schema.fields: # Ignore unique ids if field.name.lower() != 'id' and ( not numerical or dataFrameMisc.isNumericType(field.dataType)): # Find a good column to display in pie ChartDisplay default = default or field.name.decode( "utf-8") if PY2 else field.name count = self.entity.count() sample = self.entity.sample( False, (float(200) / count)) if count > 200 else self.entity orderedSample = sample.groupBy(field.name).agg( F.count(field.name).alias("agg")).orderBy( F.desc("agg")).select("agg") if orderedSample.take(1)[0]["agg"] > 10: return [ field.name.decode("utf-8") if PY2 else field.name ] # Otherwise, return first non-id column return [default]
def canRenderChart(self, handlerId, aggregation, fieldNames): if (aggregation == "COUNT"): return (True, None) else: for field in self.entity.schema.fields: if dataFrameMisc.isNumericType(field.dataType): return (True, None) return (False, "At least one numerical column required.")
def getDefaultValueFields(self, handlerId, aggregation): fieldNames = [] for field in self.entity.schema.fields: if dataFrameMisc.isNumericType(field.dataType): fieldNames.append(field.name) if len(fieldNames) == self.getPreferredDefaultValueFieldCount(handlerId): break return fieldNames
def getDefaultKeyFields(self, handlerId, aggregation): if self.supportsKeyFields(handlerId) == False: return [] defaultFields = [] for field in self.entity.schema.fields: if (dataFrameMisc.isNumericType(field.dataType) == False and field.name.lower() != "id"): defaultFields.append(field.name) if len(defaultFields) == self.getPreferredDefaultKeyFieldCount(handlerId): break if len(defaultFields) == 0: defaultFields.append(self.entity.schema.fields[0].name) return defaultFields
def sampleColumn(self, numerical): default=None if Environment.hasSpark: from pyspark.sql import functions as F for field in self.entity.schema.fields: # Ignore unique ids if field.name.lower() != 'id' and ( not numerical or dataFrameMisc.isNumericType(field.dataType) ): # Find a good column to display in pie ChartDisplay default = default or field.name.decode("utf-8") if PY2 else field.name count = self.entity.count() sample = self.entity.sample(False, (float(200) / count)) if count > 200 else self.entity orderedSample = sample.groupBy(field.name).agg(F.count(field.name).alias("agg")).orderBy(F.desc("agg")).select("agg") if orderedSample.take(1)[0]["agg"] > 10: return [field.name.decode("utf-8") if PY2 else field.name] # Otherwise, return first non-id column return [default]
def isNumericType(self, field): return dataFrameMisc.isNumericType(field.dataType)
def isNumericType(self, field): return dataFrameMisc.isNumericType(field.dataType)