Ejemplo n.º 1
0
    def sampleColumn(self, numerical):
        default = None
        if Environment.hasSpark:
            from pyspark.sql import functions as F
            for field in self.entity.schema.fields:
                # Ignore unique ids
                if field.name.lower() != 'id' and (
                        not numerical
                        or dataFrameMisc.isNumericType(field.dataType)):
                    # Find a good column to display in pie ChartDisplay
                    default = default or field.name.decode(
                        "utf-8") if PY2 else field.name
                    count = self.entity.count()
                    sample = self.entity.sample(
                        False,
                        (float(200) / count)) if count > 200 else self.entity
                    orderedSample = sample.groupBy(field.name).agg(
                        F.count(field.name).alias("agg")).orderBy(
                            F.desc("agg")).select("agg")
                    if orderedSample.take(1)[0]["agg"] > 10:
                        return [
                            field.name.decode("utf-8") if PY2 else field.name
                        ]

        # Otherwise, return first non-id column
        return [default]
Ejemplo n.º 2
0
 def canRenderChart(self, handlerId, aggregation, fieldNames):
     if (aggregation == "COUNT"):
         return (True, None)
     else:
         for field in self.entity.schema.fields:
             if dataFrameMisc.isNumericType(field.dataType):
                 return (True, None)
         return (False, "At least one numerical column required.")
Ejemplo n.º 3
0
 def getDefaultValueFields(self, handlerId, aggregation):
     fieldNames = []
     for field in self.entity.schema.fields:
         if dataFrameMisc.isNumericType(field.dataType):
             fieldNames.append(field.name)
             if len(fieldNames) == self.getPreferredDefaultValueFieldCount(handlerId):
                 break
     return fieldNames
Ejemplo n.º 4
0
 def getDefaultKeyFields(self, handlerId, aggregation):
     if self.supportsKeyFields(handlerId) == False:
         return []
     defaultFields = []
     for field in self.entity.schema.fields:
         if (dataFrameMisc.isNumericType(field.dataType) == False and field.name.lower() != "id"):
             defaultFields.append(field.name)
             if len(defaultFields) == self.getPreferredDefaultKeyFieldCount(handlerId):
                 break
     if len(defaultFields) == 0:
         defaultFields.append(self.entity.schema.fields[0].name)
     return defaultFields
Ejemplo n.º 5
0
 def sampleColumn(self, numerical):
     default=None
     if Environment.hasSpark:
         from pyspark.sql import functions as F
         for field in self.entity.schema.fields:
             # Ignore unique ids
             if field.name.lower() != 'id' and ( not numerical or dataFrameMisc.isNumericType(field.dataType) ):
                 # Find a good column to display in pie ChartDisplay
                 default = default or field.name.decode("utf-8") if PY2 else field.name
                 count = self.entity.count()
                 sample = self.entity.sample(False, (float(200) / count)) if count > 200 else self.entity
                 orderedSample = sample.groupBy(field.name).agg(F.count(field.name).alias("agg")).orderBy(F.desc("agg")).select("agg")
                 if orderedSample.take(1)[0]["agg"] > 10:
                     return [field.name.decode("utf-8") if PY2 else field.name]
     
     # Otherwise, return first non-id column
     return [default]
Ejemplo n.º 6
0
 def isNumericType(self, field):
     return dataFrameMisc.isNumericType(field.dataType)
 def isNumericType(self, field):
     return dataFrameMisc.isNumericType(field.dataType)