def _use_aggregation(self, agg, columns=None):
        """Compute the result using the aggregation function provided.
        The aggregation name must also be provided so we can strip of the extra
        name that Spark SQL adds."""
        if not columns:
            columns = self._columns
        from pyspark.sql import functions as F

        aggs = map(lambda column: agg(column).alias(column), self._columns)
        aggRdd = self._grouped_spark_sql.agg(*aggs)
        df = Dataframe.from_schema_rdd(aggRdd, self._by)
        return df
 def from_pd_data_frame(self, local_df):
     """Make a distributed dataframe from a local dataframe. The intend use
     is for testing. Note: dtypes are re-infered, so they may not match."""
     def frame_to_rows(frame):
         """Convert a Panda's DataFrame into Spark SQL Rows"""
         # TODO: Convert to row objects directly?
         return [r.tolist() for r in frame.to_records()]
     schema = list(local_df.columns)
     index_names = list(local_df.index.names)
     index_names = _normalize_index_names(index_names)
     schema = index_names + schema
     rows = self.spark_ctx.parallelize(frame_to_rows(local_df))
     sp_df = Dataframe.from_schema_rdd(
         self.sql_ctx.createDataFrame(
             rows,
             schema=schema,
             # Look at all the rows, should be ok since coming from
             # a local dataset
             samplingRatio=1))
     sp_df._index_names = index_names
     return sp_df