Example #1
0
    def GetTableData(self, query_parameters):
        if query_parameters.column_ids == ('col1', ):
            return data_source.TableData(rows=[['blue'], ['green'], ['red']])
        elif query_parameters.column_ids == ('col2', ):
            return data_source.TableData(
                rows=[['california'], ['maine'], ['oregon']])
        elif query_parameters.column_ids == ('col6', ):
            return data_source.TableData(rows=[['east'], ['west']])
        elif query_parameters.column_ids == ('col2', 'col6'):
            return data_source.TableData(
                rows=[['california', 'west'], ['maine', 'east'],
                      ['oregon', 'west']])
        else:
            data_columns = []

            for column_id in query_parameters.column_ids:
                if column_id == 'col1':
                    data_columns.append(['blue', 'blue', 'green', 'red'])
                elif column_id == 'col2':
                    data_columns.append(
                        ['california', 'california', 'maine', 'oregon'])
                elif column_id == 'col3':
                    data_columns.append(['1989', '1990', '1991', '1992'])
                elif column_id == 'col4':
                    data_columns.append(['1.2', '1.3', '1.4', '1.5'])
                elif column_id == 'col5':
                    data_columns.append(['4', '5', '6', '7'])
                elif column_id == 'col6':
                    data_columns.append(['west', 'west', 'east', 'west'])

            # Transpose rows and columns so that table is properly set up
            return data_source.TableData([list(r) for r in zip(*data_columns)])
    def GetTableData(self, query_parameters):
        """Calculate and return the requested table data.

    Uses sqlite to group and aggregate the raw data from the original CSV.

    Args:
      query_parameters: A QueryParameters object

    Returns:
      A TableData object containing the data for the requested table

    Raises:
      DataSourceError: If query against sqlite instance fails
    """
        if query_parameters.query_type == data_source.QueryParameters.CONCEPT_QUERY:
            # This request is for a concept definition table

            # Filter out total values
            where_statements = []

            for column_id in query_parameters.column_ids:
                column = self.column_bundle.GetColumnByID(column_id)

                if column.total_val:
                    where_statements.append(
                        '%s != "%s"' % (column.column_id, column.total_val))
            if where_statements:
                where_clause = 'WHERE ' + ','.join(where_statements)
            else:
                where_clause = ''

            query_str = ('SELECT DISTINCT %s FROM csv_table %s ORDER BY %s' %
                         (','.join(query_parameters.column_ids), where_clause,
                          ','.join(query_parameters.column_ids)))
        elif query_parameters.query_type == data_source.QueryParameters.SLICE_QUERY:
            # This request is for a slice table
            sql_names = []
            dimension_sql_names = []
            where_statements = []

            time_dimension_id = ''

            # Construct a SQL query that selects all parameters (with the necessary
            # aggregations), groups by non-time dimensions, and orders by all the
            # dimensions, with time last.
            for column_id in query_parameters.column_ids:
                column = self.column_bundle.GetColumnByID(column_id)

                if column.total_val:
                    where_statements.append(
                        '%s != "%s"' % (column.column_id, column.total_val))

                if column.slice_role == 'dimension':
                    sql_names.append(column_id)
                    dimension_sql_names.append(column_id)

                    if column.data_type == 'date':
                        time_dimension_id = column_id
                elif column.slice_role == 'metric':
                    sql_names.append(
                        '%s(%s) AS %s' %
                        (column.internal_parameters['aggregation'], column_id,
                         column_id))

            order_sql_names = ([
                d for d in dimension_sql_names if d != time_dimension_id
            ])

            if time_dimension_id:
                order_sql_names.append(time_dimension_id)

            # Handle total values in non-selected columns
            for column in self.column_bundle.GetColumnIterator():
                if column.column_id not in query_parameters.column_ids:
                    if column.total_val:
                        where_statements.append(
                            '%s = "%s"' % (column.column_id, column.total_val))

            if where_statements:
                where_clause = 'WHERE ' + ','.join(where_statements)
            else:
                where_clause = ''

            query_str = (
                'SELECT %s FROM csv_table %s GROUP BY %s ORDER BY %s' %
                (','.join(sql_names), where_clause,
                 ','.join(dimension_sql_names), ','.join(order_sql_names)))
        else:
            raise data_source.DataSourceError('Unknown query type: %s' %
                                              query_parameters.query_type)

        if self.verbose:
            print 'Executing query:\n%s\n' % (query_str)

        # Execute the query against the sqlite backend
        cursor = self.sqlite_connection.cursor()

        try:
            cursor.execute(query_str)
        except sqlite3.OperationalError as e:
            raise data_source.DataSourceError('Error executing query: %s\n%s' %
                                              (query_str, str(e)))

        query_results = []

        for row in cursor:
            query_results.append(list(row))

        cursor.close()

        return data_source.TableData(rows=query_results)
Example #3
0
 def testMergeValues(self):
     another_table_data = data_source.TableData([[4, 5, 6], [6, 7, 8]])
     merged_table_data = self.table_data.MergeValues(another_table_data,
                                                     num_columns=2)
     self.assertEqual(merged_table_data.rows,
                      [[1, 2, 3, 4, 5], [4, 5, 6, 6, 7]])
Example #4
0
    def GetTableData(self, query_parameters):
        """Calculate and return the requested table data.

    Use in-memory data container to group and aggregate the raw data from the
    original CSV.

    Args:
      query_parameters: A QueryParameters object

    Returns:
      A TableData object containing the data for the requested table

    Raises:
      DataSourceError: If query against sqlite instance fails
    """
        if query_parameters.query_type == data_source.QueryParameters.CONCEPT_QUERY:
            omitted_values = {}

            # Pull out total values
            for column_id in query_parameters.column_ids:
                column = self.column_bundle.GetColumnByID(column_id)

                if column.total_val:
                    omitted_values[column_id] = [column.total_val]

            query_results = self.data_container.DistinctValues(
                query_parameters.column_ids, omitted_values)
        elif query_parameters.query_type == data_source.QueryParameters.SLICE_QUERY:
            # This request is for a slice table
            all_columns = []

            dimension_columns = []
            time_dimension_id = ''
            metric_columns = []
            metric_aggregation_map = {}

            query_total_vals = {}

            # Construct a SQL query that selects all parameters (with the necessary
            # aggregations), groups by non-time dimensions, and orders by all the
            # dimensions, with time last.
            for column_id in query_parameters.column_ids:
                column = self.column_bundle.GetColumnByID(column_id)

                all_columns.append(column_id)

                if column.slice_role == 'dimension':
                    dimension_columns.append(column_id)

                    if column.data_type == 'date':
                        time_dimension_id = column_id
                elif column.slice_role == 'metric':
                    metric_columns.append(column_id)
                    metric_aggregation_map[column_id] = (
                        column.internal_parameters['aggregation'])

                if column.total_val:
                    query_total_vals[column.column_id] = column.total_val

            order_by_columns = ([
                d for d in dimension_columns if d != time_dimension_id
            ])

            if time_dimension_id:
                order_by_columns.append(time_dimension_id)

            # Calculate the rows to filter out based on totals
            aggregated_total_vals = {}

            for column in self.column_bundle.GetColumnIterator():
                if column.column_id not in query_parameters.column_ids:
                    if column.total_val:
                        aggregated_total_vals[
                            column.column_id] = column.total_val

            query_results = self.data_container.GroupedValues(
                all_columns, dimension_columns, order_by_columns,
                metric_aggregation_map, aggregated_total_vals,
                query_total_vals)
        else:
            raise data_source.DataSourceError('Unknown query type: %s' %
                                              query_parameters.query_type)

        return data_source.TableData(rows=query_results)
Example #5
0
 def setUp(self):
     self.table_data = data_source.TableData([[1, 2, 3], [4, 5, 6]])