def GetTableData(self, query_parameters): if query_parameters.column_ids == ('col1', ): return data_source.TableData(rows=[['blue'], ['green'], ['red']]) elif query_parameters.column_ids == ('col2', ): return data_source.TableData( rows=[['california'], ['maine'], ['oregon']]) elif query_parameters.column_ids == ('col6', ): return data_source.TableData(rows=[['east'], ['west']]) elif query_parameters.column_ids == ('col2', 'col6'): return data_source.TableData( rows=[['california', 'west'], ['maine', 'east'], ['oregon', 'west']]) else: data_columns = [] for column_id in query_parameters.column_ids: if column_id == 'col1': data_columns.append(['blue', 'blue', 'green', 'red']) elif column_id == 'col2': data_columns.append( ['california', 'california', 'maine', 'oregon']) elif column_id == 'col3': data_columns.append(['1989', '1990', '1991', '1992']) elif column_id == 'col4': data_columns.append(['1.2', '1.3', '1.4', '1.5']) elif column_id == 'col5': data_columns.append(['4', '5', '6', '7']) elif column_id == 'col6': data_columns.append(['west', 'west', 'east', 'west']) # Transpose rows and columns so that table is properly set up return data_source.TableData([list(r) for r in zip(*data_columns)])
def GetTableData(self, query_parameters): """Calculate and return the requested table data. Uses sqlite to group and aggregate the raw data from the original CSV. Args: query_parameters: A QueryParameters object Returns: A TableData object containing the data for the requested table Raises: DataSourceError: If query against sqlite instance fails """ if query_parameters.query_type == data_source.QueryParameters.CONCEPT_QUERY: # This request is for a concept definition table # Filter out total values where_statements = [] for column_id in query_parameters.column_ids: column = self.column_bundle.GetColumnByID(column_id) if column.total_val: where_statements.append( '%s != "%s"' % (column.column_id, column.total_val)) if where_statements: where_clause = 'WHERE ' + ','.join(where_statements) else: where_clause = '' query_str = ('SELECT DISTINCT %s FROM csv_table %s ORDER BY %s' % (','.join(query_parameters.column_ids), where_clause, ','.join(query_parameters.column_ids))) elif query_parameters.query_type == data_source.QueryParameters.SLICE_QUERY: # This request is for a slice table sql_names = [] dimension_sql_names = [] where_statements = [] time_dimension_id = '' # Construct a SQL query that selects all parameters (with the necessary # aggregations), groups by non-time dimensions, and orders by all the # dimensions, with time last. for column_id in query_parameters.column_ids: column = self.column_bundle.GetColumnByID(column_id) if column.total_val: where_statements.append( '%s != "%s"' % (column.column_id, column.total_val)) if column.slice_role == 'dimension': sql_names.append(column_id) dimension_sql_names.append(column_id) if column.data_type == 'date': time_dimension_id = column_id elif column.slice_role == 'metric': sql_names.append( '%s(%s) AS %s' % (column.internal_parameters['aggregation'], column_id, column_id)) order_sql_names = ([ d for d in dimension_sql_names if d != time_dimension_id ]) if time_dimension_id: order_sql_names.append(time_dimension_id) # Handle total values in non-selected columns for column in self.column_bundle.GetColumnIterator(): if column.column_id not in query_parameters.column_ids: if column.total_val: where_statements.append( '%s = "%s"' % (column.column_id, column.total_val)) if where_statements: where_clause = 'WHERE ' + ','.join(where_statements) else: where_clause = '' query_str = ( 'SELECT %s FROM csv_table %s GROUP BY %s ORDER BY %s' % (','.join(sql_names), where_clause, ','.join(dimension_sql_names), ','.join(order_sql_names))) else: raise data_source.DataSourceError('Unknown query type: %s' % query_parameters.query_type) if self.verbose: print 'Executing query:\n%s\n' % (query_str) # Execute the query against the sqlite backend cursor = self.sqlite_connection.cursor() try: cursor.execute(query_str) except sqlite3.OperationalError as e: raise data_source.DataSourceError('Error executing query: %s\n%s' % (query_str, str(e))) query_results = [] for row in cursor: query_results.append(list(row)) cursor.close() return data_source.TableData(rows=query_results)
def testMergeValues(self): another_table_data = data_source.TableData([[4, 5, 6], [6, 7, 8]]) merged_table_data = self.table_data.MergeValues(another_table_data, num_columns=2) self.assertEqual(merged_table_data.rows, [[1, 2, 3, 4, 5], [4, 5, 6, 6, 7]])
def GetTableData(self, query_parameters): """Calculate and return the requested table data. Use in-memory data container to group and aggregate the raw data from the original CSV. Args: query_parameters: A QueryParameters object Returns: A TableData object containing the data for the requested table Raises: DataSourceError: If query against sqlite instance fails """ if query_parameters.query_type == data_source.QueryParameters.CONCEPT_QUERY: omitted_values = {} # Pull out total values for column_id in query_parameters.column_ids: column = self.column_bundle.GetColumnByID(column_id) if column.total_val: omitted_values[column_id] = [column.total_val] query_results = self.data_container.DistinctValues( query_parameters.column_ids, omitted_values) elif query_parameters.query_type == data_source.QueryParameters.SLICE_QUERY: # This request is for a slice table all_columns = [] dimension_columns = [] time_dimension_id = '' metric_columns = [] metric_aggregation_map = {} query_total_vals = {} # Construct a SQL query that selects all parameters (with the necessary # aggregations), groups by non-time dimensions, and orders by all the # dimensions, with time last. for column_id in query_parameters.column_ids: column = self.column_bundle.GetColumnByID(column_id) all_columns.append(column_id) if column.slice_role == 'dimension': dimension_columns.append(column_id) if column.data_type == 'date': time_dimension_id = column_id elif column.slice_role == 'metric': metric_columns.append(column_id) metric_aggregation_map[column_id] = ( column.internal_parameters['aggregation']) if column.total_val: query_total_vals[column.column_id] = column.total_val order_by_columns = ([ d for d in dimension_columns if d != time_dimension_id ]) if time_dimension_id: order_by_columns.append(time_dimension_id) # Calculate the rows to filter out based on totals aggregated_total_vals = {} for column in self.column_bundle.GetColumnIterator(): if column.column_id not in query_parameters.column_ids: if column.total_val: aggregated_total_vals[ column.column_id] = column.total_val query_results = self.data_container.GroupedValues( all_columns, dimension_columns, order_by_columns, metric_aggregation_map, aggregated_total_vals, query_total_vals) else: raise data_source.DataSourceError('Unknown query type: %s' % query_parameters.query_type) return data_source.TableData(rows=query_results)
def setUp(self): self.table_data = data_source.TableData([[1, 2, 3], [4, 5, 6]])