def get_types_from_rows(column_names, rows): """ Return description by scraping the rows We only return the name and type (inferred from the data). """ if not column_names: return [] if not rows: raise exceptions.InternalError( f'Cannot infer the column types from empty rows') types = [None] * len(column_names) remaining = len(column_names) for row in rows: if remaining <= 0: break if len(row) != len(column_names): raise exceptions.DatabaseError( f'Column names {column_names} does not match row {row}') for column_index, value in enumerate(row): if value is not None: current_type = types[column_index] new_tc = get_type(value) if current_type is None: types[column_index] = new_tc remaining -= 1 elif new_tc is not current_type.code: raise exceptions.DatabaseError( f'Differing column type found for column @{column_index} {column_names[column_index]}:' f'{current_type} vs {new_tc}') if any([t is None for t in types]): raise exceptions.DatabaseError( f'Couldn\'t infer all the types {types}') return types
def get_group_by_column_names(aggregation_results): group_by_cols = [] for metric in aggregation_results: metric_name = metric.get('function', 'noname') gby_cols_for_metric = metric.get('groupByColumns', []) if group_by_cols and group_by_cols != gby_cols_for_metric: raise exceptions.DatabaseError( f"Cols for metric {metric_name}: {gby_cols_for_metric} differ from other columns {group_by_cols}" ) elif not group_by_cols: group_by_cols = gby_cols_for_metric[:] return group_by_cols
def get_metadata_from_controller(self, path): url = parse.urljoin(self._server, path) r = requests.get(url, headers={'Accept': 'application/json'}) try: result = r.json() except ValueError as e: raise exceptions.DatabaseError( f'Got invalid json response from {self._server}:{path}: {r.text}' ) from e if self._debug: logger.info( f"metadata get on {self._server}:{path} returned {result}") return result
def check_sufficient_responded(self, query, queried, responded): fraction = self.acceptable_respond_fraction if fraction == 0: return if queried < 0 or responded < 0: responded = -1 needed = -1 elif fraction <= -1: needed = queried elif fraction > 0 and fraction < 1: needed = int(fraction * queried) else: needed = fraction if responded < 0 or responded < needed: raise exceptions.DatabaseError( f"Query\n\n{query} timed out: Out of {queried}, only" f" {responded} responded, while needed was {needed}")
def execute(self, operation, parameters=None): query = apply_parameters(operation, parameters or {}) headers = {'Content-Type': 'application/json'} headers.update(self._extra_request_headers) payload = {'pql': query} if self._debug: logger.info( f'Submitting the pinot query to {self.url}:\n{query}\n{pformat(payload)}, with {headers}' ) r = requests.post(self.url, headers=headers, json=payload) if r.encoding is None: r.encoding = 'utf-8' try: payload = r.json() except Exception as e: raise exceptions.DatabaseError( f"Error when querying {query} from {self.url}, raw response is:\n{r.text}" ) from e if self._debug: logger.info( f'Got the payload of type {type(payload)} with the status code {0 if not r else r.status_code}:\n{payload}' ) num_servers_responded = payload.get('numServersResponded', -1) num_servers_queried = payload.get('numServersQueried', -1) if num_servers_queried > num_servers_responded or num_servers_responded == -1 or num_servers_queried == -1: raise exceptions.DatabaseError( f"Query\n\n{query} timed out: Out of {num_servers_queried}, only" f" {num_servers_responded} responded") # raise any error messages if r.status_code != 200: msg = f"Query\n\n{query}\n\nreturned an error: {r.status_code}\nFull response is {pformat(payload)}" raise exceptions.ProgrammingError(msg) if payload.get('exceptions', []): msg = '\n'.join( pformat(exception) for exception in payload['exceptions']) raise exceptions.DatabaseError(msg) rows = [ ] # array of array, where inner array is array of column values column_names = [ ] # column names, such that len(column_names) == len(rows[0]) if 'aggregationResults' in payload: aggregation_results = payload['aggregationResults'] gby_cols = get_group_by_column_names(aggregation_results) metric_names = [ agg_result['function'] for agg_result in aggregation_results ] gby_rows = OrderedDict( ) # Dict of group-by-vals to array of metrics total_group_vals_key = () num_metrics = len(metric_names) for i, agg_result in enumerate(aggregation_results): if 'groupByResult' in agg_result: if total_group_vals_key in gby_rows: raise exceptions.DatabaseError( f"Invalid response {pformat(aggregation_results)} since we have both total and group by results" ) for gb_result in agg_result['groupByResult']: group_values = gb_result['group'] if len(group_values) < len(gby_cols): raise exceptions.DatabaseError( f"Expected {pformat(agg_result)} to contain {len(gby_cols)}, but got {len(group_values)}" ) elif len(group_values) > len(gby_cols): # This can happen because of poor escaping in the results extra = len(group_values) - len(gby_cols) new_group_values = group_values[extra:] new_group_values[0] = ''.join( group_values[0:extra]) + new_group_values[0] group_values = new_group_values group_values_key = tuple(group_values) if group_values_key not in gby_rows: gby_rows[group_values_key] = [None] * num_metrics gby_rows[group_values_key][i] = gb_result['value'] else: # Global aggregation result if total_group_vals_key not in gby_rows: gby_rows[total_group_vals_key] = [None] * num_metrics if len(gby_rows) != 1: raise exceptions.DatabaseError( f"Invalid response {pformat(aggregation_results)} since we have both total and group by results" ) if len(gby_cols) > 0: raise exceptions.DatabaseError( f"Invalid response since total aggregation results are present even when non zero gby_cols:{gby_cols}, {pformat(aggregation_results)}" ) gby_rows[total_group_vals_key][i] = agg_result['value'] rows = [] column_names = gby_cols + metric_names for group_vals, metric_vals in gby_rows.items(): if len(group_vals) != len(gby_cols): raise exceptions.DatabaseError( f"Expected {len(gby_cols)} but got {len(group_vals)} for a row" ) if len(metric_vals) != len(metric_names): raise exceptions.DatabaseError( f"Expected {len(metric_names)} but got {len(metric_vals)} for a row" ) rows.append(list(group_vals) + metric_vals) elif 'selectionResults' in payload: results = payload['selectionResults'] column_names = results.get('columns') values = results.get('results') if column_names and values: rows = values else: raise exceptions.DatabaseError( f'Expected columns and results in selectionResults, but got {pformat(results)} instead' ) logger.debug( f'Got the rows as a type {type(rows)} of size {len(rows)}') if logger.isEnabledFor(logging.DEBUG): logger.debug(pformat(rows)) self.description = None self._results = [] if rows: types = get_types_from_rows(column_names, rows) if self._debug: logger.info( f'There are {len(rows)} rows and types is {pformat(types)}, column_names are {pformat(column_names)}, first row is like {pformat(rows[0])}, and last row is like {pformat(rows[-1])}' ) self._results = rows self.description = get_description_from_types(column_names, types) return self