def check_select(select: Select): # pragma: no cover if select.sort_clause: raise SqlError(f'ORDER clause is not allowed in aggregation query') if select.limit_count: raise SqlError('LIMIT clause is not allowed in aggregation query') if select.limit_offset is not None: raise SqlError('OFFSET is not supported')
def check_select(select: Select, streaming: bool): if not streaming and select.limit_count is None: raise SqlError('LIMIT must be specified') # pragma: no cover if select.limit_offset is not None: raise SqlError('OFFSET is not supported') # pragma: no cover if select.group_clause is not None: raise SqlError('GROUP clause are not allowed for non-agg query' ) # pragma: no cover
def __init__(self, args: List[Expression], named_args: Dict[str, Expression], agg_star: bool): if len(args) != 1: raise SqlError(f'function `histogram` accepts only 1 positional paramter') self._array = args[0] self._bins: Optional[Expression] = named_args.pop('bins', None) self._range = named_args.pop('range', None) # self._weights: Optional[Expression] = named_args.pop('weights') if len(named_args) != 0: raise SqlError(f'Unknown named argument: {named_args.keys()} for function `histogram`')
def __init__(self, args: List[Expression], named_args: Dict[str, Expression], agg_star: bool): if len(args) < 3: raise SqlError( f'function `crossmatch` accepts only 3 positional paramters') self._catalog_coord = args[0] self._target_coord = args[1] self._radius = args[2] self._fields = args[3:] if len(named_args) != 0: raise SqlError( f'Unknown named argument: {named_args.keys()} for function `crossmatch`' ) self._tree_cache = None
def evaluate_FuncCallExpression(self, e: FuncCallExpression): if e.name in nonagg_functions: return nonagg_functions[e.name](*[a(self) for a in e.args], **{ k: a(self) for k, a in e.named_args.items() }) raise SqlError(f'Unknown function: {e.name}') # pragma: no cover
def evaluate_UnaryOperationExpression(self, e: UnaryOperationExpression): a = e.a(self) if e.name == '-': return -a if e.name == '+': return a raise SqlError(f'Unknwon unary operator: {e.name}') # pragma: no cover
def mapper(self, context: AggContext): bins: int = 50 if self._bins is None else self._bins.evaluate(context) if self._range is not None: row = self._range(context) if not isinstance(row, list): raise SqlError(f'range must be a list: {row}') range = row else: range = self._minmax.result(context) return numpy.histogram(self._array(context), bins=bins, range=range)
def run_agg_query(select: Select, run_make_env: RunMakeEnv, shared: Dict = None, progress: ProgressCB = None, interrupt_notifiyer: SafeEvent = None): from .agg_functions import agg_functions make_env = ''' from quickdb.sql2mapreduce.agg import agg1_env rerun, mapper, reducer, finalizer = agg1_env(agg, select, agg_results, shared) ''' check_select(select) aggs: List[Tuple[Optional[Expression], AggCall]] = [] def pick_aggs(e: Expression): if isinstance(e, FuncCallExpression) and e.name in agg_functions: cls = cast(Type[AggCall], agg_functions[e.name]) # We need `cast` due to pyright's bug a = cls(e.args, e.named_args, e.agg_star) walk_subaggrs(a, lambda sa: aggs.append((None, sa))) aggs.append((e, a)) for target in select.target_list: target.val.walk(pick_aggs) if is_context_dependent(target.val): aggs.append((target.val, PickOneAggCall([target.val], {}))) if len(aggs) == 0: raise SqlError(f'No aggregation operation') # run aggregation queries agg_results: Dict[Union[Expression, AggCall], Any] = {} for i, (e, agg) in enumerate(aggs): def progress1(p1: Progress): if progress: progress(Progress(done=p1.done + i * p1.total, total=p1.total * len(aggs))) env_context = {'agg': agg, 'select': select, 'agg_results': agg_results, 'shared': shared} result = run_make_env(make_env, env_context, progress1, interrupt_notifiyer) agg_results[agg] = result if e: agg_results[e] = result group_values = next(iter(agg_results.values())).keys() target_list = {} for gv in group_values: context = FinalizeContext(agg_results, gv, shared=shared) target_list[gv] = [ agg_results[t.val][gv] if t.val in agg_results else t.val(context) for t in select.target_list ] return AggQueryResult( target_list, [t.name or f'col{i}' for i, t in enumerate(select.target_list)], )
def __init__(self, args: List[Expression], named_args: Dict[str, Expression], agg_star: bool): if len(args) != 1 or len(named_args) != 0 or agg_star: raise SqlError(f'`sum` accept only 1 argument') self.arg = args[0]
def __init__(self, args: List[Expression], named_args: Dict[str, Expression], star_agg: bool): if len(args) != 1: raise SqlError(f'function `sleep` accepts only 1 positional paramter') self._duration = args[0]
def evaluate_BinaryOperationExpression(self, e: BinaryOperationExpression): if e.name in BINARY_OPERATIONS: return BINARY_OPERATIONS[e.name](e.a(self), e.b(self)) raise SqlError( f'Unknwon binary operator: {e.name}') # pragma: no cover
def evaluate_SharedValueRefExpression(self, e: SharedValueRefExpression): if e.name in self._shared: return self._shared[e.name] raise SqlError(f'No such shared value: {e.name}') # pragma: no cover
def __init__(self, args: List[Expression], named_args: Dict[str, Expression], agg_star: bool): if agg_star or len(args) != 1 or len( named_args) != 0: # pragma: no cover raise SqlError(f'minmax accepts only 1 argument') self._array = args[0]
def reducer(self, a, b): if a != b: # pragma: no cover raise SqlError(f'Non unique values in {self.a}') return a
def mapper(self, context: Context): a = self.a(context) if len(numpy.unique(a)) >= 2: # pragma: no cover raise SqlError(f'Non unique values in {self.a}') return a[0]
def __init__(self, args: List[Expression], named_args: Dict[str, Expression], agg_star: bool): if not agg_star: # pragma: no cover raise SqlError(f'COUNT accepts only * for its parameter')
def probe(e: Expression): if isinstance(e, FuncCallExpression): if e.name in agg_functions: aggs.append(e) elif e.name not in nonagg_functions: raise SqlError(f'No such function: {e.name}')