def processLetClause(c, table, prior_lcs): comp_expr = compile(c["expr"].lstrip(), "<string>", "eval") new_schema = None for t in table: if not new_schema: new_schema = dict(t.schema) for (i, v) in enumerate(c["vars"]): new_schema[v] = len(t.schema) + i lcs = prior_lcs lcs.update(t.getDict()) v = eval(comp_expr, globals(), lcs) if len(c["vars"]) == 1: t.tuple.append(v) new_t = PQTuple(t.tuple, new_schema) yield new_t else: unpack_expr = "[ %s for %s in [ __v ]]" % ( '(' + ",".join(c["vars"]) + ')', c["unpack"]) unpacked_vals = eval(unpack_expr, globals(), {'__v': v}) new_t_data = list(t.tuple) for tv in unpacked_vals[0]: new_t_data.append(tv) new_t = PQTuple(new_t_data, new_schema) yield new_t
def execute(self,query,tuple_vars,vars): res = self.engine.execute(query) schema = {} tuple_schemas = [] for table_name,v in tuple_vars: schema[v] = len(schema) tuple_schema = {} schema_name = None if len(table_name.split('.'))>1: schema_name,table_name = table_name.split('.') table = Table(table_name, MetaData(), autoload=True, autoload_with=self.engine, schema=schema_name) for c in table.columns: tuple_schema[c.name] = len(tuple_schema) tuple_schemas.append( tuple_schema ) for v in vars: schema[v] = len(schema) for r in res: i = 0 out_t = [] for j,(_,v) in enumerate(tuple_vars): t_data = [] sc = tuple_schemas[j] for k in range(len(sc)): t_data.append(r[i]) i += 1 out_t.append( PQTuple( t_data, sc ) ) for v in vars: out_t.append(r[i]) i += 1 yield PQTuple( out_t, schema )
def processForClause(c, table, prior_lcs, prior_globs): new_schema = None print(c.expr) comp_expr = compile(c.expr.lstrip(), "<string>", "eval") for t in table: if not new_schema: new_schema = dict(t.schema) for (i,v) in enumerate(c.vars): new_schema[v] = len(t.schema) + i lcs = dict(prior_lcs) lcs.update(t.getDict()) vals = eval(comp_expr, prior_globs, lcs) if len(c.vars) == 1: for v in vals: new_t_data = list(t.tuple) new_t_data.append(v) new_t = PQTuple(new_t_data, new_schema) yield new_t else: for v in vals: unpack_expr = "[ %s for %s in [ __v ]]" % ( '(' + ",".join(c.vars) + ')', c.unpack) unpacked_vals = eval(unpack_expr, prior_globs, {'__v':v}) new_t_data = list(t.tuple) for tv in unpacked_vals[0]: new_t_data.append(tv) new_t = PQTuple(new_t_data, new_schema) yield new_t
def execute(self,query,tuple_vars,vars): res = self.engine.execute(query) schema = {} tuple_schemas = [] for t in tuple_vars: v = t['tuple_var'] schema[v] = len(schema) tuple_schema = {} final_tuple_schema = {} rev_tuple_schema = [] for (c_name,_) in t['output_schema']['values']: tuple_schema[c_name] = len(tuple_schema) if c_name != '#checkbit': final_tuple_schema[c_name] = len(final_tuple_schema) tuple_schema[c_name] = len(tuple_schema) rev_tuple_schema.append(c_name) tuple_schemas.append( (tuple_schema,final_tuple_schema,rev_tuple_schema) ) for v in vars: schema[v] = len(schema) for r in res: i = 0 out_t = [] for j,_ in enumerate(tuple_vars): checkbit = True isTuple = True t_data = [] (sc,final_sc,rev_sc) = tuple_schemas[j] for k in range(len(sc)): if rev_sc[k] == '#checkbit': checkbit = r[i] else: t_data.append(r[i]) if rev_sc[k] == '#value': isTuple = False i += 1 if isTuple: out_t.append( PQTuple( t_data, final_sc ) if checkbit else None ) else: out_t.append(t_data[0]) for v in vars: out_t.append(r[i]) i += 1 yield PQTuple( out_t, schema )
def make_pql_tuple(vals, lcs, gbs): t = [] als = [] for v in vals: t.append(eval(v[0], lcs, gbs)) alias = v[1] if v[1] else v[0] als.append(alias) schema = {n: i for (i, n) in enumerate(als)} return PQTuple(t, schema)
def processCountClause(c, table, prior_lcs): new_schema = None for (i, t) in enumerate(table): if not new_schema: new_schema = dict(t.schema) new_schema[c["var"]] = len(t.schema) new_t = PQTuple(t.tuple + [i], new_schema) yield new_t
def processGroupByClause(c, table, prior_lcs): gby_aliases = [ g if isinstance(g, str) else g[1] for g in c["groupby_list"] ] gby_exprs = [g if isinstance(g, str) else g[0] for g in c["groupby_list"]] comp_exprs = [compile(e, '<string>', 'eval') for e in gby_exprs] grp_table = {} schema = None # Group tuples in a hashtable for t in table: if not schema: schema = t.schema lcs = prior_lcs lcs.update(t.getDict()) # Compute the key k = tuple([eval(e, globals(), lcs) for e in comp_exprs]) if not k in grp_table: grp_table[k] = [] grp_table[k].append(t) if not grp_table: return yield # Construct the new table # Non-key variables non_key_vars = [v for v in schema if not v in gby_aliases] new_schema = {v: i for (i, v) in enumerate(gby_aliases + non_key_vars)} for k in grp_table: t = PQTuple([None] * len(new_schema), new_schema) #Copy over the key for (i, v) in enumerate(gby_aliases): t[v] = k[i] #Every other variable (not in group by list) is turned into a lists #First create empty lists for v in non_key_vars: t[v] = [] # Now fill in the lists: for part_t in grp_table[k]: for v in non_key_vars: t[v].append(part_t[v]) yield t
def processMatchClause(c, table, prior_lcs): clause_expr = compile(c['expr'], "<string>", "eval") # Fetch and compile all expressions in the # pattern match clause e_patterns = [] patterns = list(c['pattern']) while patterns: p = patterns.pop() if 'expr_cond' in p: e_patterns.append(p) if 'pattern' in p: patterns.append(p['pattern']) for ep in e_patterns: ep['expr_cond'] = compile(ep["expr_cond"], "<string>", "eval") new_schema = None for t in table: if not new_schema: new_schema = dict(t.schema) for (i, v) in enumerate(c["vars"]): new_schema[v] = len(t.schema) + i lcs = prior_lcs lcs.update(t.getDict()) vals = eval(clause_expr, globals(), lcs) for v in vals: if not hasattr(v, '__contains__'): continue new_t_data = list(t.tuple) + [None] * len(c['vars']) new_t = PQTuple(new_t_data, new_schema) if match_pattern(c['pattern'], c['exact'], v, new_t, lcs): yield new_t
def wrap_tuples(list,schema): _schema = {n:i for (i,n) in schema } for item in list: yield PQTuple(item,_schema)
def processWindowClause(c, table, prior_lcs): schema = None new_schema = None # Create window variable name mapping var_mapping = {} for v in c["vars"]: var_mapping[v] = c["vars"][v] for t in table: if not schema: schema = t.schema # Create a new schema with window variables added new_schema = dict(t.schema) for v in c["vars"]: new_schema[c["vars"][v]] = len(new_schema) lcs = dict(prior_lcs) lcs.update(t.getDict()) # Evaluate the binding sequence binding_seq = list(eval(c["in"], globals(), lcs)) # Create initial window variables # Initialize the windows open_windows = [] closed_windows = [] # Iterate over the binding sequence for (i, v) in enumerate(binding_seq): # Try to open a new window # in case of tumbling windows, only open a # window if there are no open windows if not c["tumbling"] or (c["tumbling"] and not open_windows): vars = make_window_vars() fill_in_start_vars(vars, binding_seq, i) if check_start_condition(vars, c, dict(lcs), var_mapping): open_windows.append({"window": [], "vars": vars}) new_open_windows = [] #update all open windows, close those that are finished for w in open_windows: # Add currnt value to the window w["window"].append(v) fill_in_end_vars(w["vars"], binding_seq, i) if check_end_condition(w["vars"], c, dict(lcs), var_mapping): closed_windows.append(w) else: new_open_windows.append(w) open_windows = new_open_windows #close or remove all remaining open windows #if only is specified, we ignore non-closed windows if not c["only"]: closed_windows.extend(open_windows) # create a new tuple by extending the tuple from previous clauses # with the window variables, for each closed window for w in closed_windows: new_t = PQTuple(t.tuple + [None] * (len(new_schema) - len(schema)), new_schema) new_t[var_mapping["var"]] = w["window"] for v in [v for v in w["vars"].keys() if v in var_mapping]: new_t[var_mapping[v]] = w["vars"][v] yield new_t
def processJoin(c, table, prior_lcs): new_schema = None left_arg = c['left'] right_arg = c['right'] left_conds = c['left_conds'] right_conds = c['right_conds'] join_type = 'nl' dir = 'right' if 'hint' in c: join_type = c['hint']['join_type'] dir = c['hint']['dir'] if dir == 'left': left_arg, right_arg = right_arg, left_arg r_init_data = [] r_init_data.append(emptyTuple([])) # Build an index on the right relation, if we're doing # an index join. index = None if join_type == 'index': index = {} r_data = r_init_data if isinstance(c['right'], list): for c2 in c['right']: r_data = processClause(c2, r_data, prior_lcs) else: r_data = processJoin(c['right'], r_data, prior_lcs) for t in r_data: index_tuple = [] for rcond in right_conds: lcs = prior_lcs lcs.update(t.getDict()) rcond_val = eval(rcond, globals(), lcs) index_tuple.append(rcond_val) index_tuple = tuple(index_tuple) if not index_tuple in index: index[index_tuple] = [] index[index_tuple].append(t) # Iterate over the tuples of the left relation and # compute the tuple of condition vars if isinstance(c['left'], list): for c2 in c['left']: table = processClause(c2, table, prior_lcs) else: table = processJoin(c['left'], table, prior_lcs) for t in table: cond_tuple = [] for lcond in left_conds: lcs = prior_lcs lcs.update(t.getDict()) lcond_val = eval(lcond, globals(), lcs) cond_tuple.append(lcond_val) cond_tuple = tuple(cond_tuple) if index: if cond_tuple in index: for t2 in index[cond_tuple]: if not new_schema: new_schema = dict(t.schema) for i, _ in enumerate(t2): v = [x for x in t2.schema.items() if x[1] == i][0][0] new_schema[v] = len(new_schema) + i new_t_data = list(t.tuple) new_t_data += list(t2.tuple) new_t = PQTuple(new_t_data, new_schema) yield new_t else: continue else: r_data = r_init_data if isinstance(c['right'], list): for c2 in c['right']: r_data = processClause(c2, r_data, prior_lcs) else: r_data = processJoin(c['right'], r_data, prior_lcs) for t2 in r_data: rcond_tuple = [] for rcond in right_conds: lcs = prior_lcs lcs.update(t2.getDict()) rcond_val = eval(rcond, globals(), lcs) rcond_tuple.append(rcond_val) rcond_tuple = tuple(rcond_tuple) if cond_tuple == rcond_tuple: if not new_schema: new_schema = dict(t.schema) for i, _ in enumerate(t2): v = [x for x in t2.schema.items() if x[1] == i][0][0] new_schema[v] = len(new_schema) + i new_t_data = list(t.tuple) new_t_data += list(t2.tuple) new_t = PQTuple(new_t_data, new_schema) yield new_t
def emptyTuple(schema): return PQTuple([None] * len(schema), schema)
def wrap_df(df): schema = { n:i for (i,n) in enumerate(df.columns) } for t in df.itertuples(False): yield PQTuple(t,schema)