def consume(self, t, src, state): if src.childtag == "right": my_sch = self.scheme() declr_template = self._cgenv.get_template("hash_declaration.cpp") right_template = self._cgenv.get_template("insert_materialize.cpp") hashname = self._hashname keypos = self.right_keypos keyval = t.get_code(self.right_keypos) if self.rightCondIsRightAttr: keytype = self.language().typename( self.condition.right.typeof( my_sch, None)) else: keytype = self.language().typename( self.condition.left.typeof( my_sch, None)) in_tuple_type = t.getTupleTypename() in_tuple_name = t.name # declaration of hash map hashdeclr = declr_template.render(locals()) state.addDeclarations([hashdeclr]) # materialization point code = right_template.render(locals()) return code if src.childtag == "left": left_template = self._cgenv.get_template("lookup.cpp") hashname = self._hashname keyname = t.name keytype = t.getTupleTypename() keypos = self.left_keypos keyval = t.get_code(keypos) right_tuple_name = gensym() outTuple = CStagedTupleRef(gensym(), self.scheme()) out_tuple_type_def = outTuple.generateDefinition() out_tuple_type = outTuple.getTupleTypename() out_tuple_name = outTuple.name state.addDeclarations([out_tuple_type_def]) inner_plan_compiled = self.parent().consume(outTuple, self, state) code = left_template.render(locals()) return code assert False, "src not equal to left or right"
def consume(self, t, src, state): access_template = self._cgenv.get_template('hash_insert_lookup.cpp') hashname = self._hashname keyname = t.name side = src.childtag outTuple = self.outTuple out_tuple_type = self.outTuple.getTupleTypename() out_tuple_name = self.outTuple.name global_syncname = state.getPipelineProperty('global_syncname') if src.childtag == "right": left_sch = self.left.scheme() # save for later self.right_in_tuple_type = t.getTupleTypename() state.resolveSymbol(self.rightTypeRef, self.right_in_tuple_type) inner_plan_compiled = self.parent().consume(outTuple, self, state) keyval = self.__aggregate_val__(t, self.rightcols) other_tuple_type = self.leftTypeRef.getPlaceholder() left_type = other_tuple_type right_type = self.right_in_tuple_type left_name = gensym() right_name = keyname self.right_name = right_name valname = left_name code = access_template.render(locals()) return code if src.childtag == "left": right_in_tuple_type = self.right_in_tuple_type left_in_tuple_type = t.getTupleTypename() state.resolveSymbol(self.leftTypeRef, left_in_tuple_type) keyval = self.__aggregate_val__(t, self.leftcols) inner_plan_compiled = self.parent().consume(outTuple, self, state) left_type = left_in_tuple_type right_type = self.right_in_tuple_type other_tuple_type = self.right_in_tuple_type left_name = keyname right_name = gensym() valname = right_name code = access_template.render(locals()) return code assert False, "src not equal to left or right"
def consume(self, t, src, state): if src.childtag == "right": declr_template = """std::unordered_map\ <int64_t, std::vector<%(in_tuple_type)s>* > %(hashname)s; """ right_template = """insert(%(hashname)s, %(keyname)s, %(keypos)s); """ hashname = self._hashname keyname = t.name keypos = self.right_keypos in_tuple_type = t.getTupleTypename() # declaration of hash map hashdeclr = declr_template % locals() state.addDeclarations([hashdeclr]) # materialization point code = right_template % locals() return code if src.childtag == "left": left_template = """ for (auto %(right_tuple_name)s : \ lookup(%(hashname)s, %(keyname)s.get(%(keypos)s))) { auto %(out_tuple_name)s = \ combine<%(out_tuple_type)s> (%(keyname)s, %(right_tuple_name)s); %(inner_plan_compiled)s } """ hashname = self._hashname keyname = t.name keytype = t.getTupleTypename() keypos = self.left_keypos right_tuple_name = gensym() outTuple = CStagedTupleRef(gensym(), self.scheme()) out_tuple_type_def = outTuple.generateDefinition() out_tuple_type = outTuple.getTupleTypename() out_tuple_name = outTuple.name state.addDeclarations([out_tuple_type_def]) inner_plan_compiled = self.parent().consume(outTuple, self, state) code = left_template % locals() return code assert False, "src not equal to left or right"
def consume(self, t, src, state): if src.childtag == "right": right_template = self._cgenv.get_template('insert_materialize.cpp') hashname = self._hashname keyname = t.name keyval = self.__aggregate_val__(t, self.rightcols) self.right_syncname = get_pipeline_task_name(state) self.rightTupleTypename = t.getTupleTypename() if self.rightTupleTypeRef is not None: state.resolveSymbol(self.rightTupleTypeRef, self.rightTupleTypename) pipeline_sync = state.getPipelineProperty('global_syncname') # materialization point code = right_template.render(locals()) return code if src.childtag == "left": left_template = self._cgenv.get_template('lookup.cpp') # add a dependence on the right pipeline state.addToPipelinePropertySet('dependences', self.right_syncname) hashname = self._hashname keyname = t.name input_tuple_type = t.getTupleTypename() keyval = self.__aggregate_val__(t, self.leftcols) pipeline_sync = state.getPipelineProperty('global_syncname') right_tuple_name = gensym() right_tuple_type = self.rightTupleTypename outTuple = GrappaStagedTupleRef(gensym(), self.scheme()) out_tuple_type_def = outTuple.generateDefinition() out_tuple_type = outTuple.getTupleTypename() out_tuple_name = outTuple.name state.addDeclarations([out_tuple_type_def]) inner_plan_compiled = self.parent().consume(outTuple, self, state) code = left_template.render(locals()) return code assert False, "src not equal to left or right"
def produce(self, state): # Common subexpression elimination # don't scan the same file twice resultsym = state.lookupExpr(self) _LOG.debug("lookup %s(h=%s) => %s", self, self.__hash__(), resultsym) if not resultsym: # TODO for now this will break # whatever relies on self.bound like reusescans # Scan is the only place where a relation is declared resultsym = gensym() name = str(self.relation_key).split(':')[2] fstemplate, fsbindings = self.__compileme__(resultsym, name) state.saveExpr(self, resultsym) stagedTuple = self.new_tuple_ref(resultsym, self.scheme()) state.saveTupleDef(resultsym, stagedTuple) tuple_type_def = stagedTuple.generateDefinition() tuple_type = stagedTuple.getTupleTypename() state.addDeclarations([tuple_type_def]) rel_decl_template = self.__get_relation_decl_template__(name) if rel_decl_template: state.addDeclarations([rel_decl_template.render(locals())]) # now that we have the type, format this in; state.setPipelineProperty('type', 'scan') state.setPipelineProperty('source', self.__class__) state.addPipeline( fstemplate.render(fsbindings, result_type=tuple_type)) # no return value used because parent is a new pipeline self.parent().consume(resultsym, self, state)
def create_pipeline_synchronization(state): """ The pipeline_synchronization will sync tasks within a single pipeline. Adds this new object to the compiler state. """ global_syncname = gensym() # true = tracked by gce user metrics global_sync_decl_template = ct(""" GlobalCompletionEvent %(global_syncname)s(true); """) global_sync_decl = global_sync_decl_template % locals() gce_metric_template = """ GRAPPA_DEFINE_METRIC(CallbackMetric<int64_t>, \ app_%(pipeline_id)s_gce_incomplete, []{ return %(global_syncname)s.incomplete(); }); """ pipeline_id = state.getCurrentPipelineId() gce_metric_def = gce_metric_template % locals() state.addDeclarations([global_sync_decl, gce_metric_def]) state.setPipelineProperty('global_syncname', global_syncname) return global_syncname
def expression_combine(cls, args, operator="&&"): codes, decls, inits = cls._extract_code_decl_init(args) # special case for integer divide. C doesn't have this syntax # Rely on automatic conversion from float to int this_decls = [] this_inits = [] if operator == "//": operator = "/" # special case for string LIKE, use overloaded mod operator elif operator == "like": # NOTE: LIKE probably shouldn't be implemented as # a "binop" because the input type != output type assert len(args) == 2, "LIKE only combines 2 arguments" operator = "%" # hoist pattern compilation out of the loop processing # Unchecked precondition: codes[1] is independent of the tuple varname = gensym() this_decls.append("std::regex {var};\n".format(var=varname)) this_inits.append(cls.on_all( """{var} = compile_like_pattern({str}); """.format(var=varname, str=codes[1]))) # replace the string literal with the regex codes[1] = varname opstr = " %s " % operator conjunc = opstr.join(["(%s)" % c for c in codes]) _LOG.debug("conjunc: %s", conjunc) return "( %s )" % conjunc, \ decls + this_decls, \ inits + this_inits
def produce(self, state): # declare a single new type for project # TODO: instead do mark used-columns? # always does an assignment to new tuple self.newtuple = self.new_tuple_ref(gensym(), self.scheme()) state.addDeclarations([self.newtuple.generateDefinition()]) self.input.produce(state)
def get_append(out_tuple_type, type1, type1numfields, type2, type2numfields): append_func_name = "create_" + gensym() result_type = out_tuple_type combine_function_def = _cgenv.get_template( "materialized_tuple_create_two.cpp").render(locals()) return append_func_name, combine_function_def
def produce(self, state): self.symBase = self.__genBaseName__() if not isinstance(self.condition, expression.EQ): msg = "The C compiler can only handle equi-join conditions\ of a single attribute: %s" % self.condition raise ValueError(msg) init_template = ct("""%(hashname)s.init_global_DHT( &%(hashname)s, \ cores()*16*1024 ); """) declr_template = ct("""typedef DoubleDHT<int64_t, \ %(left_in_tuple_type)s, \ %(right_in_tuple_type)s, std_hash> \ DHT_%(left_in_tuple_type)s_%(right_in_tuple_type)s; DHT_%(left_in_tuple_type)s_%(right_in_tuple_type)s %(hashname)s; """) my_sch = self.scheme() left_sch = self.left.scheme() # declaration of hash map self._hashname = self.__getHashName__() hashname = self._hashname self.leftTypeRef = state.createUnresolvedSymbol() left_in_tuple_type = self.leftTypeRef.getPlaceholder() self.rightTypeRef = state.createUnresolvedSymbol() right_in_tuple_type = self.rightTypeRef.getPlaceholder() hashdeclr = declr_template % locals() state.addDeclarationsUnresolved([hashdeclr]) self.outTuple = GrappaStagedTupleRef(gensym(), my_sch) out_tuple_type_def = self.outTuple.generateDefinition() state.addDeclarations([out_tuple_type_def]) # find the attribute that corresponds to the right child self.rightCondIsRightAttr = \ self.condition.right.position >= len(left_sch) self.leftCondIsRightAttr = \ self.condition.left.position >= len(left_sch) assert self.rightCondIsRightAttr ^ self.leftCondIsRightAttr self.right.childtag = "right" state.addInitializers([init_template % locals()]) self.right.produce(state) self.left.childtag = "left" self.left.produce(state)
def produce(self, state): # Common subexpression elimination # don't scan the same file twice resultsym = state.lookupExpr(self) _LOG.debug("lookup %s(h=%s) => %s", self, self.__hash__(), resultsym) if not resultsym: # TODO for now this will break # whatever relies on self.bound like reusescans # Scan is the only place where a relation is declared resultsym = gensym() name = str(self.relation_key).split(':')[2] fstemplate, fsbindings = self.__compileme__(resultsym, name) state.saveExpr(self, resultsym) stagedTuple = self.new_tuple_ref_for_filescan( resultsym, self.scheme()) state.saveTupleDef(resultsym, stagedTuple) tuple_type_def = stagedTuple.generateDefinition() tuple_type = stagedTuple.getTupleTypename() state.addDeclarations([tuple_type_def]) colnames = self.scheme().get_names() rel_decl_template = self.__get_relation_decl_template__(name) if rel_decl_template: state.addDeclarations([rel_decl_template.render(locals())]) rel_aux_decl_template = self._get_input_aux_decls_template() if rel_aux_decl_template: state.addDeclarations([rel_aux_decl_template.render(locals())]) # now that we have the type, format this in; state.setPipelineProperty('type', 'scan') state.setPipelineProperty('source', self.__class__) state.addPipeline( fstemplate.render(fsbindings, result_type=tuple_type)) # no return value used because parent is a new pipeline self.parent().consume(resultsym, self, state)
def createTupleTypeConversion(lang, state, input_tuple, result_tuple): # add declaration for function to convert from one type to the other type1 = input_tuple.getTupleTypename() type1numfields = len(input_tuple.scheme) convert_func_name = "create_" + gensym() result_type = result_tuple.getTupleTypename() result_name = result_tuple.name input_tuple_name = input_tuple.name convert_func = lang._cgenv.get_template( 'materialized_tuple_create_one.cpp').render(locals()) state.addDeclarations([convert_func]) return lang._cgenv.get_template('tuple_type_convert.cpp').render( result_type=result_type, result_name=result_name, convert_func_name=convert_func_name, input_tuple_name=input_tuple_name )
def consume(self, t, src, state): union_template = _cgenv.get_template('union.cpp') unified_tuple_typename = self.unifiedTupleType.getTupleTypename() unified_tuple_name = self.unifiedTupleType.name src_tuple_name = t.name # add declaration for function to convert from one type to the other type1 = t.getTupleTypename() type1numfields = len(t.scheme) convert_func_name = "create_" + gensym() result_type = unified_tuple_typename convert_func = _cgenv.get_template( 'materialized_tuple_create_one.cpp').render(locals()) state.addDeclarations([convert_func]) inner_plan_compiled = \ self.parent().consume(self.unifiedTupleType, self, state) return union_template.render(locals())
def produce(self, state): self.symBase = self.__genBaseName__() init_template = self._cgenv.get_template('hash_init.cpp') declr_template = self._cgenv.get_template('hash_declaration.cpp') my_sch = self.scheme() left_sch = self.left.scheme() right_sch = self.right.scheme() self.leftcols, self.rightcols = \ algebra.convertcondition(self.condition, len(left_sch), left_sch + right_sch) # declaration of hash map self._hashname = self.__getHashName__() keytype = self.__aggregate_type__(my_sch, self.rightcols) hashname = self._hashname self.leftTypeRef = state.createUnresolvedSymbol() left_in_tuple_type = self.leftTypeRef.getPlaceholder() self.rightTypeRef = state.createUnresolvedSymbol() right_in_tuple_type = self.rightTypeRef.getPlaceholder() hashdeclr = declr_template.render(locals()) state.addDeclarationsUnresolved([hashdeclr]) self.outTuple = GrappaStagedTupleRef(gensym(), my_sch) out_tuple_type_def = self.outTuple.generateDefinition() state.addDeclarations([out_tuple_type_def]) self.right.childtag = "right" state.addInitializers([init_template.render(locals())]) self.right.produce(state) self.left.childtag = "left" self.left.produce(state)
def create_pipeline_synchronization(state): """ The pipeline_synchronization will sync tasks within a single pipeline. Adds this new object to the compiler state. """ global_syncname = gensym() # true = tracked by gce user metrics global_sync_decl = GrappaLanguage.cgenv().get_template( 'sync_declaration.cpp').render(locals()) gce_metric_template = GrappaLanguage.cgenv().get_template( 'gce_app_metric.cpp') pipeline_id = state.getCurrentPipelineId() gce_metric_def = gce_metric_template.render(locals()) state.addDeclarations([global_sync_decl, gce_metric_def]) state.setPipelineProperty('global_syncname', global_syncname) return global_syncname
def produce(self, state): assert len(self.grouping_list) <= 2, \ """%s does not currently support \ "groupings of more than 2 attributes"""\ % self.__class__.__name__ assert len(self.aggregate_list) == 1, \ "%s currently only supports aggregates of 1 attribute"\ % self.__class__.__name__ for agg_term in self.aggregate_list: assert isinstance(agg_term, expression.BuiltinAggregateExpression), \ """%s only supports simple aggregate expressions. A rule should create Apply[GroupBy]""" \ % self.__class__.__name__ self.useKey = len(self.grouping_list) > 0 _LOG.debug("groupby uses keys? %s" % self.useKey) declr_template = None if self.useKey: if len(self.grouping_list) == 1: declr_template = ct("""typedef DHT_symmetric<int64_t, \ int64_t, std_hash> \ DHT_int64; """) elif len(self.grouping_list) == 2: declr_template = ct("""typedef DHT_symmetric<\ std::pair<int64_t,int64_t>, \ int64_t, pair_hash> \ DHT_pair_int64; """) self._hashname = self.__genHashName__() _LOG.debug("generate hashname %s for %s", self._hashname, self) hashname = self._hashname if declr_template is not None: hashdeclr = declr_template % locals() state.addDeclarationsUnresolved([hashdeclr]) if self.useKey: if len(self.grouping_list) == 1: init_template = ct("""auto %(hashname)s = \ DHT_int64::create_DHT_symmetric( );""") elif len(self.grouping_list) == 2: init_template = ct("""auto %(hashname)s = \ DHT_pair_int64::create_DHT_symmetric( );""") else: init_template = ct("""auto %(hashname)s = counter::create(); """) state.addInitializers([init_template % locals()]) self.input.produce(state) # now that everything is aggregated, produce the tuples assert len(self.column_list()) == 1 \ or isinstance(self.column_list()[0], expression.AttributeRef), \ """assumes first column is the key and second is aggregate result column_list: %s""" % self.column_list() if self.useKey: mapping_var_name = gensym() if len(self.grouping_list) == 1: produce_template = ct("""%(hashname)s->\ forall_entries<&%(pipeline_sync)s>\ ([=](std::pair<const int64_t,int64_t>& %(mapping_var_name)s) { %(output_tuple_type)s %(output_tuple_name)s(\ {%(mapping_var_name)s.first, %(mapping_var_name)s.second}); %(inner_code)s }); """) elif len(self.grouping_list) == 2: produce_template = ct("""%(hashname)s->\ forall_entries<&%(pipeline_sync)s>\ ([=](std::pair<const std::pair<int64_t,int64_t>,int64_t>& \ %(mapping_var_name)s) { %(output_tuple_type)s %(output_tuple_name)s(\ {%(mapping_var_name)s.first.first,\ %(mapping_var_name)s.first.second,\ %(mapping_var_name)s.second}); %(inner_code)s }); """) else: op = self.aggregate_list[0].__class__.__name__ # translations for Grappa::reduce predefined ops coll_op = {'COUNT': 'COLL_ADD', 'SUM': 'COLL_ADD', 'MAX': 'COLL_MAX', 'MIN': 'COLL_MIN'}[op] produce_template = ct("""auto %(output_tuple_name)s_tmp = \ reduce<int64_t, \ counter, \ %(coll_op)s, \ &get_count>\ (%(hashname)s); %(output_tuple_type)s %(output_tuple_name)s; %(output_tuple_name)s.set(0, %(output_tuple_name)s_tmp); %(inner_code)s """) pipeline_sync = create_pipeline_synchronization(state) get_pipeline_task_name(state) # add a dependence on the input aggregation pipeline state.addToPipelinePropertySet('dependences', self.input_syncname) output_tuple = GrappaStagedTupleRef(gensym(), self.scheme()) output_tuple_name = output_tuple.name output_tuple_type = output_tuple.getTupleTypename() state.addDeclarations([output_tuple.generateDefinition()]) inner_code = self.parent().consume(output_tuple, self, state) code = produce_template % locals() state.setPipelineProperty("type", "in_memory") state.addPipeline(code)
def produce(self, state): self.unifiedTupleType = self.new_tuple_ref(gensym(), self.scheme()) state.addDeclarations([self.unifiedTupleType.generateDefinition()]) self.right.produce(state) self.left.produce(state)
def consume(self, t, src, state): if src.childtag == "right": right_template = ct(""" %(hashname)s.insert_async<&%(pipeline_sync)s>(\ %(keyname)s.get(%(keypos)s), %(keyname)s); """) hashname = self._hashname keyname = t.name keypos = self.right_keypos self.right_syncname = get_pipeline_task_name(state) self.rightTupleTypename = t.getTupleTypename() if self.rightTupleTypeRef is not None: state.resolveSymbol(self.rightTupleTypeRef, self.rightTupleTypename) pipeline_sync = state.getPipelineProperty('global_syncname') # materialization point code = right_template % locals() return code if src.childtag == "left": left_template = ct(""" %(hashname)s.lookup_iter<&%(pipeline_sync)s>( \ %(keyname)s.get(%(keypos)s), \ [=](%(right_tuple_type)s& %(right_tuple_name)s) { join_coarse_result_count++; %(out_tuple_type)s %(out_tuple_name)s = \ combine<%(out_tuple_type)s, \ %(keytype)s, \ %(right_tuple_type)s> \ (%(keyname)s, %(right_tuple_name)s); %(inner_plan_compiled)s }); """) # add a dependence on the right pipeline state.addToPipelinePropertySet('dependences', self.right_syncname) hashname = self._hashname keyname = t.name keytype = t.getTupleTypename() pipeline_sync = state.getPipelineProperty('global_syncname') keypos = self.left_keypos right_tuple_name = gensym() right_tuple_type = self.rightTupleTypename outTuple = GrappaStagedTupleRef(gensym(), self.scheme()) out_tuple_type_def = outTuple.generateDefinition() out_tuple_type = outTuple.getTupleTypename() out_tuple_name = outTuple.name state.addDeclarations([out_tuple_type_def]) inner_plan_compiled = self.parent().consume(outTuple, self, state) code = left_template % locals() return code assert False, "src not equal to left or right"
def produce(self, state): assert len(self.grouping_list) <= 2, \ "%s does not currently support groupings of \ more than 2 attributes" % self.__class__.__name__ assert len(self.aggregate_list) == 1, \ """%s currently only supports aggregates of 1 attribute (aggregate_list=%s)""" \ % (self.__class__.__name__, self.aggregate_list) for agg_term in self.aggregate_list: assert isinstance(agg_term, expression.BuiltinAggregateExpression), \ """%s only supports simple aggregate expressions. A rule should create Apply[GroupBy]""" \ % self.__class__.__name__ inp_sch = self.input.scheme() self.useMap = len(self.grouping_list) > 0 if self.useMap: if len(self.grouping_list) == 1: declr_template = self._cgenv.get_template( '1key_declaration.cpp') keytype = self.language().typename( self.grouping_list[0].typeof( inp_sch, None)) elif len(self.grouping_list) == 2: declr_template = self._cgenv.get_template( '2key_declaration.cpp') keytypes = ','.join( [self.language().typename(g.typeof(inp_sch, None)) for g in self.grouping_list]) else: initial_value = self.__get_initial_value__( 0, cached_inp_sch=inp_sch) declr_template = self._cgenv.get_template('0key_declaration.cpp') valtype = self.language().typename( self.aggregate_list[0].typeof( inp_sch, None)) self.hashname = CGroupBy.__genHashName__() hashname = self.hashname hash_declr = declr_template.render(locals()) state.addDeclarations([hash_declr]) my_sch = self.scheme() _LOG.debug("aggregates: %s", self.aggregate_list) _LOG.debug("columns: %s", self.column_list()) _LOG.debug("groupings: %s", self.grouping_list) _LOG.debug("groupby scheme: %s", my_sch) _LOG.debug("groupby scheme[0] type: %s", type(my_sch[0])) self.input.produce(state) # now that everything is aggregated, produce the tuples assert (not self.useMap) \ or isinstance(self.column_list()[0], expression.AttributeRef), \ "assumes first column is the key and " \ "second is aggregate result: %s" % (self.column_list()[0]) if self.useMap: if len(self.grouping_list) == 1: produce_template = self._cgenv.get_template('1key_scan.cpp') elif len(self.grouping_list) == 2: produce_template = self._cgenv.get_template('2key_scan.cpp') else: produce_template = self._cgenv.get_template('0key_scan.cpp') output_tuple = CStagedTupleRef(gensym(), my_sch) output_tuple_name = output_tuple.name output_tuple_type = output_tuple.getTupleTypename() state.addDeclarations([output_tuple.generateDefinition()]) inner_code = self.parent().consume(output_tuple, self, state) code = produce_template.render(locals()) state.setPipelineProperty("type", "in_memory") state.addPipeline(code)
def consume(self, t, src, state): access_template = ct(""" %(hashname)s.insert_lookup_iter_%(side)s<&%(global_syncname)s>(\ %(keyname)s.get(%(keypos)s), %(keyname)s, \ [=](%(other_tuple_type)s %(valname)s) { join_coarse_result_count++; %(out_tuple_type)s %(out_tuple_name)s = \ combine<%(out_tuple_type)s, \ %(left_type)s, \ %(right_type)s> (%(left_name)s, \ %(right_name)s); %(inner_plan_compiled)s }); """) hashname = self._hashname keyname = t.name side = src.childtag outTuple = self.outTuple out_tuple_type = self.outTuple.getTupleTypename() out_tuple_name = self.outTuple.name global_syncname = state.getPipelineProperty('global_syncname') if src.childtag == "right": left_sch = self.left.scheme() # save for later self.right_in_tuple_type = t.getTupleTypename() state.resolveSymbol(self.rightTypeRef, self.right_in_tuple_type) if self.rightCondIsRightAttr: keypos = self.condition.right.position \ - len(left_sch) else: keypos = self.condition.left.position \ - len(left_sch) inner_plan_compiled = self.parent().consume(outTuple, self, state) other_tuple_type = self.leftTypeRef.getPlaceholder() left_type = other_tuple_type right_type = self.right_in_tuple_type left_name = gensym() right_name = keyname self.right_name = right_name valname = left_name code = access_template % locals() return code if src.childtag == "left": right_in_tuple_type = self.right_in_tuple_type left_in_tuple_type = t.getTupleTypename() state.resolveSymbol(self.leftTypeRef, left_in_tuple_type) if self.rightCondIsRightAttr: keypos = self.condition.left.position else: keypos = self.condition.right.position inner_plan_compiled = self.parent().consume(outTuple, self, state) left_type = left_in_tuple_type right_type = self.right_in_tuple_type other_tuple_type = self.right_in_tuple_type left_name = keyname right_name = gensym() valname = right_name code = access_template % locals() return code assert False, "src not equal to left or right"
def produce(self, state): assert len(self.grouping_list) <= 2, \ "%s does not currently support groupings of \ more than 2 attributes" % self.__class__.__name__ assert len(self.aggregate_list) == 1, \ """%s currently only supports aggregates of 1 attribute (aggregate_list=%s)""" \ % (self.__class__.__name__, self.aggregate_list) for agg_term in self.aggregate_list: assert isinstance(agg_term, expression.BuiltinAggregateExpression), \ """%s only supports simple aggregate expressions. A rule should create Apply[GroupBy]""" \ % self.__class__.__name__ self.useMap = len(self.grouping_list) > 0 if self.useMap: if len(self.grouping_list) == 1: declr_template = """std::unordered_map<int64_t, int64_t> \ %(hashname)s; """ elif len(self.grouping_list) == 2: declr_template = """std::unordered_map<\ std::pair<int64_t, int64_t>, int64_t, pairhash> \ %(hashname)s; """ else: declr_template = """int64_t %(hashname)s; """ self.hashname = self.__genHashName__() hashname = self.hashname hash_declr = declr_template % locals() state.addDeclarations([hash_declr]) my_sch = self.scheme() _LOG.debug("aggregates: %s", self.aggregate_list) _LOG.debug("columns: %s", self.column_list()) _LOG.debug("groupings: %s", self.grouping_list) _LOG.debug("groupby scheme: %s", my_sch) _LOG.debug("groupby scheme[0] type: %s", type(my_sch[0])) self.input.produce(state) # now that everything is aggregated, produce the tuples assert (not self.useMap) \ or isinstance(self.column_list()[0], expression.AttributeRef), \ "assumes first column is the key and " \ "second is aggregate result: %s" % (self.column_list()[0]) if self.useMap: if len(self.grouping_list) == 1: produce_template = """for (auto it=%(hashname)s.begin(); \ it!=%(hashname)s.end(); it++) { %(output_tuple_type)s %(output_tuple_name)s(\ {it->first, it->second}); %(inner_code)s } """ elif len(self.grouping_list) == 2: produce_template = """for (auto it=%(hashname)s.begin(); \ it!=%(hashname)s.end(); it++) { %(output_tuple_type)s %(output_tuple_name)s(\ {it->first.first, it->first.second, it->second}); %(inner_code)s } """ else: produce_template = """{ %(output_tuple_type)s %(output_tuple_name)s({ %(hashname)s }); %(inner_code)s } """ output_tuple = CStagedTupleRef(gensym(), my_sch) output_tuple_name = output_tuple.name output_tuple_type = output_tuple.getTupleTypename() state.addDeclarations([output_tuple.generateDefinition()]) inner_code = self.parent().consume(output_tuple, self, state) code = produce_template % locals() state.setPipelineProperty("type", "in_memory") state.addPipeline(code)
def produce(self, state): self._agg_mode = None if len(self.aggregate_list) == 1 \ and isinstance(self.aggregate_list[0], expression.BuiltinAggregateExpression): self._agg_mode = self._ONE_BUILT_IN elif all([isinstance(a, expression.UdaAggregateExpression) for a in self.aggregate_list]): self._agg_mode = self._MULTI_UDA assert self._agg_mode is not None, \ "unsupported aggregates {0}".format(self.aggregate_list) _LOG.debug("%s _agg_mode was set to %s", self, self._agg_mode) self.useKey = len(self.grouping_list) > 0 _LOG.debug("groupby uses keys? %s" % self.useKey) inp_sch = self.input.scheme() if self._agg_mode == self._ONE_BUILT_IN: state_type = self.language().typename( self.aggregate_list[0].input.typeof(inp_sch, None)) op = self.aggregate_list[0].__class__.__name__ self.update_func = "Aggregates::{op}<{type}, {type}>".format( op=op, type=state_type) elif self._agg_mode == self._MULTI_UDA: # for now just name the aggregate after the first state variable self.func_name = self.updaters[0][0] self.state_tuple = GrappaStagedTupleRef(gensym(), self.state_scheme) state.addDeclarations([self.state_tuple.generateDefinition()]) state_type = self.state_tuple.getTupleTypename() self.update_func = "{name}_update".format(name=self.func_name) update_func = self.update_func if self.useKey: numkeys = len(self.grouping_list) keytype = "std::tuple<{types}>".format( types=','.join([self.language().typename( g.typeof(inp_sch, None)) for g in self.grouping_list])) self._hashname = self.__genHashName__() _LOG.debug("generate hashname %s for %s", self._hashname, self) hashname = self._hashname if self.useKey: init_template = self._cgenv.get_template('withkey_init.cpp') valtype = state_type else: if self._agg_mode == self._ONE_BUILT_IN: initial_value = \ self.__get_initial_value__(0, cached_inp_sch=inp_sch) no_key_state_initializer = \ "counter<{state_type}>::create({valinit})".format( state_type=state_type, valinit=initial_value) elif self._agg_mode == self._MULTI_UDA: no_key_state_initializer = \ "symmetric_global_alloc<{state_tuple_type}>()".format( state_tuple_type=self.state_tuple.getTupleTypename()) init_template = self._cgenv.get_template('withoutkey_init.cpp') initializer = no_key_state_initializer state.addInitializers([init_template.render(locals())]) self.input.produce(state) # now that everything is aggregated, produce the tuples # assert len(self.column_list()) == 1 \ # or isinstance(self.column_list()[0], # expression.AttributeRef), \ # """assumes first column is the key and second is aggregate result # column_list: %s""" % self.column_list() if self.useKey: mapping_var_name = gensym() if self._agg_mode == self._ONE_BUILT_IN: emit_type = self.language().typename( self.aggregate_list[0].input.typeof( self.input.scheme(), None)) elif self._agg_mode == self._MULTI_UDA: emit_type = self.state_tuple.getTupleTypename() if self._agg_mode == self._ONE_BUILT_IN: # need to force type in make_tuple produce_template = self._cgenv.get_template( 'one_built_in_scan.cpp') elif self._agg_mode == self._MULTI_UDA: # pass in attribute values individually produce_template = self._cgenv.get_template( 'multi_uda_scan.cpp') else: if self._agg_mode == self._ONE_BUILT_IN: produce_template = self._cgenv.get_template( 'one_built_in_0key_output.cpp') elif self._agg_mode == self._MULTI_UDA: produce_template = self._cgenv.get_template( 'multi_uda_0key_output.cpp') pipeline_sync = create_pipeline_synchronization(state) get_pipeline_task_name(state) # add a dependence on the input aggregation pipeline state.addToPipelinePropertySet('dependences', self.input_syncname) output_tuple = GrappaStagedTupleRef(gensym(), self.scheme()) output_tuple_name = output_tuple.name output_tuple_type = output_tuple.getTupleTypename() output_tuple_set_func = output_tuple.set_func_code(0) state.addDeclarations([output_tuple.generateDefinition()]) inner_code = self.parent().consume(output_tuple, self, state) code = produce_template.render(locals()) state.setPipelineProperty("type", "in_memory") state.addPipeline(code)
def produce(self, state): assert len(self.grouping_list) <= 2, \ "%s does not currently support groupings of \ more than 2 attributes" % self.__class__.__name__ assert len(self.aggregate_list) == 1, \ """%s currently only supports aggregates of 1 attribute (aggregate_list=%s)""" \ % (self.__class__.__name__, self.aggregate_list) for agg_term in self.aggregate_list: assert isinstance(agg_term, expression.BuiltinAggregateExpression), \ """%s only supports simple aggregate expressions. A rule should create Apply[GroupBy]""" \ % self.__class__.__name__ inp_sch = self.input.scheme() self.useMap = len(self.grouping_list) > 0 if self.useMap: if len(self.grouping_list) == 1: declr_template = self._cgenv.get_template( '1key_declaration.cpp') keytype = self.language().typename( self.grouping_list[0].typeof(inp_sch, None)) elif len(self.grouping_list) == 2: declr_template = self._cgenv.get_template( '2key_declaration.cpp') keytypes = ','.join([ self.language().typename(g.typeof(inp_sch, None)) for g in self.grouping_list ]) else: initial_value = self.__get_initial_value__(0, cached_inp_sch=inp_sch) declr_template = self._cgenv.get_template('0key_declaration.cpp') valtype = self.language().typename(self.aggregate_list[0].typeof( inp_sch, None)) self.hashname = CGroupBy.__genHashName__() hashname = self.hashname hash_declr = declr_template.render(locals()) state.addDeclarations([hash_declr]) my_sch = self.scheme() _LOG.debug("aggregates: %s", self.aggregate_list) _LOG.debug("columns: %s", self.column_list()) _LOG.debug("groupings: %s", self.grouping_list) _LOG.debug("groupby scheme: %s", my_sch) _LOG.debug("groupby scheme[0] type: %s", type(my_sch[0])) self.input.produce(state) # now that everything is aggregated, produce the tuples assert (not self.useMap) \ or isinstance(self.column_list()[0], expression.AttributeRef), \ "assumes first column is the key and " \ "second is aggregate result: %s" % (self.column_list()[0]) if self.useMap: if len(self.grouping_list) == 1: produce_template = self._cgenv.get_template('1key_scan.cpp') elif len(self.grouping_list) == 2: produce_template = self._cgenv.get_template('2key_scan.cpp') else: produce_template = self._cgenv.get_template('0key_scan.cpp') output_tuple = CStagedTupleRef(gensym(), my_sch) output_tuple_name = output_tuple.name output_tuple_type = output_tuple.getTupleTypename() state.addDeclarations([output_tuple.generateDefinition()]) inner_code = self.parent().consume(output_tuple, self, state) code = produce_template.render(locals()) state.setPipelineProperty("type", "in_memory") state.addPipeline(code)
def produce(self, state): self.unifiedTupleType = self.new_tuple_ref(gensym(), self.scheme()) state.addDeclarations([self.unifiedTupleType.generateDefinition()]) for arg in self.args: arg.produce(state)
def consume(self, t, src, state): if src.childtag == "right": my_sch = self.scheme() declr_template = self._cgenv.get_template("hash_declaration.cpp") right_template = self._cgenv.get_template("insert_materialize.cpp") hashname = self._hashname keypos = self.right_keypos keyval = t.get_code(self.right_keypos) if self.rightCondIsRightAttr: keytype = self.language().typename( self.condition.right.typeof(my_sch, None)) else: keytype = self.language().typename( self.condition.left.typeof(my_sch, None)) in_tuple_type = t.getTupleTypename() in_tuple_name = t.name self.right_type = in_tuple_type state.saveExpr((self.right, self.right_keypos), (self._hashname, self.right_type)) # declaration of hash map hashdeclr = declr_template.render(locals()) state.addDeclarations([hashdeclr]) # materialization point code = right_template.render(locals()) return code if src.childtag == "left": left_template = self._cgenv.get_template("lookup.cpp") hashname = self._hashname keyname = t.name keytype = t.getTupleTypename() keypos = self.left_keypos keyval = t.get_code(keypos) right_tuple_name = gensym() outTuple = CStagedTupleRef(gensym(), self.scheme()) out_tuple_type_def = outTuple.generateDefinition() out_tuple_type = outTuple.getTupleTypename() out_tuple_name = outTuple.name type1 = keytype type1numfields = len(t.scheme) type2 = self.right_type type2numfields = len(self.right.scheme()) append_func_name, combine_function_def = \ CStagedTupleRef.get_append( out_tuple_type, type1, type1numfields, type2, type2numfields) state.addDeclarations([out_tuple_type_def, combine_function_def]) inner_plan_compiled = self.parent().consume(outTuple, self, state) code = left_template.render(locals()) return code assert False, "src not equal to left or right"
def produce(self, state): left_sch = self.left.scheme() self.syncnames = [] self.symBase = self.__genBaseName__() self.right.childtag = "right" self.rightTupleTypeRef = None # may remain None if CSE succeeds self.leftTupleTypeRef = None # may remain None if CSE succeeds # find the attribute that corresponds to the right child self.rightCondIsRightAttr = \ self.condition.right.position >= len(left_sch) self.leftCondIsRightAttr = \ self.condition.left.position >= len(left_sch) assert self.rightCondIsRightAttr ^ self.leftCondIsRightAttr # find right key position if self.rightCondIsRightAttr: self.right_keypos = self.condition.right.position \ - len(left_sch) else: self.right_keypos = self.condition.left.position \ - len(left_sch) # find left key position if self.rightCondIsRightAttr: self.left_keypos = self.condition.left.position else: self.left_keypos = self.condition.right.position # define output tuple outTuple = GrappaStagedTupleRef(gensym(), self.scheme()) out_tuple_type_def = outTuple.generateDefinition() out_tuple_type = outTuple.getTupleTypename() out_tuple_name = outTuple.name # common index is defined by same right side and same key # TODO: probably want also left side hashtableInfo = state.lookupExpr((self.right, self.right_keypos)) if not hashtableInfo: # if right child never bound then store hashtable symbol and # call right child produce self._hashname = self.__getHashName__() _LOG.debug("generate hashname %s for %s", self._hashname, self) hashname = self._hashname # declaration of hash map self.rightTupleTypeRef = state.createUnresolvedSymbol() self.leftTupleTypeRef = state.createUnresolvedSymbol() self.outTupleTypeRef = state.createUnresolvedSymbol() right_type = self.rightTupleTypeRef.getPlaceholder() left_type = self.leftTupleTypeRef.getPlaceholder() # TODO: really want this addInitializers to be addPreCode # TODO: *for all pipelines that use this hashname* init_template = self._cgenv.get_template('hash_init.cpp') state.addInitializers([init_template.render(locals())]) self.right.produce(state) self.left.childtag = "left" self.left.produce(state) state.saveExpr((self.right, self.right_keypos), (self._hashname, right_type, left_type, self.right_syncname, self.left_syncname)) else: # if found a common subexpression on right child then # use the same hashtable self._hashname, right_type, left_type,\ self.right_syncname, self.left_syncname = hashtableInfo _LOG.debug("reuse hash %s for %s", self._hashname, self) # now that Relation is produced, produce its contents by iterating over # the join result iterate_template = self._cgenv.get_template('result_scan.cpp') hashname = self._hashname state.addDeclarations([out_tuple_type_def]) pipeline_sync = create_pipeline_synchronization(state) get_pipeline_task_name(state) # add dependences on left and right inputs state.addToPipelinePropertySet('dependences', self.right_syncname) state.addToPipelinePropertySet('dependences', self.left_syncname) # reduce is a single self contained pipeline. # future hashjoin implementations may pipeline out of it # by passing a continuation to reduceExecute reduce_template = self._cgenv.get_template('reduce.cpp') state.addPreCode(reduce_template.render(locals())) delete_template = self._cgenv.get_template('delete.cpp') state.addPostCode(delete_template.render(locals())) inner_code_compiled = self.parent().consume(outTuple, self, state) code = iterate_template % locals() state.setPipelineProperty('type', 'in_memory') state.setPipelineProperty('source', self.__class__) state.addPipeline(code)