def generateJoinEstimateKernel(f, query, estimator, stats): print >> f, "__kernel void estimate(" icols = Utils.generateInvariantColumns(query) jcols = Utils.generateJoinColumns(query) graph = constructJoinGraph(query) tids = graph.collectTableIDs() pairs = graph.collectJoinPairs() _, dvals = stats for x, t in enumerate(tids): for jc in jcols[t]: print >> f, " __global unsigned int* t%s_c%s," % (t, jc), if x > 0: print >> f, " unsigned int n_t%s," % (t) print >> f, " __global unsigned long *contributions, unsigned int ss){" print >> f #We start of with table 1. print >> f, " unsigned long sum = 0;" print >> f, " for(unsigned int offset = 0; offset < ss; offset += get_global_size(0)){" print >> f, " if (offset + get_global_id(0) < ss){" graph.generateJoinEstimateKernelBottomUp(f, query, estimator) print >> f, " sum++;" graph.generateJoinEstimateKernelTopDown(f, query) print >> f, " }" print >> f, " }" print >> f, " if (get_global_id(0) < ss) contributions[get_global_id(0)] = sum;" print >> f, "}"
def generateJoinEstimateKernel(f, query, estimator, stats): print >> f, "__kernel void estimate(" icols = Utils.generateInvariantColumns(query) jcols = Utils.generateJoinColumns(query) _, dvals = stats graph = constructJoinGraph(query) t1, c1 = graph.left_col t2, c2 = graph.right_col tids = graph.collectTableIDs() pairs = graph.collectJoinPairs() if estimator.join_kernel == "Cont": kde = ContKDEKernel() elif estimator.join_kernel == "Rect": kde = RectKDEKernel() elif estimator.join_kernel == "Cat": kde = CatKDEKernel() for x, t in enumerate(tids): for jc in jcols[t]: print >> f, " __global unsigned int* t%s_c%s," % (t, jc), print >> f, " double bw_t%s_c%s," % (t, jc) if icols[t]: print >> f, " __global double* inv_t%s," % (t) if x > 0: print >> f, " unsigned int n_t%s," % (t) #Here we go. for t1, c1, t2, c2 in pairs: print >> f, " double limit_t%s_c%s_t%s_c%s," % (t1, c1, t2, c2) if estimator.join_kernel == "Cat": print >> f, " double omega," print >> f, " __global double* contributions, unsigned int ss){" print >> f #We start of with table 1. kde.generatePreamble(f, query) print >> f, " for(unsigned int offset = 0; offset < ss; offset += get_global_size(0)){" print >> f, " if (offset + get_global_id(0) < ss){" graph.generateJoinEstimateKernelBottomUp(f, query, estimator) kde.generateEstimateCode(f, query, graph.jid + 1, graph, stats) graph.generateJoinEstimateKernelTopDown(f, query) if estimator.join_kernel == "Cat": print >> f, " T jnone = (1.0-bw_t%s_c%s) * bw_t%s_c%s / (%f-1.0) + (1.0-bw_t%s_c%s) * bw_t%s_c%s / (%f-1.0) + bw_t%s_c%s*bw_t%s_c%s * (%f-2.0) / ((%f-1.0)*(%f-1.0));" % ( t1, c1, t2, c2, dvals[t2][c2], t2, c2, t1, c1, dvals[t1][c1], t1, c1, t2, c2, min(dvals[t1][c1], dvals[t2][c2]), dvals[t1][c1], dvals[t2][c2]) t1, c1 = graph.left_col print >> f, " sum += c_t%s * jnone * (omega-osum);" % (t1) print >> f, " }" print >> f, " }" print >> f, " if (get_global_id(0) < ss) contributions[get_global_id(0)] = sum;" print >> f, "}"
def generateGPUSampleCode(i, query, estimator, stats, cu_factor): ts, dv = stats graph = constructJoinGraph(query) tids = graph.collectTableIDs() #Generate Kernels with open("./%s_kernels.cl" % i, 'w') as cf: generatePreamble(cf) print >> cf, "//" graph.generateTableEstimateKernel(cf, query, estimator, stats) generateBinarySearchCode(cf) generateJoinEstimateKernel(cf, query, estimator, stats) print >> cf, "//" with open("./%s_GPUS.cpp" % i, 'w') as cf: generateCIncludes(cf) generateGPUSampleParameterArray(cf, query, estimator) Utils.generateGPUVectorConverterFunction(cf) Utils.generateUintFileReaderFunction(cf) Utils.generateScottBWFunction(cf) generateGPUSampleEstimateFunction(cf, graph, query, estimator, prod(ts.values())**-1.0, stats, cu_factor) #There is no reason why we shouldn't use the estimate function from GPUJKDE. generateGPUJKDETestWrapper(cf, query, estimator) cols = Utils.generateInvariantColumns(query) jcols = Utils.generateJoinColumns(query) print >> cf, """ int main( int argc, const char* argv[] ){ parameters p; compute::device device = compute::system::default_device(); p.ctx = compute::context(device); p.queue=compute::command_queue(p.ctx, device); """ print >> cf, """ std::ifstream t("./%s_kernels.cl"); t.exceptions ( std::ifstream::failbit | std::ifstream::badbit ); std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>()); """ % i #Read table sizes and read columns into memory and transfer to device the GPU print >> cf, " std::stringstream iteration_stream;" print >> cf, " p.iteration = (unsigned int) atoi(argv[%s]);" % ( len(query.tables) + 1) print >> cf, " iteration_stream << \"./iteration\" << std::setw(2) << std::setfill('0') << argv[%s];" % ( len(query.tables) + 1) for j, t in enumerate(query.tables): print >> cf, " p.ss%s = atoi(argv[%s]);" % (j, j + 1) print >> cf, " p.ts%s= %s;" % (j, ts[j]) for k, c in enumerate(t.columns): print >> cf, " std::stringstream s_t%s_c%s_stream ;" % (j, k) print >> cf, " s_t%s_c%s_stream << iteration_stream.str() << \"/sample_\" << atoi(argv[%s]) << \"_%s_%s.dump\";" % ( j, k, j + 1, t.tid, c.cid) print >> cf, " std::string s_t%s_c%s_string = s_t%s_c%s_stream.str();" % ( j, k, j, k) print >> cf, " unsigned int* s_t%s_c%s = readUArrayFromFile(s_t%s_c%s_string.c_str());" % ( j, k, j, k) print >> cf, " p.s_t%s_c%s = toGPUVector(s_t%s_c%s, p.ss%s, p.ctx, p.queue);" % ( j, k, j, k, j) print >> cf for t, cs in enumerate(jcols): if cols[t]: for c in cs: print >> cf, " p.sr_t%s_c%s = compute::vector<unsigned int>(p.ss%s, p.ctx);" % ( t, c, t) print >> cf, " p.final_contributions = compute::vector<unsigned long>(p.ss%s, p.ctx);" % tids[ 0] print >> cf, """ compute::program pr = compute::program::create_with_source(str,p.ctx); try{ std::ostringstream oss; pr.build(oss.str()); } catch(const std::exception& ex){ std::cout << pr.build_log() << std::endl; } """ for j, t in enumerate(query.tables): if len(cols[j]) > 0: print >> cf, " p.invk%s = pr.create_kernel(\"invk_t%s\");" % ( j, j) print >> cf, " p.inv_t%s = compute::vector<double>(p.ss%s, p.ctx);" % ( j, j) print >> cf, " p.invr_t%s = compute::vector<double>(p.ss%s, p.ctx);" % ( j, j) print >> cf, " p.estimate = pr.create_kernel(\"estimate\");" print >> cf for t, tab in enumerate(query.tables): print >> cf, " p.map_t%s = compute::vector<unsigned int >(p.ss%s+1, p.ctx);" % ( t, t) print >> cf, " p.count_t%s = compute::vector<int >(p.ss%s+1, p.ctx);" % ( t, t) print >> cf, " p.count_t%s[0] = -1;" % t print >> cf, " std::string test_cardinality_string = iteration_stream.str() + \"/test_join_true.dump\";" print >> cf, " p.j_test_cardinality = readUArrayFromFile(test_cardinality_string.c_str());" for i, indices in enumerate(cols): if len(indices) != 0: for j in indices: if query.tables[i].columns[j].type == "range": print >> cf, " std::string j_l_t%s_c%s_string = iteration_stream.str() + \"/test_join_l_%s_%s.dump\";" % ( i, j, query.tables[i].tid, query.tables[i].columns[j].cid) print >> cf, " p.j_l_t%s_c%s= readUArrayFromFile(j_l_t%s_c%s_string.c_str());" % ( i, j, i, j) print >> cf, " std::string j_u_t%s_c%s_string = iteration_stream.str() + \"/test_join_u_%s_%s.dump\";" % ( i, j, query.tables[i].tid, query.tables[i].columns[j].cid) print >> cf, " p.j_u_t%s_c%s = readUArrayFromFile(j_u_t%s_c%s_string.c_str());" % ( i, j, i, j) elif query.tables[i].columns[j].type == "point": print >> cf, " std::string j_p_t%s_c%s_string = iteration_stream.str() + \"/test_join_p_%s_%s.dump\";" % ( i, j, query.tables[i].tid, query.tables[i].columns[j].cid) print >> cf, " p.j_p_t%s_c%s = readUArrayFromFile(j_p_t%s_c%s_string.c_str());" % ( i, j, i, j) else: raise Exception("Unsupported ctype.") print >> cf print >> cf, " join_test(&p);" print >> cf, "}"
def generateGPUJKDECode(i, query, estimator, stats, cu_factor): ts, dv = stats graph = constructJoinGraph(query) tids = graph.collectTableIDs() #Generate Kernels with open("./%s_kernels.cl" % i, 'w') as cf: generatePreamble(cf) gk = GaussKernel() gk.pointEstimateFunction(cf) gk.pointGradientFunction(cf) gk.rangeEstimateFunction(cf) gk.rangeGradientFunction(cf) ck = CategoricalKernel() ck.pointEstimateFunction(cf) ck.pointGradientFunction(cf) print >> cf, "//" graph.generateTableEstimateKernel(cf, query, estimator, stats) generateBinarySearchCode(cf) generateJoinEstimateKernel(cf, query, estimator, stats) print >> cf, "//" #Do we need table level estimation kernels? if estimator.bw_optimization == "local": for j, kernels in enumerate(estimator.kernels): generateTableGradientContributionKernel( cf, "grad_t%s" % j, kernels, dv[j]) with open("./%s_AGPUJKDE.cpp" % i, 'w') as cf: generateCIncludes(cf) generateRectKDELimit(cf) generateContKDELimit(cf) generateGPUJKDEParameterArray(cf, query, estimator) Utils.generateGPUVectorConverterFunction(cf) Utils.generateUintFileReaderFunction(cf) Utils.generateDoubleFileReaderFunction(cf) Utils.generateFileCheckFunction(cf) Utils.generateScottBWFunction(cf) Utils.generateDoubleDumper(cf) generateGPUJKDEEstimateFunction(cf, graph, query, estimator, prod(ts.values())**-1.0, stats, cu_factor) generateGPUJKDETestWrapper(cf, query, estimator) if estimator.bw_optimization == "local": for tid, table in enumerate(query.tables): generateTableEstGrad(cf, tid, query, estimator) generateTableObjectiveGrad(cf, tid, query, estimator) elif estimator.bw_optimization == "join": generateGPUJKDEObjective(cf, query, estimator) cols = Utils.generateInvariantColumns(query) jcols = Utils.generateJoinColumns(query) print >> cf, """ int main( int argc, const char* argv[] ){ parameters p; compute::device device = compute::system::default_device(); p.ctx = compute::context(device); p.queue=compute::command_queue(p.ctx, device); """ print >> cf, """ std::ifstream t("./%s_kernels.cl"); t.exceptions ( std::ifstream::failbit | std::ifstream::badbit ); std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>()); """ % i #Read table sizes and read columns into memory and transfer to device the GPU print >> cf, " std::stringstream iteration_stream;" print >> cf, " p.iteration = (unsigned int) atoi(argv[%s]);" % ( len(query.tables) + 1) print >> cf, " iteration_stream << \"./iteration\" << std::setw(2) << std::setfill('0') << argv[%s];" % ( len(query.tables) + 1) for j, t in enumerate(query.tables): print >> cf, " p.ss%s = atoi(argv[%s]);" % (j, j + 1) print >> cf, " p.ts%s= %s;" % (j, ts[j]) for k, c in enumerate(t.columns): print >> cf, " std::stringstream s_t%s_c%s_stream ;" % (j, k) print >> cf, " s_t%s_c%s_stream << iteration_stream.str() << \"/sample_\" << atoi(argv[%s]) << \"_%s_%s.dump\";" % ( j, k, j + 1, t.tid, c.cid) print >> cf, " std::string s_t%s_c%s_string = s_t%s_c%s_stream.str();" % ( j, k, j, k) print >> cf, " unsigned int* s_t%s_c%s = readUArrayFromFile(s_t%s_c%s_string.c_str());" % ( j, k, j, k) print >> cf, " p.s_t%s_c%s = toGPUVector(s_t%s_c%s, p.ss%s, p.ctx, p.queue);" % ( j, k, j, k, j) if estimator.kernels[j][k] == "GaussPoint" or estimator.kernels[ j][k] == "GaussRange": print >> cf, " p.bw_t%s_c%s = scott_bw(s_t%s_c%s, p.ss%s, %s);" % ( j, k, j, k, j, len(query.tables)) print >> cf, " if(p.bw_t%s_c%s < 0.2) p.bw_t%s_c%s = 0.2;" % ( j, k, j, k) else: print >> cf, " p.bw_t%s_c%s = 1.0/(1.0+1.0/%f);" % ( j, k, dv[j][k] - 1) print >> cf for t, cs in enumerate(jcols): if cols[t]: for c in cs: print >> cf, " p.sr_t%s_c%s = compute::vector<unsigned int>(p.ss%s, p.ctx);" % ( t, c, t) print >> cf, " p.final_contributions = compute::vector<double>(p.ss%s, p.ctx);" % ( tids[0]) print >> cf, """ compute::program pr = compute::program::create_with_source(str,p.ctx); try{ std::ostringstream oss; pr.build(oss.str()); } catch(const std::exception& ex){ std::cout << pr.build_log() << std::endl; } """ for j, t in enumerate(query.tables): if len(cols[j]) > 0: print >> cf, " p.invk%s = pr.create_kernel(\"invk_t%s\");" % ( j, j) print >> cf, " p.inv_t%s = compute::vector<double>(p.ss%s, p.ctx);" % ( j, j) print >> cf, " p.invr_t%s = compute::vector<double>(p.ss%s, p.ctx);" % ( j, j) print >> cf, " p.estimate = pr.create_kernel(\"estimate\");" print >> cf for t, tab in enumerate(query.tables): print >> cf, " p.map_t%s = compute::vector<unsigned int >(p.ss%s+1, p.ctx);" % ( t, t) print >> cf, " p.count_t%s = compute::vector<int >(p.ss%s+1, p.ctx);" % ( t, t) print >> cf, " p.count_t%s[0] = -1;" % t #Prepare training if estimator.bw_optimization == "local": generateGPUJKDELocalTraining(cf, query, estimator, cu_factor) elif estimator.bw_optimization == "join": if estimator.join_kernel == "Rect": raise Exception("This is not how optimization on join works.") elif estimator.join_kernel == "Cont": generateGPUJKDEGlobalTraining(cf, query, estimator) elif estimator.join_kernel == "Cat": generateGPUJKDEGlobalTraining(cf, query, estimator) else: raise Exception("I don't know this join kernel.") else: raise Exception("I don't know this type of join optimization.") print >> cf, " std::string test_cardinality_string = iteration_stream.str() + \"/test_join_true.dump\";" print >> cf, " p.j_test_cardinality = readUArrayFromFile(test_cardinality_string.c_str());" for i, indices in enumerate(cols): if len(indices) != 0: for j in indices: if estimator.kernels[i][j] == "GaussRange": print >> cf, " std::string j_l_t%s_c%s_string = iteration_stream.str() + \"/test_join_l_%s_%s.dump\";" % ( i, j, query.tables[i].tid, query.tables[i].columns[j].cid) print >> cf, " p.j_l_t%s_c%s= readUArrayFromFile(j_l_t%s_c%s_string.c_str());" % ( i, j, i, j) print >> cf, " std::string j_u_t%s_c%s_string = iteration_stream.str() + \"/test_join_u_%s_%s.dump\";" % ( i, j, query.tables[i].tid, query.tables[i].columns[j].cid) print >> cf, " p.j_u_t%s_c%s = readUArrayFromFile(j_u_t%s_c%s_string.c_str());" % ( i, j, i, j) else: print >> cf, " std::string j_p_t%s_c%s_string = iteration_stream.str() + \"/test_join_p_%s_%s.dump\";" % ( i, j, query.tables[i].tid, query.tables[i].columns[j].cid) print >> cf, " p.j_p_t%s_c%s = readUArrayFromFile(j_p_t%s_c%s_string.c_str());" % ( i, j, i, j) print >> cf print >> cf, " join_test(&p);" print >> cf, "}"