def plan_from_optimizer(query_str, sources): optimizer = LDFF_Optimizer(sources=sources, eddies=2, pbj=False, decomposer=False, pruning=False) query_parsed = parse(query_str) plan = optimizer.create_plan(query_parsed) return plan
def plan_from_optimizer(query_str, sources, optimizer_dct={}): backend_logger.info("Optimizer Config: {}".format(optimizer_dct)) optimizer_name = optimizer_dct.get("name", "left-deep") eddies = optimizer_dct.get("eddies", 2) pbj_enabled = optimizer_dct.get("pbj", False) decomposer_enabled = optimizer_dct.get("decomposer", False) pruning_enabled = optimizer_dct.get("pruning", False) if optimizer_name == "left-deep": optimizer = LDFF_Optimizer(sources=sources, eddies=eddies, pbj=pbj_enabled, decomposer=decomposer_enabled, pruning=pruning_enabled) elif optimizer_name == "nLDE": optimizer = nLDE_Optimizer(sources=sources, eddies=eddies) elif optimizer_name == "CROP": # Cost Model, Robust Model cost_model = CropCostModel() robust_model = CropCostModel() # IDP Optimizer Setup k = optimizer_dct.get("k", 4) top_t = optimizer_dct.get("top_t", 5) adaptive_k = optimizer_dct.get("adaptive_k", True) enable_robustplan = True robustness_threshold = optimizer_dct.get("robust_threshold", 0.05) cost_threshold = optimizer_dct.get("cost_threshold", 0.3) optimizer = IDP_Optimizer(eddies=eddies, sources=sources, cost_model=cost_model, robust_model=robust_model, k=k, top_t=top_t, adaptive_k=adaptive_k, enable_robustplan=enable_robustplan, robustness_threshold=robustness_threshold, cost_threshold=cost_threshold) query_parsed = parse(query_str) plan = optimizer.create_plan(query_parsed) return plan
def __get_query_plan(self): # Parse SPARQL query. queryparsed = parse(self.query) self.triple_pattern_cnt = queryparsed.triple_pattern_count # Start Timer start = time() # Create Plan plan = self.optimizer.create_plan(queryparsed) # Time the execution self.optimization_time = time() - start logger.debug(plan) return plan
def execute_plan(query_id, sources_json, plan_json, query_json, mongodb_url): # Results are aggregated & written to DB every X results write_to_db_every = 10 # Maximum time in seconds before query is stopped maximum_time_per_query = 60 task_id = query_id t0 = time() sources = sources_json plan_dict = json.loads(plan_json) print "Got Plan Dict:", plan_dict # Connect to Database try: client = MongoClient(mongodb_url) except Exception as e: return json.dumps({ 'title': 'Could not connect to database.', 'msg': 'Authentication failed.' }) db = client.querydb queries = db.queries queries.update_one({'_id': task_id}, {'$set': {'status': 'pending'}}) # Physical Plan creation error - i. e. due to manipulated plan request try: lplan = dict_to_logical(plan_dict, sources) parsedQuery = parse(query_json) plan = PhysicalPlan(sources, 2, lplan, poly_operator=False, query=parsedQuery) variables = plan.tree.vars if plan.query.projection: try: args = [ str(arg.get_variable()) for arg in plan.query.projection ] variables = args except Exception as e: print(e) queries.update_one( {'_id': task_id}, {'$set': { 'sparql_results.head.vars': list(variables) }}) except Exception: queries.update_one({'_id': task_id}, {'$set': {'status': 'failed'}}) return json.dumps({ 'title': 'Physical plan creation error', 'msg': 'Could not create a physical plan for the query execution from the plan provided in the request.' }) print(plan) En = EddyNetwork() result_count = 0 counter = 0 aggregated_solutions = [] status = 'done' print("Vor der Eddie Schleife") # Set Timout for query execution signal.signal(signal.SIGALRM, En.stop_execution) signal.alarm(maximum_time_per_query) try: t0 = time() for result in En.execute_standalone(plan): t_elasped = time() - t0 logger.info(result) # Add variables to variable set (for "head" of sparql result) #variables.update(result.data.keys()) solution_dict = {} counter += 1 result_count += 1 # Check if URI or Literal for key, value in result.data.items(): if str(value).startswith('http://') or str(value).startswith( 'https://'): val_type = 'uri' else: val_type = 'Literal' solution_dict[key] = {'value': value, 'type': val_type} solution_dict['_trace_'] = { 'value': str(t_elasped), 'type': 'Literal', 'count': str(result_count) } aggregated_solutions.append(solution_dict) # Write aggregated results to MongoDB every X results if (counter % write_to_db_every == 0): t_now = time() queries.update_one({'_id': task_id}, { '$push': { 'sparql_results.results.bindings': { '$each': aggregated_solutions } }, '$set': { 'result_count': result_count, 't_delta': t_now - t0, 'sparql_results.head.vars': list(variables) } }) counter = 0 aggregated_solutions = [] except Exception as e: print(e) raise e updated_plan_dict = plan.json_dict query_hash = get_query_hash(parsedQuery) plan_hash = calc_query_plan_hash(updated_plan_dict) tend = time() if tend - t0 > maximum_time_per_query: status = "timeout" # Write remaining results to MongoDB queries.update_one({'_id': task_id}, { '$push': { 'sparql_results.results.bindings': { '$each': aggregated_solutions } }, '$set': { 'plan': updated_plan_dict, 'query_hash': query_hash, 'plan_hash': plan_hash, 'requests': plan.total_requests, 'status': status, 't_end': tend, 't_delta': (tend - t0), 'result_count': result_count, 'sparql_results.head.vars': list(variables) } }) return ' Count: ' + str(result_count)