Example #1
0
def plan_from_optimizer(query_str, sources):

    optimizer = LDFF_Optimizer(sources=sources,
                               eddies=2,
                               pbj=False,
                               decomposer=False,
                               pruning=False)
    query_parsed = parse(query_str)

    plan = optimizer.create_plan(query_parsed)
    return plan
Example #2
0
def plan_from_optimizer(query_str, sources, optimizer_dct={}):

    backend_logger.info("Optimizer Config: {}".format(optimizer_dct))
    optimizer_name = optimizer_dct.get("name", "left-deep")

    eddies = optimizer_dct.get("eddies", 2)

    pbj_enabled = optimizer_dct.get("pbj", False)
    decomposer_enabled = optimizer_dct.get("decomposer", False)
    pruning_enabled = optimizer_dct.get("pruning", False)

    if optimizer_name == "left-deep":
        optimizer = LDFF_Optimizer(sources=sources,
                                   eddies=eddies,
                                   pbj=pbj_enabled,
                                   decomposer=decomposer_enabled,
                                   pruning=pruning_enabled)

    elif optimizer_name == "nLDE":
        optimizer = nLDE_Optimizer(sources=sources, eddies=eddies)

    elif optimizer_name == "CROP":
        # Cost Model, Robust Model
        cost_model = CropCostModel()
        robust_model = CropCostModel()

        # IDP Optimizer Setup
        k = optimizer_dct.get("k", 4)
        top_t = optimizer_dct.get("top_t", 5)
        adaptive_k = optimizer_dct.get("adaptive_k", True)

        enable_robustplan = True
        robustness_threshold = optimizer_dct.get("robust_threshold", 0.05)
        cost_threshold = optimizer_dct.get("cost_threshold", 0.3)
        optimizer = IDP_Optimizer(eddies=eddies,
                                  sources=sources,
                                  cost_model=cost_model,
                                  robust_model=robust_model,
                                  k=k,
                                  top_t=top_t,
                                  adaptive_k=adaptive_k,
                                  enable_robustplan=enable_robustplan,
                                  robustness_threshold=robustness_threshold,
                                  cost_threshold=cost_threshold)

    query_parsed = parse(query_str)
    plan = optimizer.create_plan(query_parsed)
    return plan
Example #3
0
    def __get_query_plan(self):

        # Parse SPARQL query.
        queryparsed = parse(self.query)
        self.triple_pattern_cnt = queryparsed.triple_pattern_count

        # Start Timer
        start = time()
        # Create Plan
        plan = self.optimizer.create_plan(queryparsed)

        # Time the execution
        self.optimization_time = time() - start

        logger.debug(plan)
        return plan
Example #4
0
def execute_plan(query_id, sources_json, plan_json, query_json, mongodb_url):

    # Results are aggregated & written to DB every X results
    write_to_db_every = 10

    # Maximum time in seconds before query is stopped
    maximum_time_per_query = 60

    task_id = query_id
    t0 = time()

    sources = sources_json
    plan_dict = json.loads(plan_json)
    print "Got Plan Dict:", plan_dict
    # Connect to Database
    try:
        client = MongoClient(mongodb_url)
    except Exception as e:
        return json.dumps({
            'title': 'Could not connect to database.',
            'msg': 'Authentication failed.'
        })

    db = client.querydb
    queries = db.queries

    queries.update_one({'_id': task_id}, {'$set': {'status': 'pending'}})

    # Physical Plan creation error - i. e. due to manipulated plan request
    try:
        lplan = dict_to_logical(plan_dict, sources)
        parsedQuery = parse(query_json)
        plan = PhysicalPlan(sources,
                            2,
                            lplan,
                            poly_operator=False,
                            query=parsedQuery)
        variables = plan.tree.vars

        if plan.query.projection:
            try:
                args = [
                    str(arg.get_variable()) for arg in plan.query.projection
                ]
                variables = args
            except Exception as e:
                print(e)

        queries.update_one(
            {'_id': task_id},
            {'$set': {
                'sparql_results.head.vars': list(variables)
            }})

    except Exception:
        queries.update_one({'_id': task_id}, {'$set': {'status': 'failed'}})
        return json.dumps({
            'title':
            'Physical plan creation error',
            'msg':
            'Could not create a physical plan for the query execution from the plan provided in the request.'
        })

    print(plan)

    En = EddyNetwork()
    result_count = 0

    counter = 0
    aggregated_solutions = []
    status = 'done'
    print("Vor der Eddie Schleife")

    # Set Timout for query execution
    signal.signal(signal.SIGALRM, En.stop_execution)
    signal.alarm(maximum_time_per_query)

    try:
        t0 = time()
        for result in En.execute_standalone(plan):
            t_elasped = time() - t0
            logger.info(result)

            # Add variables to variable set (for "head" of sparql result)
            #variables.update(result.data.keys())

            solution_dict = {}
            counter += 1
            result_count += 1
            # Check if URI or Literal

            for key, value in result.data.items():
                if str(value).startswith('http://') or str(value).startswith(
                        'https://'):
                    val_type = 'uri'
                else:
                    val_type = 'Literal'
                solution_dict[key] = {'value': value, 'type': val_type}
            solution_dict['_trace_'] = {
                'value': str(t_elasped),
                'type': 'Literal',
                'count': str(result_count)
            }
            aggregated_solutions.append(solution_dict)

            # Write aggregated results to MongoDB every X results
            if (counter % write_to_db_every == 0):
                t_now = time()
                queries.update_one({'_id': task_id}, {
                    '$push': {
                        'sparql_results.results.bindings': {
                            '$each': aggregated_solutions
                        }
                    },
                    '$set': {
                        'result_count': result_count,
                        't_delta': t_now - t0,
                        'sparql_results.head.vars': list(variables)
                    }
                })

                counter = 0
                aggregated_solutions = []
    except Exception as e:
        print(e)
        raise e

    updated_plan_dict = plan.json_dict
    query_hash = get_query_hash(parsedQuery)
    plan_hash = calc_query_plan_hash(updated_plan_dict)
    tend = time()

    if tend - t0 > maximum_time_per_query:
        status = "timeout"
    # Write remaining results to MongoDB

    queries.update_one({'_id': task_id}, {
        '$push': {
            'sparql_results.results.bindings': {
                '$each': aggregated_solutions
            }
        },
        '$set': {
            'plan': updated_plan_dict,
            'query_hash': query_hash,
            'plan_hash': plan_hash,
            'requests': plan.total_requests,
            'status': status,
            't_end': tend,
            't_delta': (tend - t0),
            'result_count': result_count,
            'sparql_results.head.vars': list(variables)
        }
    })

    return ' Count: ' + str(result_count)