Exemple #1
0
def contact_source_direct(server, query, queue, vars=None, **kwargs):

    sparql_limit = kwargs.get("sparql_limit", 500)
    if "sparql@" in server:
        server_url = server.replace("sparql@", "")
        contact_single_endpoint_server(server_url,
                                       query,
                                       queue,
                                       vars,
                                       put_eof=False,
                                       limit=sparql_limit)
        queue.put(EOF())
    elif "brtpf@" in server:
        server_url = server.replace("brtpf@", "")
        contact_single_brtpf_server(server_url,
                                    query,
                                    queue,
                                    vars,
                                    put_eof=False)
        queue.put(EOF())
    elif "tpf@" in server:
        server_url = server.replace("tpf@", "")
        contact_single_tpf_server(server_url,
                                  query,
                                  queue,
                                  vars,
                                  put_eof=False)
        queue.put(EOF())
    else:
        server_url = server
        contact_single_tpf_server(server_url,
                                  query,
                                  queue,
                                  vars,
                                  put_eof=False)
        queue.put(EOF())
Exemple #2
0
def contact_source_bindings(servers,
                            query,
                            queue,
                            bindings,
                            vars=None,
                            **kwargs):

    sparql_limit = kwargs.get("sparql_limit", 500)
    requests_cnts = []
    for server in servers:
        if server in query.sources.keys():
            if "sparql@" in server:
                server_url = server.replace("sparql@", "")
                requests_sparql = contact_single_endpoint_server_bindings(
                    server_url,
                    query,
                    queue,
                    bindings,
                    vars,
                    limit=sparql_limit)
                requests_cnts.append(requests_sparql)

            elif "brtpf@" in server:
                server_url = server.replace("brtpf@", "")
                requests_brtpf = contact_single_brtpf_server_binding(
                    server_url, query, queue, bindings, vars)
                requests_cnts.append(requests_brtpf)

            elif "tpf@" in server:
                server_url = server.replace("tpf@", "")
                requests_tpf = contact_single_tpf_server_binding(
                    server_url, query, queue, bindings, vars)
                requests_cnts.append(requests_tpf)

            else:
                requests_tpf = contact_single_tpf_server_binding(
                    server, query, queue, bindings, vars)
                requests_cnts.append(requests_tpf)

    requests_total = sum(requests_cnts)
    eof = EOF(requests=requests_total)
    queue.put(eof)
Exemple #3
0
def contact_single_tpf_server(server,
                              query,
                              queue,
                              vars=None,
                              binding={},
                              **kwargs):
    logger = logging.getLogger("nlde_logger")

    template = 0
    qvars = []
    param_dict = {}

    # Extract subject.
    subject = query.subject
    if subject.isvariable():
        template = template | 4
        qvars.append(subject.get_variable())
    else:
        param_dict['subject'] = subject.value

    # Extract predicate.
    predicate = query.predicate
    if predicate.isvariable():
        template = template | 2
        qvars.append(predicate.get_variable())
    else:
        param_dict['predicate'] = predicate.value

    # Extract object (value).
    value = query.object
    if value.isvariable():
        template = template | 1
        qvars.append(value.get_variable())
    else:
        if value.isfloat():
            param_dict['object'] = '"{}"^^{}'.format(
                value, "<http://www.w3.org/2001/XMLSchema#double>")
        elif value.isint():
            param_dict['object'] = '"{}"^^{}'.format(
                value, "<http://www.w3.org/2001/XMLSchema#integer>")
        else:
            if not value.value.startswith("http") and not value.isuri(
            ) and value.value[0] != '"' and value.value[-1] != '"':
                param_dict['object'] = '"{}"'.format(value.value)
            else:
                param_dict['object'] = value.value

    # Literal in subject or predicate position: NOT a valid triple pattern.
    if subject.isliteral() or predicate.isliteral():
        queue.put("EOF")
        return

    # When there are no variables in the TP,
    # Get the ones provides
    if len(qvars) == 0 and vars:
        qvars = vars

    #if vars:
    #    qvars.append(vars)

    # Pagination settings.
    page = 1
    next_page = True
    total = 0
    count_requests = 0
    card_sum = 0
    elapsed = 0

    while next_page:
        count_requests = count_requests + 1
        # Establish connection and get response from server.
        param_dict['page'] = page
        response = requests.get(server, params=param_dict, headers=headers)
        elapsed += response.elapsed.total_seconds()
        #print response.url
        next_page = False

        # Successfully contacted the server.
        if response.status_code == 200:
            res = response.content
            if page == 1:
                # Get total solutions in fragment.
                matches = findall(triples_regex, res)
                if len(matches) == 1:
                    total = int(matches[0])
                else:
                    break

            # Get solution mappings from fragment.
            if total > 0:
                myres = eval(res)
                card = parse_response(template, myres["@graph"], qvars, queue,
                                      server, 0, myres["@context"], binding)
                card_sum += card

            # Prepare next request.
            if "nextPage" in res:
                next_page = True
            elif "hydra:next" in res:
                next_page = True
            page = page + 1
        else:
            print "Could not contact TPF server {}. Status code: {}".format(
                server, response.status_code)
            print response.headers

    if logger:
        mgs = {
            "requests": count_requests,
            "tuples": card_sum,
            "querypath": response.url,
            "timestamp": str(datetime.datetime.now()),
            "elapsed": elapsed,
            "interface": "tpf"
        }
        logger.info(str(mgs))

    if kwargs.get("put_eof", False):
        eof = EOF(requests=count_requests)
        queue.put(eof)

    return count_requests
Exemple #4
0
def contact_single_endpoint_server(server, query, queue, vars=None, **kwargs):

    logger = logging.getLogger("nlde_logger")
    logger_debug = logging.getLogger("nlde_debug")

    projection_vars = set()
    if not isinstance(query, list):
        query = [query]

    query_str = "\n".join([str(tp) for tp in query])
    for tp in query:
        projection_vars.update(tp.variables)
    base_query = select_tmplt.format(
        " ".join(["?{}".format(var) for var in projection_vars]), query_str)

    # Pagination settings.
    limit = kwargs.get("limit", PAGE_SIZE)
    offset = 0
    total = 0
    count_requests = 0
    card_sum = 0
    elapsed = 0
    while True:
        count_requests = count_requests + 1
        if limit > 0:
            select_query = "{} LIMIT {} OFFSET {}".format(
                base_query, limit, offset * limit)
        else:
            select_query = base_query

        params = {"query": select_query, "format": "json", "timeout": "30000"}
        # Successfully contacted the server.
        response = http.get(server, params=params, headers=headers)

        elapsed += response.elapsed.total_seconds()
        total = -1

        # Successfully contacted the server.
        if response.status_code == 200:
            res = response.json()
            results = 0
            if len(res['results']['bindings']) == 0:
                break
            else:
                total += len(res['results']['bindings'])
                results = parse_response(queue, res['results']['bindings'])
                card_sum += results

            if limit == 0 or results < limit:
                break

            if limit == 0:
                break

            offset += 1

        else:
            print "Could not contact SPARQL endpoint {}. Status code: {}".format(
                server, response.status_code)
            print response.url
            #print response.headers
            break

    if logger:
        mgs = {
            "requests": count_requests,
            "tuples": card_sum,
            "timestamp": str(datetime.datetime.now()),
            "elapsed": elapsed,
            "interface": "sparql"
        }
        logger.info(str(mgs))
        mgs = {"querypath": response.url}
        logger_debug.info(str(mgs))

    if kwargs.get("put_eof", False):
        eof = EOF(requests=count_requests)
        queue.put(eof)
Exemple #5
0
def contact_source(servers, query, queue, vars=None, **kwargs):

    #if len(query.sources.keys()) == 1:
    #    return contact_source_direct(query.sources.keys()[0], query, queue, vars, **kwargs)
    sparql_limit = kwargs.get("sparql_limit", 500)
    p_list = []
    aux_queue = Queue()
    for server in servers:
        if server in query.sources.keys():
            if "sparql@" in server:
                server_url = server.replace("sparql@", "")
                p = Process(target=contact_single_endpoint_server,
                            args=(server_url, query, aux_queue, vars),
                            kwargs={
                                "limit": sparql_limit,
                                "put_eof": True
                            })
                p.start()
                p_list.append(p.pid)
            elif "brtpf@" in server:
                server_url = server.replace("brtpf@", "")
                p = Process(target=contact_single_brtpf_server,
                            args=(server_url, query, aux_queue, vars),
                            kwargs={"put_eof": True})
                p.start()
                p_list.append(p.pid)

            elif "tpf@" in server:
                server_url = server.replace("tpf@", "")
                p = Process(target=contact_single_tpf_server,
                            args=(server_url, query, aux_queue, vars),
                            kwargs={"put_eof": True})
                p.start()
                p_list.append(p.pid)
            else:
                server_url = server
                p = Process(target=contact_single_tpf_server,
                            args=(server_url, query, aux_queue, vars),
                            kwargs={"put_eof": True})
                p.start()
                p_list.append(p.pid)

    if len(p_list) == 0:
        queue.put("EOF")
        return None

    requests = 0
    processes_finalized = 0
    while True:
        tuple = aux_queue.get()

        if tuple != "EOF":
            queue.put(tuple)
        else:
            requests += tuple.get("requests", 0)
            processes_finalized += 1

        if processes_finalized == len(p_list):
            break

    eof = EOF(requests=requests)
    queue.put(eof)
Exemple #6
0
def contact_single_endpoint_server_bindings(server,
                                            query,
                                            queue,
                                            bindings,
                                            vars=None,
                                            **kwargs):

    logger = logging.getLogger("nlde_logger")
    logger_debug = logging.getLogger("nlde_debug")

    if len(bindings) == 0:
        queue.put(EOF())

    if not isinstance(query, list):
        query = [query]

    vars = " ".join(["?{}".format(var) for var in bindings[0].keys()])
    query_str = "\n".join([str(tp) for tp in query])

    bindings_str = ""
    bound_values = set()
    for binding in bindings:
        inner_str = ""
        for var, value in binding.items():
            if value.startswith("http://") or value.startswith("https://"):
                inner_str += "<{}> ".format(value)
            else:
                # Escape Characters
                for to_replace, replace_with in ESCAPE_CHARS.items():
                    value = value.replace(to_replace, replace_with)
                inner_str += '"{}" '.format(value)

        if not inner_str in bound_values:
            bindings_str += "( {} )\n".format(inner_str)
        bound_values.add(inner_str)

    projection_vars = set()
    for tp in query:
        projection_vars.update(tp.variables)
    base_query = values_tmplt.format(
        " ".join(["?{}".format(var) for var in projection_vars]), query_str,
        "( {} )".format(vars), bindings_str)

    # Pagination settings.
    limit = kwargs.get("limit", PAGE_SIZE)
    offset = 0
    count_requests = 0
    card_sum = 0
    elapsed = 0
    while True:
        count_requests = count_requests + 1
        if limit > 0:
            values_query = "{} LIMIT {} OFFSET {}".format(
                base_query, limit, offset * limit)
        else:
            values_query = base_query

        #print values_query
        params = {"query": values_query, "format": "json", "timeout": "30000"}
        # Successfully contacted the server.
        response = http.get(server, params=params, headers=headers)
        elapsed += response.elapsed.total_seconds()
        total = -1

        # Successfully contacted the server.
        if response.status_code == 200:
            res = response.json()
            if len(res['results']['bindings']) == 0:
                break
            else:
                total += len(res['results']['bindings'])
                results = parse_response(queue, res['results']['bindings'])
                card_sum += results

            if limit == 0 or results < limit:
                break

            offset += 1

        else:
            print "Could not contact SPARQL endpoint {}. Status code: {}".format(
                server, response.status_code)
            print response.url
            break

    if logger:
        mgs = {
            "requests": count_requests,
            "tuples": card_sum,
            "timestamp": str(datetime.datetime.now()),
            "elapsed": elapsed,
            "interface": "sparql"
        }
        logger.info(str(mgs))
        mgs = {"querypath": response.url}
        logger_debug.info(str(mgs))

    return count_requests