def load_config(config_file="config.yaml"):
    """Load config file to initialize fragment factories.
    A config file is a Python file, loaded as a module.

    Example config file:
    # config.yaml
    name: My LDF server
    maintainer: chuck Norris <*****@*****.**>

    datasets:
    -
        name: DBpedia-2016-04
        description: DBpedia dataset, version 2016-04
        backend: hdt-file
        file: /home/chuck-norris/dbpedia-2016-04.hdt
    -
        name: Chuck-Norris-facts
        description: Best Chuck Norris facts ever
        backend: rdf-file
        format: nt
        file: /home/chuck-norris/facts.nt
    """
    config = load(open(config_file))
    # available backends (populated with sage's native backends)
    backends = {
        'hdt-file': hdt_backend(),
        'postgres': postgres_backend(),
        'postgres-catalog': postgres_catalog_backend(),
        'sqlite': sqlite_backend(),
        'sqlite-catalog': sqlite_catalog_backend()
    }
    # build custom backend (if there is some)
    if 'backends' in config and len(config['backends']) > 0:
        for b in config['backends']:
            if 'name' not in b or 'path' not in b or 'connector' not in b or 'required' not in b:
                raise SyntaxError(
                    'Invalid backend declared. Each custom backend must be declared with properties "name", "path", "connector" and "required"'
                )
            backends[b['name']] = import_backend(b['name'], b['path'],
                                                 b['connector'], b['required'])
    # set page size, i.e. the number of triples per page
    quota = config['quota'] if 'quota' in config else 75
    max_results = config['max_results'] if 'max_results' in config else inf
    config['quota'] = quota
    for c in config["datasets"]:
        if 'quota' not in c:
            c['quota'] = quota
        if 'max_results' not in c:
            c['max_results'] = max_results
        if 'publish' not in c:
            c['publish'] = False
        if 'queries' not in c:
            c['queries'] = []
    # build RDF graphs
    graphs = {c["name"]: Graph(c, backends) for c in config["datasets"]}
    return (config, graphs, backends)
Esempio n. 2
0
def load_config(config_file: str) -> Dataset:
    """Parse a SaGe configuration file written in YAML and load the corresponding RDF dataset.

    Args:
      * config_file: Path to the SaGe configuration file (in YAML format) to load.
    
    Returns:
      A RDF dataset built according to the input configuration file.
    """
    config = load(open(config_file), Loader=FullLoader)

    # available backends (populated with sage's native backends)
    backends = builtin_backends()
    # build custom backend (if there is some)
    if 'backends' in config and len(config['backends']) > 0:
        for b in config['backends']:
            if 'name' not in b or 'path' not in b or 'connector' not in b or 'required' not in b:
                raise SyntaxError(
                    'Invalid backend declared. Each custom backend must be declared with properties "name", "path", "connector" and "required"'
                )
            backends[b['name']] = import_backend(b['name'], b['path'],
                                                 b['connector'], b['required'])

    # load dataset basic informations
    dataset_name = config["name"]
    public_url = config["public_url"] if "public_url" in config else None
    default_query = config[
        "default_query"] if "default_query" in config else None
    analytics = config[
        "google_analytics"] if "google_analytics" in config else None
    if "long_description" in config:
        with open(config["long_description"], "r") as file:
            dataset_description = file.read()
    else:
        dataset_description = "A RDF dataset hosted by a SaGe server"

    # load the mode of the server: stateless or statefull
    if 'stateless' in config:
        is_stateless = config['stateless']
    else:
        is_stateless = True

    # if statefull, load the saved plan storage backend to use
    statefull_manager = None
    if not is_stateless:
        # TODO allow use of custom backend for saved plans
        # same kind of usage than custom DB backends
        statefull_manager = HashMapManager()

    # get default time quantum & maximum number of results per page
    if 'quota' in config:
        if config['quota'] == 'inf':
            logging.warning(
                "You are using SaGe with an infinite time quantum. Be sure to configure the Worker timeout of Gunicorn accordingly, otherwise long-running queries might be terminated."
            )
            quantum = inf
        else:
            quantum = config['quota']
    else:
        quantum = 75
    if 'max_results' in config and config['max_results'] != 'inf':
        max_results = config['max_results']
    else:
        logging.warning(
            "You are using SaGe without limitations on the number of results sent per page. This is fine, but be carefull as very large page of results can have unexpected serialization time."
        )
        max_results = inf

    # build all RDF graphs found in the configuration file
    graphs = dict()
    if "graphs" not in config:
        raise SyntaxError(
            "Np RDF graphs found in the configuration file. Please refers to the documentation to see how to declare RDF graphs in a SaGe YAML configuration file."
        )
    for g_config in config["graphs"]:
        if "uri" not in g_config:
            raise SyntaxError(
                f"Error: the RDF Graph {g_config} has no URI declared!")
        # load basic information about the graph
        g_uri = g_config["uri"]
        g_name = g_config["name"] if "name" in g_config else str(uuid4())
        g_description = g_config[
            "description"] if "description" in g_config else f"Unnamed RDF graph with id {g_name}"
        g_quantum = g_config["quota"] if "quota" in g_config else quantum
        g_max_results = g_config[
            "max_results"] if "max_results" in g_config else max_results
        g_queries = g_config["queries"] if "queries" in g_config else list()

        # load the graph connector using available backends
        if "backend" in g_config and g_config["backend"] in backends:
            g_connector = backends[g_config["backend"]](g_config)
        else:
            logging.error(
                f"Impossible to find the backend with name {g_config['backend']}, declared for the RDF Graph {g_name}"
            )
            continue

        # build the graph and register it using its URI
        graphs[g_uri] = Graph(g_uri,
                              g_name,
                              g_description,
                              g_connector,
                              quantum=g_quantum,
                              max_results=g_max_results,
                              default_queries=g_queries)
        logging.info(
            f"RDF Graph '{g_name}' (backend: {g_config['backend']}) successfully loaded"
        )

    return Dataset(dataset_name,
                   dataset_description,
                   graphs,
                   public_url=public_url,
                   default_query=default_query,
                   analytics=analytics,
                   stateless=is_stateless,
                   statefull_manager=statefull_manager)
def test_import_backend():
    config = {'foo': 'hello world!'}
    factory = import_backend('silly', 'tests.database.custom_backend',
                             'SillyConnector', ['foo'])
    connector = factory(config)
    assert connector.foo() == config['foo']
def test_import_backend_missing_param():
    with pytest.raises(SyntaxError):
        config = {'foo': 'hello world!'}
        factory = import_backend('silly', 'tests.database.custom_backend',
                                 'SillyConnector', ['foo', 'bar'])
        factory(config)