def align_graphs_from_files(ontology_path_1, ontology_path_2,
                            param_path, format=None):
    """align ontology graphs from RDF files in 'ontology_path_1'
    and 'ontology_path_2' with the help of parameters
    from the file in 'param_path'
    """

    # get graphs from files
    if not format:
        ontology_1 = Graph()
        ontology_1.parse(ontology_path_1,
                         format=guess_format(ontology_path_1))
        ontology_2 = Graph()
        ontology_2.parse(ontology_path_2,
                         format=guess_format(ontology_path_2))
    else:
        ontology_1 = Graph(ontology_path_1, format=format)
        ontology_2 = Graph(ontology_path_2, format=format)

    # get parameters for alignment from file
    parameters = dict()
    exec(compile(open(param_path, "rb").read(), param_path, 'exec'),
         parameters)

    # align graphs with parameters
    yield from align_graphs(ontology_1, ontology_2, parameters)
예제 #2
0
def main(argv):

    parser = ArgumentParser(
        description='This program contains various NIDM-Experiment utilities')
    sub = parser.add_subparsers(dest='command')
    concat = sub.add_parser(
        'concat',
        description=
        "This command will simply concatenate the supplied NIDM files into a single output"
    )
    visualize = sub.add_parser(
        'visualize',
        description=
        "This command will produce a visualization(png) of the supplied NIDM files"
    )

    for arg in [concat, visualize]:
        arg.add_argument(
            '-nl',
            '--nl',
            dest="nidm_files",
            nargs="+",
            required=True,
            help="A comma separated list of NIDM files with full path")

    concat.add_argument('-o',
                        '--o',
                        dest='output_file',
                        required=True,
                        help="Merged NIDM output file name + path")
    visualize.add_argument('-o',
                           '--o',
                           dest='output_file',
                           required=True,
                           help="Output file name+path of dot graph")

    args = parser.parse_args()

    #concatenate nidm files
    if args.command == 'concat':

        #create empty graph
        graph = Graph()
        for nidm_file in args.nidm_files:
            tmp = Graph()
            graph = graph + tmp.parse(nidm_file,
                                      format=util.guess_format(nidm_file))

        graph.serialize(args.output_file, format='turtle')

    elif args.command == 'visualize':
        #create empty graph
        graph = Graph()
        for nidm_file in args.nidm_files:
            tmp = Graph()
            graph = graph + tmp.parse(nidm_file,
                                      format=util.guess_format(nidm_file))

        project = read_nidm(StringIO.write(graph.serialize(format='turtle')))
        project.save_DotGraph(filename=args.output_file + '.png', format='png')
예제 #3
0
def run_task_oa2_1(filename_a: str, filename_b: str, filename_c: str,
                   task: str) -> None:
    TARGET_NAMESPACE_STR: str = "http://www.city.ac.uk/ds/inm713/feiphoon#"
    TARGET_NAMESPACE: rdflib.Namespace = Namespace(TARGET_NAMESPACE_STR)
    TARGET_PREFIX: str = "fp"

    CANDIDATE_NAMESPACE_STR: str = "http://www.co-ode.org/ontologies/pizza/pizza.owl#"
    CANDIDATE_NAMESPACE: rdflib.Namespace = Namespace(CANDIDATE_NAMESPACE_STR)
    CANDIDATE_PREFIX: str = "pizza"

    OWL_PREFIX: str = "owl"

    graph: rdflib.Graph = Graph()
    graph.bind(prefix=TARGET_PREFIX, namespace=TARGET_NAMESPACE)
    graph.bind(prefix=CANDIDATE_PREFIX, namespace=CANDIDATE_NAMESPACE)
    graph.bind(prefix=OWL_PREFIX, namespace=OWL)

    graph.load(source=filename_a, format=guess_format(filename_a))
    graph.load(source=filename_b, format=guess_format(filename_b))
    graph.load(source=filename_c, format=guess_format(filename_c))

    _perform_reasoning(graph)

    _save_graph(graph=graph,
                output_file=f"all_files_with_reasoning_{task}.ttl")
예제 #4
0
def convert(nidm_file_list, type):
    """
    This function will convert NIDM files to various RDF-supported formats and name then / put them in the same
    place as the input file.
    """

    for nidm_file in nidm_file_list.split(','):
        # WIP: for now we use pynidm for jsonld exports to make more human readable and rdflib for everything
        # else.
        if type == 'jsonld':
            # read in nidm file
            project = read_nidm(nidm_file)
            #write jsonld file with same name
            with open(splitext(nidm_file)[0] + ".json", 'w') as f:
                f.write(project.serializeJSONLD())
        elif type == 'turtle':
            graph = Graph()
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            graph.serialize(splitext(nidm_file)[0] + ".ttl", format='turtle')
        elif type == 'xml-rdf':
            graph = Graph()
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            graph.serialize(splitext(nidm_file)[0] + ".xml",
                            format='pretty-xml')
        elif type == 'n3':
            graph = Graph()
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            graph.serialize(splitext(nidm_file)[0] + ".n3", format='n3')
        elif type == 'trig':
            # read in nidm file
            project = read_nidm(nidm_file)
            with open(splitext(nidm_file)[0] + ".trig", 'w') as f:
                f.write(project.serializeTrig())
        else:
            print("Error, type is not supported at this time")
def main():
    argparser = argparse.ArgumentParser(description=__doc__,
                                        fromfile_prefix_chars='@')

    # argparser.add_argument("task", help="Task to perform", choices=['link_people', 'all'], default='link_people')
    argparser.add_argument("input_bibale", help="Input Bibale RDF file")
    argparser.add_argument("input_bodley", help="Input Bodley RDF file")
    argparser.add_argument("input_sdbm", help="Input SDBM RDF file")
    argparser.add_argument(
        "--loglevel",
        default='DEBUG',
        help="Logging level",
        choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
    argparser.add_argument("--logfile", default='tasks.log', help="Logfile")

    args = argparser.parse_args()

    log = logging.getLogger()  # Get root logger
    log_handler = logging.FileHandler(args.logfile)
    log_handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
    log.addHandler(log_handler)
    log.setLevel(args.loglevel)

    log.info('Reading input graphs.')

    bibale = Graph()
    bibale.parse(args.input_bibale, format=guess_format(args.input_bibale))
    bodley = Graph()
    bodley.parse(args.input_bodley, format=guess_format(args.input_bodley))
    sdbm = Graph()
    sdbm.parse(args.input_sdbm, format=guess_format(args.input_sdbm))

    # if args.task in ['link_people', 'all']:
    log.info('Linking people of three graphs')
    p = PersonLinker(sdbm, bodley, bibale)

    p.link()

    if p.links:
        bibale, bodley, sdbm = p.datasets()

        log.info('Serializing output files...')

        filename_suffix = '_people.ttl'  # '_' + args.task + '.ttl'
        bind_namespaces(bibale).serialize(args.input_bibale.split('.')[0] +
                                          filename_suffix,
                                          format='turtle')
        bind_namespaces(bodley).serialize(args.input_bodley.split('.')[0] +
                                          filename_suffix,
                                          format='turtle')
        bind_namespaces(sdbm).serialize(args.input_sdbm.split('.')[0] +
                                        filename_suffix,
                                        format='turtle')
    else:
        log.warning('No links found')

    log.info('Task finished.')
예제 #6
0
 def test_guess_format(self) -> None:
     self.assertEqual(guess_format("example.trix"), "trix")
     self.assertEqual(guess_format("local-file.jsonld"), "json-ld")
     self.assertEqual(guess_format("local-file.json-ld"), "json-ld")
     self.assertEqual(guess_format("/some/place/on/disk/example.json"),
                      "json-ld")
     self.assertEqual(
         guess_format("../../relative/place/on/disk/example.json"),
         "json-ld")
예제 #7
0
    def _read(self, paths=None):
        graph = Graph()
        for path in paths:
            assert is_readable(path)
            if not is_gzip(path):
                graph.parse(path, format=guess_format(path))
            else:
                self.logger.debug("Input recognized as gzip file")
                with gzip.open(path, 'rb') as f:
                    graph.parse(f, format=guess_format(path[:-3]))

        return graph
def get_example():
    ontology_1 = Graph()
    ontology_1.parse(ontology_path_1, format=guess_format(ontology_path_1))

    ontology_2 = Graph()
    ontology_2.parse(ontology_path_2, format=guess_format(ontology_path_2))

    parameters = dict()
    exec(compile(open(param_path, "rb").read(), param_path, 'exec'),
         parameters)

    return ontology_1, ontology_2, parameters
예제 #9
0
def load_ontologies():
    """Add ontologies into twks-server"""
    files = Path(current_app.config['ONTOLOGY_PATH']).glob('*')
    for f in files:
        path = f.as_posix()
        pub = Nanopublication.parse_assertions(source=path,
                                               format=guess_format(path))
        current_app.store.put_nanopublication(pub)

    for ontology in remote_ontologies:
        pub = Nanopublication.parse_assertions(source=ontology,
                                               format=guess_format(ontology))
        current_app.store.put_nanopublication(pub)
예제 #10
0
파일: rdf_diff.py 프로젝트: SD2E/paml
def main():
    values = ap.parse_args()
    format1 = guess_format(values.file1)
    format2 = guess_format(values.file2)
    g1: Graph = Graph().parse(values.file1, format=format1)
    g2: Graph = Graph().parse(values.file2, format=format2)
    iso1: IsomorphicGraph = to_isomorphic(g1)
    iso2: IsomorphicGraph = to_isomorphic(g2)
    _in_both, in_first, in_second = graph_diff(iso1, iso2)
    print(f"Only in {values.file1}")
    dump_nt_sorted(in_first)

    print(f"Only in {values.file2}")
    dump_nt_sorted(in_second)
예제 #11
0
def main():
    argparser = argparse.ArgumentParser(description="Casualty linking tasks", fromfile_prefix_chars='@')

    argparser.add_argument("task", help="Linking task to perform",
                           choices=["ranks", "persons", "municipalities", "units", "occupations"])
    argparser.add_argument("input", help="Input RDF file")
    argparser.add_argument("output", help="Output file location")
    argparser.add_argument("--loglevel", default='INFO', help="Logging level, default is INFO.",
                           choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
    argparser.add_argument("--logfile", default='tasks.log', help="Logfile")
    argparser.add_argument("--endpoint", default='http://ldf.fi/warsa/sparql', help="SPARQL Endpoint")
    argparser.add_argument("--munics", default='output/municipalities.ttl', help="Municipalities RDF file")
    argparser.add_argument("--arpa", type=str, help="ARPA instance URL for linking")

    args = argparser.parse_args()

    log = logging.getLogger()  # Get root logger
    log_handler = logging.FileHandler(args.logfile)
    log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
    log.addHandler(log_handler)
    log.setLevel(args.loglevel)

    input_graph = Graph()
    input_graph.parse(args.input, format=guess_format(args.input))

    if args.task == 'ranks':
        log.info('Linking ranks')
        bind_namespaces(link_ranks(input_graph, args.endpoint, CASUALTY_MAPPING['SOTARVO']['uri'], SCHEMA_CAS.rank,
                                   SCHEMA_WARSA.DeathRecord)).serialize(args.output, format=guess_format(args.output))

    elif args.task == 'persons':
        log.info('Linking persons')
        bind_namespaces(link_casualties(input_graph, args.endpoint, args.munics)) \
            .serialize(args.output, format=guess_format(args.output))

    elif args.task == 'municipalities':
        log.info('Linking municipalities')
        bind_namespaces(link_municipalities(input_graph, args.endpoint, args.arpa)) \
            .serialize(args.output, format=guess_format(args.output))

    elif args.task == 'units':
        log.info('Linking units')
        bind_namespaces(link_units(input_graph, args.endpoint, args.arpa)) \
            .serialize(args.output, format=guess_format(args.output))

    elif args.task == 'occupations':
        log.info('Linking occupations')
        bind_namespaces(link_occupations(input_graph, args.endpoint, CASUALTY_MAPPING['AMMATTI']['uri'],
                                         BIOC.has_occupation, SCHEMA_WARSA.DeathRecord)) \
            .serialize(args.output, format=guess_format(args.output))
예제 #12
0
def load_data(data_url: str, old_graph: Optional[PPGraph] = None) -> PPGraph:
    """Create new PPGraph or add triples to the provided one.

    Args:
        data_url: path to RDF file or url address of SPARQL endpoint,
                    passing an url will invalidate old_graph
        old_graph: existing graph, will add triples to it

    Returns:
        Graph with triples loaded from data_url (lazy loaded in case of SPARQL endpoint)
    """
    if old_graph:
        graph = old_graph
    else:
        graph = PPGraph(ConjunctiveGraph())

    if isfile(data_url):
        L.info('Loading triples from file `%s`', data_url)
        data_format = guess_format(data_url)
        graph.parse(data_url, format=data_format)

    elif isdir(data_url):
        L.info('Loading triples from files in directory `%s`', data_url)
        for extension in TRIPLE_FILE_EXTENSIONS:
            triples_files = glob(f'{data_url}/*.{extension}')
            if len(triples_files) > 0:
                L.info('Found %d `.%s` files', len(triples_files), extension)

            for i, triples_file in enumerate(triples_files):
                data_format = guess_format(triples_file)
                L.debug('%d / %d (`%s`), data format: %s', i,
                        len(triples_files), triples_file, data_format)
                graph.parse(triples_file, format=data_format)

    else:
        L.info('Using remote graph from SPARQL endpoint `%s`', data_url)
        graph = PPGraph(SPARQLStore(data_url))

        # early fail
        try:
            graph.query('''SELECT DISTINCT ?s 
                   WHERE { 
                      ?s rdf:type foaf:Person
                   } LIMIT 1''')
        except Exception as e:
            L.error("Can't load data from remote endpoint")
            raise e

    return graph
예제 #13
0
def query(context, data_dict):
    # Get the resource and query from the form
    TTL_Resource = data_dict["TTL_Resource"]
    query = data_dict["query"]
    try:
        # Try to create the graph to analyze the vocabulary
        g = Graph()
        result = g.parse(TTL_Resource["url"],
                         format=guess_format("ttl"),
                         publicID=TTL_Resource["name"])
        # Query the dataset
        qres = g.query(query)

        # Save the result of the query
        result = list()
        for row in qres:
            rowRes = list()
            for res in row:
                if (res):
                    rowRes.append(res.toPython())
            result.append(rowRes)
        # Return the result of the query
        return result
    except Exception as e:
        # Return the exception
        return [["Exception: " + str(e)]]
예제 #14
0
def parse_args(args):
    """
    Parse command line arguments. See [Usage](#usage) (or the source code) for details.

    `args` is the list of command line arguments.
    """

    argparser = argparse.ArgumentParser(description="Link resources to an RDF graph with ARPA.",
            fromfile_prefix_chars="@")
    argparser.add_argument("input", help="Input rdf file")
    argparser.add_argument("output", help="Output file")
    argparser.add_argument("tprop", metavar="target_property", help="Target property for the matches")
    argparser.add_argument("arpa", help="ARPA service URL")
    argparser.add_argument("--fi", metavar="INPUT_FORMAT",
        help="Input file format (rdflib parser). Will be guessed if omitted.")
    argparser.add_argument("--fo", metavar="OUTPUT_FORMAT",
        help="Output file format (rdflib serializer). Default is turtle.", default="turtle")
    argparser.add_argument("-n", "--new_graph", action="store_true",
        help="""Add the ARPA results to a new graph instead of the original. The output file
        contains all the triples of the original graph by default. With this argument set
        the output file will contain only the results.""")
    argparser.add_argument("--rdf_class", metavar="CLASS",
        help="Process only subjects of the given type (goes through all subjects by default).")
    argparser.add_argument("--prop", metavar="PROPERTY",
        help="Property that's value is to be used in matching. Default is skos:prefLabel.")
    argparser.add_argument("--ignore", nargs="*", metavar="TERM",
        help="Terms that should be ignored even if matched")
    argparser.add_argument("--min_ngram", default=1, metavar="N", type=int,
        help="The minimum ngram length that is considered a match. Default is 1.")
    argparser.add_argument("--no_duplicates", nargs="*", default=False, metavar="TYPE",
        help="""Remove duplicate matches based on the 'label' returned by the ARPA service.
        Here 'duplicate' means a subject with the same label as another subject in
        the same result set.
        A list of types can be given with this argument. If given, prioritize matches
        based on it - the first given type will get the highest priority and so on.
        Note that the response from the service has to include a 'type' variable
        for this to work.""")
    argparser.add_argument("-r", "--retries", default=0, metavar="N", type=int,
        help="The amount of retries per query if a HTTP error is received. Default is 0.")
    argparser.add_argument("--log_level", default="INFO",
        choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
        help="Logging level, default is INFO. The log file is arpa_linker.log.")

    args = argparser.parse_args(args)

    if not args.fi:
        args.fi = guess_format(args.input)

    if args.prop:
        args.prop = URIRef(args.prop)

    if args.rdf_class:
        args.rdf_class = URIRef(args.rdf_class)

    args.tprop = URIRef(args.tprop)

    if args.no_duplicates == []:
        args.no_duplicates = True

    return args
예제 #15
0
def getGraph(file_path):

    g = Graph()
    # print(guess_format(file_path))
    g.parse(file_path, format=guess_format(file_path))
    # g.parse(file_path,format="turtle")
    return g
예제 #16
0
def parse_and_serialize(input_files,
                        input_format,
                        guess,
                        outfile,
                        output_format,
                        ns_bindings,
                        store_conn="",
                        store_type=None):

    if store_type:
        store = plugin.get(store_type, Store)()
        store.open(store_conn)
        graph = ConjunctiveGraph(store)
    else:
        store = None
        graph = ConjunctiveGraph()

    for prefix, uri in list(ns_bindings.items()):
        graph.namespace_manager.bind(prefix, uri, override=False)

    for fpath in input_files:
        use_format, kws = _format_and_kws(input_format)
        if fpath == '-':
            fpath = sys.stdin
        elif not input_format and guess:
            use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
        graph.parse(fpath, format=use_format, **kws)

    if outfile:
        output_format, kws = _format_and_kws(output_format)
        kws.setdefault('base', None)
        graph.serialize(destination=outfile, format=output_format, **kws)

    if store:
        store.rollback()
예제 #17
0
 def __load_from_file(self, file, format=None):
     """
         Load the datastructure from a RDF file.
         If not format is provided, then rdflib is used to guess the format.
     """
     if not os.path.isfile(file):
         raise Exception("Cannot find RDF file to load: {}".format(file))
     if format is None:
         format = guess_format(file)
     # use a temporary graph to load from a RDF file
     g = Graph()
     g.parse(file, format=format)
     for s, p, o in g.triples((None, None, None)):
         # load RDF triples in the dictionary, then index it
         triple = self._dictionary.insert_triple(strip_uri(s.n3()),
                                                 strip_uri(p.n3()),
                                                 strip_uri(o.n3()))
         self._indexes["spo"].insert(triple, len(self._triples))
         self._indexes["sop"].insert((triple[0], triple[2], triple[1]),
                                     len(self._triples))
         self._indexes["osp"].insert((triple[2], triple[0], triple[1]),
                                     len(self._triples))
         self._indexes["ops"].insert((triple[2], triple[1], triple[0]),
                                     len(self._triples))
         self._indexes["pso"].insert((triple[1], triple[0], triple[2]),
                                     len(self._triples))
         self._indexes["pos"].insert((triple[1], triple[2], triple[0]),
                                     len(self._triples))
         self._triples.append(triple)
예제 #18
0
def parse(dir, dirName, file, df, originDir):
    root = os.path.join(dir, dirName)
    # Create a graph to analyze the n3 file
    g = Graph()
    try:
        fileObj = open(os.path.join(root, file), "r", encoding="utf8")
        result = g.parse(file=fileObj, format=guess_format(file))
        fileObj.close()
        log("Parsed " + file + "\n")
    except Exception as e:
        log("Error trying to parse " + file + "\n")
        log(str(e) + "\n")

    # For each statement present in the graph obtained
    for subject, predicate, object_ in g:
        # Save the statement to the ExcelFile
        domain = file.replace("_", ".").split(".")[0]
        df = df.append(
            {
                'Subject': subject,
                'Predicate': predicate,
                'Object': object_,
                'Domain': domain
            },
            ignore_index=True)
    return df
예제 #19
0
 def __init__(self, shape):
     self.g = Graph()
     if type(shape) is Graph:
         self.g = shape
     else:
         self.g.parse(shape, format=guess_format(shape.name))
     shape.close()
예제 #20
0
def parse_and_serialize(input_files,
                        input_format,
                        guess,
                        outfile,
                        output_format,
                        ns_bindings,
                        store_conn=STORE_CONNECTION,
                        store_type=STORE_TYPE):

    store = plugin.get(store_type, Store)()
    store.open(store_conn)
    graph = Graph(store)

    for prefix, uri in ns_bindings.items():
        graph.namespace_manager.bind(prefix, uri, override=False)

    for fpath in input_files:
        use_format, kws = _format_and_kws(input_format)
        if fpath == '-':
            fpath = sys.stdin
        elif not input_format and guess:
            use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
        graph.parse(fpath, format=use_format, **kws)

    if outfile:
        output_format, kws = _format_and_kws(output_format)
        graph.serialize(destination=outfile,
                        format=output_format,
                        base=None,
                        **kws)
    store.rollback()
예제 #21
0
파일: views.py 프로젝트: upenn-libraries/DM
def import_old_data(request):
    everything_graph = Graph()
    bind_namespaces(everything_graph)

    # Either gather post data (must be one project/user graph at a time)
    if request.method == 'POST':
        logger.debug('!!!!!!!!!!!!!!! views.py - import_old_data')
        parse_request_into_graph(request, everything_graph)

        add_all_users(everything_graph)

        # Create each user's default project
        # Due to the structure of the data when exported from the old system, this also
        #  add each annotation to the project as an aggregated resource
        create_project(everything_graph)

    # or serialize from a folder, where each file is one project/user graph
    else:
        i = 0
        for file_name in listdir("output/"):
            if file_name.startswith('.'):
                continue

            try:
                everything_graph.parse("output/" + file_name, format=guess_format(file_name) or 'turtle')
            except Exception as e:
                print "Failed to decode file '%s' with error message '%s'"%(file_name, e.args[-1])
            else:
                add_all_users(everything_graph)
                create_project(everything_graph)
        

    return HttpResponse("I finished migrating data without errors.")
예제 #22
0
def main():
    parser = argparse.ArgumentParser(
        description='OMIA integration test',
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument(
        '--input', '-i', type=str, required=True,
        help='Location of input ttl file')

    args = parser.parse_args()

    graph = ConjunctiveGraph()
    graph.parse(args.input, format=rdflib_util.guess_format(args.input))

    model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301')

    models = graph.subject_objects(model_of)
    model_len = len(list(models))

    if model_len < EXPECTED_PAIRS:
        logger.error("Not enough model_of predicates in graph:"
                     " {} expected {} check omia log for"
                     " warnings".format(model_len, EXPECTED_PAIRS))
        exit(1)
    else:
        logger.info("PASSED")
예제 #23
0
    def load(self, url):
        src = VOCAB_SOURCE_MAP.get(str(url), url)
        if os.path.isfile(url):
            context_id = create_input_source(url).getPublicId()
            last_vocab_mtime = self.mtime_map.get(url)
            vocab_mtime = os.stat(url).st_mtime
            if not last_vocab_mtime or last_vocab_mtime < vocab_mtime:
                logger.debug("Parse file: '%s'", url)
                self.mtime_map[url] = vocab_mtime
                # use CG as workaround for json-ld always loading as dataset
                graph = ConjunctiveGraph()
                graph.parse(src, format=guess_format(src))
                self.graph.remove_context(context_id)
                for s, p, o in graph:
                    self.graph.add((s, p, o, context_id))
                return graph
        else:
            context_id = url

        if any(self.graph.triples((None, None, None), context=context_id)):
            logger.debug("Using context <%s>" % context_id)
            return self.graph.get_context(context_id)

        cache_path = self.get_fs_path(url)
        if os.path.exists(cache_path):
            logger.debug("Load local copy of <%s> from '%s'", context_id, cache_path)
            return self.graph.parse(cache_path, format='turtle', publicID=context_id)
        else:
            logger.debug("Fetching <%s> to '%s'", context_id, cache_path)
            graph = self.graph.parse(src,
                    format='rdfa' if url.endswith('html') else None)
            with open(cache_path, 'w') as f:
                graph.serialize(f, format='turtle')
            return graph
예제 #24
0
 def load(self):
     """
     Indexes the AppEnsemble-Directory for files with the AppEnsemble-Extension and ADDs them to the AppEnsemblePool.
     :return:None
     """
     try:
         files = os.listdir(self.get_ae_folder_path())
         for file in files:
             if file.endswith(AppEnsemble.ae_extension):
                 identifier=file.replace(AppEnsemble.ae_extension,'')
                 ae_tmp=AppEnsemble(identifier)
                 self.pool[identifier]=ae_tmp
                 filepath=os.path.join(self.get_ae_folder_path(),file)
                 with ZipFile(filepath, "r") as ae_pkg:
                     for name in ae_pkg.namelist():
                         if fnmatch.fnmatch(name, AppEnsemble.ae_filename):
                             ae_model = ae_pkg.read(AppEnsemble.ae_filename).decode()
                             self.parse(data=ae_model, format=util.guess_format(name))
                     ae_pkg.close()
         return None
     except FileNotFoundError as detail:
         if self._ae_folder_path != self._ae_folder_path_backup:
             self.log.error('AppEnsemble-Path "{}" was not found in the system! Try to use the standard path!'.format(self.get_ae_folder_path()))
             self.set_ae_folder_path(self._ae_folder_path_backup)
         else:
             self.log.error('AppEnsemble-Path "{}" was not found in the system!'.format(self.get_ae_folder_path()))
         return None
예제 #25
0
def link_casualties(input_graph, endpoint, munics):
    data_fields = [
        {'field': 'given', 'type': 'String'},
        {'field': 'family', 'type': 'String'},
        # Birth place is linked, can have multiple values
        {'field': 'birth_place', 'type': 'Custom', 'comparator': intersection_comparator, 'has missing': True},
        {'field': 'birth_begin', 'type': 'DateTime', 'has missing': True, 'fuzzy': False},
        {'field': 'birth_end', 'type': 'DateTime', 'has missing': True, 'fuzzy': False},
        {'field': 'death_begin', 'type': 'DateTime', 'has missing': True, 'fuzzy': False},
        {'field': 'death_end', 'type': 'DateTime', 'has missing': True, 'fuzzy': False},
        {'field': 'activity_end', 'type': 'Custom', 'comparator': activity_comparator, 'has missing': True},
        {'field': 'rank', 'type': 'Exact', 'has missing': True},
        {'field': 'rank_level', 'type': 'Price', 'has missing': True},
        {'field': 'unit', 'type': 'Custom', 'comparator': intersection_comparator, 'has missing': True},
    ]

    ranks = r.read_graph_from_sparql(endpoint, "http://ldf.fi/warsa/ranks")
    munics = Graph().parse(munics, format=guess_format(munics))

    random.seed(42)  # Initialize randomization to create deterministic results
    np.random.seed(42)

    training_links = read_person_links('input/person_links.json')

    person_links = link_persons(endpoint, _generate_casualties_dict(input_graph, ranks, munics),
                                data_fields, training_links, sample_size=500000,  threshold_ratio=0.5)

    return person_links
예제 #26
0
def main():
    parser = argparse.ArgumentParser(
        description='OMIA integration test',
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument(
        '--input', '-i', type=str, required=True,
        help='Location of input ttl file')

    args = parser.parse_args()

    graph = ConjunctiveGraph()
    graph.parse(args.input, format=rdflib_util.guess_format(args.input))

    model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301')

    models = graph.subject_objects(model_of)
    model_len = len(list(models))

    if model_len < EXPECTED_PAIRS:
        logger.error("Not enough model_of predicates in graph:"
                     " {} expected {} check omia log for"
                     " warnings".format(model_len, EXPECTED_PAIRS))
        exit(1)

    omim_diseases = graph.objects(
        subject=URIRef('https://monarchinitiative.org/model/OMIA-breed:18'),
        predicate=model_of
    )

    if list(omim_diseases) != [URIRef('http://purl.obolibrary.org/obo/OMIM_275220')]:
        logger.error("Missing breed to omim triple for {}".format('OMIA-breed:18'))
        exit(1)
    
    logger.info("PASSED")
예제 #27
0
    def configure_database(self):
        """
        Database configuration should be set here
        """
        self.NS = NamespaceContainer()
        self.NS.RDFS = rdflib.RDFS
        self.NS.RDF = rdflib.RDF
        self.NS.OWL = rdflib.OWL
        self.NS.xsd   = rdflib.Namespace("http://www.w3.org/2001/XMLSchema#")
        self.NS.dcterms    = rdflib.Namespace("http://purl.org/dc/terms/")
        self.NS.prov  = rdflib.Namespace("http://www.w3.org/ns/prov#")
        self.NS.skos = rdflib.Namespace("http://www.w3.org/2004/02/skos/core#")
        self.NS.dcat = rdflib.Namespace("http://www.w3.org/ns/dcat#")
        self.NS.oa = rdflib.Namespace("http://www.w3.org/ns/oa#")
        self.NS.dataset = rdflib.Namespace("https://cn.dataone.org/cn/v2/object/")
        self.NS.local = rdflib.Namespace(self.config['lod_prefix']+'/')
        self.NS.oboe = Namespace('http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#')
        self.NS.csvw = Namespace('http://www.w3.org/ns/csvw#')
        self.urn = rdflib.Namespace("urn:")

        self.nt_file = self.config['target_ontology']
        self.target_graph = ConjunctiveGraph()
        self.target_graph.load(self.nt_file, format=guess_format(self.nt_file))

        target_classes, idf = vectorize_ontology(self.target_graph)
        self.target_classes = target_classes
        self.idf = idf
        self.target_subtree = set(self.target_graph.transitive_subjects(self.NS.RDFS.subClassOf, self.NS.oboe.MeasurementType))
        self.target_class_subtree = [x for x in self.target_classes
                                     if x.identifier in self.target_subtree and x.identifier != self.NS.oboe.MeasurementType]
        self.targets = dict([(x.identifier, x) for x in self.target_class_subtree])
예제 #28
0
 def _loadgraph(filename):
     g = rdflib.Graph()
     # we must read the data ourself, providing a non-ascii
     # filename to Graph.parse fails deep in rdflib internals
     g.parse(data=util.readfile(filename, "rb"),
             format=guess_format(filename))
     return g
예제 #29
0
def add_file_to_graph(graph: Graph,
                      file: str,
                      imports_map=None,
                      imported: List[str] = None) -> Graph:
    """Returns a graph with the loaded rdf file

    Args:
        :param graph: graph to be used to parse the file
        :param file: Rdf file to load
        :param imports_map: a uri to file map
        :param imported: collected imports from previous parsed files
    Return:
        :return: Graph
    """

    if imports_map is None:
        imports_map = {}
    if imported is None:
        imported = []
    file_type = guess_format(file)
    if file_type is None:
        file_type = "json-ld"
    graph.parse(file, format=file_type)
    for obj in graph.objects(None, OWL.imports):
        try:
            local = imports_map[str(obj)]
            if local not in imported:
                imported.append(local)
                add_file_to_graph(graph, local, imports_map, imported)
        except KeyError:
            LOGGER.error("%s not in map", str(obj))

    return graph
예제 #30
0
    def load(cls, file_or_filename, format=None):
        """
        Materialize ontology into Python class hierarchy from a given
        file-like object or a filename.

        :param file_or_filename - file-like object or local filesystem path to file
            containing ontology definition in one of the supported formats.
        :param format - the format ontology is serialized in.
            For list of currently supported formats (based on RDFlib which is used under the hood)
            see: http://rdflib.readthedocs.io/en/565/plugin_parsers.html
        :returns instance of the `Ontology` object which encompasses the ontology namespace
            for all created objects and types.

        """
        graph = Graph()
        if isinstance(file_or_filename, string_types):
            # Load from given filename
            if not format:
                format = guess_format(file_or_filename)
            graph.parse(file_or_filename, format=format)
        else:
            # Load from file-like buffer
            if not format:
                raise RuntimeError(
                    "Must supply format argument when not loading from a filename"
                )
            graph.parse(file_or_filename, format=format)

        builder = OntologyBuilder(graph)
        namespace = builder.build_namespace()

        return cls(namespace, graph=graph, base_uri=builder.base_uri)
예제 #31
0
    def post(self, proms_report_lodging_uri, report):
        """
        POSTS an RDF-serialised Report object to a PROMS server instance

        :param proms_report_lodging_uri: the URI of the PROMS server instance's Report lodgement endpoint.
        Typically something like {PROMS_URI}/function/lodge-report.
        :param report: a pyproms Report class object, an rdflib Graph of a Report, a Report file path or a string
        containing RDF of a Report in turtle

        :return: a requests module Response class
        """

        if isinstance(report, PromsReport):
            report_str = report.serialize_graph().decode('utf-8')
        elif isinstance(report, Graph):
            report_str = report.serialize(format='turtle')
        elif isinstance(report, str):
            if os.path.exists(report):
                g = Graph()
                g.parse(report, format=util.guess_format(report))
                report_str = g.serialize(format='turtle')
            else:  # assume it's an RDF string in turtle
                report_str = report
        else:  # don't allow anything else
            raise ValueError(
                'Only PromsReport objects, rdflib  Graph objects, path strings to RDF files or a string of '
                'RDF in turtle format are allowd for \'report\'')

        # POST the Report to PROMS
        headers = {'Content-type': 'text/turtle'}
        r = requests.post(proms_report_lodging_uri,
                          data=report_str,
                          headers=headers)

        return r
예제 #32
0
def OpenGraph(file):
    '''
    Returns a parsed RDFLib Graph object for the given file
    The file will be hashed and if a pickled copy is found in the TMP dir, that will be used
    Otherwise the graph will be computed and then saved in the TMP dir as a pickle file
    We also use functools.lru_cache to cache results in memory during a run

    :param file: filename
    :return: Graph
    '''
    # if someone passed me a RDF graph rather than a file, just send it back
    if isinstance(file, rdflib.graph.Graph):
        return file

    BLOCKSIZE = 65536
    hasher = hashlib.md5()
    with open(file, 'rb') as afile:
        buf = afile.read(BLOCKSIZE)
        while len(buf) > 0:
            hasher.update(buf)
            buf = afile.read(BLOCKSIZE)
    hash = hasher.hexdigest()

    pickle_file = '{}/rdf_graph.{}.pickle'.format(tempfile.gettempdir(), hash)
    if path.isfile(pickle_file):
        return pickle.load(open(pickle_file, "rb"))

    rdf_graph = Graph()
    rdf_graph.parse(file, format=util.guess_format(file))
    pickle.dump(rdf_graph, open(pickle_file, 'wb'))

    # new graph, so to be safe clear out all cached entries
    memory.clear(warn=False)

    return rdf_graph
예제 #33
0
 def convert(self, form_input, map_filename):
     self.form_input = form_input.form
     # Get map and result RDF graphs ready
     self.rdf_map = Graph()
     self.rdf_map.parse(map_filename, format=guess_format(map_filename))
     self.rdf_result = Graph()
     self.rdf_result.namespace_manager = self.rdf_map.namespace_manager
     # Find node class
     for possible_root_node_class in self.rdf_map.objects(
             Literal('placeholder node_uri'), URIRef(RDF.type)):
         if 'placeholder' not in possible_root_node_class:
             self.root_node_class = possible_root_node_class
     if self.root_node_class is None:
         raise Exception('No root node class specified in ' + map_filename)
     # Use provided URI or generate unique URI of the new node
     if not self.root_node:
         self.root_node = URIRef(self.base_uri + str(uuid.uuid4()))
     self.rdf_result.add((self.root_node, RDF.type, self.root_node_class))
     # Go through each property and search for entries submitted in the form
     for (subject, property_predicate, property_obj) in self.rdf_map:
         if str(
                 subject
         ) == 'placeholder node_uri' and 'placeholder' in property_obj:
             self.add_entries_for_property(self.root_node,
                                           property_predicate, property_obj)
     # Also get any custom properties submitted in the form
     self.add_custom_property_entries(self.root_node)
     return self.rdf_result
예제 #34
0
def parse_and_serialize(input_files, input_format, guess,
                        outfile, output_format, ns_bindings,
                        store_conn="", store_type=None):

    if store_type:
        store = plugin.get(store_type, Store)()
        store.open(store_conn)
        graph = ConjunctiveGraph(store)
    else:
        store = None
        graph = ConjunctiveGraph()

    for prefix, uri in list(ns_bindings.items()):
        graph.namespace_manager.bind(prefix, uri, override=False)

    for fpath in input_files:
        use_format, kws = _format_and_kws(input_format)
        if fpath == '-':
            fpath = sys.stdin
        elif not input_format and guess:
            use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
        graph.parse(fpath, format=use_format, **kws)

    if outfile:
        output_format, kws = _format_and_kws(output_format)
        kws.setdefault('base', None)
        graph.serialize(destination=outfile, format=output_format, **kws)

    if store:
        store.rollback()
예제 #35
0
def fusion(ontologies, output):
	global mode

	# Definition of namespaces
	# Uncomment if needed
	# NS_owl =  Namespace("http://www.w3.org/2002/07/owl#")
	# NS_rdfs =  Namespace("http://www.w3.org/2000/01/rdf-schema#")
	# NS_xsd =  Namespace("http://www.w3.org/2001/XMLSchema#")
	# NS_rdf =  Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	# NS_mcf =  Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#")

	# Final graph creation
	gMerge = ConjunctiveGraph()

	myPrint("Beginning additions...\n\n")
	for ontology in ontologies:
		gAdd = ConjunctiveGraph()
		if mode == 2 or mode == 3:
			myPrint("\tParsing ontology "+ontology+"...\n")
		gAdd.parse(ontology, format=guess_format(ontology))
		if mode == 2 or mode == 3:
			myPrint("\tAdding ontology "+ontology+", "+str(len(gAdd))+ " triples...\n")
		gMerge = gMerge + gAdd
		if mode == 2 or mode == 3:
			myPrint("\tOntology "+ontology+" added !\n")
			myPrint("\tNew size of merged ontology : "+str(len(gMerge))+" triples\n\n")

	myPrint("Additions complete !\n")
	myPrint("Final size of merged ontology : "+str(len(gMerge))+" triples\n\n")

	myPrint("Saving the ontology in turtle format...\n")
	# Saving the merged ontology in turtle
	gMerge.serialize(output, format="turtle")
	myPrint("Saving done !\n\n")
예제 #36
0
def parse(name, link, list_):
    # Create a graph to analyze the n3 file
    g = Graph()
    try:
        format_ = link.split(".")[-1]
        if (format_ == "txt"):
            format_ = link.split(".")[-2]
        format_ = format_.split("?")[0]
        log("Parsing: " + name + format_ + "\n")
        result = g.parse(link, format=guess_format(name + "." + "n3"))
        log("Parsed : " + name + "\n")
    except Exception as e:
        log("Error trying to parse " + name + "\n")
        log(str(e) + "\n")
        return list_, 0
    index = 0
    # For each statement present in the graph obtained
    for subject, predicate, object_ in g:
        # Save the statement to the ExcelFile
        # Save the statement to the ExcelFile
        predicateTerm = predicate.replace("/", "#").split("#")
        predicateTerm = predicateTerm[len(predicateTerm) - 1]
        objectTerm = object_.replace("/", "#").split("#")
        objectTerm = objectTerm[len(objectTerm) - 1]
        domain = name.replace("_", ".").split(".")[0]
        list_.insert(
            index, {
                "Subject": subject,
                "Predicate": predicateTerm,
                "Object": objectTerm,
                "Domain": domain
            })
        index += 1
    return list_, index
예제 #37
0
    def handle(self, *args, **options):
        f = options['file'][0]
        g = options['graph'][0]

        graph = settings.GRAPH

        rdf_format = guess_format(f)

        graph.parse(source=f, format=rdf_format, publicID=g)

        graph.commit()

        # need to copy a unique set of resources to the Resource table
        for ts in TypeStatement.objects.values_list('member', flat=True):
            Resource.objects.update_or_create(subject=ts)

        # and unique class names and predicates to their own tables
        for a in AssertedStatement.objects.values_list('predicate', flat=True):
            Predicate.objects.update_or_create(value=a)

        for l in LiteralStatement.objects.values_list('predicate', flat=True):
            Predicate.objects.update_or_create(value=l)

        for q in QuotedStatement.objects.values_list('predicate', flat=True):
            Predicate.objects.update_or_create(value=q)

        for k in TypeStatement.objects.values_list('klass', flat=True):
            Klass.objects.update_or_create(value=k)

        for c in TypeStatement.objects.values_list('context', flat=True):
            Context.objects.update_or_create(value=c)

        graph.close()
예제 #38
0
def parse(name, link, file, df):
    # Create a graph to analyze the n3 file
    g = Graph()
    try:
        log("Parsing: " + name + "\n")
        result = g.parse(link, format=guess_format(name.split("/")[-1]))
        log("Parsed : " + name + "\n")
    except Exception as e:
        log("Error trying to parse " + name + "\n")
        log(str(e) + "\n")
        return df

    # For each statement present in the graph obtained
    for subject, predicate, object_ in g:
        # Save the statement to the ExcelFile
        # Save the statement to the ExcelFile
        predicateTerm = predicate.replace("/", "#").split("#")
        predicateTerm = predicateTerm[len(predicateTerm) - 1]
        objectTerm = object_.replace("/", "#").split("#")
        objectTerm = objectTerm[len(objectTerm) - 1]
        domain = name.replace("_", ".").split(".")[0]
        df = df.append(
            {
                "Subject": subject,
                "Predicate": predicateTerm,
                "Object": objectTerm,
                "Domain": domain
            },
            ignore_index=True)
    return df
예제 #39
0
def rm_main():
    # Get the name of the file to serialize
    fileName = ""

    # Try to create the graph to analyze the vocabulary
    try:
        g = Graph()
        format_ = fileName.split(".")[-1]
        if (format_ == "txt"):
            format_ = fileName.split(".")[-2]
        format_ = format_.split("?")[0]
        result = g.parse(fileName, format=guess_format(format_))
    except Exception as e:
        # In case of an error during the graph's initiation, print the error
        print(str(e) + "\n")

    # Get the formats that will be used for serialization
    strFormats = ""
    dest = fileName.split(".")[0]

    # Serialize the vocabulary in multiple formats
    if ("n3" in strFormats.split()):
        g.serialize(destination=dest + ".n3", format="n3")
    if ("nt" in strFormats.split()):
        g.serialize(destination=dest + ".nt", format="nt")
    if ("rdf" in strFormats.split()):
        g.serialize(destination=dest + ".rdf", format="pretty-xml")
    if ("ttl" in strFormats.split()):
        g.serialize(destination=dest + ".ttl", format="turtle")
    if ("json" in strFormats.split()):
        g.serialize(destination=dest + ".json-ld", format="json-ld")
예제 #40
0
def read_graph(location, result, g=None):
    if g is None:
        g = ConjunctiveGraph()
    graph = ConjunctiveGraph(store=g.store, identifier=result.identifier)
    if len(graph) == 0:
        data = get_content(location).read()
        f = guess_format(location)
        for fmt in [f] + _rdf_formats_to_guess:
            try:
                graph.parse(data=data, format=fmt)
                break
            except Exception as e:
                #print(e)
                pass
        if len(graph) == 0:
            print("Could not parse graph: ", location)
        if result[RDF.type:OWL.Ontology]:
            for ontology in graph.subjects(RDF.type, OWL.Ontology):
                imports = [
                    graph.resource(x)
                    for x in graph.objects(ontology, OWL.imports)
                ]
                for i in imports:
                    read_graph(i.identifier, i, g=g)
    return g
예제 #41
0
  def handle(self, *args, **options):
    f = options['file'][0]
    g = options['graph'][0]

    graph = settings.GRAPH

    rdf_format = guess_format(f)

    graph.parse(source=f, format=rdf_format, publicID=g)

    graph.commit()

    # need to copy a unique set of resources to the Resource table
    for ts in TypeStatement.objects.values_list('member',flat=True):
      Resource.objects.update_or_create(subject=ts)

    # and unique class names and predicates to their own tables
    for a in AssertedStatement.objects.values_list('predicate',flat=True):
      Predicate.objects.update_or_create(value=a)

    for l in LiteralStatement.objects.values_list('predicate',flat=True):
      Predicate.objects.update_or_create(value=l)

    for q in QuotedStatement.objects.values_list('predicate',flat=True):
      Predicate.objects.update_or_create(value=q)

    for k in TypeStatement.objects.values_list('klass',flat=True):
      Klass.objects.update_or_create(value=k)

    for c in TypeStatement.objects.values_list('context',flat=True):
      Context.objects.update_or_create(value=c)

    graph.close()
예제 #42
0
def import_old_data(request):
    everything_graph = Graph()
    bind_namespaces(everything_graph)

    # Either gather post data (must be one project/user graph at a time)
    if request.method == 'POST':
        logger.debug('!!!!!!!!!!!!!!! views.py - import_old_data')
        parse_request_into_graph(request, everything_graph)

        add_all_users(everything_graph)

        # Create each user's default project
        # Due to the structure of the data when exported from the old system, this also
        #  add each annotation to the project as an aggregated resource
        create_project(everything_graph)

    # or serialize from a folder, where each file is one project/user graph
    else:
        i = 0
        for file_name in listdir("output/"):
            if file_name.startswith('.'):
                continue

            try:
                everything_graph.parse("output/" + file_name,
                                       format=guess_format(file_name)
                                       or 'turtle')
            except Exception as e:
                print "Failed to decode file '%s' with error message '%s'" % (
                    file_name, e.args[-1])
            else:
                add_all_users(everything_graph)
                create_project(everything_graph)

    return HttpResponse("I finished migrating data without errors.")
예제 #43
0
def file_to_rdf_provider(input_file):
    """
    Create RDF provider from the input file
    """
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
    graph = Graph()
    graph.parse(input_file, format=guess_format(input_ext))
    return RDFProvider({"id": input_name.upper()}, graph)
예제 #44
0
def get_format(filename):
    return guess_format(filename, {
        'xml': 'trix',
        'ttl': 'turtle',
        'nq': 'nquads',
        'nt': 'nt',
        'rdf': 'xml'
    })
예제 #45
0
    def handle(self, directory, *args, **kwargs):
        store = rdfstore()

        for filename in os.listdir(directory):
            full_path = os.path.join(directory, filename)
            if os.path.isfile(full_path) and not filename.startswith('.'):
                context = URIRef(urllib.unquote(filename[:filename.rfind('.')]))
                graph = Graph(store, context)
                graph.parse(full_path, format=guess_format(filename))
	def run(self):
		ontologies = self.ontologies

		# Definition of namespaces
		# Uncomment if needed
		NS_owl =  Namespace("http://www.w3.org/2002/07/owl#")
		NS_rdfs =  Namespace("http://www.w3.org/2000/01/rdf-schema#")
		NS_xsd =  Namespace("http://www.w3.org/2001/XMLSchema#")
		NS_rdf =  Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
		NS_mcf =  Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#")

		g1 = ConjunctiveGraph()
		g2 = ConjunctiveGraph()
		g1.parse(ontologies[0], format=guess_format(ontologies[0]))
		g2.parse(ontologies[1], format=guess_format(ontologies[1]))
		listDiff = ConjunctiveGraph()
		listDiff = g1 ^ g2

		global listNames, listSizes

		for s,p,o in g1.triples((None, None, None)):
			item = ""
			#item += "[[ "+str(s)+" ]]\t[[ "+str(p)+" ]]\t[[ "+str(o)+" ]]"
			item +=str(s)+" || "+str(p)+" || "+str(o)
			self.emit(SIGNAL('addListItem(QString)'), item)

		ontologySplit = ontologies[0].split('/')
		ontologyName=ontologySplit[len(ontologySplit)-1]
		listNames.append(ontologyName)
		listSizes.append(str(len(g1)))
		tab["Ontology"] = listNames
		tab["Size"] = listSizes

		self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)

		ontologySplit = ontologies[1].split('/')
		ontologyName=ontologySplit[len(ontologySplit)-1]
		listNames.append(ontologyName)
		listSizes.append(str(len(g2)))
		tab["Ontology"] = listNames
		tab["Size"] = listSizes

		self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)
예제 #47
0
    def __init__(self, file_names):

        logger.info("Reading the data with RdfLib ...")

        for file_name in file_names:
            try:
                name, extension = os.path.splitext(file_name)
                self.ont.parse(file_name, format=util.guess_format(file_name))
            except:
                logger.exception("Error reading file "+file_name+". Parser for "+extension[1:]+" needed.")
예제 #48
0
        def _loadgraph(filename):
            g = rdflib.Graph()
            # we must read the data ourself, providing a non-ascii
            # filename to Graph.parse fails deep in rdflib internals
            format = guess_format(filename)
            if format == "nt":
                data = util.readfile(filename, "r", encoding="utf-8")
            else:
                data = util.readfile(filename, "rb")

            g.parse(data=data, format=format)
            return g
예제 #49
0
def main():
    parser = argparse.ArgumentParser(
        description='OMIA integration test',
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument(
        '--input', '-i', type=str, required=True, help='Location of input ttl file')

    args = parser.parse_args()

    graph = ConjunctiveGraph()
    graph.parse(args.input, format=rdflib_util.guess_format(args.input))

    # "is model of": "RO:0003301"
    # is_model_of = URIRef('OBO:RO_0003301')
    is_model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301')

    # if we curie_map & globaltt here we could ...
    # (pfx lcl) = globaltt["is model of"].split(':')
    # iri = curie_map[pfx] + '_'.join((pfx, lcl))
    # is_model_of = URIRef(iri)

    models = graph.subject_objects(is_model_of)
    model_len = len(set(list(models)))

    if model_len < EXPECTED_PAIRS:
        LOG.error(
            "Not enough <RO:is model of> predicates in graph: found {}, "
            "expected {} check omia log for warnings".format(
                model_len, EXPECTED_PAIRS))
        exit(1)
    # else:
    #    LOG.info(
    #        "Found {} model_of predicates in graph, expected at least: {}".format(
    #            model_len, EXPECTED_PAIRS))

    breed = 'https://monarchinitiative.org/model/OMIA-breed:758'
    disease = 'http://omim.org/entry/305100'

    omim_diseases = graph.objects(
        subject=URIRef(breed),
        predicate=is_model_of
    )

    if list(omim_diseases) != [URIRef(disease)]:
        LOG.error("Missing breed to omim triple for %s", breed)
        LOG.error(list(omim_diseases))
        exit(1)

    LOG.info("PASSED")
예제 #50
0
def main(target, _help=_help, options="", stdin=True):
    """
    A main function for tools that read RDF from files given on commandline
    or from STDIN (if stdin parameter is true)
    """

    args, files = getopt.getopt(sys.argv[1:], "hf:o:" + options)
    dargs = dict(args)

    if "-h" in dargs:
        _help()
        sys.exit(-1)

    g = rdflib.Graph()

    if "-f" in dargs:
        f = dargs["-f"]
    else:
        f = None

    if "-o" in dargs:
        sys.stderr.write("Output to %s\n" % dargs["-o"])
        out = codecs.open(dargs["-o"], "w", "utf-8")
    else:
        out = sys.stdout

    start = time.time()
    if len(files) == 0 and stdin:
        sys.stderr.write("Reading from stdin as %s..." % f)
        g.load(sys.stdin, format=f)
        sys.stderr.write("[done]\n")
    else:
        size = 0
        for x in files:
            if f is None:
                f = guess_format(x)
            start1 = time.time()
            sys.stderr.write("Loading %s as %s... " % (x, f))
            g.load(x, format=f)
            sys.stderr.write("done.\t(%d triples\t%.2f seconds)\n" %
                             (len(g) - size, time.time() - start1))
            size = len(g)

    sys.stderr.write("Loaded a total of %d triples in %.2f seconds.\n" %
                     (len(g), time.time() - start))

    target(g, out, args)
예제 #51
0
def process(triple_file, action, email, password, url, named_graph):
    console("\n{}\n".format('-' * 25))
    console("VIVO url: {}".format(url))

    # Handle named graph
    if named_graph != KB2:
        named_graph = URIRef(named_graph)

    fmt = guess_format(triple_file)
    graph = Graph()
    graph.parse(source=triple_file, format=fmt)

    console("Read {} triples and will {} to <{}>".format(len(graph), action, named_graph))
    # Do the update.
    do_update(email, password, url, graph, named_graph, action)
    # Finish
    console("\n{}\n".format('-' * 25))
예제 #52
0
파일: conf.py 프로젝트: AKSW/QuitStore
    def __init_graph_conf_from_configuration(self, configfileId, known_blobs):
        """Init graphs with setting from config.ttl."""
        try:
            configfile = self.repository.get(configfileId)
        except Exception as e:
            raise InvalidConfigurationError(
                "Blob for configfile with id {} not found in repository {}".format(configfileId, e))

        content = configfile.read_raw()

        try:
            self.graphconf.parse(data=content, format='turtle')
        except Exception as e:
            raise InvalidConfigurationError(
                "Configfile could not be parsed {} {}".format(configfileId, e)
            )
        nsQuit = 'http://quit.aksw.org/vocab/'
        query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { '
        query += '  ?graph a <' + nsQuit + 'Graph> . '
        query += '  ?graph <' + nsQuit + 'graphUri> ?graphuri . '
        query += '  ?graph <' + nsQuit + 'graphFile> ?filename . '
        query += '  OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} '
        query += '}'
        result = self.graphconf.query(query)

        for row in result:
            filename = str(row['filename'])
            if row['format'] is None:
                format = guess_format(filename)
            else:
                format = str(row['format'])
            if format != 'nt':
                break
            if filename not in known_blobs.keys():
                break

            graphuri = URIRef(str(row['graphuri']))

            # we store which named graph is serialized in which file
            self.graphs[graphuri] = filename
            self.files[filename] = {
                'serialization': format, 'graph': graphuri, 'oid': known_blobs[filename]}
예제 #53
0
def read_graph(location, result, g = None):
    if g is None:
        g = ConjunctiveGraph()
    graph = ConjunctiveGraph(store=g.store, identifier=result.identifier)
    if len(graph) == 0:
        data = get_content(location).read()
        f = guess_format(location)
        for fmt in [f] + _rdf_formats_to_guess:
            try:
                graph.parse(data=data, format=fmt)
                break
            except Exception as e:
                #print e
                pass
        if len(graph) == 0:
            print "Could not parse graph: ", location
        if result[RDF.type:OWL.Ontology]:
            for ontology in graph.subjects(RDF.type, OWL.Ontology):
                imports = [graph.resource(x) for x in graph.objects(ontology, OWL.imports)]
                for i in imports:
                    read_graph(i.identifier, i, g = g)
    return g
예제 #54
0
파일: conf.py 프로젝트: AKSW/QuitStore
    def get_blobs_from_repository(self, rev):
        """Analyze all blobs of a revision.

        Returns
        -------
            A triple (dictionary, list, dictionary)
            dict: containg names of rdf-files plus their format and oid of graph file.
            list: containing names of config files.
            dict: containing names rdf files plus format and oid.

        """
        config_files = []
        graph_files = {}
        graph_file_blobs = {}
        rdf_file_blobs = {}
        try:
            commit = self.repository.revparse_single(rev)
        except Exception:
            return graph_files, config_files, rdf_file_blobs

        # Collect graph files, rdf files and config files
        for entry in commit.tree:
            if entry.type == 'blob':
                format = guess_format(entry.name)
                if format is None and entry.name.endswith('.graph'):
                    graph_file_blobs[entry.name] = entry.id
                elif format is not None and format == 'nt':
                    rdf_file_blobs[entry.name] = (entry.id, format)
                elif format is not None and entry.name == 'config.ttl':
                    config_files.append(str(entry.id))

        # collect pairs of rdf files and graph files
        for filename in rdf_file_blobs.keys():
            if filename + '.graph' in graph_file_blobs.keys():
                graph_file_blob_id = graph_file_blobs[filename + '.graph']
                graph_files[filename] = (rdf_file_blobs[filename][1], str(graph_file_blob_id))

        return graph_files, config_files, rdf_file_blobs
예제 #55
0
def parse_and_serialize(input_files, input_format, guess,
                        outfile, output_format, ns_bindings,
                        store_conn=STORE_CONNECTION, store_type=STORE_TYPE):

    store = plugin.get(store_type, Store)()
    store.open(store_conn)
    graph = Graph(store)

    for prefix, uri in ns_bindings.items():
        graph.namespace_manager.bind(prefix, uri, override=False)

    for fpath in input_files:
        use_format, kws = _format_and_kws(input_format)
        if fpath == '-':
            fpath = sys.stdin
        elif not input_format and guess:
            use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
        graph.parse(fpath, format=use_format, **kws)

    if outfile:
        output_format, kws = _format_and_kws(output_format)
        graph.serialize(
            destination=outfile, format=output_format, base=None, **kws)
    store.rollback()
예제 #56
0
import sys
from rdflib import Graph, Namespace, RDF
from rdflib.util import guess_format

# namespaces
TERO = Namespace("http://www.yso.fi/onto/tero/")
YSO = Namespace("http://www.yso.fi/onto/yso/")
TEROYSO = Namespace("http://www.yso.fi/onto/tero/p")
TEROMETA = Namespace("http://www.yso.fi/onto/tero-meta/")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")

# input graph  
g = Graph()
for fn in sys.argv[1:]:
  g.parse(fn, format=guess_format(fn))
  
g.namespace_manager.bind('tero',TERO)
g.namespace_manager.bind('terometa',TEROMETA)

out = Graph()
for prefix,ns in g.namespace_manager.namespaces():
  out.namespace_manager.bind(prefix,ns)

def switch(res):
  if res.startswith(TEROYSO):
    return YSO[res.replace(TEROYSO, 'p')]
  return res

for s,p,o in g:
  out.add((switch(s), p, switch(o)))
	def run(self):
		global log
		today = datetime.now()
		log = open("./logs/fusionAdv_"+str(today.month)+"_"+str(today.day)+"_"+str(today.year)+"_"+str(today.hour)+"_"+str(today.minute)+"_"+str(today.second)+".log", "w")
		ontologies = self.ontologies
		output = self.output
		tps0 = time.clock()

		self.myPrint("Fusion process begins...\n...\n")

		owl =  Namespace("http://www.w3.org/2002/07/owl#")
		rdfs =  Namespace("http://www.w3.org/2000/01/rdf-schema#")
		xsd =  Namespace("http://www.w3.org/2001/XMLSchema#")
		rdf =  Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
		mcf =  Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#")

		self.myPrint("Parsing the first graph...\n")
		g1 = ConjunctiveGraph()
		g1.parse(ontologies[0], format=guess_format(ontologies[0]))
		diff = ConjunctiveGraph()
		toDel = ConjunctiveGraph()
		self.myPrint("Parsing done !\n\n")

		self.myPrint("Parsing differences list...\n")
		listDiff = self.diff
		for item in listDiff:
			itemSplit = item.split(" || ")
			s = itemSplit[0]
			p = itemSplit[1]
			o = itemSplit[2]

			msg="Adding triple : "
			msg=msg+s+" || "+p+" || "+o+"\n"
			self.myPrint(msg)
			diff.add((URIRef(s),URIRef(p),URIRef(o)))
		self.myPrint("Parsing done !\n\n")

		self.myPrint("Parsing triples to remove from the final graph...\n")
		listToDel = self.toDel
		for item in listToDel:
			itemSplit = item.split(" || ")
			s = itemSplit[0]
			p = itemSplit[1]
			o = itemSplit[2]

			msg = "Removing triple : "
			msg = msg + s+" || "+p+" || "+o+"\n"
			self.myPrint(msg)
			toDel.add((URIRef(s),URIRef(p),URIRef(o)))
		self.myPrint("Parsing done !\n\n")

		self.myPrint("Final merge processing...\n")
		gMerge = ConjunctiveGraph()

		gMerge = g1 + diff
		gMerge = gMerge - toDel
		self.myPrint("Merge process complete !\n\n")

		global listNames, listSizes
		ontologySplit = output.split('/')
		ontologyName=ontologySplit[len(ontologySplit)-1]
		listNames.append(ontologyName)
		listSizes.append(str(len(gMerge)))
		tab["Ontology"] = listNames
		tab["Size"] = listSizes

		self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)

		self.myPrint("Saving the ontology...\n")
		# Saving the merged ontology
		extension = output[len(output)-4:]
		f = ""
		if(extension == ".ttl"):
			f = "turtle"
		elif(extension == ".rdf"):
			f = "xml"
		else:
			f="xml"
			output = output + ".rdf"
		gMerge.serialize(output, format=f)

		self.myPrint("Saving done !\n\n")

		tps1 = time.clock()

		self.myPrint("Fusion advanced complete.\n")
		execTime = self.prettyTime(tps1-tps0)
		self.myPrint("\nFusion advanced executing time : "+execTime+"\n")

		log.close()
예제 #58
0
def rdf_inspect(file_names, verbose=1):
    logger.info("Reading the data with RdfLib ...")

    memg = rdflib.Graph()
    for file_name in file_names:
        name, extension = os.path.splitext(file_name)
        memg.parse(file_name, format=util.guess_format(file_name))
    print("Graph has %s statements." % len(memg))


    pred_set = Set()
    for pred in memg.predicates(None, None):
        pred_set.add(pred)
    print("Graph has %s distinct predicates." % len(pred_set))

    types = []
    properties = []
    relations = []
    names = []

    for pred in pred_set:
        if pred in [URIRef(u'http://www.w3.org/2004/02/skos/core#prefLabel'),
                    URIRef(u'http://www.w3.org/2000/01/rdf-schema#label'),
                    URIRef(u'http://www.geonames.org/ontology#name'),
                    URIRef(u'http://xmlns.com/foaf/0.1/name'),
                    URIRef(u'http://purl.org/dc/elements/1.1/title'),
                    URIRef(u'http://dbpedia.org/ontology/personName'),
                    URIRef(u'http://reegle.info/schema#projectTitle')]:
            names.append(pred)
            properties.append(pred)
        elif pred == URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'):
            types.append(pred)
        else:
            (s, p, o) = memg.triples((None, pred, None)).next()
            o_uri = ("%s" % o).lower()
            if isinstance(o, rdflib.term.Literal) \
                    or o_uri.endswith("jpg") \
                    or o_uri.endswith("png") \
                    or o_uri.endswith("pdf") \
                    or o_uri.endswith("doc") \
                    or p == URIRef(u'http://xmlns.com/foaf/0.1/homepage'):
                properties.append(p)
            else:
                relations.append(p)
    lod2graph_mapping(file_out, types, properties, relations, names)

    if verbose:
        logger.info("Types of RDF relations:")
        logger.info("Properties: %d" %len(properties))
        for pred in properties:
            (s, p, o) = memg.triples((None, pred, None)).next()
            logger.info("%s \t %s  %s  %s" % (pred, s, p, o))

        logger.info("Relations: %d" %len(relations))
        for pred in relations:
            (s, p, o) = memg.triples((None, pred, None)).next()
            logger.info("%s \t %s  %s  %s" % (pred, s, p, o))

        logger.info("Types: %d" % len(types))
        for pred in types:
            (s, p, o) = memg.triples((None, pred, None)).next()
            logger.info("%s \t %s  %s  %s" % (pred, s, p, o))

        logger.info("Names: %d" % len(names))
        for pred in names:
            (s, p, o) = memg.triples((None, pred, None)).next()
            logger.info("%s \t %s  %s  %s" % (pred, s, p, o))
예제 #59
0
"""
Combine the RDF files into a graph.
"""

import glob

from rdflib import Graph
from rdflib.util import guess_format

from utils import ns_mgr

g = Graph()
g.namespace_manager = ns_mgr

for item in glob.glob('data/rdf/*'):
    if item == 'all.ttl':
        continue
    format = guess_format(item)
    g.parse(item, format='turtle')

print g.serialize(format='turtle')