Python NTriplesParser.NTriplesParser Beispiele, rdflib.plugins.parsers.ntriples.NTriplesParser.NTriplesParser Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: indexer_dbpedia_types.py Projekt: zxlzr/nordlys

    def __load_entity_abstracts(self, filename):
        prefix = URIPrefix()
        t = Triple()
        p = NTriplesParser(t)
        lines_counter = 0
        PLOGGER.info("Loading entity abstracts from {}".format(filename))
        for line in FileUtils.read_file_as_list(filename):
            # basic line parsing
            line = line.decode("utf-8") if isinstance(line, bytes) else line
            try:
                p.parsestring(line)
            except ParseError:  # skip lines that couldn't be parsed
                continue
            if t.subject() is None:  # only if parsed as a triple
                continue

            # Subject and object identification
            subj = prefix.get_prefixed(t.subject())
            obj = ""
            if type(t.object()) is URIRef:
                # PLOGGER.error("Error: it is URIRef the parsed obj")
                pass
            else:
                obj = t.object().encode("utf-8")
                if len(obj) == 0:
                    continue  # skip empty objects
            self.__entity_abstracts[subj] = obj

            lines_counter += 1
            if lines_counter % 10000 == 0:
                PLOGGER.info("\t{}K lines processed".format(lines_counter // 1000))
                pass

        PLOGGER.info("\n### Loading entity abstracts... Done.")

Beispiel #2

0

Datei anzeigen

    def __sample_file(self, dir, file):
        """Creates a local from a specific file in a given directory.

        :param dir: directory (relative to path_to_dbpedia)
        :param file:
        """
        t = Triple()
        p = NTriplesParser(t)
        infile = os.path.join(self.path_to_dbpedia, dir, file)
        outfile = os.path.join(self.output_dir, dir, file)
        print("Processing file " + file + " ...")
        i = 0
        with FileUtils.open_file_by_type(infile) as fin:
            fout = FileUtils.open_file_by_type(
                outfile,
                mode="w")  # output file will be of the same type as the input
            for line in fin:
                try:
                    p.parsestring(line.decode("utf-8"))
                except ParseError:  # skip lines that couldn't be parsed
                    continue
                if t.subject() is None:  # only if parsed as a triple
                    continue
                subj = self.prefix.get_prefixed(
                    t.subject())  # prefixing subject
                if subj in self.sample_entities:
                    fout.write(line)
                i += 1
                if i % 100000 == 0:
                    print(str(i // 1000) + "K lines processed")
            fout.close()

Beispiel #3

0

Datei anzeigen

Datei: nt_transformer.py Projekt: vemonet/kgx

    def parse(self,
              filename: str = None,
              input_format: str = None,
              provided_by: str = None,
              predicates: Set[URIRef] = None) -> None:
        """
        Parse a n-triple file into networkx.MultiDiGraph

        The file must be a *.nt formatted file.

        Parameters
        ----------
        filename : str
            File to read from.
        input_format : str
            The input file format. Must be one of ``['nt', 'nt.gz']``
        provided_by : str
            Define the source providing the input file.

        """
        p = p = NTriplesParser(self)
        self.start = current_time_in_millis()
        if input_format == INPUT_FORMATS[0]:
            p.parse(open(filename, 'rb'))
        elif input_format == INPUT_FORMATS[1]:
            p.parse(gzip.open(filename, 'rb'))
        else:
            raise NameError(
                f"input_format: {input_format} not supported. Must be one of {INPUT_FORMATS}"
            )
        print("Done parsing NT file")
        self.dereify(self.assocs)

Beispiel #4

0

Datei anzeigen

Datei: nt2mongo.py Projekt: ageron/nordlys

    def add_file(self, filename, reverse_triple=False, predicate_prefix=None):
        """Adds contents from an NTriples file to MongoDB.

        :param filename: NTriples file.
        :param reverse_triple: if set True, the subject and object values are swapped.
        :param predicate_prefix: prefix to be added to predicates.
        :param subjects_redirecter: redirects dict.
        """
        print("Processing " + filename + "...")

        t = Triple()
        p = NTriplesParser(t)
        self.__m_id = None  # document id for MongoDB -- subj
        self.__m_contents = None  # document contents for MongoDB -- pred, obj
        i = 0

        with FileUtils.open_file_by_type(filename) as f:
            for line in f:
                try:
                    p.parsestring(line.decode("utf-8"))
                except ParseError:  # skip lines that couldn't be parsed
                    continue
                if t.subject() is None:  # only if parsed as a triple
                    continue

                # subject prefixing
                subj = self.__prefix.get_prefixed(t.subject())

                # predicate prefixing
                pred = self.__prefix.get_prefixed(t.predicate())
                if predicate_prefix is not None:
                    pred = predicate_prefix + pred

                # Object prefixing
                if type(t.object()) is URIRef:
                    obj = self.__prefix.get_prefixed(t.object())
                else:
                    obj = t.object()
                    if len(obj) == 0:
                        continue  # skip empty objects

                # write or append
                if reverse_triple:  # reverse subj and obj
                    self._next_triple(obj, pred, subj)
                else:  # normal mode
                    self._next_triple(subj, pred, obj)

                i += 1
                if i % 100000 == 0:
                    print(
                        str(i // 1000) + "K lines processed from " + filename)

        # process last triple
        self._write_to_mongo()

Beispiel #5

0

Datei anzeigen

def axioms(filename):
    stream = Stream()
    parser = NTriplesParser(stream)

    with open(filename, "rb") as data:
        parser.parse(data)

    graph = stream.graph()
    propertise = set()

    for triple in graph:
        propertise.add(triple[1])

    print(propertise)

    dic_fun = functionality(graph, propertise)
    print('1')

    dic_ref = reflexivity(graph, propertise)
    print('2')
    dic_sym = symmetry(graph, propertise)
    print('3')
    dic_inv = inverse(graph, propertise)
    print('4')
    dic_tra = transivity(graph, propertise)
    print('5')

    csvname = filename + '.csv'
    out = open(csvname, 'a', newline='')
    csv_writer = csv.writer(out, dialect='excel')

    for pi in propertise:
        l1 = [pi]
        if (dic_fun[pi] > 0):
            l1.append('functionality')
            l1.append(dic_fun[pi])

        if (dic_ref[pi] == 1):
            l1.append('reflexivity')
        if (dic_sym[pi] == 1):
            l1.append('symmetry')
        if (len(dic_inv[pi]) != 0):
            l1.append('inverse')

        if (dic_tra[pi] == 1):
            l1.append('transivity')

        print(l1)
        csv_writer.writerow(l1)

    print('over')

Beispiel #6

0

Datei anzeigen

 def __init__(self, config):
     self.__elastic = None
     self.__config = config
     self.__index_name = config["index_name"]
     self.__dbpedia_path = config["dbpedia_files_path"]
     # For triple parsing
     self.__prefix = URIPrefix()
     self.__triple = Triple()
     self.__ntparser = NTriplesParser(self.__triple)
     # Entity abstract and type assignments kept in memory
     self.__entity_abstracts = {}
     self.__load_entity_abstracts()
     self.__types_entities = defaultdict(list)
     self.__load_entity_types()

Beispiel #7

0

Datei anzeigen

Datei: nt.py Projekt: zqhead/rdflib

    def parse(self, source, sink, **kwargs):
        '''
        Parse the NT format

        :type source: `rdflib.parser.InputSource`
        :param source: the source of NT-formatted data
        :type sink: `rdflib.graph.Graph`
        :param sink: where to send parsed triples
        :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
        '''
        f = source.getByteStream()  # TODO getCharacterStream?
        parser = NTriplesParser(NTSink(sink))
        parser.parse(f, **kwargs)
        f.close()

Beispiel #8

0

Datei anzeigen

Datei: map_rdf_file.py Projekt: swankd/rdf-demo

def process_file(infile, sink):
    bad_lines = defaultdict(int)
    for line in infile:
        s = BytesIO()
        s.write(line)
        s.seek(0)
        parser = NTriplesParser(sink)
        try:
            parser.parse(s)
        except (ParseError, ElementStrError) as e:
            bad_lines[line] += 1

    print('read {} lines from {}'.format(sink.nlines, infile.name))
    print('bad lines and their frequencies:')
    for line, count in bad_lines.items():
        print('  {:>10} : {}'.format(count, line))

Beispiel #9

0

Datei anzeigen

Datei: fast.py Projekt: galterlibrary/InvenioRDM-at-NU

    def load(cls, filepath):
        """Return array of FAST dict. Main method."""
        if zipfile.is_zipfile(filepath):
            with ZipFile(filepath) as zf:
                nt_filename = next(
                    (n for n in zf.namelist() if n.endswith('.nt')))
                # defaults to equivalent of 'rb'
                nt_file = zf.open(nt_filename)
        else:
            nt_file = open(filepath, 'rb')

        instance = cls()
        parser = NTriplesParser(instance)
        parser.parse(nt_file)

        nt_file.close()

        return instance.terms

Beispiel #10

0

Datei anzeigen

Datei: sesame2.py Projekt: pdwood/RDFAlchemy3

    def triples(self, xxx_todo_changeme3, context=None):
        """Generator over the triple store

        Returns triples that match the given triple pattern. If triple pattern
        does not provide a context, all contexts will be searched.
        """
        (s, p, o) = xxx_todo_changeme3
        url = self._statement_encode((s, p, o), context)
        req = Request(url)
        req.add_header('Accept', 'text/plain')
        # N-Triples is best for generator (one line per triple)
        log.debug("Request: %s" % req.get_full_url())
        dumper = DumpSink()
        parser = NTriplesParser(dumper)

        for l in urlopen(req):
            log.debug('line: %s' % l)
            parser.parsestring(l)
            yield dumper.get_triple()

Beispiel #11

0

Datei anzeigen

def transform_part(
    input_path,
    global_id_marker,
    part_name,
    left,
    right,
    prefixer=None,
):
    print(f'starting {part_name}: {left} -- {right}')
    with open(input_path, 'rb') as in_file:
        in_file.seek(left)
        part_bytes = in_file.read(right - left)
        part_str = part_bytes.decode('utf8')  # wasteful
        with PropertyGraphSink(global_id_marker, part_name, prefixer) as sink:
            ntp = NTriplesParser(sink=sink)
            ntp.parsestring(part_str)

    triple_count = sum(sink.predicate_count.values())
    print(f'finished {part_name}: {triple_count} triples')
    return part_name, dict(sink.predicate_count)

Beispiel #12

0

Datei anzeigen

    def parse_file(self, filename, triplehandler):
        """Parses file and calls callback function with the parsed triple"""
        PLOGGER.info("Processing " + filename + "...")

        prefix = URIPrefix()
        t = Triple(prefix)
        p = NTriplesParser(t)
        i = 0

        with open(filename) as f:
            for line in f:
                p.parsestring(line)
                if t.subject() is None:  # only if parsed as a triple
                    continue

                # call the handler object with the parsed triple
                triplehandler.triple_parsed(t)

                i += 1
                if i % 10000 == 0:
                    PLOGGER.info(str(i / 1000) + "K lines processed")

Beispiel #13

0

Datei anzeigen

    def read_fb2dbp_file(self, is_39=False):
        """Reads the file and generates an initial mapping of Freebase to DBpedia IDs.
        Only proper DBpedia entities are considered; i.e. redirect and disambiguation pages are ignored.
        """
        fb2dbp_file = self.__fb2dbp_file_39 if is_39 else self.__fb2dbp_file
        print("Processing " + fb2dbp_file + "...")

        t = Triple()
        p = NTriplesParser(t)
        i = 0
        fb2dbp_mapping = defaultdict(set)
        with FileUtils.open_file_by_type(fb2dbp_file) as f:
            for line in f:
                try:
                    p.parsestring(line.decode("utf-8"))
                except ParseError:  # skip lines that couldn't be parsed
                    continue
                if t.subject() is None:  # only if parsed as a triple
                    continue

                # prefixing
                dbp_id = self.__prefix.get_prefixed(t.subject())
                fb_id = self.__prefix.get_prefixed(t.object())

                # if reading 3.9 file, converts ID to 2015-10 version
                if is_39:
                    dbp_id = EntityUtils.convert_39_to_201510(dbp_id)
                    fb2dbp_mapping[fb_id].add(dbp_id)

                # if reading 2015-10 file, keeps only the proper DBpedia entities
                else:
                    entity_utils = EntityUtils(
                        self.__mongo_dbpedia.find_by_id(dbp_id))
                    if entity_utils.is_entity():
                        fb2dbp_mapping[fb_id].add(dbp_id)
                i += 1
                if i % 1000 == 0:
                    print(str(i // 1000) + "K lines are processed!")

        return fb2dbp_mapping

Beispiel #14

0

Datei anzeigen

Datei: disAxioms.py Projekt: HEYuru/2020internship

def classPart(filename):
	
	stream = Stream()
	parser = NTriplesParser(stream) 	
	with open(filename,"rb") as data:
		parser.parse(data)
	graph = stream.graph()	# garaph(set) is the dataset <s1,p1,o1> <s2,p2,o2>...
	print('success load')
	
	classes = {}
	c = set()
	p = set()
	for triple in graph:
		c.add(triple[2])
		p.add(triple[0])
		if triple[2] not in classes:
			classes[triple[2]] = set()
		classes[triple[2]].add(triple[0])
	print('the number of classes: ',end='')
	print(len(c))
	print('the number of instances: ',end='')
	print(len(p))
	
	return classes

Beispiel #15

0

Datei anzeigen

Datei: trident.py Projekt: karmaresearch/takco

 def _parse(s):
     n = NTriplesParser()
     n.line = s
     return n.object()

Beispiel #16

0

Datei anzeigen

Datei: dbpedia-article-map.py Projekt: kevinmel2000/sontekan

        if data:
            data = set(json.loads(data))

        for w in v:
            data.add(w)

        if data:
            self.db.put(k, json.dumps(list(data)))
            logging.info('categories: {0} => {1}'.format(
                unquote(k),
                list(data)[:5]))


if __name__ == '__main__':

    labels = NTriplesParser(sink=Label('./labels'))
    categories = NTriplesParser(
        sink=Category('./categories', labels=labels.sink.db))

    def process_labels(line):
        labels.parsestring(line)

    for filename in [
            './labels_en.nt', './labels_en_uris_id.nt',
            './category_labels_en.nt', './category_labels_en_uris_id.nt'
    ]:
        logging.info('labels: processing: {0}'.format(filename))
        Parallel(n_jobs=N_JOBS)(delayed(process_labels)(line)
                                for line in open(filename))

    def process_categories(line):

Beispiel #17

0

Datei anzeigen

Datei: rdf_stream_sqlite.py Projekt: mommi84/python-rdf2tensor

        try:
            c.execute("INSERT INTO entities (entity) VALUES (?)", [(s)])
            s_id = c.lastrowid
        except sqlite3.IntegrityError:
            c.execute("SELECT id FROM entities WHERE entity = ?", [(s)])
            s_id = c.fetchone()[0]
        try:
            c.execute("INSERT INTO properties (property) VALUES (?)", [(p)])
            p_id = c.lastrowid
        except sqlite3.IntegrityError:
            c.execute("SELECT id FROM properties WHERE property = ?", [(p)])
            p_id = c.fetchone()[0]
        try:
            c.execute("INSERT INTO entities (entity) VALUES (?)", [(o)])
            o_id = c.lastrowid
        except sqlite3.IntegrityError:
            c.execute("SELECT id FROM entities WHERE entity = ?", [(o)])
            o_id = c.fetchone()[0]

        # print "{} {} {}".format(s_id, p_id, o_id)


if __name__ == "__main__":
    # Create a new parser and try to parse the NT file.
    sk = StreamSink()
    n = NTriplesParser(sk)
    with open(sys.argv[1], "r") as anons:
        n.parse(anons)
    conn.commit()
    conn.close()
    print "triples = {}".format(sk.length)

Beispiel #18

0

Datei anzeigen

            dic_dis[triple[2]].add(triple[0])

    return dic_dis


# input the filepath
datasets = Filelist('E:/python/ttldata')

print(datasets)

dic1 = {}

for filename in datasets:
    stream = Stream()
    parser = NTriplesParser(stream)

    with open(filename, "rb") as data:
        parser.parse(data)
    graph = stream.graph()
    ChoiceType(graph, dic1)

print(dic1)

for i in dic1:
    for j in dic1:
        if i == j:
            continue
        if dic1[i] & dic1[j]:
            print(i, ' and ', j, ' aren\'t disjunction')
        else:

Beispiel #19

0

Datei anzeigen

        if not self.labels:
            raise Exception('Labels not set.')

    def triple(self, s, p, o):
        k = o.encode('utf-8')
        s = s.encode('utf-8')
        for v in self.labels.get(s, default=set()):
            self.store[k].add(v)
        logging.info('categories: {0} => {1}'.format(
            unquote(k),
            list(self.store.get(k))[:5]))


if __name__ == '__main__':

    labels = NTriplesParser(sink=Label())

    def process_labels(line):
        labels.parsestring(line)

    for filename in [
            './labels_en.nt', './labels_en_uris_id.nt',
            './category_labels_en.nt', './category_labels_en_uris_id.nt'
    ]:
        logging.info('labels: processing: {0}'.format(filename))
        Parallel(n_jobs=N_JOBS)(delayed(process_labels)(line)
                                for line in open(filename))

    pickle.dump(labels.sink.store, open('labels.p', 'wb'))

    categories = NTriplesParser(sink=Category(labels=labels.sink.store))

Beispiel #20

0

Datei anzeigen

def load(
    dump_file: 'url of the Freebase RDF dump', mid_textid_file:
    'url of the part of the Freebase RDF dump containing type.object.id relations'
):
    engine = create_engine(get_db_url(), pool_recycle=3600)
    Base.metadata.create_all(engine)

    def execute_select(statement, **args):
        db = engine.connect()
        try:
            for row in db.execute(statement, **args):
                yield row
        except OperationalError:
            db.close()
            db = engine.connect()
            for row in db.execute(statement, **args):
                yield row
        finally:
            db.close()

    def execute_edit(statement, **args):
        db = engine.connect()
        try:
            return db.execute(statement, **args)
        except OperationalError:
            db.close()
            db = engine.connect()
            return db.execute(statement, **args)
        finally:
            db.close()

    @lru_cache(maxsize=4096)
    def get_topic_id_from_url(url: str) -> Optional[int]:
        input_id = url.replace('http://rdf.freebase.com/ns',
                               '').replace('.', '/')
        if input_id.startswith('/m/') or input_id.startswith('/g/'):
            for topic in execute_select(
                    Topic.__table__.select(Topic.mid == input_id)):
                return topic[0]
            return execute_edit(insert_query(Topic),
                                mid=input_id).inserted_primary_key[0]
        else:
            if len(input_id) > MAX_VARCHAR_SIZE:
                return None
            for topic in execute_select(
                    Topic.__table__.select(Topic.textid == input_id)):
                return topic[0]
            try:
                return execute_edit(insert_query(Topic),
                                    textid=input_id).inserted_primary_key[0]
            except IntegrityError as e:
                logger.error(e)
                return None

    def add_to_language_column(table, s, label, max_size):
        s_topic_id = get_topic_id_from_url(s)
        if s_topic_id is None:
            logger.warning(
                'Not able to get mid for label subject {}'.format(s))
            return
        if len(label) >= max_size:
            logger.error('Not able to add too long label: {}'.format(label))
            return
        try:
            execute_edit(insert_query(table),
                         topic_id=s_topic_id,
                         language=label.language,
                         value=label.value)
        except IntegrityError:
            pass  # We do not care about duplicates

    def add_type(s, o, notable):
        s_topic_id = get_topic_id_from_url(s)
        if s_topic_id is None:
            logger.warning('Not able to get mid for type subject {}'.format(s))
            return
        o_topic_id = get_topic_id_from_url(o)
        if o_topic_id is None:
            logger.warning('Not able to get mid for type object {}'.format(o))
            return
        try:
            execute_edit(insert_query(Type),
                         topic_id=s_topic_id,
                         type_id=o_topic_id,
                         notable=notable)
        except IntegrityError:
            if notable:
                # We add notability
                execute_edit(Type.__table__.update().where(
                    Type.topic_id == s_topic_id).where(
                        Type.type_id == o_topic_id).values(notable=notable))

    def add_key(s, key):
        if not is_interesting_key(key):
            return False
        s_topic_id = get_topic_id_from_url(s)
        if s_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(s))
            return
        key = decode_key(key)
        if len(key) >= MAX_VARCHAR_SIZE:
            logger.error('Not able to add too long key: {}'.format(key))
            return
        try:
            execute_edit(insert_query(Key),
                         topic_id=s_topic_id,
                         key=decode_key(key))
        except IntegrityError:
            pass

    def add_property_topic_id_field(field_name, s, o):
        s_topic_id = get_topic_id_from_url(s)
        if s_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(s))
            return
        o_topic_id = get_topic_id_from_url(o)
        if o_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(s))
            return
        try:
            execute_edit(insert_query(Property),
                         topic_id=s_topic_id,
                         **{field_name: o_topic_id})
        except IntegrityError:
            execute_edit(
                update_query(Property).values(**{
                    field_name: o_topic_id
                }).where(Property.topic_id == s_topic_id))

    def add_unique(s, o):
        s_topic_id = get_topic_id_from_url(s)
        if s_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(s))
            return
        try:
            execute_edit(insert_query(Property),
                         unique=to_bool(o),
                         topic_id=s_topic_id)
        except IntegrityError:
            execute_edit(
                update_query(Property).values(unique=to_bool(o)).where(
                    Property.topic_id == s_topic_id))

    def add_edge(s, p, o):
        s_topic_id = get_topic_id_from_url(s)
        if s_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(s))
            return
        p_topic_id = get_topic_id_from_url(p)
        if p_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(p))
            return
        o_topic_id = get_topic_id_from_url(o)
        if o_topic_id is None:
            logger.warning('Not able to get mid for key {}'.format(o))
            return
        try:
            execute_edit(insert_query(Edge),
                         subject_id=s_topic_id,
                         predicate_id=p_topic_id,
                         object_id=o_topic_id)
        except IntegrityError:
            pass

    def to_bool(s):
        s = str(s)
        if s == 'true':
            return True
        elif s == 'false':
            return False
        else:
            raise ValueError("Unexpected value: '{}'".format(s))

    class TextIdSink:
        def triple(self, s, p, o):
            if p == type_object_id:
                s = s.replace('http://rdf.freebase.com/ns',
                              '').replace('.', '/')
                o = o.replace('http://rdf.freebase.com/ns',
                              '').replace('.', '/')
                try:
                    execute_edit(insert_query(Topic), mid=s, textid=o)
                except IntegrityError:
                    pass
            else:
                logger.info('Unexpected triple: {} {} {}'.format(s, p, o))

    class TripleSink:
        def __init__(self, start_cursor=0):
            self.cursor = start_cursor

        def triple(self, s, p, o):
            self.cursor += 1
            if self.cursor % 1000000 == 0:
                print(self.cursor)
                with open('progress.txt', 'wt') as pfp:
                    pfp.write(str(self.cursor))

            try:
                """
                if p == type_object_name:
                    add_to_language_column(Label, s, o, MAX_VARCHAR_SIZE)
                elif p == common_topic_description:
                    add_to_language_column(Description, s, o, sys.maxsize)
                elif p == common_topic_alias:
                    add_to_language_column(Alias, s, o, MAX_VARCHAR_SIZE)
                elif p == type_object_type:
                    add_type(s, o, False)
                elif p == common_topic_notable_types:
                    add_type(s, o, True)
                elif p == type_object_key:
                    add_key(s, o.value)
                if p == type_property_schema:
                    add_property_topic_id_field('schema_id', s, o)
                elif p == type_property_expected_type:
                    add_property_topic_id_field('expected_type_id', s, o)
                elif p == type_property_unique:
                    add_unique(s, o)
                elif p == type_property_master_property:
                    add_property_topic_id_field('master_id', s, o)
                elif p == type_property_reverse_property:
                    add_property_topic_id_field('reverse_id', s, o)
                elif p == type_property_unit:
                    add_property_topic_id_field('unit_id', s, o)
                elif p == type_property_delegated:
                    add_property_topic_id_field('delegated_id', s, o)
                """
                if isinstance(o, URIRef) and o.startswith(
                        'http://rdf.freebase.com/') and p.startswith(
                            'http://rdf.freebase.com/ns/') and not any(
                                b in p for b in edge_blacklist):
                    add_edge(s, p, o)
            except ValueError:
                pass

    with gzip.open(mid_textid_file) as fp:
        NTriplesParser(sink=TextIdSink()).parse(fp)

    with gzip.open(dump_file) as fp:
        cursor = 0
        progress = Path('progress.txt')
        if progress.is_file():
            with progress.open('rt') as fpc:
                cursor = int(fpc.read().strip())
            logger.info('Skipping the first {} lines'.format(cursor))
        for _ in range(cursor):
            fp.readline()
        NTriplesParser(sink=TripleSink(cursor)).parse(fp)

Beispiel #21

0

Datei anzeigen

    def triples(self, (s, p, o), context=None):
        """Generator over the triple store

        Returns triples that match the given triple pattern. If triple pattern
        does not provide a context, all contexts will be searched.
        """
        ctx = context or self.context
        url = self._statement_encode((s, p, o), ctx)
        req = Request(url)
        req.add_header(
            'Accept', 'text/plain'
        )  # N-Triples is best for generator (one line per triple)
        log.debug("Request: %s" % req.get_full_url())
        dumper = DumpSink()
        parser = NTriplesParser(dumper)

        for l in urlopen(req):
            #log.debug('line: %s'%l)
            parser.parsestring(l)
            yield dumper.get_triple()

    def __len__(self):
        """Returns the number of triples in the graph
        calls http://{self.url}/size  very fast
        """
        return int(urlopen(self.url + "/size").read())

    def set(self, (subject, predicate, object)):
        """Convenience method to update the value of object

Beispiel #22

0

Datei anzeigen

		try:    
			p_id = props[p][0]
		except KeyError:
			props[p] = (i, open("matrices/" + str(i), "w+"))
			props[p][1].write("%%MatrixMarket matrix coordinate integer general\n%\nnum_ents num_ents num_nonZeros\n")
			p_id = i
			i += 1
		c.execute("SELECT id FROM entities WHERE entity = ?", [(s)])
		s_id = c.fetchone()[0]
		c.execute("SELECT id FROM entities WHERE entity = ?", [(o)])
		o_id = c.fetchone()[0]
		props[p][1].write(("{} {} 1" + "\n").format(s_id, o_id))
		

TSink = TensorSink()
g = NTriplesParser(TSink)
f = open("test.ttl", 'rb')
g.parse(f)
f.close()
conn.commit()
c.execute("SELECT count(*) FROM entities")
num_ents = c.fetchone()[0]

#close all writers and the database connection
for key, value in props.items():	
	value[1].close()
conn.close()

#create .mtx for all properties with proper head fields
for key, value in props.items():
	id_p = str(value[0])

Beispiel #23

0

Datei anzeigen

Datei: nt.py Projekt: ElisaPop/SP-Knowledge-Representation

 def parse(self, source, sink, baseURI=None):
     f = source.getByteStream()  # TODO getCharacterStream?
     parser = NTriplesParser(NTSink(sink))
     parser.parse(f)
     f.close()