def axioms(filename): stream = Stream() parser = NTriplesParser(stream) with open(filename, "rb") as data: parser.parse(data) graph = stream.graph() propertise = set() for triple in graph: propertise.add(triple[1]) print(propertise) dic_fun = functionality(graph, propertise) print('1') dic_ref = reflexivity(graph, propertise) print('2') dic_sym = symmetry(graph, propertise) print('3') dic_inv = inverse(graph, propertise) print('4') dic_tra = transivity(graph, propertise) print('5') csvname = filename + '.csv' out = open(csvname, 'a', newline='') csv_writer = csv.writer(out, dialect='excel') for pi in propertise: l1 = [pi] if (dic_fun[pi] > 0): l1.append('functionality') l1.append(dic_fun[pi]) if (dic_ref[pi] == 1): l1.append('reflexivity') if (dic_sym[pi] == 1): l1.append('symmetry') if (len(dic_inv[pi]) != 0): l1.append('inverse') if (dic_tra[pi] == 1): l1.append('transivity') print(l1) csv_writer.writerow(l1) print('over')
def parse(self, source, sink, **kwargs): ''' Parse the NT format :type source: `rdflib.parser.InputSource` :param source: the source of NT-formatted data :type sink: `rdflib.graph.Graph` :param sink: where to send parsed triples :param kwargs: Additional arguments to pass to `.NTriplesParser.parse` ''' f = source.getByteStream() # TODO getCharacterStream? parser = NTriplesParser(NTSink(sink)) parser.parse(f, **kwargs) f.close()
def process_file(infile, sink): bad_lines = defaultdict(int) for line in infile: s = BytesIO() s.write(line) s.seek(0) parser = NTriplesParser(sink) try: parser.parse(s) except (ParseError, ElementStrError) as e: bad_lines[line] += 1 print('read {} lines from {}'.format(sink.nlines, infile.name)) print('bad lines and their frequencies:') for line, count in bad_lines.items(): print(' {:>10} : {}'.format(count, line))
def load(cls, filepath): """Return array of FAST dict. Main method.""" if zipfile.is_zipfile(filepath): with ZipFile(filepath) as zf: nt_filename = next( (n for n in zf.namelist() if n.endswith('.nt'))) # defaults to equivalent of 'rb' nt_file = zf.open(nt_filename) else: nt_file = open(filepath, 'rb') instance = cls() parser = NTriplesParser(instance) parser.parse(nt_file) nt_file.close() return instance.terms
def classPart(filename): stream = Stream() parser = NTriplesParser(stream) with open(filename,"rb") as data: parser.parse(data) graph = stream.graph() # garaph(set) is the dataset <s1,p1,o1> <s2,p2,o2>... print('success load') classes = {} c = set() p = set() for triple in graph: c.add(triple[2]) p.add(triple[0]) if triple[2] not in classes: classes[triple[2]] = set() classes[triple[2]].add(triple[0]) print('the number of classes: ',end='') print(len(c)) print('the number of instances: ',end='') print(len(p)) return classes
try: c.execute("INSERT INTO entities (entity) VALUES (?)", [(s)]) s_id = c.lastrowid except sqlite3.IntegrityError: c.execute("SELECT id FROM entities WHERE entity = ?", [(s)]) s_id = c.fetchone()[0] try: c.execute("INSERT INTO properties (property) VALUES (?)", [(p)]) p_id = c.lastrowid except sqlite3.IntegrityError: c.execute("SELECT id FROM properties WHERE property = ?", [(p)]) p_id = c.fetchone()[0] try: c.execute("INSERT INTO entities (entity) VALUES (?)", [(o)]) o_id = c.lastrowid except sqlite3.IntegrityError: c.execute("SELECT id FROM entities WHERE entity = ?", [(o)]) o_id = c.fetchone()[0] # print "{} {} {}".format(s_id, p_id, o_id) if __name__ == "__main__": # Create a new parser and try to parse the NT file. sk = StreamSink() n = NTriplesParser(sk) with open(sys.argv[1], "r") as anons: n.parse(anons) conn.commit() conn.close() print "triples = {}".format(sk.length)
return dic_dis # input the filepath datasets = Filelist('E:/python/ttldata') print(datasets) dic1 = {} for filename in datasets: stream = Stream() parser = NTriplesParser(stream) with open(filename, "rb") as data: parser.parse(data) graph = stream.graph() ChoiceType(graph, dic1) print(dic1) for i in dic1: for j in dic1: if i == j: continue if dic1[i] & dic1[j]: print(i, ' and ', j, ' aren\'t disjunction') else: print(i, ' and ', j, ' are disjunction')
from rdflib.plugins.parsers.ntriples import NTriplesParser, Sink import sys reload(sys) sys.setdefaultencoding("utf-8") class StreamSink(Sink): def triple(self, s, p, o): self.length += 1 print "Stream of triples s={s}, p={p}, o={o}".format(s=s, p=p, o=o).encode('utf8') if __name__ == "__main__": # Create a new parser and try to parse the NT file. sk = StreamSink() n = NTriplesParser(sk) with open(sys.argv[1], "r") as anons: n.parse(anons) print "triples = {}".format(sk.length)
def parse(self, source, sink, baseURI=None): f = source.getByteStream() # TODO getCharacterStream? parser = NTriplesParser(NTSink(sink)) parser.parse(f) f.close()
def parse(self, source, sink, baseURI=None): f = source.getByteStream() # TODO getCharacterStream? parser = NTriplesParser(NTSink(sink)) parser.parse(f) f.close()
except UnicodeEncodeError: print "Unicode error, skipping triple..." self.i += 1 # set logging to basic logging.basicConfig() pathToFile = sys.argv[1] targetDir = "graph" csk = CountSink() ntp = NTriplesParser(csk) with open(pathToFile, "r") as anons: print "Counting into {}...".format(pathToFile) ntp.parse(anons) f = open(targetDir + '/resources.tsv', 'w') for r in csk.res: f.write(re.sub(r"\n", " ", re.sub(r"\r", " ", r.n3().encode('utf8')[1:-1])) + "\n") sk = RDFToTensorSink() sk.set_filedict(csk.filedict) sk.tensor_size = len(csk.res) n = NTriplesParser(sk) with open(pathToFile, "r") as anons: print "Extracting relationships from {}...".format(pathToFile) n.parse(anons) print "triples = {}, errors = {}".format(sk.length, sk.i) print "Done!"
except KeyError: props[p] = (i, open("matrices/" + str(i), "w+")) props[p][1].write("%%MatrixMarket matrix coordinate integer general\n%\nnum_ents num_ents num_nonZeros\n") p_id = i i += 1 c.execute("SELECT id FROM entities WHERE entity = ?", [(s)]) s_id = c.fetchone()[0] c.execute("SELECT id FROM entities WHERE entity = ?", [(o)]) o_id = c.fetchone()[0] props[p][1].write(("{} {} 1" + "\n").format(s_id, o_id)) TSink = TensorSink() g = NTriplesParser(TSink) f = open("test.ttl", 'rb') g.parse(f) f.close() conn.commit() c.execute("SELECT count(*) FROM entities") num_ents = c.fetchone()[0] #close all writers and the database connection for key, value in props.items(): value[1].close() conn.close() #create .mtx for all properties with proper head fields for key, value in props.items(): id_p = str(value[0]) with open("matrices/" + id_p, "r") as f: num_nonZeros = sum([1 for line in f]) - 3