Beispiel #1
0
def axioms(filename):
    stream = Stream()
    parser = NTriplesParser(stream)

    with open(filename, "rb") as data:
        parser.parse(data)

    graph = stream.graph()
    propertise = set()

    for triple in graph:
        propertise.add(triple[1])

    print(propertise)

    dic_fun = functionality(graph, propertise)
    print('1')

    dic_ref = reflexivity(graph, propertise)
    print('2')
    dic_sym = symmetry(graph, propertise)
    print('3')
    dic_inv = inverse(graph, propertise)
    print('4')
    dic_tra = transivity(graph, propertise)
    print('5')

    csvname = filename + '.csv'
    out = open(csvname, 'a', newline='')
    csv_writer = csv.writer(out, dialect='excel')

    for pi in propertise:
        l1 = [pi]
        if (dic_fun[pi] > 0):
            l1.append('functionality')
            l1.append(dic_fun[pi])

        if (dic_ref[pi] == 1):
            l1.append('reflexivity')
        if (dic_sym[pi] == 1):
            l1.append('symmetry')
        if (len(dic_inv[pi]) != 0):
            l1.append('inverse')

        if (dic_tra[pi] == 1):
            l1.append('transivity')

        print(l1)
        csv_writer.writerow(l1)

    print('over')
Beispiel #2
0
    def parse(self, source, sink, **kwargs):
        '''
        Parse the NT format

        :type source: `rdflib.parser.InputSource`
        :param source: the source of NT-formatted data
        :type sink: `rdflib.graph.Graph`
        :param sink: where to send parsed triples
        :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
        '''
        f = source.getByteStream()  # TODO getCharacterStream?
        parser = NTriplesParser(NTSink(sink))
        parser.parse(f, **kwargs)
        f.close()
Beispiel #3
0
def process_file(infile, sink):
    bad_lines = defaultdict(int)
    for line in infile:
        s = BytesIO()
        s.write(line)
        s.seek(0)
        parser = NTriplesParser(sink)
        try:
            parser.parse(s)
        except (ParseError, ElementStrError) as e:
            bad_lines[line] += 1

    print('read {} lines from {}'.format(sink.nlines, infile.name))
    print('bad lines and their frequencies:')
    for line, count in bad_lines.items():
        print('  {:>10} : {}'.format(count, line))
Beispiel #4
0
    def load(cls, filepath):
        """Return array of FAST dict. Main method."""
        if zipfile.is_zipfile(filepath):
            with ZipFile(filepath) as zf:
                nt_filename = next(
                    (n for n in zf.namelist() if n.endswith('.nt')))
                # defaults to equivalent of 'rb'
                nt_file = zf.open(nt_filename)
        else:
            nt_file = open(filepath, 'rb')

        instance = cls()
        parser = NTriplesParser(instance)
        parser.parse(nt_file)

        nt_file.close()

        return instance.terms
Beispiel #5
0
def classPart(filename):
	
	stream = Stream()
	parser = NTriplesParser(stream) 	
	with open(filename,"rb") as data:
		parser.parse(data)
	graph = stream.graph()	# garaph(set) is the dataset <s1,p1,o1> <s2,p2,o2>...
	print('success load')
	
	classes = {}
	c = set()
	p = set()
	for triple in graph:
		c.add(triple[2])
		p.add(triple[0])
		if triple[2] not in classes:
			classes[triple[2]] = set()
		classes[triple[2]].add(triple[0])
	print('the number of classes: ',end='')
	print(len(c))
	print('the number of instances: ',end='')
	print(len(p))
	
	return classes
        try:
            c.execute("INSERT INTO entities (entity) VALUES (?)", [(s)])
            s_id = c.lastrowid
        except sqlite3.IntegrityError:
            c.execute("SELECT id FROM entities WHERE entity = ?", [(s)])
            s_id = c.fetchone()[0]
        try:
            c.execute("INSERT INTO properties (property) VALUES (?)", [(p)])
            p_id = c.lastrowid
        except sqlite3.IntegrityError:
            c.execute("SELECT id FROM properties WHERE property = ?", [(p)])
            p_id = c.fetchone()[0]
        try:
            c.execute("INSERT INTO entities (entity) VALUES (?)", [(o)])
            o_id = c.lastrowid
        except sqlite3.IntegrityError:
            c.execute("SELECT id FROM entities WHERE entity = ?", [(o)])
            o_id = c.fetchone()[0]

        # print "{} {} {}".format(s_id, p_id, o_id)


if __name__ == "__main__":
    # Create a new parser and try to parse the NT file.
    sk = StreamSink()
    n = NTriplesParser(sk)
    with open(sys.argv[1], "r") as anons:
        n.parse(anons)
    conn.commit()
    conn.close()
    print "triples = {}".format(sk.length)
Beispiel #7
0
    return dic_dis


# input the filepath
datasets = Filelist('E:/python/ttldata')

print(datasets)

dic1 = {}

for filename in datasets:
    stream = Stream()
    parser = NTriplesParser(stream)

    with open(filename, "rb") as data:
        parser.parse(data)
    graph = stream.graph()
    ChoiceType(graph, dic1)

print(dic1)

for i in dic1:
    for j in dic1:
        if i == j:
            continue
        if dic1[i] & dic1[j]:
            print(i, ' and ', j, ' aren\'t disjunction')
        else:
            print(i, ' and ', j, ' are  disjunction')
from rdflib.plugins.parsers.ntriples import NTriplesParser, Sink
import sys

reload(sys)
sys.setdefaultencoding("utf-8")

class StreamSink(Sink):
    
    def triple(self, s, p, o):
        self.length += 1
        print "Stream of triples s={s}, p={p}, o={o}".format(s=s, p=p, o=o).encode('utf8')
            

if __name__ == "__main__":
    # Create a new parser and try to parse the NT file.
    sk = StreamSink()
    n = NTriplesParser(sk)
    with open(sys.argv[1], "r") as anons:
        n.parse(anons)
    print "triples = {}".format(sk.length)
 def parse(self, source, sink, baseURI=None):
     f = source.getByteStream()  # TODO getCharacterStream?
     parser = NTriplesParser(NTSink(sink))
     parser.parse(f)
     f.close()
Beispiel #10
0
 def parse(self, source, sink, baseURI=None):
     f = source.getByteStream()  # TODO getCharacterStream?
     parser = NTriplesParser(NTSink(sink))
     parser.parse(f)
     f.close()
Beispiel #11
0
        except UnicodeEncodeError:
            print "Unicode error, skipping triple..."
            self.i += 1


# set logging to basic
logging.basicConfig()

pathToFile = sys.argv[1]
targetDir = "graph"

csk = CountSink()
ntp = NTriplesParser(csk)
with open(pathToFile, "r") as anons:
    print "Counting into {}...".format(pathToFile)
    ntp.parse(anons)

f = open(targetDir + '/resources.tsv', 'w')
for r in csk.res:
    f.write(re.sub(r"\n", " ", re.sub(r"\r", " ", r.n3().encode('utf8')[1:-1])) + "\n")

sk = RDFToTensorSink()
sk.set_filedict(csk.filedict)
sk.tensor_size = len(csk.res)
n = NTriplesParser(sk)
with open(pathToFile, "r") as anons:
    print "Extracting relationships from {}...".format(pathToFile)
    n.parse(anons)
print "triples = {}, errors = {}".format(sk.length, sk.i)

print "Done!"
Beispiel #12
0
		except KeyError:
			props[p] = (i, open("matrices/" + str(i), "w+"))
			props[p][1].write("%%MatrixMarket matrix coordinate integer general\n%\nnum_ents num_ents num_nonZeros\n")
			p_id = i
			i += 1
		c.execute("SELECT id FROM entities WHERE entity = ?", [(s)])
		s_id = c.fetchone()[0]
		c.execute("SELECT id FROM entities WHERE entity = ?", [(o)])
		o_id = c.fetchone()[0]
		props[p][1].write(("{} {} 1" + "\n").format(s_id, o_id))
		

TSink = TensorSink()
g = NTriplesParser(TSink)
f = open("test.ttl", 'rb')
g.parse(f)
f.close()
conn.commit()
c.execute("SELECT count(*) FROM entities")
num_ents = c.fetchone()[0]

#close all writers and the database connection
for key, value in props.items():	
	value[1].close()
conn.close()

#create .mtx for all properties with proper head fields
for key, value in props.items():
	id_p = str(value[0])
	with open("matrices/" + id_p, "r") as f:
		num_nonZeros = sum([1 for line in f]) - 3