Ejemplo n.º 1
0
    def process_organization(self):
        filename = "{0}/data/manual/{1}-organization.csv".format(self.global_config["home"], self.local_config["tag"])

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:
                if len(row) < len(headers):
                    # print "skipping row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if len(entry["name"]) == 0:
                    # print "skipping empty name row %s" % entry
                    continue

                res_organization = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_ORG), entry["name"])

                # object properties
                self.create_triple_complex(res_organization, ["homepage", "logo"], entry)

                # role
                self.create_role_to_event(
                    entry["role_event"], entry["role_type"], entry["role_label"], res_organization
                )
Ejemplo n.º 2
0
    def load_metadata(self):
        filename_manual = "{0}/data/entity/organisation.csv".format(self.global_config["home"])
        if os.path.exists(filename_manual):
            with open(filename_manual) as f:
                csvreader = UnicodeReader(f)
                headers = csvreader.next()
                for row in csvreader:
                    entry = dict(zip(headers, row))

                    self.map_name_name[entry["altLabel"]] = {"prefLabel": entry["title"], "dbpediaUri": entry["uri"]}

        print "{0} name mappings loaded".format(len(self.map_name_name))
Ejemplo n.º 3
0
    def process_person(self):
        filename = "{0}/data/manual/{1}-person.csv".format(self.global_config["home"], self.local_config["tag"])

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    # print "skipping mismatch row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if len(entry["name"]) == 0:
                    # print "skipping empty name row %s" % entry
                    continue

                res_person = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_PERSON), entry["name"])

                # object properties
                self.create_triple_complex(res_person, ["homepage"], entry)

                # role
                self.create_role_to_event(entry["role_event"], entry["role_type"], entry["role_label"], res_person)

                # organization
                if "organization" in entry:
                    for org in entry["organization"].split(";"):
                        if len(org) == 0:
                            continue

                        res_organization = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_ORG), org)
                        self.graph.add((res_organization, FOAF.member, res_person))
                        # inverse property
                        self.graph.add((res_person, SWRC.affiliation, res_organization))

                        # alt-name
                self.create_triple_complex(res_person, ["alt-name"], entry)

                # email
                if len(entry["email"]) > 0:
                    if not entry["email"].startswith("mailto:"):
                        mbox = "mailto:%s" % entry["email"]
                    else:
                        mbox = entry["email"]

                    mbox_sha1sum = hashlib.sha1(mbox).hexdigest()
                    # self.graph.add( (res_person, FOAF.mbox, URIRef(mbox)) )
                    self.graph.add((res_person, FOAF.mbox_sha1sum, Literal(mbox_sha1sum)))
"swws-2001",
"iswc-2002",
"iswc-2003",
"iswc-2004",
"iswc-2005",
"iswc-2013",
]

for tag in list_tag:
	# process organization table
	filename = "{0}/data/manual/{1}-organization.csv".format(
		global_config["home"],
		tag)

	with open(filename) as f:
		csvreader = UnicodeReader(f)
		headers =  csvreader.next()
		for row in csvreader:
			if len(row)<len(headers):
				#print "skipping row %s" % row 
				continue

			entry = dict(zip(headers, row))

			org = entry["name"]

			if len(org)>0:
				#print "skipping empty name row %s" % entry
				continue

			print u"processing [{0}] in [{1}] ".format(org, tag)	
	params["filename_manual_csv"] = "{0}/data/manual/{1}.csv".format(global_config['home'], filename_query)
	params["filename_temp_csv"] = "{0}/local/output/{1}.csv".format(global_config['home'], filename_query)
	with open(params["filename_query"]) as f:
		query = f.read()
	print query
		
	command= "curl -H \"Accept: application/sparql-results+json\" \"http://data.semanticweb.org/sparql?query={0}\" > {1}".format(urllib.quote(query), params["filename_result"])
	print command
	#call(command, shell=True)
	
	#load manual mapping
	#name,uri
	mem_name_uri_mapping ={}
	if os.path.exists(params["filename_manual_csv"]):
		with open(params["filename_manual_csv"]) as f:
			csvreader = UnicodeReader(f)
			csvreader.next()
			for row in csvreader:
				if len(row)<2:
					continue
						
				name = row[0]
				uri = row[1]
				mem_name_uri_mapping[name]= uri
			
	#write temp csv
	with open(params["filename_result"]) as f:
		json_data = json.load(f)
	
		counter_name_uri = MyCounterKeyValue()
		counter_uri_name = MyCounterKeyValue()
Ejemplo n.º 6
0
    def process_proceedings(self):
        # 		filename = "{0}/data/manual/full_iswc_proceedings.csv".format(
        filename = "{0}/data/manual/iswc-publication-proceedings.csv".format(self.global_config["home"])

        counter_paper = MyCounter()
        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    print "skipping mismatch row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if entry["year"] != self.local_config["year"]:
                    # skip mismatched year
                    continue

                if len(entry["title"]) == 0:
                    print "skipping empty title row %s" % entry
                    continue

                if len(entry["proceedings_uri"]) == 0:
                    print "skipping empty proceedings_uri row %s" % entry
                    continue

                uri_proceedings = self.expand_uri(entry["proceedings_uri"])
                uri_proceedings_editor_list = "%s/editor_list" % (uri_proceedings)
                uri_event = self.expand_uri(entry["event_uri"])

                # print json.dumps(entry, indent=4)
                # print uri_proceedings
                res_proceedings = URIRef(uri_proceedings)
                res_event = URIRef(uri_event)

                self.graph.add((res_proceedings, RDF.type, SWRC.Proceedings))

                # relation to event
                self.graph.add((res_proceedings, SWC.relatedToEvent, res_event))
                self.graph.add((res_event, SWRC.hasRelatedDocument, res_proceedings))

                # editor
                if len(entry["editor"]) > 0:
                    self.graph.add((res_proceedings, SWRC.listEditor, Literal(entry["editor"])))
                    list_res_editor = []
                    for editor in entry["editor"].split(","):
                        res_editor = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_PERSON), editor)
                        list_res_editor.append(res_editor)
                        self.graph.add((res_proceedings, SWRC.editor, res_editor))
                        self.graph.add((res_proceedings, FOAF.maker, res_editor))
                        self.graph.add((res_editor, FOAF.made, res_proceedings))

                    res_proceedings_editor_list = self.create_container(
                        list_res_editor, RDF.Seq, uri_proceedings_editor_list
                    )
                    self.graph.add((res_proceedings, SWC.editorList, res_proceedings_editor_list))

                    # simple properties
                self.create_triple_complex(
                    res_proceedings,
                    [
                        "title",
                        "subtitle",
                        "abstract",
                        "keywords",
                        "year",
                        "pages",
                        "publisher",
                        "series",
                        "volume",
                        "link_open_access",
                        "link_publisher",
                        "depiction",
                    ],
                    entry,
                )
Ejemplo n.º 7
0
    def process_paper(self):
        # 		filename = "{0}/data/manual/full_iswc_paper_pdf.csv".format(
        filename = "{0}/data/manual/iswc-publication-paper.csv".format(self.global_config["home"])

        counter_paper = MyCounter()
        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    # print "skipping mismatch row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if entry["year"] != self.local_config["year"]:
                    # skip mismatched year
                    continue

                if len(entry["title"]) == 0:
                    print "skipping empty title row %s" % entry
                    continue

                if len(entry["proceedings_uri"]) == 0:
                    print "skipping empty proceedings row %s" % entry
                    continue

                counter_paper.inc(entry["proceedings_uri"])
                id_paper = counter_paper.data[entry["proceedings_uri"]]
                uri_paper = "%s/paper-%02d" % (entry["proceedings_uri"], id_paper)
                uri_paper_author_list = "%s/paper-%02d/author_list" % (entry["proceedings_uri"], id_paper)
                # print json.dumps(entry, indent=4)
                # print uri_paper
                res_proceedings = URIRef(entry["proceedings_uri"])
                res_paper = URIRef(uri_paper)

                self.graph.add((res_paper, RDF.type, SWRC.InProceedings))

                # part-of proceedings
                self.graph.add((res_paper, SWC.isPartOf, res_proceedings))
                self.graph.add((res_proceedings, SWC.hasPart, res_paper))

                # author
                self.graph.add((res_paper, SWRC.listAuthor, Literal(entry["author"])))
                list_res_author = []
                for author in entry["author"].split(","):
                    res_author = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_PERSON), author)
                    self.graph.add((res_author, RDF.type, FOAF.Person))

                    list_res_author.append(res_author)
                    self.graph.add((res_paper, SWRC.author, res_author))
                    self.graph.add((res_paper, FOAF.maker, res_author))
                    self.graph.add((res_author, FOAF.made, res_paper))

                res_paper_author_list = self.create_container(list_res_author, RDF.Seq, uri_paper_author_list)
                self.graph.add((res_paper, BIBO.authorList, res_paper_author_list))

                # simple properties
                self.create_triple_complex(
                    res_paper,
                    [
                        "abstract",
                        "keywords",
                        "year",
                        "pages",
                        "title",
                        "category",
                        "link_open_access",
                        "link_publisher",
                    ],
                    entry,
                )

                # cache
                self.map_name_res[entry["title"]] = res_paper
Ejemplo n.º 8
0
    def process_event(self):
        filename = "{0}/data/manual/{1}-event.csv".format(self.global_config["home"], self.local_config["tag"])

        counter_event = MyCounter()

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    # print "skipping mismatch row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if len(entry["label"]) == 0:
                    # print "skipping empty label row %s" % entry
                    continue

                if len(entry["event_type"]) == 0:
                    # print "skipping empty event_type row %s" % entry
                    continue

                if entry["event_uri"].startswith("#"):
                    # print "skipping empty commented row %s" % entry
                    continue

                    # set default super event
                if len(entry["super_event_uri"]) == 0:
                    entry["super_event_uri"] = "[ME]"

                uri_super_event = self.expand_uri(entry["super_event_uri"])
                res_super_event = URIRef(uri_super_event)

                if len(entry["event_uri"]) == 0:
                    counter_event.inc(uri_super_event)
                    entry["event_uri"] = "%s/event-%02d" % (uri_super_event, counter_event.data[uri_super_event])

                uri_event = self.expand_uri(entry["event_uri"])
                res_event = URIRef(uri_event)

                # event type
                self.graph.add((res_event, RDF.type, SWC[entry["event_type"]]))

                # super event
                self.graph.add((res_event, SWC.isSubEventOf, res_super_event))
                self.graph.add((res_super_event, SWC.isSuperEventOf, res_event))

                # simple properties
                self.create_triple_complex(
                    res_event,
                    [
                        "label",
                        "acronym",
                        "abstract",
                        "order_in_super_event",
                        "start",
                        "end",
                        "tzid",
                        "room",
                        "address",
                        "homepage",
                        "link_document",
                        "logo",
                    ],
                    entry,
                )

                # linking paper event
                if "TalkEvent" == entry["event_type"]:
                    if entry["label"] in self.map_name_res:
                        res_paper = self.map_name_res[entry["label"]]
                        self.graph.add((res_event, SWC.hasRelatedDocument, res_paper))
                        self.graph.add((res_paper, SWC.relatedToEvent, res_event))
                    else:
                        print "missing paper link " + entry["label"]
                        sys.exit(0)

                        # role -chair
                for role in ["Chair", "Presenter"]:
                    role_lower = role.lower()
                    if len(entry[role_lower + "_person"]) > 0:
                        for name in entry[role_lower + "_person"].split(","):
                            if len(name) == 0:
                                continue

                            res_person = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_PERSON), name)

                            self.create_role_to_event(
                                uri_event, "swc:" + role, entry[role_lower + "_label"], res_person
                            )