def init_map_person_name(self): if hasattr(self, "map_name"): return # load global entity name mappings filename = "{0}/data/entity/person.csv".format( self.global_config["home"], self.local_config["id"]) map_name = {} #othername -> name map_name_info = {} #name -> (real name, list of other name) with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): #print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if entry["name"]: name = entry["name"].strip() if ["other_names"]: #real_name = entry["name"] #if "real_name" in entry: # real_name = entry["real_name"] map_name_info[name] = {"other_names": [x.strip() for x in entry["other_names"].split(";")]} for other_name in map_name_info[name]["other_names"]: map_name[other_name] = name self.map_name = map_name self.map_name_info = map_name_info
def process_organization(self): filename = "{0}/data/source/{1}-organization.csv".format( self.global_config["home"], self.local_config["id"]) with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) < len(headers): #print "skipping row %s" % row continue entry = dict(zip(headers, row)) if len(entry["name"]) == 0: #print "skipping empty name row %s" % entry continue for res_organization in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_ORG), entry["name"]).values(): #object properties self.create_triple_complex(res_organization, ["homepage", "logo"], entry) #role self.create_role_to_event( entry["role_event"], entry["role_type"], entry["role_label"], res_organization)
def load_csv(filename, non_empty_field): ret = [] with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) < len(headers): #print "skipping row %s" % row continue entry = dict(zip(headers, row)) if not entry[non_empty_field]: continue ret.append(entry) print entry return ret
def update_index_data(id_data, global_config, data, data_local): """ {"person": { "person_uri":[entry1, entry2] } } """ for entity_type in ["person","organization"]: # read data id_query = "index-{}".format(entity_type) filename_csv_index = "%s/data/www/%s-%s.csv" % ( global_config["home"], id_data, id_query) with open(filename_csv_index) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row)<len(headers): #print "skipping row %s" % row continue entry = dict(zip(headers, row)) entry["year"] = entry["conf_uri"].split("/")[-1] UtilJson.add_init_list( data, [entity_type], entry["uri"], entry) UtilJson.add_init_list( data_local, [entity_type], entry["uri"], entry)
def load_people_csv(): with open("config.json") as f: global_config = json.load(f) print global_config list_input = [ {"filename": "people.csv", }, ] list_field=[ "Paper", "Author", "Email", "Country", "Affiliation", ] list_item = [] counter = collections.Counter() map_name_author = {} set_organization = set() list_people_role = [] for input in list_input: filename = os.path.join( global_config["home"],"data", input["filename"]) print filename with open(filename,'r') as f: csvreader = UnicodeReader(f) headers = csvreader.next() track = None for row in csvreader: if not row: continue if row[0].startswith("##"): track = row[0][2:] print track , "---------------------------" continue elif row[0].startswith("#"): continue entry = dict(zip(headers, row)) entry["Track"] = track entry["Role"] = "author" print entry #author author = entry["Author"] entry["Name"] = author if author in ["Xi Chen"]: author = "{} {}".format(author, entry["Email"] ) if author in map_name_author: author_info = map_name_author[author] else: author_info = {} author_info.update(entry) map_name_author[author] = author_info author_info["track_list"]= set(track) if entry["Email"] != author_info["Email"]: print author_info["Email"], author_info["Track"] print entry["Email"], entry["Track"] if entry["Affiliation"] != author_info["Affiliation"]: print author_info["Affiliation"], author_info["Track"] print entry["Affiliation"], entry["Track"] author_info.update(entry) author_info["track_list"].add(track) #affiliation organization = entry["Affiliation"] set_organization.add(organization) list_people_role.append(entry) print sorted(list(set_organization)) filename = "person.json" filename = os.path.join(global_config["home"],"data", filename) print filename # with codecs.open(filename, "w","utf-8") as f: # f.write(lib_data.json2text(list_item)) print_list_people_role("person_author.csv", list_people_role)
def main(): filename = "config.json" filename = os.path.join(os.path.dirname(__file__), filename) with open(filename) as f: global_config = json.load(f) print global_config list_input = [ {"filename": "8796CorrespondingAuthors.csv", #TODO #"link_publisher":"tba", "proceedings_uri": "http://data.semanticweb.org/conference/iswc/2014/proceedings-1", }, {"filename": "8797CorrespondingAuthors.csv", #"link_publisher":"tba", "proceedings_uri": "http://data.semanticweb.org/conference/iswc/2014/proceedings-2", }, ] list_field=[ "author", "title", "pages", "year", "link_open_access", "link_publisher", "proceedings_uri", "paper_uri", "source_uri", "keywords", "abstract", "uri_me", "category", "source", "start_page", "paper_id", "EOL", ] map_key = { "Title":"title", "Authors":"author", "Start Page":"start_page", "Folder Index":"paper_id", "Paper no.":"paper_no", } list_key = { "link_publisher", "proceedings_uri", } list_item = [] counter = collections.Counter() for input in list_input: filename = os.path.join( global_config["home"],"data", input["filename"]) print filename with open(filename,'r') as f: csvreader = UnicodeReader(f) headers = csvreader.next() prev_item = None for row in csvreader: entry = dict(zip(headers, row)) print entry item = { "year":2014, "uri_me":"http://data.semanticweb.org/conference/iswc/2014", #"EOL":"EOL", } for k,v in map_key.items(): item[v] = entry[k].strip() for k in list_key: if k in input: item[k] = input[k] temp = entry["Paper no."] if temp.startswith("DC"): counter["DC"] += 1 category = "Doctoral Consortium Paper" else: counter[temp[0]] += 1 map_category = { "R": "Research Track Paper", "D": "Replication, Benchmark, Data and Software Track Paper", "I": "Semantic Web In Use Track Paper", } category = map_category[temp[0]] item["category"]= category list_item.append(item) if prev_item: prev_item["pages"]= "{}-{}".format(prev_item["start_page"], int(item["start_page"]) - 1) prev_item = item prev_item["pages"]= "{}-".format(prev_item["start_page"]) #update: paper uri for item in list_item: #paper_name = re.sub("\W+", "-", item[u"title"]).lower() paper_name = slugify.slugify(item[u"title"]) print item[u"title"] print paper_name item["link_open_access"] = "https://github.com/lidingpku/iswc2014/raw/master/paper/{}-{}.pdf".format(item['paper_id'],paper_name) print item["link_open_access"] print counter.most_common() print len(list_item) #create file filename = "paper-excel.csv" filename = os.path.join(global_config["home"],"output", filename) print filename with open(filename, "w") as f: csvwriter = UnicodeWriter(f) csvwriter.writerow(list_field) for item in list_item: row = UtilString.json2list(item, list_field) csvwriter.writerow(row) filename = "paper-excel.json" filename = os.path.join(global_config["home"],"output", filename) print filename with codecs.open(filename, "w","utf-8") as f: f.write(lib_data.json2text(list_item))
def load_paper_json(): global_config = load_gloabl_config() filename = "paper-excel.json" filename = os.path.join(global_config["home"],"output", filename) with open(filename, 'r') as f: content = f.read() list_paper_excel = lib_data.text2json(content) print len(list_paper_excel) map_paper_excel = {} map_paper_excel_no = {} for paper in list_paper_excel: map_paper_excel[str(paper["paper_id"])] = paper map_paper_excel_no[str(paper["paper_no"])] = paper map_name_session = {} filename = "paper-industry.json" filename = os.path.join(global_config["home"],"output", filename) with open(filename, 'r') as f: content = f.read() list_paper_industry = lib_data.text2json(content) set_session_name = set() for paper in list_paper_industry: paper_id = str(paper["paper_id"]) map_paper_excel[paper_id] = paper session_name = paper["session_name"] set_session_name.add(session_name) default_session_id = 100 + len(set_session_name) default_entry = { "session_time": paper["session_time"], "session_name": session_name, "session_id" : default_session_id, "session_index" : default_session_id, } entry = map_name_session.get(session_name, default_entry) map_name_session[session_name]=entry paper_list = entry.get("paper_list",[]) lib_data.list_append_unique(paper_list, paper_id) entry["paper_list"] =paper_list entry["paper_count"]= len(entry["paper_list"]) filename = "paper-pdf.json" filename = os.path.join(global_config["home"],"output", filename) with open(filename, 'r') as f: content = f.read() list_paper_pdf = lib_data.text2json(content) print len(list_paper_pdf) map_paper_pdf = {} for paper in list_paper_pdf: map_paper_pdf[str(paper["paper_id"])] = paper filename = "session.csv" filename = os.path.join(global_config["home"],"data", filename) map_paper_session = {} with open(filename,'r') as f: csvreader = UnicodeReader(f) headers = csvreader.next() session_no = None session_name = None session_index = 1 for row in csvreader: entry = dict(zip(headers, row)) if entry.get("Paper no."): entry["session_no"] = session_no entry["session_id"] = int(session_no.split(" ")[-1]) entry["session_name"] = session_name entry["session_index"] = session_index session_index+=1 map_paper_session[entry["Paper no."]]=entry map_name_session[session_name]=entry else: session_no = entry["Session no"] session_name = entry["Title"].strip() session_index = 1 print len(map_paper_session) filename = "event.csv" filename = os.path.join(global_config["home"],"data", filename) map_event_session = {} with open(filename,'r') as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if row[0].startswith("#"): continue entry = dict(zip(headers, row)) print entry event_start, event_end = entry["Time"].split("-") event_day = entry["day"] for k,v in entry.items(): if k in ["Time","day"]: continue if v: event_id = (len(map_event_session)+1) event = { "day":event_day, "start":event_start.strip(), "end": event_end.strip(), "name": v.strip(), "location": k, "id": event_id, } if "Session" in v or "Industry Track:" in v: session_name = v.replace("Session:","") session_name = session_name.replace("Industry Track:","") session_name = re.sub("\([^\)]+\)","", session_name) session_name = session_name.strip() if session_name not in map_name_session: print session_name assert session_name in map_name_session event["session_name"] = session_name map_event_session[event_id] = event print len(map_paper_session) return map_paper_excel, map_paper_excel_no, map_paper_pdf, map_paper_session, map_name_session, map_event_session
def csv2html(id_data, global_config): #create json_conf data json_conf ={} ###################### #conf-paper filename_csv_conf_paper = "%s/data/www/%s-%s.csv" % ( global_config["home"], id_data, "conf-paper") indexed_proceedings ={} list_title = [] with open(filename_csv_conf_paper) as f: csvreader = UnicodeReader(f) headers = csvreader.next() while len(headers)<=1: print "skipping header row {0}".format( headers ) headers = csvreader.next() for row in csvreader: if len(row)<len(headers): print "skipping row {0}".format( row ) continue entry = dict(zip(headers, row)) # print entry if entry["subtitle_proceedings"]: proceeding_title = "{} -- {}".format(entry["label_proceedings"], entry["subtitle_proceedings"]) if proceeding_title not in list_title: list_title.insert(0, proceeding_title) else: proceeding_title = "{}".format(entry["label_proceedings"]) if proceeding_title not in list_title: list_title.append(proceeding_title) UtilJson.add_init_list( indexed_proceedings, [proceeding_title], entry["category"], entry) #update json_conf for proceedings in list_title: #print proceedings json_proceedings ={} json_proceedings["title"] =proceedings UtilJson.add_init_list(json_conf, [], "proceedings", json_proceedings) for category in sorted(indexed_proceedings[proceedings].keys()): #print category json_category = {} if len(indexed_proceedings[proceedings].keys()) > 1: json_category["title"] =category UtilJson.add_init_list(json_proceedings, [], "categories", json_category) json_category["papers"] =indexed_proceedings[proceedings][category] ###################### #conf-person filename_csv_conf_person = "%s/data/www/%s-%s.csv" % ( global_config["home"], id_data, "conf-person") indexed_persons ={} with open(filename_csv_conf_person) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row)<len(headers): #print "skipping row %s" % row continue entry = dict(zip(headers, row)) #print entry name = entry["name"] name = name.strip() name = re.sub("\s+"," ",name) cnt_paper = int(entry["cnt_paper"]) if cnt_paper >0: index_1 = entry["proceedings_label"] if len(entry["proceedings_label"])==0: index_1 = "All" index_1 = "[Proceedings] {}".format(index_1) index_2 = "Authors" UtilJson.add_init_dict( indexed_persons, [index_1,index_2], name, entry) #consolidate affiliation organization = entry["organization"] if len(entry["organization"])>0: entry["organization"] = organization.split(";")[0] #only keep direct conference role ALLOWED_EVENT_TYPE= [] ALLOWED_EVENT_TYPE.append("http://data.semanticweb.org/ns/swc/ontology#ConferenceEvent") # ALLOWED_EVENT_TYPE.append("http://data.semanticweb.org/ns/swc/ontology#WorkshopEvent") if entry["role_event_type"] not in ALLOWED_EVENT_TYPE: continue if entry["role_type"].endswith("Chair") and entry["role_event_type"].endswith("ConferenceEvent"): entry["role_event_label"] = " {} (organization Committee)".format(entry["role_event_label"]) UtilJson.add_init_dict( indexed_persons, [entry["role_event_label"],entry["role_label"]], name, entry) #update json_conf for role_event_label in sorted(indexed_persons.keys()): #print role_event_label josn_role_event ={} josn_role_event["title"] =role_event_label UtilJson.add_init_list(json_conf, [], "events", josn_role_event) list_role = [] for role_label in sorted(indexed_persons[role_event_label].keys()): if "Chair" in role_label or "Webmaster" in role_label: list_role.insert(0, role_label) else: list_role.append(role_label) for role_label in list_role: #print role_label json_role_label = {} json_role_label["title"] =role_label UtilJson.add_init_list(josn_role_event, [], "roles", json_role_label) json_role_label["persons"] = sorted( indexed_persons[role_event_label][role_label].values()) ###################### # write xyz-proceedings id_html = "proceedings" filename_html = "%s/data/www/%s-%s.html" % ( global_config["home"], id_data, id_html) json_template = resource_string('resources.files', '{}.jsont'.format(id_html)) content= jsontemplate.expand(json_template, json_conf) with codecs.open(filename_html,"w","utf-8") as f: f.write(u'\ufeff') f.write(content) ###################### # write xyz-people id_html = "people" filename_html = "%s/data/www/%s-%s.html" % ( global_config["home"], id_data, id_html) json_template = resource_string('resources.files', '{}.jsont'.format(id_html)) content= jsontemplate.expand(json_template, json_conf) with codecs.open(filename_html,"w","utf-8") as f: f.write(u'\ufeff') f.write(content) ###################### #conf-event filename_csv_conf_event = "%s/data/www/%s-%s.csv" % ( global_config["home"], id_data, "conf-event") dict_events ={} list_events = [] conf_event_name ="" with open(filename_csv_conf_event) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row)<len(headers): #print "skipping row %s" % row continue entry = dict(zip(headers, row)) #print entry dict_events[entry["event_uri"]] = entry list_events.append(entry) event_type = entry["event_type"].split('#')[-1] if event_type in ['ConferenceEvent']: conf_event_name = entry["label"] elif event_type in ['InvitedTalkEvent', 'PanelEvent']: entry['category'] = event_type.replace('Event', '') indexed_events ={} map_events ={} for entry in list_events: temp = entry["event_type"].split('#')[-1] temp = temp.replace("Event","") if not temp in ["Tutorial","Talk","Special","Break"]: entry["event_type_label"] = temp UtilJson.add_init_list( map_events, [], entry["super_event_uri"], entry["event_uri"], True) super_event_name = conf_event_name if entry["super_event_uri"] and entry["super_event_uri"] in dict_events: super_event_type = dict_events[entry["super_event_uri"]]["event_type"].split('#')[-1].replace("Event","") if super_event_type in ['Workshop', 'Tutorial'] : super_event_name = dict_events[entry["super_event_uri"]]["label"] if super_event_name.lower().find("Doctoral Consortium".lower()) < 0: if not super_event_name.startswith(super_event_type): super_event_name = "{}: {}".format(super_event_type, super_event_name) entry['start_x'] = entry['start'] entry['end_x'] = entry['end'] if len(entry['start'])>0: #skip talk event if len(entry['order_in_super_event'])>0: continue date = entry['start'][0:10] entry['start_x'] = entry['start'][11:-3] date_end = date if len(entry['end'])>0: date_end = entry['end'][0:10] entry['end_x'] = entry['end'][11:-3] #only keep same day events if date_end == date: UtilJson.add_init_list( indexed_events, [super_event_name], date, entry) #print json.dumps(map_events, indent=4) #update json_conf list_event_name = [] for event_name in sorted(indexed_events.keys()): if conf_event_name == event_name: list_event_name.insert(0, event_name) else: list_event_name.append(event_name) for event_name in list_event_name: top_events_in_program = indexed_events[event_name] json_program = { 'title': event_name } UtilJson.add_init_list(json_conf, [], "top_programs", json_program) for date in sorted(top_events_in_program.keys()): events_in_program_date = top_events_in_program[date] json_date_program ={} if len(top_events_in_program) >1: json_date_program["title"] = datetime.datetime(*time.strptime(date,"%Y-%m-%d")[0:5]).strftime("%Y-%m-%d (%A)") json_date_program["events"] = events_in_program_date UtilJson.add_init_list(json_program, [], "date_programs", json_date_program) # sorted(events_in_program_date, key=lambda item: item['start']) for entry in events_in_program_date: entry["super_event_type"] = dict_events[entry["super_event_uri"]]["event_type"] if entry["super_event_type"] == "http://data.semanticweb.org/ns/swc/ontology#TrackEvent": entry["track"] = dict_events[entry["super_event_uri"]]["label"] else: entry["track"] = "" #if entry["event_type"] == "http://data.semanticweb.org/ns/swc/ontology#SessionEvent": if entry["event_uri"] in map_events: for sub_event_uri in map_events[entry["event_uri"]]: UtilJson.add_init_list(entry, [], "talks", dict_events[sub_event_uri]) ###################### # write json-data #print json.dumps(json_conf, indent=4) filename_json = "%s/data/www/%s-conf.json" % ( global_config["home"], id_data) with codecs.open(filename_json,"w","utf-8") as f: json.dump(json_conf, f, indent=4) ###################### # write xyz-program id_html = "program" filename_html = "%s/data/www/%s-%s.html" % ( global_config["home"], id_data, id_html) json_template = resource_string('resources.files', '{}.jsont'.format(id_html)) content= jsontemplate.expand(json_template, json_conf) with codecs.open(filename_html,"w","utf-8") as f: f.write(u'\ufeff') f.write(content) ###################### # write icalendar id_html = "program" filename_ics_prefix = "%s/data/www/%s-%s" % ( global_config["home"], id_data, id_html) ConfData.json_conf2ics(json_conf, filename_ics_prefix)
def process_proceedings(self): filename = "{0}/data/source/iswc-all-proceedings.csv".format( self.global_config["home"]) counter_paper = MyCounter() with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if entry["year"] != self.local_config["year"]: #skip mismatched year continue if len(entry["title"]) == 0: print "skipping empty title row %s" % entry continue if len(entry["proceedings_uri"]) == 0: print "skipping empty proceedings_uri row %s" % entry continue expand_entry(entry) uri_proceedings = self.expand_uri(entry["proceedings_uri"]) uri_proceedings_editor_list = "%s/editor_list" % (uri_proceedings) uri_event = self.expand_uri(entry["event_uri"]) #print json.dumps(entry, indent=4) #print uri_proceedings res_proceedings = URIRef(uri_proceedings) res_event = URIRef(uri_event) self.graph.add((res_proceedings, RDF.type, SWRC.Proceedings )) #relation to event self.graph.add((res_proceedings, SWC.relatedToEvent, res_event)) self.graph.add((res_event, SWRC.hasRelatedDocument, res_proceedings)) #editor if len(entry["editor"]) > 0: self.graph.add((res_proceedings, SWRC.listEditor, Literal(entry["editor"]))) list_res_editor = [] for editor in entry["editor"].split(","): editor = self.get_final_name(editor) for res_editor in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON), editor).values(): list_res_editor.append(res_editor) self.graph.add((res_proceedings, SWRC.editor, res_editor)) self.graph.add((res_proceedings, FOAF.maker, res_editor)) self.graph.add((res_editor, FOAF.made, res_proceedings)) res_proceedings_editor_list = self.create_container(list_res_editor, RDF.Seq, uri_proceedings_editor_list) self.graph.add((res_proceedings, SWC.editorList, res_proceedings_editor_list)) #simple properties self.create_triple_complex( res_proceedings, ["title", "subtitle", "abstract", "keywords", "year", "pages", "publisher", "series", "volume", "link_open_access", "link_publisher", "depiction"], entry)
def process_paper(self): filename = "{0}/data/source/iswc-all-papers.csv".format( self.global_config["home"]) if self.local_config["id"] in ["iswc-2013","iswc-2014"]: filename = "{}/data/source/{}-paper.csv".format( self.global_config["home"], self.local_config["id"]) counter_paper = MyCounter() with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): #print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if entry["year"] != self.local_config["year"]: #skip mismatched year continue if len(entry["title"]) == 0: print "skipping empty title row %s" % entry continue if len(entry["proceedings_uri"]) == 0: print "skipping empty proceedings row %s" % entry continue expand_entry(entry) counter_paper.inc(entry["proceedings_uri"]) id_paper = counter_paper.data[entry["proceedings_uri"]] uri_paper = "%s/paper-%02d" % (entry["proceedings_uri"], id_paper) uri_paper_author_list = "%s/paper-%02d/author_list" % (entry["proceedings_uri"], id_paper) #print json.dumps(entry, indent=4) #print uri_paper res_proceedings = URIRef(entry["proceedings_uri"]) res_paper = URIRef(uri_paper) self.graph.add((res_paper, RDF.type, SWRC.InProceedings )) #part-of proceedings self.graph.add((res_paper, SWC.isPartOf, res_proceedings)) self.graph.add((res_proceedings, SWC.hasPart, res_paper)) #author author_data = DataIswc.parse_person_list(entry["author"]) # if author_x_and != entry["author"]: # print "--------------" # print entry["author"] # print author_x_and # author_x_and_y = re.sub("\s+"," ",author_x_and) # if author_x_and != author_x_and_y: # print "????" # print author_x_and # print author_x_and_y self.graph.add((res_paper, SWRC.listAuthor, Literal(author_data["text"]))) list_res_author = [] for author in author_data["list"]: author = self.get_final_name(author) for res_author in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON), author).values(): self.graph.add((res_author, RDF.type, FOAF.Person)) list_res_author.append(res_author) self.graph.add((res_paper, SWRC.author, res_author)) self.graph.add((res_paper, FOAF.maker, res_author)) self.graph.add((res_author, FOAF.made, res_paper)) res_paper_author_list = self.create_container(list_res_author, RDF.Seq, uri_paper_author_list) self.graph.add((res_paper, BIBO.authorList, res_paper_author_list)) #simple properties self.create_triple_complex( res_paper, ["abstract", "keywords", "year", "pages", "title", "category", "link_open_access", "link_publisher"], entry) #cache self.map_name_res[entry["title"]] = res_paper
def process_event(self): filename = "{0}/data/source/{1}-event.csv".format( self.global_config["home"], self.local_config["id"]) counter_event = MyCounter() with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): #print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if len(entry["label"].strip()) == 0: #print "skipping empty label row %s" % entry continue if len(entry["event_type"].strip()) == 0: #print "skipping empty event_type row %s" % entry continue if entry["event_uri"].startswith("#"): #print "skipping empty commented row %s" % entry continue #set default super event if len(entry["super_event_uri"]) == 0: entry["super_event_uri"] = "[ME]" expand_entry(entry) uri_super_event = self.expand_uri(entry["super_event_uri"]) res_super_event = URIRef(uri_super_event) if len(entry["event_uri"]) == 0: counter_event.inc(uri_super_event) entry["event_uri"] = "%s/event-%02d" % ( uri_super_event, counter_event.data[uri_super_event]) uri_event = self.expand_uri(entry["event_uri"]) res_event = URIRef(uri_event) #event type self.graph.add((res_event, RDF.type, SWC[entry["event_type"]])) #super event self.graph.add((res_event, SWC.isSubEventOf, res_super_event)) self.graph.add((res_super_event, SWC.isSuperEventOf, res_event)) #simple properties self.create_triple_complex( res_event, ["label", "acronym", "abstract", "order_in_super_event", "start", "end", "tzid", "room", "address", "homepage", "link_document", "logo"], entry) #linking paper event if "TalkEvent" == entry["event_type"]: if entry["label"] in self.map_name_res: res_paper = self.map_name_res[entry["label"]] self.graph.add(( res_event, SWC.hasRelatedDocument, res_paper)) self.graph.add(( res_paper, SWC.relatedToEvent, res_event)) else: print "missing paper link [{}]".format(entry["label"]) #print json.dumps(self.map_name_res, indent=4, sort_keys=True) sys.exit(0) #role -chair for role in ["Chair", "Presenter"]: role_lower = role.lower() if len(entry[role_lower + "_person"]) > 0: person_data = DataIswc.parse_person_list(entry[role_lower + "_person"]) for name in person_data["list"]: if len(name) == 0: continue name = self.get_final_name(name) for res_person in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON),name).values(): role_label_x = entry[role_lower + "_label"] event_type_x = entry["event_type"].split("#")[-1].replace("Event", "") if event_type_x in ["Workshop", "Tutorial"]: role_label_x = u"{} {}".format(event_type_x, role_label_x) assert (len(role.strip())>0) self.create_role_to_event( uri_event, "swc:" + role, role_label_x, res_person)
def process_person(self): #load person filename = "{0}/data/source/{1}-person.csv".format( self.global_config["home"], self.local_config["id"]) with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): #print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if len(entry["name"]) == 0: #print "skipping empty name row %s" % entry continue name = entry["name"].strip() name = self.get_final_name(name) for res_person in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON), name).values(): #map other names for other_name in entry["other_names"].split(","): self.cache_map_name_res(other_name, res_person) if name in self.map_name_info: for other_name in self.map_name_info[name]["other_names"]: self.cache_map_name_res(other_name, res_person) #object properties self.create_triple_complex(res_person, ["homepage"], entry) #role self.create_role_to_event( entry["role_event"], entry["role_type"], entry["role_label"], res_person) #organization if "organization" in entry: for org in entry["organization"].split(";"): if len(org) == 0: continue for res_organization in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_ORG), org).values(): self.graph.add((res_organization, FOAF.member, res_person)) #inverse property self.graph.add((res_person, SWRC.affiliation, res_organization)) #alt-name self.create_triple_complex(res_person, ["other_names"], entry) #email if len(entry["email"]) > 0: if not entry["email"].startswith("mailto:"): mbox = "mailto:%s" % entry["email"] else: mbox = entry["email"] mbox_sha1sum = hashlib.sha1(mbox).hexdigest() #self.graph.add( (res_person, FOAF.mbox, URIRef(mbox)) ) self.graph.add((res_person, FOAF.mbox_sha1sum, Literal(mbox_sha1sum)))