def init_map_person_name(self):
        if hasattr(self, "map_name"):
            return

        # load global entity name mappings
        filename = "{0}/data/entity/person.csv".format(
            self.global_config["home"],
            self.local_config["id"])

        map_name = {}       #othername -> name
        map_name_info = {}  #name -> (real name, list of other name)

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:
                if len(row) != len(headers):
                    #print "skipping mismatch row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if entry["name"]:
                    name = entry["name"].strip()
                    if ["other_names"]:
                        #real_name = entry["name"]
                        #if "real_name" in entry:
                        #    real_name = entry["real_name"]

                        map_name_info[name] = {"other_names": [x.strip() for x in entry["other_names"].split(";")]}
                        for other_name in map_name_info[name]["other_names"]:
                            map_name[other_name] = name

        self.map_name = map_name
        self.map_name_info = map_name_info
    def process_organization(self):
        filename = "{0}/data/source/{1}-organization.csv".format(
            self.global_config["home"],
            self.local_config["id"])

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:
                if len(row) < len(headers):
                    #print "skipping row %s" % row 
                    continue

                entry = dict(zip(headers, row))

                if len(entry["name"]) == 0:
                    #print "skipping empty name row %s" % entry
                    continue

                for res_organization in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_ORG), entry["name"]).values():

                    #object properties
                    self.create_triple_complex(res_organization, ["homepage", "logo"], entry)

                    #role
                    self.create_role_to_event(
                        entry["role_event"],
                        entry["role_type"],
                        entry["role_label"],
                        res_organization)
def load_csv(filename, non_empty_field):
    ret = []
    with open(filename) as f:
        csvreader = UnicodeReader(f)
        headers = csvreader.next()
        for row in csvreader:
            if len(row) < len(headers):
                #print "skipping row %s" % row
                continue

            entry = dict(zip(headers, row))

            if not entry[non_empty_field]:
                continue

            ret.append(entry)
            print entry

    return ret
    def update_index_data(id_data, global_config, data, data_local):
        """
        {"person":
            { "person_uri":[entry1, entry2]
            }
        }
        """

        for entity_type in ["person","organization"]:
            # read data
            id_query = "index-{}".format(entity_type)
            filename_csv_index = "%s/data/www/%s-%s.csv" % (
                global_config["home"], 
                id_data, 
                id_query)
                    

            with open(filename_csv_index) as f:
                csvreader = UnicodeReader(f)
                headers =  csvreader.next()
                for row in csvreader:
                    if len(row)<len(headers):
                        #print "skipping row %s" % row 
                        continue

                    entry = dict(zip(headers, row))
                    entry["year"] = entry["conf_uri"].split("/")[-1]

                    UtilJson.add_init_list(
                        data, 
                        [entity_type],
                        entry["uri"],
                        entry)               

                    UtilJson.add_init_list(
                        data_local,
                        [entity_type],
                        entry["uri"],
                        entry)
def load_people_csv():
    with open("config.json") as f:
        global_config = json.load(f)
    print global_config

    list_input = [
        {"filename": "people.csv",
         },

    ]

    list_field=[
        "Paper",
        "Author",
        "Email",
        "Country",
        "Affiliation",
    ]

    list_item = []
    counter = collections.Counter()

    map_name_author = {}
    set_organization = set()
    list_people_role = []

    for input in list_input:
        filename = os.path.join( global_config["home"],"data", input["filename"])
        print filename
        with open(filename,'r') as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()

            track = None
            for row in csvreader:
                if not row:
                    continue

                if row[0].startswith("##"):
                    track = row[0][2:]
                    print track , "---------------------------"
                    continue
                elif row[0].startswith("#"):
                    continue

                entry = dict(zip(headers, row))
                entry["Track"] = track
                entry["Role"] = "author"
                print entry

                #author
                author = entry["Author"]
                entry["Name"] = author


                if author in ["Xi Chen"]:
                    author = "{} {}".format(author,  entry["Email"] )

                if author in map_name_author:
                    author_info = map_name_author[author]
                else:
                    author_info = {}
                    author_info.update(entry)
                    map_name_author[author] = author_info
                    author_info["track_list"]= set(track)


                if entry["Email"] != author_info["Email"]:
                    print  author_info["Email"], author_info["Track"]
                    print  entry["Email"], entry["Track"]

                if entry["Affiliation"] != author_info["Affiliation"]:
                    print author_info["Affiliation"], author_info["Track"]
                    print entry["Affiliation"], entry["Track"]

                author_info.update(entry)
                author_info["track_list"].add(track)





                #affiliation
                organization = entry["Affiliation"]
                set_organization.add(organization)


                list_people_role.append(entry)


    print sorted(list(set_organization))

    filename = "person.json"
    filename = os.path.join(global_config["home"],"data", filename)
    print filename
#    with codecs.open(filename, "w","utf-8") as f:
#        f.write(lib_data.json2text(list_item))


    print_list_people_role("person_author.csv", list_people_role)
def main():
    filename = "config.json"
    filename = os.path.join(os.path.dirname(__file__), filename)
    with open(filename) as f:
        global_config = json.load(f)
    print global_config


    list_input = [
        {"filename": "8796CorrespondingAuthors.csv",
         #TODO
         #"link_publisher":"tba",
         "proceedings_uri": "http://data.semanticweb.org/conference/iswc/2014/proceedings-1",
         },
        {"filename": "8797CorrespondingAuthors.csv",
         #"link_publisher":"tba",
         "proceedings_uri": "http://data.semanticweb.org/conference/iswc/2014/proceedings-2",
         },
    ]

    list_field=[
        "author",
        "title",
        "pages",
        "year",
        "link_open_access",
        "link_publisher",
        "proceedings_uri",
        "paper_uri",
        "source_uri",
        "keywords",
        "abstract",
        "uri_me",
        "category",
        "source",
        "start_page",
        "paper_id",
        "EOL",
    ]
    map_key = {
        "Title":"title",
        "Authors":"author",
        "Start Page":"start_page",
        "Folder Index":"paper_id",
        "Paper no.":"paper_no",
    }

    list_key = {
        "link_publisher",
        "proceedings_uri",
    }

    list_item = []
    counter = collections.Counter()

    for input in list_input:
        filename = os.path.join( global_config["home"],"data", input["filename"])
        print filename
        with open(filename,'r') as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()

            prev_item = None
            for row in csvreader:
                entry = dict(zip(headers, row))

                print entry

                item = {
                    "year":2014,
                    "uri_me":"http://data.semanticweb.org/conference/iswc/2014",
                    #"EOL":"EOL",
                }
                for k,v in map_key.items():
                    item[v] = entry[k].strip()

                for k in list_key:
                    if k in input:
                        item[k] = input[k]

                temp = entry["Paper no."]
                if temp.startswith("DC"):
                    counter["DC"] += 1
                    category = "Doctoral Consortium Paper"
                else:
                    counter[temp[0]] += 1
                    map_category = {
                        "R": "Research Track Paper",
                        "D": "Replication, Benchmark, Data and Software Track Paper",
                        "I": "Semantic Web In Use Track Paper",
                    }
                    category = map_category[temp[0]]

                item["category"]= category

                list_item.append(item)

                if prev_item:
                    prev_item["pages"]= "{}-{}".format(prev_item["start_page"], int(item["start_page"]) - 1)

                prev_item = item

            prev_item["pages"]= "{}-".format(prev_item["start_page"])

    #update: paper uri
    for item in list_item:

        #paper_name = re.sub("\W+", "-", item[u"title"]).lower()
        paper_name = slugify.slugify(item[u"title"])
        print item[u"title"]
        print paper_name

        item["link_open_access"] = "https://github.com/lidingpku/iswc2014/raw/master/paper/{}-{}.pdf".format(item['paper_id'],paper_name)
        print item["link_open_access"]


    print counter.most_common()
    print len(list_item)

    #create file
    filename = "paper-excel.csv"
    filename = os.path.join(global_config["home"],"output", filename)
    print filename
    with open(filename, "w") as f:
        csvwriter = UnicodeWriter(f)
        csvwriter.writerow(list_field)

        for item in list_item:
            row = UtilString.json2list(item, list_field)
            csvwriter.writerow(row)

    filename = "paper-excel.json"
    filename = os.path.join(global_config["home"],"output", filename)
    print filename
    with codecs.open(filename, "w","utf-8") as f:
        f.write(lib_data.json2text(list_item))
def load_paper_json():
    global_config = load_gloabl_config()



    filename = "paper-excel.json"
    filename  = os.path.join(global_config["home"],"output", filename)
    with open(filename, 'r') as f:
        content = f.read()
        list_paper_excel = lib_data.text2json(content)
    print len(list_paper_excel)
    map_paper_excel = {}
    map_paper_excel_no = {}
    for paper  in list_paper_excel:
        map_paper_excel[str(paper["paper_id"])] = paper

        map_paper_excel_no[str(paper["paper_no"])] = paper


    map_name_session = {}



    filename = "paper-industry.json"
    filename = os.path.join(global_config["home"],"output", filename)
    with open(filename, 'r') as f:
        content = f.read()
        list_paper_industry = lib_data.text2json(content)

    set_session_name = set()
    for paper in list_paper_industry:
        paper_id = str(paper["paper_id"])
        map_paper_excel[paper_id] = paper

        session_name = paper["session_name"]

        set_session_name.add(session_name)

        default_session_id = 100 + len(set_session_name)
        default_entry = {
            "session_time": paper["session_time"],
            "session_name": session_name,
            "session_id" : default_session_id,
            "session_index" : default_session_id,

        }

        entry = map_name_session.get(session_name, default_entry)

        map_name_session[session_name]=entry

        paper_list = entry.get("paper_list",[])
        lib_data.list_append_unique(paper_list, paper_id)
        entry["paper_list"] =paper_list
        entry["paper_count"]= len(entry["paper_list"])


    filename = "paper-pdf.json"
    filename  = os.path.join(global_config["home"],"output", filename)
    with open(filename, 'r') as f:
        content = f.read()
        list_paper_pdf = lib_data.text2json(content)

    print len(list_paper_pdf)
    map_paper_pdf = {}
    for paper in list_paper_pdf:
        map_paper_pdf[str(paper["paper_id"])] = paper





    filename = "session.csv"
    filename  = os.path.join(global_config["home"],"data", filename)
    map_paper_session = {}

    with open(filename,'r') as f:
        csvreader = UnicodeReader(f)
        headers = csvreader.next()
        session_no = None
        session_name = None
        session_index = 1
        for row in csvreader:
            entry = dict(zip(headers, row))

            if entry.get("Paper no."):
                entry["session_no"] = session_no
                entry["session_id"] = int(session_no.split(" ")[-1])
                entry["session_name"] = session_name
                entry["session_index"] = session_index
                session_index+=1
                map_paper_session[entry["Paper no."]]=entry

                map_name_session[session_name]=entry
            else:
                session_no = entry["Session no"]
                session_name = entry["Title"].strip()
                session_index = 1


    print len(map_paper_session)




    filename = "event.csv"
    filename  = os.path.join(global_config["home"],"data", filename)
    map_event_session  = {}

    with open(filename,'r') as f:
        csvreader = UnicodeReader(f)
        headers = csvreader.next()
        for row in csvreader:
            if row[0].startswith("#"):
                continue

            entry = dict(zip(headers, row))

            print entry

            event_start, event_end = entry["Time"].split("-")
            event_day = entry["day"]

            for k,v in entry.items():
                if k in ["Time","day"]:
                    continue
                if v:
                    event_id = (len(map_event_session)+1)

                    event = {
                        "day":event_day,
                        "start":event_start.strip(),
                        "end": event_end.strip(),
                        "name": v.strip(),
                        "location": k,
                        "id": event_id,
                    }

                    if "Session" in v or "Industry Track:" in v:
                        session_name = v.replace("Session:","")
                        session_name = session_name.replace("Industry Track:","")

                        session_name = re.sub("\([^\)]+\)","", session_name)
                        session_name = session_name.strip()

                        if session_name not in map_name_session:
                            print session_name

                        assert session_name in map_name_session

                        event["session_name"] = session_name

                    map_event_session[event_id] = event

    print len(map_paper_session)



    return map_paper_excel, map_paper_excel_no, map_paper_pdf, map_paper_session, map_name_session, map_event_session
    def csv2html(id_data, global_config):
        #create json_conf data    
        json_conf ={}

        ######################
        #conf-paper
        filename_csv_conf_paper = "%s/data/www/%s-%s.csv" % (
            global_config["home"], 
            id_data, 
            "conf-paper")
                    
        indexed_proceedings ={}
        list_title = []
        with open(filename_csv_conf_paper) as f:
            csvreader = UnicodeReader(f)
            headers =  csvreader.next()
            while len(headers)<=1:
                print "skipping header row {0}".format( headers )
                headers =  csvreader.next()

            for row in csvreader:
                if len(row)<len(headers):
                    print "skipping row {0}".format( row )
                    continue

                entry = dict(zip(headers, row))
                
#                print entry
                if entry["subtitle_proceedings"]:
                    proceeding_title = "{} -- {}".format(entry["label_proceedings"], entry["subtitle_proceedings"])
                    if proceeding_title not in list_title:
                        list_title.insert(0, proceeding_title)
                else:
                    proceeding_title = "{}".format(entry["label_proceedings"])
                    if proceeding_title not in list_title:
                        list_title.append(proceeding_title)

                UtilJson.add_init_list(
                    indexed_proceedings, 
                    [proceeding_title],
                    entry["category"],
                    entry)
                
        
        #update json_conf
        for proceedings in list_title:
            #print proceedings
            json_proceedings ={}
            json_proceedings["title"] =proceedings
            UtilJson.add_init_list(json_conf, [], "proceedings", json_proceedings)
            for category in sorted(indexed_proceedings[proceedings].keys()):
                #print category
                json_category = {}
                if len(indexed_proceedings[proceedings].keys()) > 1:
                    json_category["title"] =category
                UtilJson.add_init_list(json_proceedings, [], "categories", json_category)
                json_category["papers"] =indexed_proceedings[proceedings][category]
        
        ######################
        #conf-person

        filename_csv_conf_person = "%s/data/www/%s-%s.csv" % (
            global_config["home"], 
            id_data, 
            "conf-person")
                    
        indexed_persons ={}
        with open(filename_csv_conf_person) as f:
            csvreader = UnicodeReader(f)
            headers =  csvreader.next()
            for row in csvreader:
                if len(row)<len(headers):
                    #print "skipping row %s" % row 
                    continue


                entry = dict(zip(headers, row))

                #print entry
                name = entry["name"]
                name = name.strip()
                name = re.sub("\s+"," ",name)

                
                cnt_paper = int(entry["cnt_paper"])
                if cnt_paper >0:
                    index_1 = entry["proceedings_label"]
                    if len(entry["proceedings_label"])==0:
                        index_1 = "All"
                        
                    index_1 = "[Proceedings] {}".format(index_1)
                    index_2 = "Authors"

                    UtilJson.add_init_dict(
                        indexed_persons, 
                        [index_1,index_2],
                        name,
                        entry)

                #consolidate affiliation
                organization = entry["organization"]
                if len(entry["organization"])>0:
                    entry["organization"] = organization.split(";")[0]

                #only keep direct conference role
                ALLOWED_EVENT_TYPE= []
                ALLOWED_EVENT_TYPE.append("http://data.semanticweb.org/ns/swc/ontology#ConferenceEvent")
#                ALLOWED_EVENT_TYPE.append("http://data.semanticweb.org/ns/swc/ontology#WorkshopEvent")
                if entry["role_event_type"] not in ALLOWED_EVENT_TYPE:
                    continue
                

                if entry["role_type"].endswith("Chair") and entry["role_event_type"].endswith("ConferenceEvent"):
                   entry["role_event_label"] = " {} (organization Committee)".format(entry["role_event_label"])


                UtilJson.add_init_dict(
                    indexed_persons, 
                    [entry["role_event_label"],entry["role_label"]],
                    name,
                    entry)

                
        #update json_conf
        for role_event_label in sorted(indexed_persons.keys()):
            #print role_event_label
            josn_role_event ={}
            josn_role_event["title"] =role_event_label
            UtilJson.add_init_list(json_conf, [], "events", josn_role_event)


            list_role = []
            for role_label in sorted(indexed_persons[role_event_label].keys()):
                if "Chair" in role_label or "Webmaster" in role_label:
                    list_role.insert(0, role_label)
                else:
                    list_role.append(role_label)

            for role_label in list_role:
                #print role_label
                json_role_label = {}
                json_role_label["title"] =role_label
                UtilJson.add_init_list(josn_role_event, [], "roles", json_role_label)
                json_role_label["persons"] = sorted( indexed_persons[role_event_label][role_label].values())
    
        
                    

        ######################
        # write xyz-proceedings
        id_html = "proceedings"


        filename_html = "%s/data/www/%s-%s.html" % (
            global_config["home"], 
            id_data, 
            id_html)        
        
        json_template = resource_string('resources.files', '{}.jsont'.format(id_html))
        content= jsontemplate.expand(json_template, json_conf)
        with codecs.open(filename_html,"w","utf-8") as f:
            f.write(u'\ufeff')
            f.write(content)


        ######################
        # write xyz-people
        id_html = "people"
        filename_html = "%s/data/www/%s-%s.html" % (
            global_config["home"], 
            id_data, 
            id_html)        
        
        json_template = resource_string('resources.files', '{}.jsont'.format(id_html))
        content= jsontemplate.expand(json_template, json_conf)
        with codecs.open(filename_html,"w","utf-8") as f:
            f.write(u'\ufeff')
            f.write(content)


    
        ######################
        #conf-event

        filename_csv_conf_event = "%s/data/www/%s-%s.csv" % (
            global_config["home"], 
            id_data, 
            "conf-event")
                    
        dict_events ={}
        list_events = []
        conf_event_name  =""
        with open(filename_csv_conf_event) as f:
            csvreader = UnicodeReader(f)
            headers =  csvreader.next()
            for row in csvreader:
                if len(row)<len(headers):
                    #print "skipping row %s" % row 
                    continue

                entry = dict(zip(headers, row))
                
                #print entry
                
                dict_events[entry["event_uri"]] = entry
                list_events.append(entry)

                event_type = entry["event_type"].split('#')[-1]
                if event_type in ['ConferenceEvent']:
                    conf_event_name = entry["label"]
                elif event_type in ['InvitedTalkEvent', 'PanelEvent']:
                    entry['category'] = event_type.replace('Event', '')

        indexed_events ={}
        map_events ={}
        for entry in list_events:

            temp = entry["event_type"].split('#')[-1]
            temp = temp.replace("Event","")
            if not temp in ["Tutorial","Talk","Special","Break"]:
                entry["event_type_label"] = temp

            UtilJson.add_init_list(
                map_events,
                [],
                entry["super_event_uri"],
                entry["event_uri"],
                True)

            super_event_name = conf_event_name
            if entry["super_event_uri"] and entry["super_event_uri"] in dict_events:
                super_event_type = dict_events[entry["super_event_uri"]]["event_type"].split('#')[-1].replace("Event","")
                if super_event_type in ['Workshop', 'Tutorial'] :
                    super_event_name = dict_events[entry["super_event_uri"]]["label"]
                    if super_event_name.lower().find("Doctoral Consortium".lower()) < 0:
                        if not super_event_name.startswith(super_event_type):
                            super_event_name = "{}: {}".format(super_event_type, super_event_name)

            entry['start_x'] = entry['start']
            entry['end_x'] = entry['end']
            if len(entry['start'])>0:
                #skip talk event
                if len(entry['order_in_super_event'])>0:
                    continue

                date = entry['start'][0:10]
                entry['start_x'] = entry['start'][11:-3]
                date_end = date
                if len(entry['end'])>0:
                    date_end = entry['end'][0:10]
                    entry['end_x'] = entry['end'][11:-3]
                #only keep same day events
                if date_end == date:
                    UtilJson.add_init_list(
                        indexed_events,
                        [super_event_name],
                        date,
                        entry)
        #print json.dumps(map_events, indent=4)
            
        #update json_conf
        list_event_name = []
        for event_name in sorted(indexed_events.keys()):
            if conf_event_name == event_name:
                list_event_name.insert(0, event_name)
            else:
                list_event_name.append(event_name)

        for event_name in list_event_name:
            top_events_in_program = indexed_events[event_name]
            json_program = {
                'title': event_name
            }
            UtilJson.add_init_list(json_conf, [], "top_programs", json_program)

            for date in sorted(top_events_in_program.keys()):
                events_in_program_date = top_events_in_program[date]
                json_date_program ={}
                if len(top_events_in_program) >1:
                    json_date_program["title"] = datetime.datetime(*time.strptime(date,"%Y-%m-%d")[0:5]).strftime("%Y-%m-%d (%A)")
                json_date_program["events"] = events_in_program_date
                UtilJson.add_init_list(json_program, [], "date_programs", json_date_program)

                #    sorted(events_in_program_date, key=lambda item: item['start'])
                for entry in events_in_program_date:

                    entry["super_event_type"] = dict_events[entry["super_event_uri"]]["event_type"]
                    if entry["super_event_type"] == "http://data.semanticweb.org/ns/swc/ontology#TrackEvent":
                        entry["track"] = dict_events[entry["super_event_uri"]]["label"]
                    else:
                        entry["track"] = ""

                    #if entry["event_type"] == "http://data.semanticweb.org/ns/swc/ontology#SessionEvent":
                    if entry["event_uri"] in map_events:
                        for sub_event_uri in map_events[entry["event_uri"]]:
                            UtilJson.add_init_list(entry, [], "talks", dict_events[sub_event_uri])

        ######################
        # write json-data
        #print json.dumps(json_conf, indent=4)
        filename_json = "%s/data/www/%s-conf.json" % (
            global_config["home"],
            id_data)
        with codecs.open(filename_json,"w","utf-8") as f:
            json.dump(json_conf, f, indent=4)

        ######################
        # write xyz-program
        id_html = "program"
        filename_html = "%s/data/www/%s-%s.html" % (
            global_config["home"], 
            id_data, 
            id_html)        
        
        json_template = resource_string('resources.files', '{}.jsont'.format(id_html))
        content= jsontemplate.expand(json_template, json_conf)
        with codecs.open(filename_html,"w","utf-8") as f:
            f.write(u'\ufeff')
            f.write(content)    

        ######################
        # write icalendar
        id_html = "program"
        filename_ics_prefix = "%s/data/www/%s-%s" % (
            global_config["home"],
            id_data,
            id_html)

        ConfData.json_conf2ics(json_conf, filename_ics_prefix)
    def process_proceedings(self):
        filename = "{0}/data/source/iswc-all-proceedings.csv".format(
            self.global_config["home"])

        counter_paper = MyCounter()
        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    print "skipping mismatch row %s" % row
                    continue

                entry = dict(zip(headers, row))

                if entry["year"] != self.local_config["year"]:
                    #skip mismatched year
                    continue

                if len(entry["title"]) == 0:
                    print "skipping empty title row %s" % entry
                    continue

                if len(entry["proceedings_uri"]) == 0:
                    print "skipping empty proceedings_uri row %s" % entry
                    continue

                expand_entry(entry)

                uri_proceedings = self.expand_uri(entry["proceedings_uri"])
                uri_proceedings_editor_list = "%s/editor_list" % (uri_proceedings)
                uri_event = self.expand_uri(entry["event_uri"])

                #print json.dumps(entry, indent=4)
                #print uri_proceedings
                res_proceedings = URIRef(uri_proceedings)
                res_event = URIRef(uri_event)

                self.graph.add((res_proceedings, RDF.type, SWRC.Proceedings ))

                #relation to event
                self.graph.add((res_proceedings, SWC.relatedToEvent, res_event))
                self.graph.add((res_event, SWRC.hasRelatedDocument, res_proceedings))

                #editor
                if len(entry["editor"]) > 0:
                    self.graph.add((res_proceedings, SWRC.listEditor, Literal(entry["editor"])))
                    list_res_editor = []
                    for editor in entry["editor"].split(","):
                        editor = self.get_final_name(editor)

                        for res_editor in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON), editor).values():
                            list_res_editor.append(res_editor)
                            self.graph.add((res_proceedings, SWRC.editor, res_editor))
                            self.graph.add((res_proceedings, FOAF.maker, res_editor))
                            self.graph.add((res_editor, FOAF.made, res_proceedings))

                    res_proceedings_editor_list = self.create_container(list_res_editor, RDF.Seq,
                                                                        uri_proceedings_editor_list)
                    self.graph.add((res_proceedings, SWC.editorList, res_proceedings_editor_list))


                #simple properties
                self.create_triple_complex(
                    res_proceedings,
                    ["title", "subtitle", "abstract", "keywords", "year", "pages", "publisher", "series", "volume",
                     "link_open_access", "link_publisher", "depiction"],
                    entry)
    def process_paper(self):
        filename = "{0}/data/source/iswc-all-papers.csv".format(
            self.global_config["home"])

        if self.local_config["id"] in ["iswc-2013","iswc-2014"]:
            filename = "{}/data/source/{}-paper.csv".format(
                self.global_config["home"],
                self.local_config["id"])

        counter_paper = MyCounter()
        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    #print "skipping mismatch row %s" % row 
                    continue

                entry = dict(zip(headers, row))

                if entry["year"] != self.local_config["year"]:
                    #skip mismatched year
                    continue

                if len(entry["title"]) == 0:
                    print "skipping empty title row %s" % entry
                    continue

                if len(entry["proceedings_uri"]) == 0:
                    print "skipping empty proceedings row %s" % entry
                    continue

                expand_entry(entry)

                counter_paper.inc(entry["proceedings_uri"])
                id_paper = counter_paper.data[entry["proceedings_uri"]]
                uri_paper = "%s/paper-%02d" % (entry["proceedings_uri"], id_paper)
                uri_paper_author_list = "%s/paper-%02d/author_list" % (entry["proceedings_uri"], id_paper)
                #print json.dumps(entry, indent=4)
                #print uri_paper
                res_proceedings = URIRef(entry["proceedings_uri"])
                res_paper = URIRef(uri_paper)

                self.graph.add((res_paper, RDF.type, SWRC.InProceedings ))

                #part-of proceedings
                self.graph.add((res_paper, SWC.isPartOf, res_proceedings))
                self.graph.add((res_proceedings, SWC.hasPart, res_paper))

                #author
                author_data = DataIswc.parse_person_list(entry["author"])

                # if author_x_and != entry["author"]:
                #     print "--------------"
                #     print entry["author"]
                #     print author_x_and

                # author_x_and_y = re.sub("\s+"," ",author_x_and)
                # if author_x_and != author_x_and_y:
                #     print "????"
                #     print author_x_and
                #     print author_x_and_y

                self.graph.add((res_paper, SWRC.listAuthor, Literal(author_data["text"])))
                list_res_author = []
                for author in author_data["list"]:

                    author = self.get_final_name(author)

                    for res_author in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON), author).values():
                        self.graph.add((res_author, RDF.type, FOAF.Person))

                        list_res_author.append(res_author)
                        self.graph.add((res_paper, SWRC.author, res_author))
                        self.graph.add((res_paper, FOAF.maker, res_author))
                        self.graph.add((res_author, FOAF.made, res_paper))

                res_paper_author_list = self.create_container(list_res_author, RDF.Seq, uri_paper_author_list)
                self.graph.add((res_paper, BIBO.authorList, res_paper_author_list))

                #simple properties
                self.create_triple_complex(
                    res_paper,
                    ["abstract", "keywords", "year", "pages", "title", "category",
                     "link_open_access", "link_publisher"],
                    entry)

                #cache
                self.map_name_res[entry["title"]] = res_paper
    def process_event(self):


        filename = "{0}/data/source/{1}-event.csv".format(
            self.global_config["home"],
            self.local_config["id"])

        counter_event = MyCounter()

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    #print "skipping mismatch row %s" % row 
                    continue

                entry = dict(zip(headers, row))

                if len(entry["label"].strip()) == 0:
                    #print "skipping empty label row %s" % entry
                    continue

                if len(entry["event_type"].strip()) == 0:
                    #print "skipping empty event_type row %s" % entry
                    continue

                if entry["event_uri"].startswith("#"):
                    #print "skipping empty commented row %s" % entry
                    continue

                #set default super event
                if len(entry["super_event_uri"]) == 0:
                    entry["super_event_uri"] = "[ME]"

                expand_entry(entry)

                uri_super_event = self.expand_uri(entry["super_event_uri"])
                res_super_event = URIRef(uri_super_event)

                if len(entry["event_uri"]) == 0:
                    counter_event.inc(uri_super_event)
                    entry["event_uri"] = "%s/event-%02d" % (
                        uri_super_event,
                        counter_event.data[uri_super_event])

                uri_event = self.expand_uri(entry["event_uri"])
                res_event = URIRef(uri_event)

                #event type
                self.graph.add((res_event, RDF.type, SWC[entry["event_type"]]))

                #super event
                self.graph.add((res_event, SWC.isSubEventOf, res_super_event))
                self.graph.add((res_super_event, SWC.isSuperEventOf, res_event))

                #simple properties
                self.create_triple_complex(
                    res_event,
                    ["label", "acronym", "abstract",
                     "order_in_super_event",
                     "start", "end", "tzid",
                     "room", "address",
                     "homepage", "link_document", "logo"],
                    entry)

                #linking paper event
                if "TalkEvent" == entry["event_type"]:
                    if entry["label"] in self.map_name_res:
                        res_paper = self.map_name_res[entry["label"]]
                        self.graph.add(( res_event, SWC.hasRelatedDocument, res_paper))
                        self.graph.add(( res_paper, SWC.relatedToEvent, res_event))
                    else:
                        print "missing paper link [{}]".format(entry["label"])
                        #print json.dumps(self.map_name_res, indent=4, sort_keys=True)
                        sys.exit(0)

                #role -chair
                for role in ["Chair", "Presenter"]:

                    role_lower = role.lower()
                    if len(entry[role_lower + "_person"]) > 0:
                        person_data = DataIswc.parse_person_list(entry[role_lower + "_person"])
                        for name in person_data["list"]:
                            if len(name) == 0:
                                continue

                            name = self.get_final_name(name)

                            for res_person in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON),name).values():

                                role_label_x = entry[role_lower + "_label"]
                                event_type_x = entry["event_type"].split("#")[-1].replace("Event", "")
                                if event_type_x in ["Workshop", "Tutorial"]:
                                    role_label_x = u"{} {}".format(event_type_x, role_label_x)

                                assert (len(role.strip())>0)

                                self.create_role_to_event(
                                    uri_event,
                                    "swc:" + role,
                                    role_label_x,
                                    res_person)
    def process_person(self):
        #load person
        filename = "{0}/data/source/{1}-person.csv".format(
            self.global_config["home"],
            self.local_config["id"])

        with open(filename) as f:
            csvreader = UnicodeReader(f)
            headers = csvreader.next()
            for row in csvreader:

                if len(row) != len(headers):
                    #print "skipping mismatch row %s" % row 
                    continue

                entry = dict(zip(headers, row))

                if len(entry["name"]) == 0:
                    #print "skipping empty name row %s" % entry
                    continue

                name = entry["name"].strip()

                name = self.get_final_name(name)

                for res_person in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_PERSON), name).values():
                    #map other names
                    for other_name in entry["other_names"].split(","):
                        self.cache_map_name_res(other_name, res_person)

                    if name in self.map_name_info:
                        for other_name in self.map_name_info[name]["other_names"]:
                            self.cache_map_name_res(other_name, res_person)

                    #object properties
                    self.create_triple_complex(res_person, ["homepage"], entry)

                    #role
                    self.create_role_to_event(
                        entry["role_event"],
                        entry["role_type"],
                        entry["role_label"],
                        res_person)

                    #organization
                    if "organization" in entry:
                        for org in entry["organization"].split(";"):
                            if len(org) == 0:
                                continue

                            for res_organization in self.create_list_named_entity(DataIswc.get_namespace(DataIswc.PREFIX_ORG), org).values():
                                self.graph.add((res_organization, FOAF.member, res_person))
                                #inverse property
                                self.graph.add((res_person, SWRC.affiliation, res_organization))

                    #alt-name
                    self.create_triple_complex(res_person, ["other_names"], entry)



                    #email
                    if len(entry["email"]) > 0:
                        if not entry["email"].startswith("mailto:"):
                            mbox = "mailto:%s" % entry["email"]
                        else:
                            mbox = entry["email"]

                        mbox_sha1sum = hashlib.sha1(mbox).hexdigest()
                        #self.graph.add( (res_person, FOAF.mbox, URIRef(mbox)) )
                        self.graph.add((res_person, FOAF.mbox_sha1sum, Literal(mbox_sha1sum)))