예제 #1
0
    def __init__(self):
        self.id2addr = {}
        self.addr_id_set = set()
        self.addr_array = []

        self.place2xy = self.xyParser()
        self.place2xy_set = set(self.place2xy.keys())

        # 加载CBDB上的所有地址
        addr_data = graph.run('MATCH (n:Addr_codes) RETURN id(n), n').data()
        for data in addr_data:
            addr_node = data['n']
            self.createAddr(addr_node)

        addr_belong_data = graph.run(
            'MATCH (n1:Addr_codes)-->(:Addr_belongs_data)-->(n2:Addr_codes) RETURN n1.c_addr_id as son_id, n2.c_addr_id as parent_id'
        ).data()

        for data in addr_belong_data:
            son_id = data['son_id']
            parent_id = data['parent_id']
            son = self.getAddr(son_id)
            parent = self.getAddr(parent_id)

            parent.addSon(son)
            son.addParent(parent)

        for addr in self.addr_array:
            if addr.x != 'None' and addr.x is not None:  #如果原先已有纪录
                xy = [int(value) / 1000000 for value in [addr.x, addr.y]]
                # for value in [addr.x, addr.y]:
                # if len(value)<9:
                #     for time in range(9-len(value)):
                #         value += '0'
                # value = int(value)/1000000
                # xy.append(value)
                if xy[0] > 0:
                    while xy[0] > 180:
                        xy[0] /= 10
                    while xy[0] < 6:
                        xy[0] *= 10

                if xy[1] > 0:
                    while xy[1] > 90:
                        xy[1] /= 10
                    while xy[1] < 6:
                        xy[1] *= 10

                # print( [addr.x, addr.y], xy)
            else:
                # xy = [-1,-1]
                xy = self._getXY(addr.name)

            # print(addr.name,xy)
            addr.x = xy[0]
            addr.y = xy[1]
        # 加载所有的地点关系的树形结构

        print('加载地址管理器')
예제 #2
0
    def loadTextEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载文学事件')
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(event:Text_data)--(text:Text_codes) RETURN person,event,id(event),text SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{c_personid:"{}"}})--(event:Text_data)--(text:Text_codes) RETURN person,event,id(event),text SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))
        results = graph.run(query).data()
        id_list = []
        id2person = {}
        for node_data in results:
            event_id = str(node_data['id(event)'])

            person = node_data['person']
            event_node = node_data['event']
            text = node_data['text']  #也有时间,未用

            event = self.createEvents(event_id)
            event.type = '文学事件'
            field = 'c_year'
            year = event_node[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('text_data', field)
                year_range = event_node[range_field]
                event.addTimeAndRange(year, year_range)

            id_list.append(event_id)
            person = personManager.createPerson(person)
            id2person[event_id] = person

        if len(id_list) == 0:
            return False

        id_list_str = ','.join(id_list)
        # print(id_list_str)
        # 加载角色
        results = graph.run(
            'START event=node({}) MATCH (role:Text_role_codes)--(event) RETURN role.c_role_desc_chn AS role, id(event)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            role = result['role']
            event_id = str(result['id(event)'])
            self.createEvents(event_id).addPerson(person, role)
            event.setTrigger('文学作品' + role)
        print('加载文学事件角色')

        if len(id_list) < LIMIT:
            return False
        return True
예제 #3
0
 def getPerson(self, person_id):
     person_id = str(person_id)
     if person_id in self.id_set:
         return self.id2person[person_id]
     else:
         print('run')
         data = graph.run(
             'MATCH (n:Biog_main{{c_personid:"{}"}}) RETURN n'.format(
                 str(person_id))).data()
         if len(data) != 0:
             # print(data[0])
             return self.createPerson(data[0]['n'])
     return None
예제 #4
0
    def __init__(self):
        self.nian_hao = {}

        data = graph.run('MATCH (n:Nian_hao) RETURN n').data()
        for nian_hao in data:

            nian_hao = nian_hao['n']
            # print(nian_hao)
            name = nian_hao['c_nianhao_chn']
            id = nian_hao['c_nianhao_id']
            start_year = nian_hao['c_firstyear']
            end_year = nian_hao['c_lastyear']
            range = [-9999, 9999]

            def isYear(year):
                return year is not None and year != 'None' and re.match(
                    '[-]*[0-9]+', str(year))

            if isYear(start_year):
                range[0] = int(start_year)
            if isYear(end_year):
                range[1] = int(end_year)

            self.nian_hao[id] = {'name': name, 'id': id, 'time_range': range}
예제 #5
0
    def loadRelationEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载关系事件')
        # 使用neo4j提取
        # 获取事件
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(n:Assoc_data) RETURN n, id(n) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{c_personid:"{}"}})--(n:Assoc_data) RETURN n, id(n) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))
        results = graph.run(query).data()
        csv_data = []
        for node_data in results:
            node_id = node_data['id(n)']
            node_data = node_data['n']
            # node_object_id = node_data['OBJECT_ID']
            # 添加时间
            # print(node_data['c_assoc_year']==None)
            event = self.createEvents(node_id)
            event.type = '关系事件'

            if 'c_sequence' in node_data.keys():
                sequence = node_data['c_sequence']
                if sequence != 'None' and sequence is not None:
                    event.sequence = int(sequence)

            c_assoc_year = node_data['c_assoc_year']
            if c_assoc_year is not None and c_assoc_year != 0 and c_assoc_year != '0' and c_assoc_year != 'None':
                c_assoc_year = int(c_assoc_year)
                range_field = graph.getTableRange('assoc_data', 'c_assoc_year')
                year_range = node_data[range_field]
                event.addTimeAndRange(c_assoc_year, year_range)

            # if 'c_assoc_nh_code' in node_data.keys():
            # 	nh_code = node_data['c_assoc_nh_code']
            # 	if nh_code != 'None' and nh_code is not None:
            # 		nh_range = timeManager.getNianHaoRange(nh_code)
            # 		event.addTimeAndRange(nh_range[0], '之后')
            # 		event.addTimeAndRange(nh_range[1], '之前')

            row = [node_id]
            csv_data.append(row)
        # print(csv_data)

        if len(csv_data) == 0:
            return False

        id_list_str = ','.join([str(row[0]) for row in csv_data])
        # 获得角色
        # START n=node(282787) MATCH (person:Biog_main)-[r]-(n) RETURN id(person), r, n, person
        results = graph.run(
            'START n=node({}) MATCH (person:Biog_main)-[r]-(n) RETURN r, person, id(n)'
            .format(id_list_str))
        results = results.data()
        for result in results:
            person = personManager.createPerson(result['person'])
            event_id = result['id(n)']
            role = result['r']['RELATION_TYPE']
            if role == '关系':
                role = '主角'
            self.createEvents(event_id).addPerson(person, role)
        print('加载关系事件角色')

        # 加载地点
        results = graph.run(
            'START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'
            .format(id_list_str))
        results = results.data()
        for result in results:
            addr_id = result['addr_id']
            event_id = result['id(n)']
            self.createEvents(event_id).setAddr(addrManager.getAddr(addr_id))
        print('加载关系数据地址')

        # 加载触发词
        results = graph.run(
            'START n=node({}) MATCH (assoc:Assoc_codes)<-[r]-(n) RETURN assoc.c_assoc_desc_chn, id(n)'
            .format(id_list_str))
        results = results.data()
        for result in results:
            trigger_name = result['assoc.c_assoc_desc_chn']
            event_id = result['id(n)']
            self.createEvents(event_id).setTrigger(trigger_name)
            # print(self.createEvents(event_id).trigger)
        print('加载事件触发词')

        # graph.runWithCsv(csv_data, 'START n=node(toInt(row[0])) MATCH (person:Biog_main)-[r]-(n) RETURN id(person), r, person, row[0]')
        # print()
        if len(csv_data) < LIMIT:
            return False

        return True
예제 #6
0
    def loadEntryEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载入仕数据')
        if person_id is None:
            query = 'MATCH (person:Biog_main)-[:参与人]->(event:Entry_data) RETURN person,event,id(event) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{ c_personid:"{}" }})-[:参与人]->(event:Entry_data) RETURN person,event,id(event) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))

        results = graph.run(query).data()
        id_list = []
        for node_data in results:
            event_id = str(node_data['id(event)'])
            person = node_data['person']
            event_node = node_data['event']

            event = self.createEvents(event_id)
            event.type = '入仕事件'
            field = 'c_year'
            year = event_node[field]

            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('entry_data', field)
                year_range = event_node[range_field]
                event.addTimeAndRange(year, year_range)

            if 'c_sequence' in node_data.keys():
                sequence = node_data['c_sequence']
                if sequence != 'None' and sequence is not None:
                    event.sequence = int(sequence)

            id_list.append(event_id)

            person = personManager.createPerson(person)
            event.addPerson(person, '主角')

        if len(id_list) == 0:
            return False

        id_list_str = ','.join(id_list)

        # 入仕法
        results = graph.run(
            'START event=node({}) MATCH (method:Entry_codes)--(event) RETURN method.c_entry_desc_chn AS method, id(event)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            method = result['method']
            event_id = str(result['id(event)'])
            self.createEvents(event_id).setTrigger('入仕')
            self.createEvents(event_id).detail = str(method)
            # self.createEvents(event_id).setTrigger(str(method))
        print('加载入仕方式')

        # 相关机构

        if len(id_list) < LIMIT:
            return False
        # 分数
        return True
예제 #7
0
    def loadPostOfficeEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载仕途事件')
        # 使用neo4j提取
        # 使用Posting_Data可能有问题,有些信息未连起来
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(n1:Posted_to_office_data)--(n2:Posting_data)--(n3:Posted_to_addr_data) RETURN person,n1,n2,n3,id(n1),id(n2),id(n3) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{ c_personid:"{}" }})--(n1:Posted_to_office_data)--(n2:Posting_data)--(n3:Posted_to_addr_data) RETURN person,n1,n2,n3,id(n1),id(n2),id(n3) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))

        results = graph.run(query).data()
        id_list1 = []
        id_list2 = []
        id_list3 = []
        id2id = {}
        for node_data in results:
            node_id1 = str(node_data['id(n1)'])
            node_id2 = str(node_data['id(n2)'])
            node_id3 = str(node_data['id(n3)'])

            node_data1 = node_data['n1']
            node_data2 = node_data['n2']
            node_data3 = node_data['n3']

            event1 = self.createEvents('担任' + node_id1)  #担任
            event2 = self.createEvents('卸任' + node_id1)  #卸任
            event1.type = '官职事件'
            event2.type = '官职事件'

            field = 'c_firstyear'
            year = node_data1[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('posted_to_office_data',
                                                  field)
                year_range = node_data1[range_field]
                event1.addTimeAndRange(year, year_range)

            field = 'c_lastyear'
            event2.addTimeAndRange(event1.time_range[0], '之后')
            year = node_data1[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('posted_to_office_data',
                                                  field)
                year_range = node_data1[range_field]
                event2.addTimeAndRange(year, year_range)

            if 'c_sequence' in node_data1.keys():
                sequence = node_data1['c_sequence']
                if sequence != 'None' and sequence is not None:
                    event1.sequence = int(sequence)
                    event2.sequence = int(sequence)

            id_list1.append(node_id1)
            id_list2.append(node_id2)
            id_list3.append(node_id3)

            id2id[node_id1] = node_id1
            id2id[node_id2] = node_id1
            id2id[node_id3] = node_id1

            person = personManager.createPerson(node_data['person'])
            event1.addPerson(person, '主角')
            event2.addPerson(person, '主角')

            event1.setTrigger('担任')
            event2.setTrigger('卸任')

        if len(id_list1) == 0:
            return False

        id_list_str1 = ','.join(id_list1)
        id_list_str2 = ','.join(id_list2)
        id_list_str3 = ','.join(id_list3)
        # print(id_list_str3)

        # 加载地点
        results = graph.run(
            'START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'
            .format(id_list_str3))
        results = results.data()
        # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3))
        # print(results)
        for result in results:
            addr_id = result['addr_id']
            event_id = str(result['id(n)'])
            # print(addr_id)
            self.createEvents('担任' + id2id[event_id]).setAddr(
                addrManager.getAddr(addr_id))
            self.createEvents('卸任' + id2id[event_id]).setAddr(
                addrManager.getAddr(addr_id))
        print('加载仕途数据地址')

        # 加载职位
        results = graph.run(
            'START n=node({}) MATCH (office:Office_codes)-[r]-(n) RETURN office, id(n)'
            .format(id_list_str1))
        results = results.data()
        # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3))
        # print(results)
        for result in results:
            office_code = result['office']
            event_id = str(result['id(n)'])
            event1 = self.createEvents('担任' + id2id[event_id])
            event2 = self.createEvents('卸任' + id2id[event_id])
            event1.detail = '担任' + str(office_code['c_office_chn'])
            event2.detail = '卸任' + str(office_code['c_office_chn'])

            # self.createEvents('担任'+id2id[event_id]).setTrigger('担任'+str(office_code['c_office_chn']))
            # self.createEvents('卸任'+id2id[event_id]).setTrigger('卸任'+str(office_code['c_office_chn']))
        print('加载仕途官职')

        # 加载官职的授予方式
        results = graph.run(
            'START n=node({}) MATCH (office:Appointment_type_codes)-[r]-(n) RETURN office, id(n)'
            .format(id_list_str1))
        results = results.data()
        # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3))
        # print(results)
        for result in results:
            office_code = result['office']['c_appt_type_desc_chn']
            event_id = str(result['id(n)'])
            self.createEvents(
                '担任' + id2id[event_id]).detail += '授予方式' + str(office_code)
            # self.createEvents('担任'+id2id[event_id]).setTrigger('担任'+str(office_code['c_office_chn']))
            # self.createEvents('卸任'+id2id[event_id]).setTrigger('卸任'+str(office_code['c_office_chn']))
        print('加载仕途官职')
        if len(id_list1) < LIMIT:
            return False
        return True
예제 #8
0
    def loadAddrEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载迁移事件')
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(event:Biog_addr_data) RETURN person, event, id(event) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{c_personid:"{}"}})-->(event:Biog_addr_data) RETURN person, event, id(event) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))

        # print(query)
        results = graph.run(query).data()
        id_list = []
        for node_data in results:
            # print(node_data)
            node_id = node_data['id(event)']
            person = node_data['person']
            node_data = node_data['event']

            event = self.createEvents(node_id)
            event.type = '前往'

            event.addPerson(personManager.createPerson(person), '主角')
            event.setTrigger('前往')
            event.type = '前往'
            event.detail = '前往某地'

            field = 'c_firstyear'
            year = node_data[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('Biog_addr_data', field)
                year_range = node_data[range_field]
                event.addTimeAndRange(year, year_range)

            event = self.createEvents('离开' + str(node_id))
            event.addPerson(personManager.createPerson(person), '主角')
            event.setTrigger('离开')
            event.type = '离开'
            event.detail = '离开某地'

            field = 'c_lastyear'
            if field in node_data.keys():
                year = node_data[field]
                # print(year)
                if year is not None and year != 0 and year != '0' and year != 'None':
                    year = int(year)
                    range_field = graph.getTableRange('Biog_addr_data', field)
                    year_range = node_data[range_field]
                    # print(year)
                    event.addTimeAndRange(year, year_range)

            id_list.append(str(node_id))

        if len(id_list) == 0:
            return False
        # print(id_list)
        id_list_str = ','.join(id_list)

        # 加载地点
        results = graph.run(
            'START n=node({}) MATCH (addr:Addr_codes)--(n) RETURN addr.c_addr_id as addr_id, id(n)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            addr_id = result['addr_id']
            event_id = str(result['id(n)'])
            # print(addr_id)

            event = self.createEvents(event_id)
            addr = addrManager.getAddr(addr_id)
            event.setAddr(addr)
            event.detail = '前往' + str(addr.name)

            event = self.createEvents('离开' + event_id)
            addr = addrManager.getAddr(addr_id)
            event.setAddr(addr)
            event.detail = '离开' + str(addr.name)

        # 加载原因
        print('加载迁移数据地址')

        results = graph.run(
            'START n=node({}) MATCH (addr:Biog_addr_codes)--(n) RETURN addr.c_addr_desc_chn as desc, id(n)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            desc = str(result['desc'])
            event_id = str(result['id(n)'])
            # print(addr_id)

            event = self.createEvents(event_id)
            event.setTrigger(desc)
            event.detail = event.detail + '由于' + desc
        print('加载迁移原因')

        if len(id_list) < LIMIT:
            return False
        return True