Beispiel #1
0
    def loadTextEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载文学事件')
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(event:Text_data)--(text:Text_codes) RETURN person,event,id(event),text SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{c_personid:"{}"}})--(event:Text_data)--(text:Text_codes) RETURN person,event,id(event),text SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))
        results = graph.run(query).data()
        id_list = []
        id2person = {}
        for node_data in results:
            event_id = str(node_data['id(event)'])

            person = node_data['person']
            event_node = node_data['event']
            text = node_data['text']  #也有时间,未用

            event = self.createEvents(event_id)
            event.type = '文学事件'
            field = 'c_year'
            year = event_node[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('text_data', field)
                year_range = event_node[range_field]
                event.addTimeAndRange(year, year_range)

            id_list.append(event_id)
            person = personManager.createPerson(person)
            id2person[event_id] = person

        if len(id_list) == 0:
            return False

        id_list_str = ','.join(id_list)
        # print(id_list_str)
        # 加载角色
        results = graph.run(
            'START event=node({}) MATCH (role:Text_role_codes)--(event) RETURN role.c_role_desc_chn AS role, id(event)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            role = result['role']
            event_id = str(result['id(event)'])
            self.createEvents(event_id).addPerson(person, role)
            event.setTrigger('文学作品' + role)
        print('加载文学事件角色')

        if len(id_list) < LIMIT:
            return False
        return True
Beispiel #2
0
    def loadRelationEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载关系事件')
        # 使用neo4j提取
        # 获取事件
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(n:Assoc_data) RETURN n, id(n) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{c_personid:"{}"}})--(n:Assoc_data) RETURN n, id(n) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))
        results = graph.run(query).data()
        csv_data = []
        for node_data in results:
            node_id = node_data['id(n)']
            node_data = node_data['n']
            # node_object_id = node_data['OBJECT_ID']
            # 添加时间
            # print(node_data['c_assoc_year']==None)
            event = self.createEvents(node_id)
            event.type = '关系事件'

            if 'c_sequence' in node_data.keys():
                sequence = node_data['c_sequence']
                if sequence != 'None' and sequence is not None:
                    event.sequence = int(sequence)

            c_assoc_year = node_data['c_assoc_year']
            if c_assoc_year is not None and c_assoc_year != 0 and c_assoc_year != '0' and c_assoc_year != 'None':
                c_assoc_year = int(c_assoc_year)
                range_field = graph.getTableRange('assoc_data', 'c_assoc_year')
                year_range = node_data[range_field]
                event.addTimeAndRange(c_assoc_year, year_range)

            # if 'c_assoc_nh_code' in node_data.keys():
            # 	nh_code = node_data['c_assoc_nh_code']
            # 	if nh_code != 'None' and nh_code is not None:
            # 		nh_range = timeManager.getNianHaoRange(nh_code)
            # 		event.addTimeAndRange(nh_range[0], '之后')
            # 		event.addTimeAndRange(nh_range[1], '之前')

            row = [node_id]
            csv_data.append(row)
        # print(csv_data)

        if len(csv_data) == 0:
            return False

        id_list_str = ','.join([str(row[0]) for row in csv_data])
        # 获得角色
        # START n=node(282787) MATCH (person:Biog_main)-[r]-(n) RETURN id(person), r, n, person
        results = graph.run(
            'START n=node({}) MATCH (person:Biog_main)-[r]-(n) RETURN r, person, id(n)'
            .format(id_list_str))
        results = results.data()
        for result in results:
            person = personManager.createPerson(result['person'])
            event_id = result['id(n)']
            role = result['r']['RELATION_TYPE']
            if role == '关系':
                role = '主角'
            self.createEvents(event_id).addPerson(person, role)
        print('加载关系事件角色')

        # 加载地点
        results = graph.run(
            'START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'
            .format(id_list_str))
        results = results.data()
        for result in results:
            addr_id = result['addr_id']
            event_id = result['id(n)']
            self.createEvents(event_id).setAddr(addrManager.getAddr(addr_id))
        print('加载关系数据地址')

        # 加载触发词
        results = graph.run(
            'START n=node({}) MATCH (assoc:Assoc_codes)<-[r]-(n) RETURN assoc.c_assoc_desc_chn, id(n)'
            .format(id_list_str))
        results = results.data()
        for result in results:
            trigger_name = result['assoc.c_assoc_desc_chn']
            event_id = result['id(n)']
            self.createEvents(event_id).setTrigger(trigger_name)
            # print(self.createEvents(event_id).trigger)
        print('加载事件触发词')

        # graph.runWithCsv(csv_data, 'START n=node(toInt(row[0])) MATCH (person:Biog_main)-[r]-(n) RETURN id(person), r, person, row[0]')
        # print()
        if len(csv_data) < LIMIT:
            return False

        return True
Beispiel #3
0
    def loadEntryEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载入仕数据')
        if person_id is None:
            query = 'MATCH (person:Biog_main)-[:参与人]->(event:Entry_data) RETURN person,event,id(event) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{ c_personid:"{}" }})-[:参与人]->(event:Entry_data) RETURN person,event,id(event) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))

        results = graph.run(query).data()
        id_list = []
        for node_data in results:
            event_id = str(node_data['id(event)'])
            person = node_data['person']
            event_node = node_data['event']

            event = self.createEvents(event_id)
            event.type = '入仕事件'
            field = 'c_year'
            year = event_node[field]

            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('entry_data', field)
                year_range = event_node[range_field]
                event.addTimeAndRange(year, year_range)

            if 'c_sequence' in node_data.keys():
                sequence = node_data['c_sequence']
                if sequence != 'None' and sequence is not None:
                    event.sequence = int(sequence)

            id_list.append(event_id)

            person = personManager.createPerson(person)
            event.addPerson(person, '主角')

        if len(id_list) == 0:
            return False

        id_list_str = ','.join(id_list)

        # 入仕法
        results = graph.run(
            'START event=node({}) MATCH (method:Entry_codes)--(event) RETURN method.c_entry_desc_chn AS method, id(event)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            method = result['method']
            event_id = str(result['id(event)'])
            self.createEvents(event_id).setTrigger('入仕')
            self.createEvents(event_id).detail = str(method)
            # self.createEvents(event_id).setTrigger(str(method))
        print('加载入仕方式')

        # 相关机构

        if len(id_list) < LIMIT:
            return False
        # 分数
        return True
Beispiel #4
0
    def loadPostOfficeEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载仕途事件')
        # 使用neo4j提取
        # 使用Posting_Data可能有问题,有些信息未连起来
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(n1:Posted_to_office_data)--(n2:Posting_data)--(n3:Posted_to_addr_data) RETURN person,n1,n2,n3,id(n1),id(n2),id(n3) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{ c_personid:"{}" }})--(n1:Posted_to_office_data)--(n2:Posting_data)--(n3:Posted_to_addr_data) RETURN person,n1,n2,n3,id(n1),id(n2),id(n3) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))

        results = graph.run(query).data()
        id_list1 = []
        id_list2 = []
        id_list3 = []
        id2id = {}
        for node_data in results:
            node_id1 = str(node_data['id(n1)'])
            node_id2 = str(node_data['id(n2)'])
            node_id3 = str(node_data['id(n3)'])

            node_data1 = node_data['n1']
            node_data2 = node_data['n2']
            node_data3 = node_data['n3']

            event1 = self.createEvents('担任' + node_id1)  #担任
            event2 = self.createEvents('卸任' + node_id1)  #卸任
            event1.type = '官职事件'
            event2.type = '官职事件'

            field = 'c_firstyear'
            year = node_data1[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('posted_to_office_data',
                                                  field)
                year_range = node_data1[range_field]
                event1.addTimeAndRange(year, year_range)

            field = 'c_lastyear'
            event2.addTimeAndRange(event1.time_range[0], '之后')
            year = node_data1[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('posted_to_office_data',
                                                  field)
                year_range = node_data1[range_field]
                event2.addTimeAndRange(year, year_range)

            if 'c_sequence' in node_data1.keys():
                sequence = node_data1['c_sequence']
                if sequence != 'None' and sequence is not None:
                    event1.sequence = int(sequence)
                    event2.sequence = int(sequence)

            id_list1.append(node_id1)
            id_list2.append(node_id2)
            id_list3.append(node_id3)

            id2id[node_id1] = node_id1
            id2id[node_id2] = node_id1
            id2id[node_id3] = node_id1

            person = personManager.createPerson(node_data['person'])
            event1.addPerson(person, '主角')
            event2.addPerson(person, '主角')

            event1.setTrigger('担任')
            event2.setTrigger('卸任')

        if len(id_list1) == 0:
            return False

        id_list_str1 = ','.join(id_list1)
        id_list_str2 = ','.join(id_list2)
        id_list_str3 = ','.join(id_list3)
        # print(id_list_str3)

        # 加载地点
        results = graph.run(
            'START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'
            .format(id_list_str3))
        results = results.data()
        # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3))
        # print(results)
        for result in results:
            addr_id = result['addr_id']
            event_id = str(result['id(n)'])
            # print(addr_id)
            self.createEvents('担任' + id2id[event_id]).setAddr(
                addrManager.getAddr(addr_id))
            self.createEvents('卸任' + id2id[event_id]).setAddr(
                addrManager.getAddr(addr_id))
        print('加载仕途数据地址')

        # 加载职位
        results = graph.run(
            'START n=node({}) MATCH (office:Office_codes)-[r]-(n) RETURN office, id(n)'
            .format(id_list_str1))
        results = results.data()
        # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3))
        # print(results)
        for result in results:
            office_code = result['office']
            event_id = str(result['id(n)'])
            event1 = self.createEvents('担任' + id2id[event_id])
            event2 = self.createEvents('卸任' + id2id[event_id])
            event1.detail = '担任' + str(office_code['c_office_chn'])
            event2.detail = '卸任' + str(office_code['c_office_chn'])

            # self.createEvents('担任'+id2id[event_id]).setTrigger('担任'+str(office_code['c_office_chn']))
            # self.createEvents('卸任'+id2id[event_id]).setTrigger('卸任'+str(office_code['c_office_chn']))
        print('加载仕途官职')

        # 加载官职的授予方式
        results = graph.run(
            'START n=node({}) MATCH (office:Appointment_type_codes)-[r]-(n) RETURN office, id(n)'
            .format(id_list_str1))
        results = results.data()
        # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3))
        # print(results)
        for result in results:
            office_code = result['office']['c_appt_type_desc_chn']
            event_id = str(result['id(n)'])
            self.createEvents(
                '担任' + id2id[event_id]).detail += '授予方式' + str(office_code)
            # self.createEvents('担任'+id2id[event_id]).setTrigger('担任'+str(office_code['c_office_chn']))
            # self.createEvents('卸任'+id2id[event_id]).setTrigger('卸任'+str(office_code['c_office_chn']))
        print('加载仕途官职')
        if len(id_list1) < LIMIT:
            return False
        return True
Beispiel #5
0
    def loadAddrEvents(self, LIMIT=1000000, SKIP=0, person_id=None):
        print('开始加载迁移事件')
        if person_id is None:
            query = 'MATCH (person:Biog_main)--(event:Biog_addr_data) RETURN person, event, id(event) SKIP {} LIMIT {} '.format(
                str(SKIP), str(LIMIT))
        else:
            query = 'MATCH (person:Biog_main{{c_personid:"{}"}})-->(event:Biog_addr_data) RETURN person, event, id(event) SKIP {} LIMIT {} '.format(
                str(person_id), str(SKIP), str(LIMIT))

        # print(query)
        results = graph.run(query).data()
        id_list = []
        for node_data in results:
            # print(node_data)
            node_id = node_data['id(event)']
            person = node_data['person']
            node_data = node_data['event']

            event = self.createEvents(node_id)
            event.type = '前往'

            event.addPerson(personManager.createPerson(person), '主角')
            event.setTrigger('前往')
            event.type = '前往'
            event.detail = '前往某地'

            field = 'c_firstyear'
            year = node_data[field]
            if year is not None and year != 0 and year != '0' and year != 'None':
                year = int(year)
                range_field = graph.getTableRange('Biog_addr_data', field)
                year_range = node_data[range_field]
                event.addTimeAndRange(year, year_range)

            event = self.createEvents('离开' + str(node_id))
            event.addPerson(personManager.createPerson(person), '主角')
            event.setTrigger('离开')
            event.type = '离开'
            event.detail = '离开某地'

            field = 'c_lastyear'
            if field in node_data.keys():
                year = node_data[field]
                # print(year)
                if year is not None and year != 0 and year != '0' and year != 'None':
                    year = int(year)
                    range_field = graph.getTableRange('Biog_addr_data', field)
                    year_range = node_data[range_field]
                    # print(year)
                    event.addTimeAndRange(year, year_range)

            id_list.append(str(node_id))

        if len(id_list) == 0:
            return False
        # print(id_list)
        id_list_str = ','.join(id_list)

        # 加载地点
        results = graph.run(
            'START n=node({}) MATCH (addr:Addr_codes)--(n) RETURN addr.c_addr_id as addr_id, id(n)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            addr_id = result['addr_id']
            event_id = str(result['id(n)'])
            # print(addr_id)

            event = self.createEvents(event_id)
            addr = addrManager.getAddr(addr_id)
            event.setAddr(addr)
            event.detail = '前往' + str(addr.name)

            event = self.createEvents('离开' + event_id)
            addr = addrManager.getAddr(addr_id)
            event.setAddr(addr)
            event.detail = '离开' + str(addr.name)

        # 加载原因
        print('加载迁移数据地址')

        results = graph.run(
            'START n=node({}) MATCH (addr:Biog_addr_codes)--(n) RETURN addr.c_addr_desc_chn as desc, id(n)'
            .format(id_list_str))
        results = results.data()
        # print(results)
        for result in results:
            desc = str(result['desc'])
            event_id = str(result['id(n)'])
            # print(addr_id)

            event = self.createEvents(event_id)
            event.setTrigger(desc)
            event.detail = event.detail + '由于' + desc
        print('加载迁移原因')

        if len(id_list) < LIMIT:
            return False
        return True