def __init__(self): self.id2addr = {} self.addr_id_set = set() self.addr_array = [] self.place2xy = self.xyParser() self.place2xy_set = set(self.place2xy.keys()) # 加载CBDB上的所有地址 addr_data = graph.run('MATCH (n:Addr_codes) RETURN id(n), n').data() for data in addr_data: addr_node = data['n'] self.createAddr(addr_node) addr_belong_data = graph.run( 'MATCH (n1:Addr_codes)-->(:Addr_belongs_data)-->(n2:Addr_codes) RETURN n1.c_addr_id as son_id, n2.c_addr_id as parent_id' ).data() for data in addr_belong_data: son_id = data['son_id'] parent_id = data['parent_id'] son = self.getAddr(son_id) parent = self.getAddr(parent_id) parent.addSon(son) son.addParent(parent) for addr in self.addr_array: if addr.x != 'None' and addr.x is not None: #如果原先已有纪录 xy = [int(value) / 1000000 for value in [addr.x, addr.y]] # for value in [addr.x, addr.y]: # if len(value)<9: # for time in range(9-len(value)): # value += '0' # value = int(value)/1000000 # xy.append(value) if xy[0] > 0: while xy[0] > 180: xy[0] /= 10 while xy[0] < 6: xy[0] *= 10 if xy[1] > 0: while xy[1] > 90: xy[1] /= 10 while xy[1] < 6: xy[1] *= 10 # print( [addr.x, addr.y], xy) else: # xy = [-1,-1] xy = self._getXY(addr.name) # print(addr.name,xy) addr.x = xy[0] addr.y = xy[1] # 加载所有的地点关系的树形结构 print('加载地址管理器')
def loadTextEvents(self, LIMIT=1000000, SKIP=0, person_id=None): print('开始加载文学事件') if person_id is None: query = 'MATCH (person:Biog_main)--(event:Text_data)--(text:Text_codes) RETURN person,event,id(event),text SKIP {} LIMIT {} '.format( str(SKIP), str(LIMIT)) else: query = 'MATCH (person:Biog_main{{c_personid:"{}"}})--(event:Text_data)--(text:Text_codes) RETURN person,event,id(event),text SKIP {} LIMIT {} '.format( str(person_id), str(SKIP), str(LIMIT)) results = graph.run(query).data() id_list = [] id2person = {} for node_data in results: event_id = str(node_data['id(event)']) person = node_data['person'] event_node = node_data['event'] text = node_data['text'] #也有时间,未用 event = self.createEvents(event_id) event.type = '文学事件' field = 'c_year' year = event_node[field] if year is not None and year != 0 and year != '0' and year != 'None': year = int(year) range_field = graph.getTableRange('text_data', field) year_range = event_node[range_field] event.addTimeAndRange(year, year_range) id_list.append(event_id) person = personManager.createPerson(person) id2person[event_id] = person if len(id_list) == 0: return False id_list_str = ','.join(id_list) # print(id_list_str) # 加载角色 results = graph.run( 'START event=node({}) MATCH (role:Text_role_codes)--(event) RETURN role.c_role_desc_chn AS role, id(event)' .format(id_list_str)) results = results.data() # print(results) for result in results: role = result['role'] event_id = str(result['id(event)']) self.createEvents(event_id).addPerson(person, role) event.setTrigger('文学作品' + role) print('加载文学事件角色') if len(id_list) < LIMIT: return False return True
def getPerson(self, person_id): person_id = str(person_id) if person_id in self.id_set: return self.id2person[person_id] else: print('run') data = graph.run( 'MATCH (n:Biog_main{{c_personid:"{}"}}) RETURN n'.format( str(person_id))).data() if len(data) != 0: # print(data[0]) return self.createPerson(data[0]['n']) return None
def __init__(self): self.nian_hao = {} data = graph.run('MATCH (n:Nian_hao) RETURN n').data() for nian_hao in data: nian_hao = nian_hao['n'] # print(nian_hao) name = nian_hao['c_nianhao_chn'] id = nian_hao['c_nianhao_id'] start_year = nian_hao['c_firstyear'] end_year = nian_hao['c_lastyear'] range = [-9999, 9999] def isYear(year): return year is not None and year != 'None' and re.match( '[-]*[0-9]+', str(year)) if isYear(start_year): range[0] = int(start_year) if isYear(end_year): range[1] = int(end_year) self.nian_hao[id] = {'name': name, 'id': id, 'time_range': range}
def loadRelationEvents(self, LIMIT=1000000, SKIP=0, person_id=None): print('开始加载关系事件') # 使用neo4j提取 # 获取事件 if person_id is None: query = 'MATCH (person:Biog_main)--(n:Assoc_data) RETURN n, id(n) SKIP {} LIMIT {} '.format( str(SKIP), str(LIMIT)) else: query = 'MATCH (person:Biog_main{{c_personid:"{}"}})--(n:Assoc_data) RETURN n, id(n) SKIP {} LIMIT {} '.format( str(person_id), str(SKIP), str(LIMIT)) results = graph.run(query).data() csv_data = [] for node_data in results: node_id = node_data['id(n)'] node_data = node_data['n'] # node_object_id = node_data['OBJECT_ID'] # 添加时间 # print(node_data['c_assoc_year']==None) event = self.createEvents(node_id) event.type = '关系事件' if 'c_sequence' in node_data.keys(): sequence = node_data['c_sequence'] if sequence != 'None' and sequence is not None: event.sequence = int(sequence) c_assoc_year = node_data['c_assoc_year'] if c_assoc_year is not None and c_assoc_year != 0 and c_assoc_year != '0' and c_assoc_year != 'None': c_assoc_year = int(c_assoc_year) range_field = graph.getTableRange('assoc_data', 'c_assoc_year') year_range = node_data[range_field] event.addTimeAndRange(c_assoc_year, year_range) # if 'c_assoc_nh_code' in node_data.keys(): # nh_code = node_data['c_assoc_nh_code'] # if nh_code != 'None' and nh_code is not None: # nh_range = timeManager.getNianHaoRange(nh_code) # event.addTimeAndRange(nh_range[0], '之后') # event.addTimeAndRange(nh_range[1], '之前') row = [node_id] csv_data.append(row) # print(csv_data) if len(csv_data) == 0: return False id_list_str = ','.join([str(row[0]) for row in csv_data]) # 获得角色 # START n=node(282787) MATCH (person:Biog_main)-[r]-(n) RETURN id(person), r, n, person results = graph.run( 'START n=node({}) MATCH (person:Biog_main)-[r]-(n) RETURN r, person, id(n)' .format(id_list_str)) results = results.data() for result in results: person = personManager.createPerson(result['person']) event_id = result['id(n)'] role = result['r']['RELATION_TYPE'] if role == '关系': role = '主角' self.createEvents(event_id).addPerson(person, role) print('加载关系事件角色') # 加载地点 results = graph.run( 'START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)' .format(id_list_str)) results = results.data() for result in results: addr_id = result['addr_id'] event_id = result['id(n)'] self.createEvents(event_id).setAddr(addrManager.getAddr(addr_id)) print('加载关系数据地址') # 加载触发词 results = graph.run( 'START n=node({}) MATCH (assoc:Assoc_codes)<-[r]-(n) RETURN assoc.c_assoc_desc_chn, id(n)' .format(id_list_str)) results = results.data() for result in results: trigger_name = result['assoc.c_assoc_desc_chn'] event_id = result['id(n)'] self.createEvents(event_id).setTrigger(trigger_name) # print(self.createEvents(event_id).trigger) print('加载事件触发词') # graph.runWithCsv(csv_data, 'START n=node(toInt(row[0])) MATCH (person:Biog_main)-[r]-(n) RETURN id(person), r, person, row[0]') # print() if len(csv_data) < LIMIT: return False return True
def loadEntryEvents(self, LIMIT=1000000, SKIP=0, person_id=None): print('开始加载入仕数据') if person_id is None: query = 'MATCH (person:Biog_main)-[:参与人]->(event:Entry_data) RETURN person,event,id(event) SKIP {} LIMIT {} '.format( str(SKIP), str(LIMIT)) else: query = 'MATCH (person:Biog_main{{ c_personid:"{}" }})-[:参与人]->(event:Entry_data) RETURN person,event,id(event) SKIP {} LIMIT {} '.format( str(person_id), str(SKIP), str(LIMIT)) results = graph.run(query).data() id_list = [] for node_data in results: event_id = str(node_data['id(event)']) person = node_data['person'] event_node = node_data['event'] event = self.createEvents(event_id) event.type = '入仕事件' field = 'c_year' year = event_node[field] if year is not None and year != 0 and year != '0' and year != 'None': year = int(year) range_field = graph.getTableRange('entry_data', field) year_range = event_node[range_field] event.addTimeAndRange(year, year_range) if 'c_sequence' in node_data.keys(): sequence = node_data['c_sequence'] if sequence != 'None' and sequence is not None: event.sequence = int(sequence) id_list.append(event_id) person = personManager.createPerson(person) event.addPerson(person, '主角') if len(id_list) == 0: return False id_list_str = ','.join(id_list) # 入仕法 results = graph.run( 'START event=node({}) MATCH (method:Entry_codes)--(event) RETURN method.c_entry_desc_chn AS method, id(event)' .format(id_list_str)) results = results.data() # print(results) for result in results: method = result['method'] event_id = str(result['id(event)']) self.createEvents(event_id).setTrigger('入仕') self.createEvents(event_id).detail = str(method) # self.createEvents(event_id).setTrigger(str(method)) print('加载入仕方式') # 相关机构 if len(id_list) < LIMIT: return False # 分数 return True
def loadPostOfficeEvents(self, LIMIT=1000000, SKIP=0, person_id=None): print('开始加载仕途事件') # 使用neo4j提取 # 使用Posting_Data可能有问题,有些信息未连起来 if person_id is None: query = 'MATCH (person:Biog_main)--(n1:Posted_to_office_data)--(n2:Posting_data)--(n3:Posted_to_addr_data) RETURN person,n1,n2,n3,id(n1),id(n2),id(n3) SKIP {} LIMIT {} '.format( str(SKIP), str(LIMIT)) else: query = 'MATCH (person:Biog_main{{ c_personid:"{}" }})--(n1:Posted_to_office_data)--(n2:Posting_data)--(n3:Posted_to_addr_data) RETURN person,n1,n2,n3,id(n1),id(n2),id(n3) SKIP {} LIMIT {} '.format( str(person_id), str(SKIP), str(LIMIT)) results = graph.run(query).data() id_list1 = [] id_list2 = [] id_list3 = [] id2id = {} for node_data in results: node_id1 = str(node_data['id(n1)']) node_id2 = str(node_data['id(n2)']) node_id3 = str(node_data['id(n3)']) node_data1 = node_data['n1'] node_data2 = node_data['n2'] node_data3 = node_data['n3'] event1 = self.createEvents('担任' + node_id1) #担任 event2 = self.createEvents('卸任' + node_id1) #卸任 event1.type = '官职事件' event2.type = '官职事件' field = 'c_firstyear' year = node_data1[field] if year is not None and year != 0 and year != '0' and year != 'None': year = int(year) range_field = graph.getTableRange('posted_to_office_data', field) year_range = node_data1[range_field] event1.addTimeAndRange(year, year_range) field = 'c_lastyear' event2.addTimeAndRange(event1.time_range[0], '之后') year = node_data1[field] if year is not None and year != 0 and year != '0' and year != 'None': year = int(year) range_field = graph.getTableRange('posted_to_office_data', field) year_range = node_data1[range_field] event2.addTimeAndRange(year, year_range) if 'c_sequence' in node_data1.keys(): sequence = node_data1['c_sequence'] if sequence != 'None' and sequence is not None: event1.sequence = int(sequence) event2.sequence = int(sequence) id_list1.append(node_id1) id_list2.append(node_id2) id_list3.append(node_id3) id2id[node_id1] = node_id1 id2id[node_id2] = node_id1 id2id[node_id3] = node_id1 person = personManager.createPerson(node_data['person']) event1.addPerson(person, '主角') event2.addPerson(person, '主角') event1.setTrigger('担任') event2.setTrigger('卸任') if len(id_list1) == 0: return False id_list_str1 = ','.join(id_list1) id_list_str2 = ','.join(id_list2) id_list_str3 = ','.join(id_list3) # print(id_list_str3) # 加载地点 results = graph.run( 'START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)' .format(id_list_str3)) results = results.data() # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3)) # print(results) for result in results: addr_id = result['addr_id'] event_id = str(result['id(n)']) # print(addr_id) self.createEvents('担任' + id2id[event_id]).setAddr( addrManager.getAddr(addr_id)) self.createEvents('卸任' + id2id[event_id]).setAddr( addrManager.getAddr(addr_id)) print('加载仕途数据地址') # 加载职位 results = graph.run( 'START n=node({}) MATCH (office:Office_codes)-[r]-(n) RETURN office, id(n)' .format(id_list_str1)) results = results.data() # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3)) # print(results) for result in results: office_code = result['office'] event_id = str(result['id(n)']) event1 = self.createEvents('担任' + id2id[event_id]) event2 = self.createEvents('卸任' + id2id[event_id]) event1.detail = '担任' + str(office_code['c_office_chn']) event2.detail = '卸任' + str(office_code['c_office_chn']) # self.createEvents('担任'+id2id[event_id]).setTrigger('担任'+str(office_code['c_office_chn'])) # self.createEvents('卸任'+id2id[event_id]).setTrigger('卸任'+str(office_code['c_office_chn'])) print('加载仕途官职') # 加载官职的授予方式 results = graph.run( 'START n=node({}) MATCH (office:Appointment_type_codes)-[r]-(n) RETURN office, id(n)' .format(id_list_str1)) results = results.data() # print('START n=node({}) MATCH (addr:Addr_codes)-[r]-(n) RETURN addr.c_addr_id as addr_id, id(n)'.format(id_list_str3)) # print(results) for result in results: office_code = result['office']['c_appt_type_desc_chn'] event_id = str(result['id(n)']) self.createEvents( '担任' + id2id[event_id]).detail += '授予方式' + str(office_code) # self.createEvents('担任'+id2id[event_id]).setTrigger('担任'+str(office_code['c_office_chn'])) # self.createEvents('卸任'+id2id[event_id]).setTrigger('卸任'+str(office_code['c_office_chn'])) print('加载仕途官职') if len(id_list1) < LIMIT: return False return True
def loadAddrEvents(self, LIMIT=1000000, SKIP=0, person_id=None): print('开始加载迁移事件') if person_id is None: query = 'MATCH (person:Biog_main)--(event:Biog_addr_data) RETURN person, event, id(event) SKIP {} LIMIT {} '.format( str(SKIP), str(LIMIT)) else: query = 'MATCH (person:Biog_main{{c_personid:"{}"}})-->(event:Biog_addr_data) RETURN person, event, id(event) SKIP {} LIMIT {} '.format( str(person_id), str(SKIP), str(LIMIT)) # print(query) results = graph.run(query).data() id_list = [] for node_data in results: # print(node_data) node_id = node_data['id(event)'] person = node_data['person'] node_data = node_data['event'] event = self.createEvents(node_id) event.type = '前往' event.addPerson(personManager.createPerson(person), '主角') event.setTrigger('前往') event.type = '前往' event.detail = '前往某地' field = 'c_firstyear' year = node_data[field] if year is not None and year != 0 and year != '0' and year != 'None': year = int(year) range_field = graph.getTableRange('Biog_addr_data', field) year_range = node_data[range_field] event.addTimeAndRange(year, year_range) event = self.createEvents('离开' + str(node_id)) event.addPerson(personManager.createPerson(person), '主角') event.setTrigger('离开') event.type = '离开' event.detail = '离开某地' field = 'c_lastyear' if field in node_data.keys(): year = node_data[field] # print(year) if year is not None and year != 0 and year != '0' and year != 'None': year = int(year) range_field = graph.getTableRange('Biog_addr_data', field) year_range = node_data[range_field] # print(year) event.addTimeAndRange(year, year_range) id_list.append(str(node_id)) if len(id_list) == 0: return False # print(id_list) id_list_str = ','.join(id_list) # 加载地点 results = graph.run( 'START n=node({}) MATCH (addr:Addr_codes)--(n) RETURN addr.c_addr_id as addr_id, id(n)' .format(id_list_str)) results = results.data() # print(results) for result in results: addr_id = result['addr_id'] event_id = str(result['id(n)']) # print(addr_id) event = self.createEvents(event_id) addr = addrManager.getAddr(addr_id) event.setAddr(addr) event.detail = '前往' + str(addr.name) event = self.createEvents('离开' + event_id) addr = addrManager.getAddr(addr_id) event.setAddr(addr) event.detail = '离开' + str(addr.name) # 加载原因 print('加载迁移数据地址') results = graph.run( 'START n=node({}) MATCH (addr:Biog_addr_codes)--(n) RETURN addr.c_addr_desc_chn as desc, id(n)' .format(id_list_str)) results = results.data() # print(results) for result in results: desc = str(result['desc']) event_id = str(result['id(n)']) # print(addr_id) event = self.createEvents(event_id) event.setTrigger(desc) event.detail = event.detail + '由于' + desc print('加载迁移原因') if len(id_list) < LIMIT: return False return True