def graphs(name=None): gps = { 'BaseGraph': BaseGraph(), 'EtpGraph': EtpGraph(), 'OptGraph': OptGraph(), 'OptRiskGraph': OptRiskGraph(), 'DvpGraph': DvpGraph(), 'RightsGraph': RightsGraph(), 'JusGraph': JusGraph(), 'NewsGraph': NewsGraph(), } if name is not None: return gps[name] else: return gps
def runEtpGraph(): gp = EtpGraph(log_save_path=log_save_dir + 'EtpGraph_log.txt') # eg.create_index_and_constraint() # eg.create_all_nodes() # eg.create_all_relationship() def getNodesAndRelations(): nodes, rps = gp.get_all_nodes_and_relationships(import_path, mode='a') pass # getNodes() # getRelations() getNodesAndRelations() pass
def create_all_relationship(self): """ 1.enterprise -[have or x]->x :return: """ ops = self.base.query( sql={'metaModel': '公司新闻'}, # limit=10, skip=2020, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] # etp = Enterprise() s_t = time.time() for o in ops: k += 1 # if k < 43500: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(o['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': o['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 etp = Related(**{'名称': o['name'], '链接': o['url']}) # etp['NAME'] = o['name'] # etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) if etp_n is None: continue pass if '新闻舆情' in o['content'].keys(): data = self.get_format_dict(o['content']['新闻舆情']) ns = News.create_from_dict(data) for n in ns: n_ = n.pop('news') n_n = self.get_neo_node(n_) if n_n is not None: relationships.append( Have(etp_n, n_n, **n).get_relationship() ) pass if len(relationships) > 1000: i += 1 sp = int(time.time() - s_t) s_t = time.time() self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise and spend {} ' 'seconds,and merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, sp, len(relationships) ))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise -[have or x]->x :return: """ ops = self.base.query( sql={ 'metaModel': '经营状况', # 'name': '重庆轩烽建材有限公司' }, limit=1000, # skip=2000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] # etp = Enterprise() for o in ops: k += 1 # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(o['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': o['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 # 如果在neo4j里面存着只有name,url的公司,意味着 # 这家公司没有“基本信息” etp = Related() etp['NAME'] = o['name'] etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) pass if '产权交易' in etp['content'].keys(): # data = self.get_format_dict(etp['content']['产权交易']) # for d in data: # bd = d.pop('标的') # bd_n = pass if '行政许可' in etp['content'].keys(): data = etp['content']['行政许可'] if '工商局' in data.keys(): d1 = self.get_format_dict(data['工商局']) ls = License.create_from_dict(d1, '工商局') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue relationships.append( Have(etp_n, l_n, **l).get_relationship()) pass if '信用中国' in data.keys(): d2 = self.get_format_dict(data['信用中国']) ls = License.create_from_dict(d2, '信用中国') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue relationships.append( Have(etp_n, l_n, **l).get_relationship()) pass pass if '招投标信息' in etp['content'].keys(): # 公示的招投标信息一般都是结果,一般情况下是找不到 # 共同投标的单位,除非是共同中标 data = self.get_format_dict(etp['content']['招投标信息']) bs = Bidding.create_from_dict(data) for b in bs: _ = b.pop('bidding') b_n = self.get_neo_node(_) if b_n is None: continue # TODO(leung):项目分类用作了招投标结果 relationships.append( TakePartIn(etp_n, b_n, **dict(b, **{'RESULT': b_n['TYPE']})).get_relationship()) pass if '抽查检查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['抽查检查']) cs = Check.create_from_dict(data) for c in cs: _ = c.pop('check') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']})).get_relationship()) pass if '双随机抽查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['双随机抽查']) rcs = RandomCheck.create_from_dict(data) # rcs_n = self.get_neo_node(rcs) for rc in rcs: # TODO(leung):随机抽查没有结果 _ = rc.pop('check') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **rc).get_relationship()) pass if '税务信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['税务信用']) ts = TaxCredit.create_from_dict(data) # ts_n = self.get_neo_node(ts) for t in ts: _ = t.pop('TaxCredit') n = self.get_neo_node(_) if n is None: continue # TODO(leung):纳税信用等级作为税务信用评级结果 relationships.append( Have(etp_n, n, **dict(RESULT=n['GRADE'], **t)).get_relationship()) pass if '进出口信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['进出口信用']) ies = IAE.create_from_dict(data) # ies_n = self.get_neo_node(ies) for ie in ies: _ = ie.pop('iae') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **ie).get_relationship()) pass if '招聘' in etp['content'].keys(): data = self.get_format_dict(etp['content']['招聘']) rs = Position.create_from_dict(data) for r in rs: _ = r.pop('position') n = self.get_neo_node(_) if n is None: continue relationships.append( Recruit(etp_n, n, **r).get_relationship()) pass if '客户' in etp['content'].keys(): data = self.get_format_dict(etp['content']['客户']) cs = Client.create_from_dict(data) for c in cs: _ = c.pop('client') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['URL'], _['NAME'])) if n is None: n = self.get_neo_node(_) if n is None: continue relationships.append( SellTo(etp_n, n, **c).get_relationship()) pass if '供应商' in etp['content'].keys(): data = self.get_format_dict(etp['content']['供应商']) ss = Supplier.create_from_dict(data) for s in ss: _ = s.pop('supplier') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['URL'], _['NAME'])) if n is None: n = self.get_neo_node(_) if n is None: continue relationships.append( BuyFrom(etp_n, n, **s).get_relationship()) pass if '信用评级' in etp['content'].keys(): data = self.get_format_dict(etp['content']['信用评级']) for d in data: _ = d.pop('评级公司') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['链接'], _['名称'])) if n is None: n = Related() n['NAME'] = _['名称'] n['URL'] = _['链接'] n = self.get_neo_node(n) if n is None: continue __ = d.pop('内容') d['评级内容'] = __['内容'] d['评级链接'] = __['链接'] relationships.append( Appraise(n, etp_n, **d).get_relationship()) pass if '土地转让' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地转让']) for d in data: e1 = d.pop('原土地使用权人') e2 = d.pop('现有土地使用权人') p = Plot(**d) p_n = self.get_neo_node(p) if p_n is None: continue if e1['名称'] == o['name'] or e1['链接'] == o['url']: n1 = etp_n else: # 有可能是人 n1 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e1['链接'])) if n1 is None: n1 = Related(**e1) n1 = self.get_neo_node(n1) if n1 is not None: relationships.append(Sell(n1, p_n).get_relationship()) if e2['名称'] == o['name'] or e2['链接'] == o['url']: n2 = etp_n else: n2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e2['链接'])) if n2 is None: n2 = Related(**e2) n2 = self.get_neo_node(n2) if n2 is not None: relationships.append(Buy(n2, p_n).get_relationship()) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise -[compete]->enterprise :return: """ ops = self.base.query(sql={'metaModel': '企业发展'}, field={ 'name': 1, 'url': 1, 'content.竞品信息': 1 }, limit=1000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] etp = Enterprise() for o in ops: k += 1 # if k < 41321: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(o['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': o['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 # etp = Enterprise({'name': o['name'], 'url': o['url']}) etp = Related() etp['NAME'] = o['name'] etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) pass if '竞品信息' in o['content'].keys(): data = self.get_format_dict(o['content']['竞品信息']) for d in data: etp_2 = d.pop('关联企业') if etp_2['名称'] is not None and len(etp_2['名称']) > 1: etp_2['链接'] = etp.parser_url(etp_2['链接']) etp_n_2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( etp_2['链接'])) if etp_n_2 is None and etp_2['名称'] > 1: _ = { 'URL': etp_2['链接'], 'NAME': etp_2['名称'], '简介': d.pop('产品介绍'), '成立日期': d.pop('成立日期'), '融资信息': d.pop('融资信息'), '所属地': d.pop('所属地'), } etp_n_2 = Related(**_) etp_n_2 = self.get_neo_node(etp_n_2) relationships.append( Compete(etp_n, etp_n_2, **d).get_relationship()) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise -[have]->punishment :return: """ ors = self.base.query( sql={ 'metaModel': '经营风险', # 'name': '重庆铭悦机械设备有限公司' }, limit=1000, # skip=2000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ors.count() relationships = [] # prs = Person() etp = Enterprise() for j in ors: # 每个公司经营风险下列式的东西,肯定就是这家公司的 k += 1 # if k < 43500: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(j['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': j['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者 # 这里就相当于把一个公司当做了一个风险提示的涉及者 # etp = Related(**{'名称': j['name'], '链接': j['url']}) etp = Related() etp['NAME'] = j['name'] etp['URL'] = j['url'] etp_n = self.get_neo_node(etp) pass if '动产抵押' in j['content'].keys(): data = self.get_format_dict(j['content']['动产抵押']) for d in data: _ = d.pop('被担保主债权数额') debt = Debt(**{'债务(金额)': _['金额'], '债务(单位)': _['单位'], '履行期限': d.pop('债务人履行债务的期限') }) debt_n = self.get_neo_node(debt) dy = d.pop('抵押权人') zw = d.pop('债务人') sy = d.pop('所有权或使用权归属') if dy['名称'] == j['name'] or dy['链接'] == j['url']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None and len(dy['名称']) > 1: dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: relationships.append(Have( dy_n, debt_n, **dict(角色='抵押权人', **d) ).get_relationship()) if zw['名称'] == j['name'] or zw['链接'] == j['url']: zw_n = etp_n else: zw_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( zw['链接'], zw['名称']) ) if zw_n is None and len(zw['名称']) > 1: zw_n = Related(**zw) zw_n = self.get_neo_node(zw_n) if zw_n is not None: relationships.append(Have( zw_n, debt_n, **dict(角色='债务人', **d) ).get_relationship()) if sy['名称'] == j['name'] or sy['链接'] == j['url']: sy_n = etp_n else: sy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sy['链接'], sy['名称']) ) if sy_n is None and len(sy['名称']) > 1: sy_n = Related(**sy) sy_n = self.get_neo_node(sy_n) if sy_n is not None: relationships.append(Have( sy_n, debt_n, **dict(角色='所有权或使用权人', **d) ).get_relationship()) pass if '公示催告' in j['content'].keys(): data = self.get_format_dict(j['content']['公示催告']) for d in data: _ = d.pop('票面金额') bn = Banknote(**{'票据号': d.pop('票据号'), '票据类型': d.pop('票据类型'), '票面金额(金额)': _['金额'], '票面金额(单位)': _['单位'] }) bn_n = self.get_neo_node(bn) sq = d.pop('申请人') cp = d.pop('持票人') if sq['名称'] == j['name'] or sq['链接'] == j['url']: sq_n = etp_n else: sq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sq['链接'], sq['名称']) ) if sq_n is None: sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: relationships.append(Have( sq_n, bn_n, **dict(角色='申请人', **d) ).get_relationship()) if cp['名称'] == j['name'] or cp['链接'] == j['url']: cp_n = etp_n else: cp_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cp['链接'], cp['名称']) ) if cp_n is None: cp_n = Related(**cp) cp_n = self.get_neo_node(cp_n) if cp_n is not None: relationships.append(Have( cp_n, bn_n, **dict(角色='持票人', **d) ).get_relationship()) relationships.append(Have( etp_n, bn_n, **dict(角色='出票人', **d) ).get_relationship()) pass if '行政处罚' in j['content'].keys(): data = j['content']['行政处罚'] d1 = self.get_format_dict(data['工商局']) ps = Punishment.create_from_dict(d1, '工商局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d2 = self.get_format_dict(data['税务局']) ps = Punishment.create_from_dict(d2, '税务局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d3 = self.get_format_dict(data['信用中国']) ps = Punishment.create_from_dict(d3, '信用中国') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d4 = self.get_format_dict(data['其他']) ps = Punishment.create_from_dict(d4, '其他') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) pass if '环保处罚' in j['content'].keys(): data = self.get_format_dict(j['content']['环保处罚']) ps = Punishment.create_from_dict(data, '环保局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) if '股权出质' in j['content'].keys(): sh_info = j['content']['股权出质'] sh_info = self.get_format_dict(sh_info) for sh in sh_info: sh = dict(sh, **self.get_format_amount( '出质数额', sh.pop('出质数额') )) # 确定出质人 cz = sh.pop('出质人') cz['链接'] = etp.parser_url(cz['链接']) # 判断出质人是不是当前公司 if j['name'] == cz['名称'] or cz['链接'] == etp_n['URL']: cz_n = etp_n else: # 确定出质人,先在法人主体中找 cz_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( cz['名称'], cz['链接'] ) ) if cz_n is None: # 在法人中没找到,就通过url在自然人中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 cz_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(cz['链接']) ) if cz_n is None: # 创建这个股权出质人 if len(cz['名称']) > 1: cz_n = Involveder(**cz) cz_n = self.get_neo_node(cz_n) pass # 确定质权人 zq = sh.pop('质权人') zq['链接'] = etp.parser_url(zq['链接']) # 判断质权人是不是当前公司 if j['name'] == zq['名称'] or zq['链接'] == etp_n['URL']: zq_n = etp_n else: # 确定质权人,先在企业中找 zq_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( zq['名称'], zq['链接'] ) ) if zq_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 zq_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(zq['链接']) ) if zq_n is None: # 创建这个股权出质人 if len(zq['名称']) > 1: zq_n = Involveder(**zq) zq_n = self.get_neo_node(zq_n) pass # 确定出质标的企业 bd = sh.pop('标的企业') bd['链接'] = etp.parser_url(bd['链接']) # 判断出质标的是不是当前公司 if j['name'] == bd['名称'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: # 确定出质标的,先在企业中找 bd_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( bd['名称'], bd['链接'] ) ) if bd_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 bd_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(bd['链接']) ) if bd_n is None: # 创建这个出质标的 if len(bd['名称']) > 1: bd_n = Possession(**bd) bd_n = self.get_neo_node(bd_n) pass # 创建关系 # 1. 抵押 if cz_n is not None and bd_n is not None: relationships.append( Guaranty(cz_n, bd_n, **sh).get_relationship() ) # 2. 质权 if zq_n is not None and bd_n is not None: relationships.append( Have(zq_n, bd_n, **sh).get_relationship() ) if '破产重组' in j['content'].keys(): data = self.get_format_dict(j['content']['破产重组']) for d in data: sq = d.pop('申请人') if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(sq['链接']) ) if sq_n is None: sq_n = Involveder(**sq) sq_n = self.get_neo_node(sq_n) bsq = d.pop('被申请人') if bsq['名称'] == j['name'] or bsq['链接'] == etp_n['URL']: bsq_n = etp_n else: # 被申请破产的一般是法人 bsq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(bsq['链接']) ) if bsq_n is None: bsq_n = Involveder(**bsq) bsq_n = self.get_neo_node(bsq_n) if sq_n is not None and bsq_n is not None: relationships.append( Relationship(sq_n, '申请破产', bsq_n, **d) ) pass if '土地抵押' in j['content'].keys(): data = self.get_format_dict(j['content']['土地抵押']) for d in data: _ = d.pop('抵押面积') p = Plot(**{'位置': d.pop('位置'), '面积(数量)': _['数额'], '面积(单位)': _['单位'], }) p_n = self.get_neo_node(p) d = dict(d, **self.get_format_amount( '抵押金额', d.pop('抵押金额') )) dy = d.pop('抵押人') dyq = d.pop('抵押权人') if dy['名称'] == j['name'] or dy['链接'] == etp_n['URL']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: relationships.append( Guaranty(dy_n, p_n, **d).get_relationship() ) if dyq['名称'] == j['name'] or dyq['链接'] == etp_n['URL']: dyq_n = etp_n else: dyq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dyq['链接'], dyq['名称']) ) if dyq_n is None: dyq_n = Related(**dyq) dyq_n = self.get_neo_node(dyq_n) if dyq_n is not None: relationships.append( Have(dyq_n, p_n, **d).get_relationship() ) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() # return pass if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise or person -[involve_case]->case :return: """ justices = self.base.query( sql={ 'metaModel': '法律诉讼', # 'name': '重庆思途科技有限公司' }, limit=100, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = justices.count() relationships = [] # prs = Person() # etp = Enterprise() for j in justices: # 每个公司的法律诉讼下的司法案件肯定跟这个案件有联系 k += 1 # if k < 4910: # continue # TODO(leung): 这里要注意,法律诉讼模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(j['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': j['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者 etp = Involveder(**{'名称': j['name'], '链接': j['url']}) etp_n = self.get_neo_node(etp) if etp_n is None: continue pass if '被执行人' in j['content'].keys(): data = self.get_format_dict(j['content']['被执行人']) eps = Enforcement.create_from_dict(data) for ep in eps: e = ep.pop('executed') e_n = self.get_neo_node(e) if e_n is not None: relationships.append( InvolveCase(etp_n, e_n, **ep).get_relationship() ) pass # if '司法案件' in j['content'].keys(): # justice_case_info = j['content']['司法案件'] # jcs = JusticeCase.create_from_dict(justice_case_info) # rps = self.create_relationship_from_justice_case( # etp_n, jcs) # relationships += rps # pass if '裁判文书' in j['content'].keys(): data = self.get_format_dict(j['content']['裁判文书']) # 返回的是[[Ruling, 相关对象],[]...] rls = Judgment.create_from_dict(data) for ruling, involve in rls: rul_n = self.get_neo_node(ruling) if rul_n is None: continue for inv in involve: # 案件相关主体 # 先判断是不是当前的企业 if j['name'] == inv[1] or j['url'] == inv[2]: # 如果是,直接关联起来 inv_n = etp_n else: # 1.先在企业中匹配 # 2.匹配自然人 inv_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( inv[2]) ) if inv_n is None: ivl = Involveder() ivl['NAME'] = inv[1] ivl['URL'] = inv[2] # if inv[2] is not None: # ivl['URL'] = inv[2] # else: # ivl['URL'] = ivl.get_entity_unique_code( # j['name']+inv[1] # ) inv_n = self.get_neo_node(ivl) # 3.以上两者都没匹配到的时候,创建这个案件参与者 # 实际上还可以到其他实体中去匹配,但那些可能是数据 # 集之外的对象了,可以先不去管他们 if inv_n is not None: relationships.append( InvolveCase( inv_n, rul_n, **{'案件身份': inv[0]} ).get_relationship() ) pass if '失信被执行人' in j['content'].keys(): data = self.get_format_dict( j['content']['失信被执行人'] ) eps = SXEnforcement.create_from_dict(data) for ep in eps: e = ep.pop('sxexecuted') e_n = self.get_neo_node(e) if e_n is not None: relationships.append( InvolveCase(etp_n, e_n, **ep).get_relationship() ) pass if '限制高消费' in j['content'].keys(): data = self.get_format_dict( j['content']['限制高消费'] ) for d in data: sq = d.pop('申请人') lh = d.pop('限消令对象') xg = d.pop('关联对象') _ = d.pop('案号') lo = dict(案号=_['名称'], 案号链接=_['链接'], **d) lo = LimitOrder(**lo) lo_n = self.get_neo_node(lo) if lo_n is None: continue if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( sq['链接']) ) if sq_n is None: # 创建这个对象 sq_n = Involveder(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: relationships.append( InvolveCase(sq_n, lo_n, **{'案件身份': '申请人'} ).get_relationship() ) if lh['名称'] == j['name'] or lh['链接'] == etp_n['URL']: lh_n = etp_n else: lh_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( lh['链接']) ) if lh_n is None: # 创建这个对象 lh_n = Involveder(**lh) lh_n = self.get_neo_node(lh_n) if lh_n is not None: relationships.append( InvolveCase(lo_n, lh_n, **{'案件身份': '限制对象'} ).get_relationship() ) if xg['名称'] == j['name'] or xg['链接'] == etp_n['URL']: xg_n = etp_n else: xg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( xg['链接']) ) if xg_n is None: # 创建这个对象 xg_n = Involveder(**xg) xg_n = self.get_neo_node(xg_n) if xg_n is not None: relationships.append( InvolveCase(lo_n, xg_n, **{'案件身份': '关联对象'} ).get_relationship() ) pass if '股权冻结' in j['content'].keys(): data = self.get_format_dict( j['content']['股权冻结'] ) for d in data: bd = d.pop('标的企业') zx = d.pop('被执行人') _1 = d.pop('股权数额') _2 = d.pop('类型|状态').split('|') sf = dict(冻结数额=_1['金额'], 金额单位=_1['单位'], 类型=_2[0], 状态=_2[1] if len(_2) > 1 else None, **d ) sf = StockFreeze(**sf) sf_n = self.get_neo_node(sf) if sf_n is None: continue if bd['名称'] == j['name'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: bd_n = self.match_node( *legal, cypher='_.URL = "{}"'.format( bd['链接']) ) if bd_n is None: bd_n = Involveder(**bd) bd_n = self.get_neo_node(bd_n) if bd_n is not None: relationships.append( InvolveCase(sf_n, bd_n, **{'案件身份': '标的企业'} ).get_relationship() ) if zx['名称'] == j['name'] or zx['链接'] == etp_n['URL']: zx_n = etp_n else: zx_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( zx['链接']) ) if zx_n is None: zx_n = Involveder(**zx) zx_n = self.get_neo_node(zx_n) if zx_n is not None: relationships.append( InvolveCase(sf_n, zx_n, **{'案件身份': '被执行人'} ).get_relationship() ) if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise -[have]->x :return: """ rts = self.base.query( sql={'metaModel': '知识产权'}, # limit=100, skip=79175 + 7909, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() # etp = Enterprise() etp_count = rts.count() relationships = [] s_t = time.time() for r in rts: k += 1 # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(r['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': r['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 etp = Related(**{'名称': r['name'], '链接': r['url']}) # etp['NAME'] = r['name'] # etp['URL'] = r['url'] etp_n = self.get_neo_node(etp) pass pass if '网站信息' in r['content'].keys(): data = self.get_format_dict(r['content']['网站信息']) webs = Website.create_from_dict(data) for web in webs: w = web.pop('website') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **web).get_relationship()) pass if '证书信息' in r['content'].keys(): data = self.get_format_dict(r['content']['证书信息']) ctfs = Certificate.create_from_dict(data) for ctf in ctfs: c = ctf.pop('certificate') c_n = self.get_neo_node(c) if c_n is not None: relationships.append( Have(etp_n, c_n, **ctf).get_relationship()) pass if '专利信息' in r['content'].keys(): data = self.get_format_dict(r['content']['专利信息']) pats = Patent.create_from_dict(data) for pat in pats: p = pat.pop('patent') p_n = self.get_neo_node(p) if p_n is not None: relationships.append( Have(etp_n, p_n, **pat).get_relationship()) pass if '商标信息' in r['content'].keys(): data = self.get_format_dict(r['content']['商标信息']) tms = Trademark.create_from_dict(data) for tm in tms: t = tm.pop('trademark') t_n = self.get_neo_node(t) if t_n is not None: relationships.append( Have(etp_n, t_n, **tm).get_relationship()) pass if '软件著作权' in r['content'].keys(): data = self.get_format_dict(r['content']['软件著作权']) scrs = SoftCopyRight.create_from_dict(data) for scr in scrs: s = scr.pop('softcopyright') s_n = self.get_neo_node(s) if s_n is not None: relationships.append( Have(etp_n, s_n, **scr).get_relationship()) pass if '作品著作权' in r['content'].keys(): data = self.get_format_dict(r['content']['作品著作权']) wcrs = WorkCopyRight.create_from_dict(data) for wcr in wcrs: w = wcr.pop('workcopyright') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **wcr).get_relationship()) pass if '微博' in r['content'].keys(): data = self.get_format_dict(r['content']['微博']) wbs = Weibo.create_from_dict(data) for wb in wbs: w = wb.pop('weibo') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **wb).get_relationship()) pass if '微信公众号' in r['content'].keys(): data = self.get_format_dict(r['content']['微信公众号']) oas = OfficialAccount.create_from_dict(data) for oa in oas: woa = oa.pop('WeChat') woa_n = self.get_neo_node(woa) if woa_n is not None: relationships.append( Have(etp_n, woa_n, **oa).get_relationship()) pass if '小程序' in r['content'].keys(): data = self.get_format_dict(r['content']['小程序']) alts = Applets.create_from_dict(data) for alt in alts: a = alt.pop('applets') a_n = self.get_neo_node(a) if a_n is not None: relationships.append( Have(etp_n, a_n, **alt).get_relationship()) pass if 'APP' in r['content'].keys(): data = self.get_format_dict(r['content']['APP']) aps = App.create_from_dict(data) for ap in aps: a = ap.pop('app') a_n = self.get_neo_node(a) if a_n is not None: relationships.append( Have(etp_n, a_n, **ap).get_relationship()) pass if len(relationships) > 1000: i += 1 sp = int(time.time() - s_t) s_t = time.time() self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise and spend {} ' 'seconds,and merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, sp, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass