def create_all_relationship(self): """ 1.ruling -[have]->ruling_text :return: """ rts = self.base.query( sql={'metaModel': '裁判文书'}, no_cursor_timeout=True) i, k = 0, 0 # eg = EtpGraph() etp_count = rts.count() relationships = [] # prs = Person() ruling = Ruling() for r in rts: k += 1 rt = RulingText.create_from_original_text( r['content'], **{'链接': r['url']} ) rl_n = self.NodeMatcher.match(ruling.label).where( '_.CASE_NUM="{}"'.format( # OR _.URL="{}" rt.BaseAttributes['CASE_NUM'], # rt.BaseAttributes['URL'] ) ).first() if rl_n is None: continue relationships.append( Have(rl_n, rt.get_neo_node(primarykey=rt.primarykey) ).get_relationship() ) if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass # rtg = JusRulingTextGraph() # rtg.create_all_relationship()
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '新闻舆情' in etp['content'].keys(): data = self.get_format_dict(etp['content']['新闻舆情']) ns = News.create_from_dict(data) for n in ns: n_ = n.pop('news') n_n = self.get_neo_node(n_) if n_n is not None: nodes.append(n_n) relationships.append( Have(etp_n, n_n, **n) ) pass return nodes, relationships
def create_all_relationship(self): """ 1.enterprise -[have or x]->x :return: """ ops = self.base.query( sql={'metaModel': '公司新闻'}, # limit=10, skip=2020, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] # etp = Enterprise() s_t = time.time() for o in ops: k += 1 # if k < 43500: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(o['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': o['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 etp = Related(**{'名称': o['name'], '链接': o['url']}) # etp['NAME'] = o['name'] # etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) if etp_n is None: continue pass if '新闻舆情' in o['content'].keys(): data = self.get_format_dict(o['content']['新闻舆情']) ns = News.create_from_dict(data) for n in ns: n_ = n.pop('news') n_n = self.get_neo_node(n_) if n_n is not None: relationships.append( Have(etp_n, n_n, **n).get_relationship() ) pass if len(relationships) > 1000: i += 1 sp = int(time.time() - s_t) s_t = time.time() self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise and spend {} ' 'seconds,and merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, sp, len(relationships) ))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '产权交易' in etp['content'].keys(): # data = self.get_format_dict(etp['content']['产权交易']) # for d in data: # bd = d.pop('标的') # bd_n = pass if '行政许可' in etp['content'].keys(): data = etp['content']['行政许可'] if '工商局' in data.keys(): d1 = self.get_format_dict(data['工商局']) ls = License.create_from_dict(d1, '工商局') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue nodes.append(l_n) relationships.append(Have(etp_n, l_n, **l)) pass if '信用中国' in data.keys(): d2 = self.get_format_dict(data['信用中国']) ls = License.create_from_dict(d2, '信用中国') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue nodes.append(l_n) relationships.append(Have(etp_n, l_n, **l)) pass pass if '招投标信息' in etp['content'].keys(): # 公示的招投标信息一般都是结果,一般情况下是找不到 # 共同投标的单位,除非是共同中标 data = self.get_format_dict(etp['content']['招投标信息']) bs = Bidding.create_from_dict(data) for b in bs: _ = b.pop('bidding') b_n = self.get_neo_node(_) if b_n is None: continue # TODO(leung):项目分类用作了招投标结果 nodes.append(b_n) relationships.append( TakePartIn(etp_n, b_n, **dict(b, **{'RESULT': b_n['TYPE']}))) pass if '抽查检查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['抽查检查']) cs = Check.create_from_dict(data) for c in cs: _ = c.pop('check') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append( Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']}))) pass if '双随机抽查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['双随机抽查']) rcs = RandomCheck.create_from_dict(data) # rcs_n = self.get_neo_node(rcs) for rc in rcs: # TODO(leung):随机抽查没有结果 _ = rc.pop('check') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append(Have(etp_n, n, **rc)) pass if '税务信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['税务信用']) ts = TaxCredit.create_from_dict(data) # ts_n = self.get_neo_node(ts) for t in ts: _ = t.pop('TaxCredit') n = self.get_neo_node(_) if n is None: continue # TODO(leung):纳税信用等级作为税务信用评级结果 nodes.append(n) relationships.append( Have(etp_n, n, **dict(RESULT=n['GRADE'], **t))) pass if '进出口信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['进出口信用']) ies = IAE.create_from_dict(data) # ies_n = self.get_neo_node(ies) for ie in ies: _ = ie.pop('iae') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append(Have(etp_n, n, **ie)) pass if '招聘' in etp['content'].keys(): data = self.get_format_dict(etp['content']['招聘']) rs = Position.create_from_dict(data) for r in rs: _ = r.pop('position') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append(Recruit(etp_n, n, **r)) pass if '客户' in etp['content'].keys(): data = self.get_format_dict(etp['content']['客户']) cs = Client.create_from_dict(data) for c in cs: cli = c.pop('client') cli_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cli['URL'], cli['NAME'])) if cli_n is None: if cli.isEnterprise(): cli = Enterprise(**cli.to_dict(with_label=False)) cli_n = self.get_neo_node(cli) if cli_n is None: continue nodes.append(cli_n) relationships.append(SellTo(etp_n, cli_n, **c)) pass if '供应商' in etp['content'].keys(): data = self.get_format_dict(etp['content']['供应商']) ss = Supplier.create_from_dict(data) for s in ss: sup = s.pop('supplier') sup_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sup['URL'], sup['NAME'])) if sup_n is None: if sup.isEnterprise(): sup = Enterprise(**sup.to_dict(with_label=False)) sup_n = self.get_neo_node(sup) if sup_n is None: continue nodes.append(sup_n) relationships.append(BuyFrom(etp_n, sup_n, **s)) pass if '信用评级' in etp['content'].keys(): data = self.get_format_dict(etp['content']['信用评级']) for d in data: _ = d.pop('评级公司') _['链接'] = Enterprise.parser_url(_['链接']) n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['链接'], _['名称'])) if n is None: n = Enterprise(**_) n = self.get_neo_node(n) if n is None: continue __ = d.pop('内容') d['评级内容'] = __['内容'] d['评级链接'] = __['链接'] nodes.append(n) relationships.append(Appraise(n, etp_n, **d)) pass if '土地转让' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地转让']) for d in data: e1 = d.pop('原土地使用权人') e2 = d.pop('现有土地使用权人') p = Plot(**d) p_n = self.get_neo_node(p) if p_n is None: continue e1['链接'] = Enterprise.parser_url(e1['链接']) if e1['名称'] == etp['name'] or e1['链接'] == etp['url']: n1 = etp_n else: # 有可能是人 n1 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e1['链接'])) if n1 is None: n1 = Enterprise(**e1) if not n1.isEnterprise(): n1 = Person(**e1) if not n1.isPerson(): n1 = Related(**e1) n1 = self.get_neo_node(n1) if n1 is not None: nodes.append(n1) nodes.append(p_n) relationships.append(Sell(n1, p_n)) e2['链接'] = Enterprise.parser_url(e2['链接']) if e2['名称'] == etp['name'] or e2['链接'] == etp['url']: n2 = etp_n else: n2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e2['链接'])) if n2 is None: n2 = Enterprise(**e2) if not n2.isEnterprise(): n2 = Person(**e2) if not n2.isPerson(): n2 = Related(**e2) n2 = self.get_neo_node(n2) if n2 is not None: nodes.append(n2) nodes.append(p_n) relationships.append(Buy(n2, p_n)) pass return nodes, relationships
def create_all_relationship(self): """ 1.enterprise -[have or x]->x :return: """ ops = self.base.query( sql={ 'metaModel': '经营状况', # 'name': '重庆轩烽建材有限公司' }, limit=1000, # skip=2000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] # etp = Enterprise() for o in ops: k += 1 # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(o['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': o['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 # 如果在neo4j里面存着只有name,url的公司,意味着 # 这家公司没有“基本信息” etp = Related() etp['NAME'] = o['name'] etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) pass if '产权交易' in etp['content'].keys(): # data = self.get_format_dict(etp['content']['产权交易']) # for d in data: # bd = d.pop('标的') # bd_n = pass if '行政许可' in etp['content'].keys(): data = etp['content']['行政许可'] if '工商局' in data.keys(): d1 = self.get_format_dict(data['工商局']) ls = License.create_from_dict(d1, '工商局') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue relationships.append( Have(etp_n, l_n, **l).get_relationship()) pass if '信用中国' in data.keys(): d2 = self.get_format_dict(data['信用中国']) ls = License.create_from_dict(d2, '信用中国') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue relationships.append( Have(etp_n, l_n, **l).get_relationship()) pass pass if '招投标信息' in etp['content'].keys(): # 公示的招投标信息一般都是结果,一般情况下是找不到 # 共同投标的单位,除非是共同中标 data = self.get_format_dict(etp['content']['招投标信息']) bs = Bidding.create_from_dict(data) for b in bs: _ = b.pop('bidding') b_n = self.get_neo_node(_) if b_n is None: continue # TODO(leung):项目分类用作了招投标结果 relationships.append( TakePartIn(etp_n, b_n, **dict(b, **{'RESULT': b_n['TYPE']})).get_relationship()) pass if '抽查检查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['抽查检查']) cs = Check.create_from_dict(data) for c in cs: _ = c.pop('check') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']})).get_relationship()) pass if '双随机抽查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['双随机抽查']) rcs = RandomCheck.create_from_dict(data) # rcs_n = self.get_neo_node(rcs) for rc in rcs: # TODO(leung):随机抽查没有结果 _ = rc.pop('check') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **rc).get_relationship()) pass if '税务信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['税务信用']) ts = TaxCredit.create_from_dict(data) # ts_n = self.get_neo_node(ts) for t in ts: _ = t.pop('TaxCredit') n = self.get_neo_node(_) if n is None: continue # TODO(leung):纳税信用等级作为税务信用评级结果 relationships.append( Have(etp_n, n, **dict(RESULT=n['GRADE'], **t)).get_relationship()) pass if '进出口信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['进出口信用']) ies = IAE.create_from_dict(data) # ies_n = self.get_neo_node(ies) for ie in ies: _ = ie.pop('iae') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **ie).get_relationship()) pass if '招聘' in etp['content'].keys(): data = self.get_format_dict(etp['content']['招聘']) rs = Position.create_from_dict(data) for r in rs: _ = r.pop('position') n = self.get_neo_node(_) if n is None: continue relationships.append( Recruit(etp_n, n, **r).get_relationship()) pass if '客户' in etp['content'].keys(): data = self.get_format_dict(etp['content']['客户']) cs = Client.create_from_dict(data) for c in cs: _ = c.pop('client') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['URL'], _['NAME'])) if n is None: n = self.get_neo_node(_) if n is None: continue relationships.append( SellTo(etp_n, n, **c).get_relationship()) pass if '供应商' in etp['content'].keys(): data = self.get_format_dict(etp['content']['供应商']) ss = Supplier.create_from_dict(data) for s in ss: _ = s.pop('supplier') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['URL'], _['NAME'])) if n is None: n = self.get_neo_node(_) if n is None: continue relationships.append( BuyFrom(etp_n, n, **s).get_relationship()) pass if '信用评级' in etp['content'].keys(): data = self.get_format_dict(etp['content']['信用评级']) for d in data: _ = d.pop('评级公司') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['链接'], _['名称'])) if n is None: n = Related() n['NAME'] = _['名称'] n['URL'] = _['链接'] n = self.get_neo_node(n) if n is None: continue __ = d.pop('内容') d['评级内容'] = __['内容'] d['评级链接'] = __['链接'] relationships.append( Appraise(n, etp_n, **d).get_relationship()) pass if '土地转让' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地转让']) for d in data: e1 = d.pop('原土地使用权人') e2 = d.pop('现有土地使用权人') p = Plot(**d) p_n = self.get_neo_node(p) if p_n is None: continue if e1['名称'] == o['name'] or e1['链接'] == o['url']: n1 = etp_n else: # 有可能是人 n1 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e1['链接'])) if n1 is None: n1 = Related(**e1) n1 = self.get_neo_node(n1) if n1 is not None: relationships.append(Sell(n1, p_n).get_relationship()) if e2['名称'] == o['name'] or e2['链接'] == o['url']: n2 = etp_n else: n2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e2['链接'])) if n2 is None: n2 = Related(**e2) n2 = self.get_neo_node(n2) if n2 is not None: relationships.append(Buy(n2, p_n).get_relationship()) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): """ 创建从公司基本信息可以看出的关系: 1.person-[lr]->enterprise 2.person-[be_in_office]->enterprise 3.enterprise-[located]->address 4.person|enterprise-[holding]->enterprise 5.enterprise-[have]->telephone 6.enterprise-[have]->email :param : :return: """ # 如果关系上的节点不存在,数据库同样会补充创建节点,这一点很重要 nodes, rps = [], [] etp_n = self.get_neo_node(etp) if etp_n is None: self.logger.debug('{} filed initialize enterprise ' 'Neo node'.format(etp['NAME'])) return nodes, rps nodes.append(etp_n) try: lr = etp.get_legal_representative() # 法定代表人有可能会是以下这些对象 lr_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format(lr['URL']) ) if lr_n is None: lr_n = self.get_neo_node(lr) if lr_n is None: self.logger.debug('{} filed initialize legal representative ' 'Neo node'.format(etp['NAME'])) else: nodes.append(lr_n) rps.append(LegalRep(lr_n, etp_n)) except Exception as e: ExceptionInfo(e) self.logger.error('{} deal legal representative raise ' '({})'.format(etp['NAME'], e), exc_info=True) try: ms = etp.get_manager() if len(ms): for m in ms: # 主要人员 下面必然是人 m_n = m.pop('person') m_n = self.get_neo_node(m_n) if m_n is None: self.logger.debug('{} filed initialize major manager ' 'Neo node'.format(etp['NAME'])) else: nodes.append(m_n) rps.append(BeInOffice(m_n, etp_n, **m)) except Exception as e: self.logger.error('{} deal major managers raise ' '({})'.format(etp['NAME'], e), exc_info=True) try: dz = etp.get_address() dz_n = self.get_neo_node(dz) if dz_n is None: self.logger.debug('{} filed initialize address ' 'Neo node'.format(etp['NAME'])) else: nodes.append(dz_n) rps.append(Located(etp_n, dz_n)) except Exception as e: self.logger.error('{} deal address raise ' '({})'.format(etp['NAME'], e), exc_info=True) try: sh = etp.get_share_holder() if len(sh): for s in sh: s_ = s.pop('share_holder') # 股东有可能会是以下这些对象 sh_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(s_['URL']) ) if sh_n is None: sh_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( s_['URL'], s_['NAME']) ) if sh_n is None: # 在以有的对象里面没找到这个股东 # 创建这个意外的股东 sh_n = self.get_neo_node(s_) if sh_n is None: self.logger.debug('{} filed initialize unexpected share ' 'holder Neo node'.format(etp['NAME'])) if sh_n is not None: nodes.append(sh_n) rps.append(Share(etp_n, sh_n, **s)) except Exception as e: self.logger.error('{} deal share holder raise ' '({})'.format(etp['NAME'], e), exc_info=True) try: tel = etp.get_telephone_number() if tel is None: # self.to_logs('there is not valid telephone for' # ' this enterprise.', 'ERROR', eb['name']) pass else: tel_n = self.get_neo_node(tel) if tel_n is None: self.logger.debug('{} filed initialize telephone ' 'Neo node'.format(etp['NAME'])) else: nodes.append(tel_n) rps.append(Have(etp_n, tel_n)) pass except Exception as e: self.logger.error('{} deal telephone number raise ' '({})'.format(etp['NAME'], e), exc_info=True) try: eml = etp.get_email() if eml is None: # self.to_logs('there is not valid email for' # ' this enterprise.', 'ERROR', eb['name']) pass else: eml_n = self.get_neo_node(eml) if eml_n is None: self.logger.debug('{} filed initialize email ' 'Neo node'.format(etp['NAME'])) else: nodes.append(eml_n) rps.append(Have(etp_n, eml_n)) pass except Exception as e: self.logger.debug('{} deal email raise ({})' ''.format(etp['NAME'], e), exc_info=True) try: ivs = etp.get_invest_outer() if len(ivs): for iv in ivs: iv_ = iv.pop('invested') # 被投资企业可能是下面这些对象 iv_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( iv_['URL'], iv_['NAME']) ) if iv_n is None: iv_n = self.get_neo_node(iv_) if iv_n is None: self.logger.debug('{} filed initialize unexpected invested ' 'Neo node'.format(etp['NAME'])) continue nodes.append(iv_n) rps.append(Investing(etp_n, iv_n, **iv)) except Exception as e: self.logger.error('{} deal invest raise ({})' ''.format(etp['NAME'], e), exc_info=True) try: brs = etp.get_branch() if len(brs): for b in brs: b_ = b.pop('branch') # 分支机构可能是下面这些对象 b_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( b_['URL'], b_['NAME']) ) if b_n is None: b_n = self.get_neo_node(b_) if b_n is None: self.logger.debug('{} filed initialize unexpected branch ' 'Neo node'.format(etp['NAME'])) continue p_ = b['principal'] p_n = self.get_neo_node(p_) if p_n is not None: nodes.append(p_n) rps.append(Principal(p_n, b_n)) b.pop('principal') nodes.append(b_n) rps.append(BranchAgency( etp_n, b_n, **b )) except Exception as e: self.logger.error('{} deal branch raise ({})' ''.format(etp['NAME'], e), exc_info=True) try: hcs = etp.get_head_company() if len(hcs): for h in hcs: h_ = h.pop('head') # 总公司可能是下面这些对象 h_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( h_['URL'], h_['NAME']) ) if h_n is None: h_n = self.get_neo_node(h_) if h_n is None: self.logger.debug('filed initialize unexpected head ' 'company Neo node'.format(etp['NAME'])) continue p_ = h['principal'] p_n = self.get_neo_node(p_) if p_n is not None: nodes.append(p_n) rps.append(Principal(p_n, h_n)) h.pop('principal') nodes.append(h_n) rps.append(SuperiorAgency( etp_n, h_n, **h )) except Exception as e: self.logger.error('{} deal head company raise ({})' ''.format(etp['NAME'], e), exc_info=True) try: cps = etp.get_construction_project() if len(cps): for c in cps: c_ = c.pop('project') c_n = self.get_neo_node(c_) if c_n is None: self.logger.debug('filed initialize unexpected construction ' 'project Neo node'.format(etp['NAME'])) continue jsdw = c.pop('jsdw') # 查询这个建设单位是否已经存在 j_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( jsdw['URL'], jsdw['NAME']) ) if j_n is None: j_n = self.get_neo_node(jsdw) if j_n is None: self.logger.debug('filed initialize unexpected construction ' 'agency Neo node'.format(etp['NAME'])) continue # TODO(lj):需要考虑是否将承建、建设单独列为一种关系 nodes.append(c_n) rps.append(Have( etp_n, c_n, **dict(角色='承建单位', **c) )) nodes.append(j_n) rps.append(Have( j_n, c_n, **dict(角色='建设单位', **c) )) except Exception as e: self.logger.error('{} deal construction project raise ({})' ''.format(etp['NAME'], e), exc_info=True) try: ccs = etp.get_construction_certificate() if len(ccs): for c in ccs: c_ = c.pop('ctf') c_n = self.get_neo_node(c_) if c_n is None: self.logger.debug('filed initialize unexpected construction ' 'certificate Neo node'.format(etp['NAME'])) continue nodes.append(c_n) rps.append(Have(etp_n, c_n, **c)) except Exception as e: ExceptionInfo(e) self.logger.error('deal construction certificate raise ({})' ''.format(etp['NAME'], e), exc_info=True) return nodes, rps
def create_all_relationship(self): """ 1.enterprise -[have]->punishment :return: """ ors = self.base.query( sql={ 'metaModel': '经营风险', # 'name': '重庆铭悦机械设备有限公司' }, limit=1000, # skip=2000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ors.count() relationships = [] # prs = Person() etp = Enterprise() for j in ors: # 每个公司经营风险下列式的东西,肯定就是这家公司的 k += 1 # if k < 43500: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(j['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': j['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者 # 这里就相当于把一个公司当做了一个风险提示的涉及者 # etp = Related(**{'名称': j['name'], '链接': j['url']}) etp = Related() etp['NAME'] = j['name'] etp['URL'] = j['url'] etp_n = self.get_neo_node(etp) pass if '动产抵押' in j['content'].keys(): data = self.get_format_dict(j['content']['动产抵押']) for d in data: _ = d.pop('被担保主债权数额') debt = Debt(**{'债务(金额)': _['金额'], '债务(单位)': _['单位'], '履行期限': d.pop('债务人履行债务的期限') }) debt_n = self.get_neo_node(debt) dy = d.pop('抵押权人') zw = d.pop('债务人') sy = d.pop('所有权或使用权归属') if dy['名称'] == j['name'] or dy['链接'] == j['url']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None and len(dy['名称']) > 1: dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: relationships.append(Have( dy_n, debt_n, **dict(角色='抵押权人', **d) ).get_relationship()) if zw['名称'] == j['name'] or zw['链接'] == j['url']: zw_n = etp_n else: zw_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( zw['链接'], zw['名称']) ) if zw_n is None and len(zw['名称']) > 1: zw_n = Related(**zw) zw_n = self.get_neo_node(zw_n) if zw_n is not None: relationships.append(Have( zw_n, debt_n, **dict(角色='债务人', **d) ).get_relationship()) if sy['名称'] == j['name'] or sy['链接'] == j['url']: sy_n = etp_n else: sy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sy['链接'], sy['名称']) ) if sy_n is None and len(sy['名称']) > 1: sy_n = Related(**sy) sy_n = self.get_neo_node(sy_n) if sy_n is not None: relationships.append(Have( sy_n, debt_n, **dict(角色='所有权或使用权人', **d) ).get_relationship()) pass if '公示催告' in j['content'].keys(): data = self.get_format_dict(j['content']['公示催告']) for d in data: _ = d.pop('票面金额') bn = Banknote(**{'票据号': d.pop('票据号'), '票据类型': d.pop('票据类型'), '票面金额(金额)': _['金额'], '票面金额(单位)': _['单位'] }) bn_n = self.get_neo_node(bn) sq = d.pop('申请人') cp = d.pop('持票人') if sq['名称'] == j['name'] or sq['链接'] == j['url']: sq_n = etp_n else: sq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sq['链接'], sq['名称']) ) if sq_n is None: sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: relationships.append(Have( sq_n, bn_n, **dict(角色='申请人', **d) ).get_relationship()) if cp['名称'] == j['name'] or cp['链接'] == j['url']: cp_n = etp_n else: cp_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cp['链接'], cp['名称']) ) if cp_n is None: cp_n = Related(**cp) cp_n = self.get_neo_node(cp_n) if cp_n is not None: relationships.append(Have( cp_n, bn_n, **dict(角色='持票人', **d) ).get_relationship()) relationships.append(Have( etp_n, bn_n, **dict(角色='出票人', **d) ).get_relationship()) pass if '行政处罚' in j['content'].keys(): data = j['content']['行政处罚'] d1 = self.get_format_dict(data['工商局']) ps = Punishment.create_from_dict(d1, '工商局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d2 = self.get_format_dict(data['税务局']) ps = Punishment.create_from_dict(d2, '税务局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d3 = self.get_format_dict(data['信用中国']) ps = Punishment.create_from_dict(d3, '信用中国') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d4 = self.get_format_dict(data['其他']) ps = Punishment.create_from_dict(d4, '其他') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) pass if '环保处罚' in j['content'].keys(): data = self.get_format_dict(j['content']['环保处罚']) ps = Punishment.create_from_dict(data, '环保局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) if '股权出质' in j['content'].keys(): sh_info = j['content']['股权出质'] sh_info = self.get_format_dict(sh_info) for sh in sh_info: sh = dict(sh, **self.get_format_amount( '出质数额', sh.pop('出质数额') )) # 确定出质人 cz = sh.pop('出质人') cz['链接'] = etp.parser_url(cz['链接']) # 判断出质人是不是当前公司 if j['name'] == cz['名称'] or cz['链接'] == etp_n['URL']: cz_n = etp_n else: # 确定出质人,先在法人主体中找 cz_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( cz['名称'], cz['链接'] ) ) if cz_n is None: # 在法人中没找到,就通过url在自然人中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 cz_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(cz['链接']) ) if cz_n is None: # 创建这个股权出质人 if len(cz['名称']) > 1: cz_n = Involveder(**cz) cz_n = self.get_neo_node(cz_n) pass # 确定质权人 zq = sh.pop('质权人') zq['链接'] = etp.parser_url(zq['链接']) # 判断质权人是不是当前公司 if j['name'] == zq['名称'] or zq['链接'] == etp_n['URL']: zq_n = etp_n else: # 确定质权人,先在企业中找 zq_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( zq['名称'], zq['链接'] ) ) if zq_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 zq_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(zq['链接']) ) if zq_n is None: # 创建这个股权出质人 if len(zq['名称']) > 1: zq_n = Involveder(**zq) zq_n = self.get_neo_node(zq_n) pass # 确定出质标的企业 bd = sh.pop('标的企业') bd['链接'] = etp.parser_url(bd['链接']) # 判断出质标的是不是当前公司 if j['name'] == bd['名称'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: # 确定出质标的,先在企业中找 bd_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( bd['名称'], bd['链接'] ) ) if bd_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 bd_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(bd['链接']) ) if bd_n is None: # 创建这个出质标的 if len(bd['名称']) > 1: bd_n = Possession(**bd) bd_n = self.get_neo_node(bd_n) pass # 创建关系 # 1. 抵押 if cz_n is not None and bd_n is not None: relationships.append( Guaranty(cz_n, bd_n, **sh).get_relationship() ) # 2. 质权 if zq_n is not None and bd_n is not None: relationships.append( Have(zq_n, bd_n, **sh).get_relationship() ) if '破产重组' in j['content'].keys(): data = self.get_format_dict(j['content']['破产重组']) for d in data: sq = d.pop('申请人') if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(sq['链接']) ) if sq_n is None: sq_n = Involveder(**sq) sq_n = self.get_neo_node(sq_n) bsq = d.pop('被申请人') if bsq['名称'] == j['name'] or bsq['链接'] == etp_n['URL']: bsq_n = etp_n else: # 被申请破产的一般是法人 bsq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(bsq['链接']) ) if bsq_n is None: bsq_n = Involveder(**bsq) bsq_n = self.get_neo_node(bsq_n) if sq_n is not None and bsq_n is not None: relationships.append( Relationship(sq_n, '申请破产', bsq_n, **d) ) pass if '土地抵押' in j['content'].keys(): data = self.get_format_dict(j['content']['土地抵押']) for d in data: _ = d.pop('抵押面积') p = Plot(**{'位置': d.pop('位置'), '面积(数量)': _['数额'], '面积(单位)': _['单位'], }) p_n = self.get_neo_node(p) d = dict(d, **self.get_format_amount( '抵押金额', d.pop('抵押金额') )) dy = d.pop('抵押人') dyq = d.pop('抵押权人') if dy['名称'] == j['name'] or dy['链接'] == etp_n['URL']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: relationships.append( Guaranty(dy_n, p_n, **d).get_relationship() ) if dyq['名称'] == j['name'] or dyq['链接'] == etp_n['URL']: dyq_n = etp_n else: dyq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dyq['链接'], dyq['名称']) ) if dyq_n is None: dyq_n = Related(**dyq) dyq_n = self.get_neo_node(dyq_n) if dyq_n is not None: relationships.append( Have(dyq_n, p_n, **d).get_relationship() ) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() # return pass if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '动产抵押' in etp['content'].keys(): data = self.get_format_dict(etp['content']['动产抵押']) for d in data: _ = d.pop('被担保主债权数额') debt = Debt(**{'债务(金额)': _['金额'], '债务(单位)': _['单位'], '履行期限': d.pop('债务人履行债务的期限') }) debt_n = self.get_neo_node(debt) nodes.append(debt_n) dy = d.pop('抵押权人') zw = d.pop('债务人') sy = d.pop('所有权或使用权归属') dy['链接'] = Enterprise.parser_url(dy['链接']) zw['链接'] = Enterprise.parser_url(zw['链接']) sy['链接'] = Enterprise.parser_url(sy['链接']) if dy['名称'] == etp['name'] or dy['链接'] == etp['url']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: # dy_n = Related(**dy) dy_n = Enterprise(**dy) if not dy_n.isEnterprise(): dy_n = Person(**dy) if not dy_n.isPerson(): dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: nodes.append(dy_n) relationships.append(Have( dy_n, debt_n, **dict(角色='抵押权人', **d) )) if zw['名称'] == etp['name'] or zw['链接'] == etp['url']: zw_n = etp_n else: zw_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( zw['链接'], zw['名称']) ) if zw_n is None and len(zw['名称']) > 1: # zw_n = Related(**zw) zw_n = Enterprise(**zw) if not zw_n.isEnterprise(): zw_n = Person(**zw) if not zw_n.isPerson(): zw_n = Related(**zw) zw_n = self.get_neo_node(zw_n) if zw_n is not None: nodes.append(zw_n) relationships.append(Have( zw_n, debt_n, **dict(角色='债务人', **d) )) if sy['名称'] == etp['name'] or sy['链接'] == etp['url']: sy_n = etp_n else: sy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sy['链接'], sy['名称']) ) if sy_n is None and len(sy['名称']) > 1: # sy_n = Related(**sy) sy_n = Enterprise(**sy) if not sy_n.isEnterprise(): sy_n = Person(**sy) if not sy_n.isPerson(): sy_n = Related(**sy) sy_n = self.get_neo_node(sy_n) if sy_n is not None: nodes.append(sy_n) relationships.append(Have( sy_n, debt_n, **dict(角色='所有权或使用权人', **d) )) pass if '公示催告' in etp['content'].keys(): data = self.get_format_dict(etp['content']['公示催告']) for d in data: _ = d.pop('票面金额') bn = Banknote(**{'票据号': d.pop('票据号'), '票据类型': d.pop('票据类型'), '票面金额(金额)': _['金额'], '票面金额(单位)': _['单位'] }) bn_n = self.get_neo_node(bn) nodes.append(bn_n) sq = d.pop('申请人') cp = d.pop('持票人') sq['链接'] = Enterprise.parser_url(sq['链接']) cp['链接'] = Enterprise.parser_url(cp['链接']) if sq['名称'] == etp['name'] or sq['链接'] == etp['url']: sq_n = etp_n else: sq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sq['链接'], sq['名称']) ) if sq_n is None: # sq_n = Related(**sq) sq_n = Enterprise(**sq) if not sq_n.isEnterprise(): sq_n = Person(**sq) if not sq_n.isPerson(): sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: nodes.append(sq_n) relationships.append(Have( sq_n, bn_n, **dict(角色='申请人', **d) )) if cp['名称'] == etp['name'] or cp['链接'] == etp['url']: cp_n = etp_n else: cp_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cp['链接'], cp['名称']) ) if cp_n is None: # cp_n = Related(**cp) cp_n = Enterprise(**cp) if not cp_n.isEnterprise(): cp_n = Person(**cp) if not cp_n.isPerson(): cp_n = Related(**cp) cp_n = self.get_neo_node(cp_n) if cp_n is not None: nodes.append(cp_n) relationships.append(Have( cp_n, bn_n, **dict(角色='持票人', **d) )) relationships.append(Have( etp_n, bn_n, **dict(角色='出票人', **d) )) pass if '行政处罚' in etp['content'].keys(): data = etp['content']['行政处罚'] d1 = self.get_format_dict(data['工商局']) ps = Punishment.create_from_dict(d1, '工商局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) d2 = self.get_format_dict(data['税务局']) ps = Punishment.create_from_dict(d2, '税务局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) d3 = self.get_format_dict(data['信用中国']) ps = Punishment.create_from_dict(d3, '信用中国') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) d4 = self.get_format_dict(data['其他']) ps = Punishment.create_from_dict(d4, '其他') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) pass if '环保处罚' in etp['content'].keys(): data = self.get_format_dict(etp['content']['环保处罚']) ps = Punishment.create_from_dict(data, '环保局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) if '股权出质' in etp['content'].keys(): sh_info = etp['content']['股权出质'] sh_info = self.get_format_dict(sh_info) for sh in sh_info: sh = dict(sh, **self.get_format_amount( '出质数额', sh.pop('出质数额') )) # 确定出质人 cz = sh.pop('出质人') cz['链接'] = Enterprise.parser_url(cz['链接']) # 判断出质人是不是当前公司 if etp['name'] == cz['名称'] or cz['链接'] == etp_n['URL']: cz_n = etp_n else: # 确定出质人,先在法人主体中找 cz_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( cz['名称'], cz['链接'] ) ) if cz_n is None: # 在法人中没找到,就通过url在自然人中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 cz_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(cz['链接']) ) if cz_n is None: # 创建这个股权出质人 if len(cz['名称']) > 1: # cz_n = Involveder(**cz) cz_n = Enterprise(**cz) if not cz_n.isEnterprise(): cz_n = Person(**cz) if not cz_n.isPerson(): cz_n = Related(**cz) cz_n = self.get_neo_node(cz_n) pass # 确定质权人 zq = sh.pop('质权人') zq['链接'] = Enterprise.parser_url(zq['链接']) # 判断质权人是不是当前公司 if etp['name'] == zq['名称'] or zq['链接'] == etp_n['URL']: zq_n = etp_n else: # 确定质权人,先在企业中找 zq_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( zq['名称'], zq['链接'] ) ) if zq_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 zq_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(zq['链接']) ) if zq_n is None: # 创建这个股权出质人 if len(zq['名称']) > 1: # zq_n = Involveder(**zq) zq_n = Enterprise(**zq) if not zq_n.isEnterprise(): zq_n = Person(**zq) if not zq_n.isPerson(): zq_n = Related(**zq) zq_n = self.get_neo_node(zq_n) pass # 确定出质标的企业 bd = sh.pop('标的企业') bd['链接'] = Enterprise.parser_url(bd['链接']) # 判断出质标的是不是当前公司 if etp['name'] == bd['名称'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: # 确定出质标的,先在企业中找,不会是人 bd_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( bd['名称'], bd['链接'] ) ) if bd_n is None: # 创建这个出质标的 if len(bd['名称']) > 1: bd_n = Enterprise(**bd) if not bd_n.isEnterprise(): bd_n = Possession(**bd) bd_n = self.get_neo_node(bd_n) pass # 创建关系 if bd_n is None: continue nodes.append(bd_n) # 1. 抵押 if cz_n is not None: nodes.append(cz_n) relationships.append( Guaranty(cz_n, bd_n, **sh) ) # 2. 质权 if zq_n is not None: nodes.append(zq_n) relationships.append( Have(zq_n, bd_n, **sh) ) if '破产重组' in etp['content'].keys(): data = self.get_format_dict(etp['content']['破产重组']) for d in data: sq = d.pop('申请人') sq['链接'] = Enterprise.parser_url(sq['链接']) if sq['名称'] == etp['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(sq['链接']) ) if sq_n is None: # sq_n = Involveder(**sq) sq_n = Enterprise(**sq) if not sq_n.isEnterprise(): sq_n = Person(**sq) if not sq_n.isPerson(): sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) bsq = d.pop('被申请人') bsq['链接'] = Enterprise.parser_url(bsq['链接']) if bsq['名称'] == etp['name'] or bsq['链接'] == etp_n['URL']: bsq_n = etp_n else: # 被申请破产的一般是法人 bsq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(bsq['链接']) ) if bsq_n is None: # bsq_n = Involveder(**bsq) bsq_n = Enterprise(**bsq) if not bsq_n.isEnterprise(): bsq_n = Person(**bsq) if not bsq_n.isPerson(): bsq_n = Related(**bsq) bsq_n = self.get_neo_node(bsq_n) if sq_n is not None and bsq_n is not None: nodes += [sq_n, bsq_n] relationships.append( ApplyBankrupt(sq_n, bsq_n, **d) ) pass if '土地抵押' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地抵押']) for d in data: _ = d.pop('抵押面积') p = Plot(**{'位置': d.pop('位置'), '面积(数量)': _['数额'], '面积(单位)': _['单位'], }) p_n = self.get_neo_node(p) nodes.append(p_n) d = dict(d, **self.get_format_amount( '抵押金额', d.pop('抵押金额') )) dy = d.pop('抵押人') dyq = d.pop('抵押权人') dy['链接'] = Enterprise.parser_url(dy['链接']) dyq['链接'] = Enterprise.parser_url(dyq['链接']) if dy['名称'] == etp['name'] or dy['链接'] == etp_n['URL']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: # dy_n = Related(**dy) dy_n = Enterprise(**dy) if not dy_n.isEnterprise(): dy_n = Person(**dy) if not dy_n.isPerson(): dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: nodes.append(dy_n) relationships.append( Guaranty(dy_n, p_n, **d) ) if dyq['名称'] == etp['name'] or dyq['链接'] == etp_n['URL']: dyq_n = etp_n else: dyq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dyq['链接'], dyq['名称']) ) if dyq_n is None: # dyq_n = Related(**dyq) dyq_n = Enterprise(**dyq) if not dyq_n.isEnterprise(): dyq_n = Person(**dyq) if not dyq_n.isPerson(): dyq_n = Related(**dyq) dyq_n = self.get_neo_node(dyq_n) if dyq_n is not None: nodes.append(dyq_n) relationships.append( Have(dyq_n, p_n, **d) ) pass return nodes, relationships
def create_all_relationship(self): """ 1.enterprise -[have]->x :return: """ rts = self.base.query( sql={'metaModel': '知识产权'}, # limit=100, skip=79175 + 7909, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() # etp = Enterprise() etp_count = rts.count() relationships = [] s_t = time.time() for r in rts: k += 1 # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(r['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': r['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 etp = Related(**{'名称': r['name'], '链接': r['url']}) # etp['NAME'] = r['name'] # etp['URL'] = r['url'] etp_n = self.get_neo_node(etp) pass pass if '网站信息' in r['content'].keys(): data = self.get_format_dict(r['content']['网站信息']) webs = Website.create_from_dict(data) for web in webs: w = web.pop('website') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **web).get_relationship()) pass if '证书信息' in r['content'].keys(): data = self.get_format_dict(r['content']['证书信息']) ctfs = Certificate.create_from_dict(data) for ctf in ctfs: c = ctf.pop('certificate') c_n = self.get_neo_node(c) if c_n is not None: relationships.append( Have(etp_n, c_n, **ctf).get_relationship()) pass if '专利信息' in r['content'].keys(): data = self.get_format_dict(r['content']['专利信息']) pats = Patent.create_from_dict(data) for pat in pats: p = pat.pop('patent') p_n = self.get_neo_node(p) if p_n is not None: relationships.append( Have(etp_n, p_n, **pat).get_relationship()) pass if '商标信息' in r['content'].keys(): data = self.get_format_dict(r['content']['商标信息']) tms = Trademark.create_from_dict(data) for tm in tms: t = tm.pop('trademark') t_n = self.get_neo_node(t) if t_n is not None: relationships.append( Have(etp_n, t_n, **tm).get_relationship()) pass if '软件著作权' in r['content'].keys(): data = self.get_format_dict(r['content']['软件著作权']) scrs = SoftCopyRight.create_from_dict(data) for scr in scrs: s = scr.pop('softcopyright') s_n = self.get_neo_node(s) if s_n is not None: relationships.append( Have(etp_n, s_n, **scr).get_relationship()) pass if '作品著作权' in r['content'].keys(): data = self.get_format_dict(r['content']['作品著作权']) wcrs = WorkCopyRight.create_from_dict(data) for wcr in wcrs: w = wcr.pop('workcopyright') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **wcr).get_relationship()) pass if '微博' in r['content'].keys(): data = self.get_format_dict(r['content']['微博']) wbs = Weibo.create_from_dict(data) for wb in wbs: w = wb.pop('weibo') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **wb).get_relationship()) pass if '微信公众号' in r['content'].keys(): data = self.get_format_dict(r['content']['微信公众号']) oas = OfficialAccount.create_from_dict(data) for oa in oas: woa = oa.pop('WeChat') woa_n = self.get_neo_node(woa) if woa_n is not None: relationships.append( Have(etp_n, woa_n, **oa).get_relationship()) pass if '小程序' in r['content'].keys(): data = self.get_format_dict(r['content']['小程序']) alts = Applets.create_from_dict(data) for alt in alts: a = alt.pop('applets') a_n = self.get_neo_node(a) if a_n is not None: relationships.append( Have(etp_n, a_n, **alt).get_relationship()) pass if 'APP' in r['content'].keys(): data = self.get_format_dict(r['content']['APP']) aps = App.create_from_dict(data) for ap in aps: a = ap.pop('app') a_n = self.get_neo_node(a) if a_n is not None: relationships.append( Have(etp_n, a_n, **ap).get_relationship()) pass if len(relationships) > 1000: i += 1 sp = int(time.time() - s_t) s_t = time.time() self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise and spend {} ' 'seconds,and merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, sp, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '网站信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['网站信息']) webs = Website.create_from_dict(data) for web in webs: w = web.pop('website') w_n = self.get_neo_node(w) if w_n is not None: nodes.append(w_n) relationships.append(Have(etp_n, w_n, **web)) pass if '证书信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['证书信息']) ctfs = Certificate.create_from_dict(data) for ctf in ctfs: c = ctf.pop('certificate') c_n = self.get_neo_node(c) if c_n is not None: nodes.append(c_n) relationships.append(Have(etp_n, c_n, **ctf)) pass if '专利信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['专利信息']) pats = Patent.create_from_dict(data) for pat in pats: p = pat.pop('patent') p_n = self.get_neo_node(p) if p_n is not None: nodes.append(p_n) relationships.append(Have(etp_n, p_n, **pat)) pass if '商标信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['商标信息']) tms = Trademark.create_from_dict(data) for tm in tms: t = tm.pop('trademark') t_n = self.get_neo_node(t) if t_n is not None: nodes.append(t_n) relationships.append(Have(etp_n, t_n, **tm)) pass if '软件著作权' in etp['content'].keys(): data = self.get_format_dict(etp['content']['软件著作权']) scrs = SoftCopyRight.create_from_dict(data) for scr in scrs: s = scr.pop('softcopyright') s_n = self.get_neo_node(s) if s_n is not None: nodes.append(s_n) relationships.append(Have(etp_n, s_n, **scr)) pass if '作品著作权' in etp['content'].keys(): data = self.get_format_dict(etp['content']['作品著作权']) wcrs = WorkCopyRight.create_from_dict(data) for wcr in wcrs: w = wcr.pop('workcopyright') w_n = self.get_neo_node(w) if w_n is not None: nodes.append(w_n) relationships.append(Have(etp_n, w_n, **wcr)) pass if '微博' in etp['content'].keys(): data = self.get_format_dict(etp['content']['微博']) wbs = Weibo.create_from_dict(data) for wb in wbs: w = wb.pop('weibo') w_n = self.get_neo_node(w) if w_n is not None: nodes.append(w_n) relationships.append(Have(etp_n, w_n, **wb)) pass if '微信公众号' in etp['content'].keys(): data = self.get_format_dict(etp['content']['微信公众号']) oas = OfficialAccount.create_from_dict(data) for oa in oas: woa = oa.pop('WeChat') woa_n = self.get_neo_node(woa) if woa_n is not None: nodes.append(woa_n) relationships.append(Have(etp_n, woa_n, **oa)) pass if '小程序' in etp['content'].keys(): data = self.get_format_dict(etp['content']['小程序']) alts = Applets.create_from_dict(data) for alt in alts: a = alt.pop('applets') a_n = self.get_neo_node(a) if a_n is not None: nodes.append(a_n) relationships.append(Have(etp_n, a_n, **alt)) pass if 'APP' in etp['content'].keys(): data = self.get_format_dict(etp['content']['APP']) aps = App.create_from_dict(data) for ap in aps: a = ap.pop('app') a_n = self.get_neo_node(a) if a_n is not None: nodes.append(a_n) relationships.append(Have(etp_n, a_n, **ap)) pass return nodes, relationships