def get_all_nodes_and_relationships_from_api(self, etp): """ 创建所有的行业实体,实体对象从外部传进来,因为行业可能 会作为一个相对独立的研究领域,与数据库中企业基本信息中的 行业可能不完全匹配 :return: """ etp_n = self.match_node( 'Enterprise', cypher='_.URL = "{}" OR _.NAME = "{}"' ''.format(Enterprise.parser_url(etp['url']), etp['name'])) if etp_n is None: etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) ind = etp['IndustryV3'] if ind is None: return nodes, relationships ind1 = self.get_neo_node(Industry(**{ 'name': ind['Industry'], 'code': ind['IndustryCode'], '类别': '一级' })) ind2 = self.get_neo_node(Industry(**{ 'name': ind['SubIndustry'], 'code': ind['SubIndustryCode'], '类别': '二级' })) ind3 = self.get_neo_node(Industry(**{ 'name': ind['MiddleCategory'], 'code': ind['MiddleCategoryCode'], '类别': '三级' })) ind4 = self.get_neo_node(Industry(**{ 'name': ind['SmallCategory'], 'code': ind['SmallCategoryCode'], '类别': '四级' })) _ids_ = [ind4, ind3, ind2, ind1] ids = [] for i in _ids_: if i is not None: ids.append(i) nodes.append(i) if len(ids): relationships.append(Belong(etp_n, ids[0])) for i in range(len(ids) - 1): relationships.append(Belong(ids[i], ids[i + 1])) pass return nodes, relationships pass
def get_all_relationships(self): enterprises = self.base.query( sql={ 'metaModel': '基本信息', # 'name': '重庆长安汽车股份有限公司' }, limit=10000, no_cursor_timeout=True) i, j = 0, 0 etp_count = enterprises.count() # etp_count = 1000 relationships = {} for ep in enterprises: i += 1 etp = Enterprise(ep) rps = self.get_all_relationships_from_enterprise(etp) for _rps_ in rps: _rps_ = _rps_.to_dict() if _rps_['label'] in relationships.keys(): relationships[_rps_['label']].append(_rps_) else: relationships[_rps_['label']] = [_rps_] pass if i % 1000 == 0: j += 1 print(SuccessMessage( '{}:success merge nodes to database ' 'round {} and deal {}/{} enterprise' ''.format(dt.datetime.now(), j, i, etp_count) )) pass return relationships
def get_all_nodes(self): enterprises = self.base.query( sql={ 'metaModel': '基本信息', # 'name': {'$in': ns['name'].tolist()} }, limit=10000, no_cursor_timeout=True) i, j = 0, 0 # etp_count = enterprises.count() etp_count = 1000 nodes = dict() for ep in enterprises: i += 1 etp = Enterprise(ep) nds = self.get_all_nodes_from_enterprise(etp) for _nds_ in nds: if _nds_ is None: continue _nds_ = _nds_.to_dict() if _nds_['label'] in nodes.keys(): nodes[_nds_['label']].append(_nds_) else: nodes[_nds_['label']] = [_nds_] pass if i % 1000 == 0: j += 1 print(SuccessMessage( '{}:success merge nodes to database ' 'round {} and deal {}/{} enterprise' ''.format(dt.datetime.now(), j, i, etp_count) )) pass return nodes
def create_nodes_from_enterprise_baseinfo(self, eb): """ 创建企业基本信息衍生出来的所有节点: 1.企业 2.法人代表 3.管理人员 4.地址 实际上公司基本信息里面还衍生出了很多实体对象 但这些对象是在后面随关系一并创建的 :return: """ nodes = [] etp = Enterprise(eb) etp_n = etp.get_neo_node(primarykey=etp.primarykey) if etp_n is None: self.to_logs('filed initialize enterprise Neo node', 'ERROR', eb['name']) return None else: nodes.append(etp_n) try: lr = etp.get_legal_representative() lr_n = lr.get_neo_node(primarykey=lr.primarykey) if lr_n is None: self.to_logs('filed initialize legal representative ' 'Neo node', 'ERROR', eb['name']) else: nodes.append(lr_n) except Exception as e: self.to_logs('deal legal representative raise ({})' ''.format(e), 'EXCEPTION', eb['name']) try: ms = etp.get_manager() if len(ms): for m in ms: m_n = m['person'] m_n = m_n.get_neo_node(primarykey=m_n.primarykey) if m_n is None: self.to_logs('filed initialize major manager ' 'Neo node', 'ERROR', eb['name']) else: nodes.append(m_n) except Exception as e: self.to_logs('deal major managers raise ({})'.format(e), 'EXCEPTION', eb['name']) try: dz = etp.get_address() dz_n = dz.get_neo_node(primarykey=dz.primarykey) if dz_n is None: self.to_logs('filed initialize address Neo node', 'ERROR', eb['name']) else: nodes.append(dz_n) except Exception as e: self.to_logs('deal address raise ({})'.format(e), 'EXCEPTION', eb['name']) return nodes
def create_all_relationship(self): """ 创建从公司基本信息可以看出的关系: 1.person-[lr]->enterprise 2.person-[be_in_office]->enterprise 3.enterprise-[located]->address 4.person|enterprise-[holding]->enterprise :return: """ enterprises = self.base.query( sql={ 'metaModel': '基本信息', # 'name': '重庆长安汽车股份有限公司' }, limit=1000, no_cursor_timeout=True) i, j = 0, 0 etp_count = enterprises.count() relationships = [] for _ in enterprises: j += 1 etp = Enterprise(_) rps = self.get_relationship_from_enterprise(etp) relationships += rps if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, j, etp_count, len(relationships) ))) relationships.clear() # if i > 10: # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, j, etp_count, len(relationships) ))) relationships.clear()
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '新闻舆情' in etp['content'].keys(): data = self.get_format_dict(etp['content']['新闻舆情']) ns = News.create_from_dict(data) for n in ns: n_ = n.pop('news') n_n = self.get_neo_node(n_) if n_n is not None: nodes.append(n_n) relationships.append( Have(etp_n, n_n, **n) ) pass return nodes, relationships
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '竞品信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['竞品信息']) data = Product.create_from_dict(data) for d in data: p = d.pop('product') p_n = self.get_neo_node(p) if p_n is None: continue nodes.append(p_n) relationships.append(Compete(etp_n, p_n)) etp_2 = d.pop('关联企业') etp_2['链接'] = Enterprise.parser_url(etp_2['链接']) if etp_2['名称'] is not None and len(etp_2['名称']) > 1: # etp_2['链接'] = Enterprise.parser_url(etp_2['链接']) etp_n_2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( etp_2['链接'])) if etp_n_2 is None and len(etp_2['名称']) > 1: etp_n_2 = Enterprise(**etp_2) if not etp_n_2.isEnterprise(): _ = { 'URL': etp_2['链接'], 'NAME': etp_2['名称'], '简介': d.pop('产品介绍'), '成立日期': d.pop('成立日期'), '融资信息': d.pop('融资信息'), '所属地': d.pop('所属地'), } etp_n_2 = Related(**{ '链接': etp_2['链接'], '名称': etp_2['名称'] }) # etp_n_2 = Related(**_) etp_n_2 = self.get_neo_node(etp_n_2) nodes.append(etp_n_2) relationships.append(Produce(etp_n_2, p_n)) return nodes, relationships
def create_all_relationship(self): """ 1.enterprise -[have or x]->x :return: """ ops = self.base.query( sql={ 'metaModel': '经营状况', # 'name': '重庆轩烽建材有限公司' }, limit=1000, # skip=2000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] # etp = Enterprise() for o in ops: k += 1 # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(o['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': o['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 # 如果在neo4j里面存着只有name,url的公司,意味着 # 这家公司没有“基本信息” etp = Related() etp['NAME'] = o['name'] etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) pass if '产权交易' in etp['content'].keys(): # data = self.get_format_dict(etp['content']['产权交易']) # for d in data: # bd = d.pop('标的') # bd_n = pass if '行政许可' in etp['content'].keys(): data = etp['content']['行政许可'] if '工商局' in data.keys(): d1 = self.get_format_dict(data['工商局']) ls = License.create_from_dict(d1, '工商局') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue relationships.append( Have(etp_n, l_n, **l).get_relationship()) pass if '信用中国' in data.keys(): d2 = self.get_format_dict(data['信用中国']) ls = License.create_from_dict(d2, '信用中国') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue relationships.append( Have(etp_n, l_n, **l).get_relationship()) pass pass if '招投标信息' in etp['content'].keys(): # 公示的招投标信息一般都是结果,一般情况下是找不到 # 共同投标的单位,除非是共同中标 data = self.get_format_dict(etp['content']['招投标信息']) bs = Bidding.create_from_dict(data) for b in bs: _ = b.pop('bidding') b_n = self.get_neo_node(_) if b_n is None: continue # TODO(leung):项目分类用作了招投标结果 relationships.append( TakePartIn(etp_n, b_n, **dict(b, **{'RESULT': b_n['TYPE']})).get_relationship()) pass if '抽查检查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['抽查检查']) cs = Check.create_from_dict(data) for c in cs: _ = c.pop('check') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']})).get_relationship()) pass if '双随机抽查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['双随机抽查']) rcs = RandomCheck.create_from_dict(data) # rcs_n = self.get_neo_node(rcs) for rc in rcs: # TODO(leung):随机抽查没有结果 _ = rc.pop('check') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **rc).get_relationship()) pass if '税务信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['税务信用']) ts = TaxCredit.create_from_dict(data) # ts_n = self.get_neo_node(ts) for t in ts: _ = t.pop('TaxCredit') n = self.get_neo_node(_) if n is None: continue # TODO(leung):纳税信用等级作为税务信用评级结果 relationships.append( Have(etp_n, n, **dict(RESULT=n['GRADE'], **t)).get_relationship()) pass if '进出口信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['进出口信用']) ies = IAE.create_from_dict(data) # ies_n = self.get_neo_node(ies) for ie in ies: _ = ie.pop('iae') n = self.get_neo_node(_) if n is None: continue relationships.append( Have(etp_n, n, **ie).get_relationship()) pass if '招聘' in etp['content'].keys(): data = self.get_format_dict(etp['content']['招聘']) rs = Position.create_from_dict(data) for r in rs: _ = r.pop('position') n = self.get_neo_node(_) if n is None: continue relationships.append( Recruit(etp_n, n, **r).get_relationship()) pass if '客户' in etp['content'].keys(): data = self.get_format_dict(etp['content']['客户']) cs = Client.create_from_dict(data) for c in cs: _ = c.pop('client') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['URL'], _['NAME'])) if n is None: n = self.get_neo_node(_) if n is None: continue relationships.append( SellTo(etp_n, n, **c).get_relationship()) pass if '供应商' in etp['content'].keys(): data = self.get_format_dict(etp['content']['供应商']) ss = Supplier.create_from_dict(data) for s in ss: _ = s.pop('supplier') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['URL'], _['NAME'])) if n is None: n = self.get_neo_node(_) if n is None: continue relationships.append( BuyFrom(etp_n, n, **s).get_relationship()) pass if '信用评级' in etp['content'].keys(): data = self.get_format_dict(etp['content']['信用评级']) for d in data: _ = d.pop('评级公司') n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['链接'], _['名称'])) if n is None: n = Related() n['NAME'] = _['名称'] n['URL'] = _['链接'] n = self.get_neo_node(n) if n is None: continue __ = d.pop('内容') d['评级内容'] = __['内容'] d['评级链接'] = __['链接'] relationships.append( Appraise(n, etp_n, **d).get_relationship()) pass if '土地转让' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地转让']) for d in data: e1 = d.pop('原土地使用权人') e2 = d.pop('现有土地使用权人') p = Plot(**d) p_n = self.get_neo_node(p) if p_n is None: continue if e1['名称'] == o['name'] or e1['链接'] == o['url']: n1 = etp_n else: # 有可能是人 n1 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e1['链接'])) if n1 is None: n1 = Related(**e1) n1 = self.get_neo_node(n1) if n1 is not None: relationships.append(Sell(n1, p_n).get_relationship()) if e2['名称'] == o['name'] or e2['链接'] == o['url']: n2 = etp_n else: n2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e2['链接'])) if n2 is None: n2 = Related(**e2) n2 = self.get_neo_node(n2) if n2 is not None: relationships.append(Buy(n2, p_n).get_relationship()) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def get_all_nodes_and_relationships( self, save_folder=None, **kwargs): enterprises = self.base.query( sql={ 'metaModel': '基本信息', # 'name': '重庆长寿城乡商贸总公司' # {'$in': ns['name'].tolist()} }, # limit=100000, # skip=290000, no_cursor_timeout=True) i, j = 0, 0 nc, rc = 0, 0 etp_count = enterprises.count() # etp_count = 1000 nodes, relationships = {}, {} _st_ = time.time() for ep in enterprises: try: i += 1 etp = Enterprise(ep) nds, rps = self.get_all_nodes_and_relationships_from_enterprise(etp) for _nds_ in nds: if _nds_ is None: continue label = list(_nds_.labels)[0] _nds_ = dict(label=label, **_nds_) if _nds_['label'] in nodes.keys(): nodes[_nds_['label']].append(_nds_) else: nodes[_nds_['label']] = [_nds_] pass for _rps_ in rps: _rps_ = _rps_.to_dict() if _rps_['label'] in relationships.keys(): relationships[_rps_['label']].append(_rps_) else: relationships[_rps_['label']] = [_rps_] pass except Exception as e: self.logger.error('{} {}'.format(e, ep['name']), exc_info=True) continue if i % 10000 == 0: j += 1 if save_folder is not None: _nc_, _rc_ = self.save_graph( save_folder, nodes, relationships, **kwargs) nc += _nc_ rc += _rc_ nodes.clear() relationships.clear() self.logger.info(SuccessMessage( 'success trans data to csv round {} and ' 'deal {}/{} enterprise spend {} seconds.' ''.format(j, i, etp_count, int(time.time() - _st_)) )) _st_ = time.time() pass if save_folder is not None: _nc_, _rc_ = self.save_graph( save_folder, nodes, relationships, **kwargs) nc += _nc_ rc += _rc_ nodes.clear() relationships.clear() self.logger.info('Summary:') self.logger.info(' save graph data:') self.logger.info(' {} nodes'.format(nc)) self.logger.info(' {} relationships'.format(rc)) pass return nodes, relationships
def create_all_relationship(self): """ 1.enterprise -[have]->x :return: """ rts = self.base.query( sql={'metaModel': '知识产权'}, # limit=100, skip=79175 + 7909, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() # etp = Enterprise() etp_count = rts.count() relationships = [] s_t = time.time() for r in rts: k += 1 # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(r['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': r['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 etp = Related(**{'名称': r['name'], '链接': r['url']}) # etp['NAME'] = r['name'] # etp['URL'] = r['url'] etp_n = self.get_neo_node(etp) pass pass if '网站信息' in r['content'].keys(): data = self.get_format_dict(r['content']['网站信息']) webs = Website.create_from_dict(data) for web in webs: w = web.pop('website') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **web).get_relationship()) pass if '证书信息' in r['content'].keys(): data = self.get_format_dict(r['content']['证书信息']) ctfs = Certificate.create_from_dict(data) for ctf in ctfs: c = ctf.pop('certificate') c_n = self.get_neo_node(c) if c_n is not None: relationships.append( Have(etp_n, c_n, **ctf).get_relationship()) pass if '专利信息' in r['content'].keys(): data = self.get_format_dict(r['content']['专利信息']) pats = Patent.create_from_dict(data) for pat in pats: p = pat.pop('patent') p_n = self.get_neo_node(p) if p_n is not None: relationships.append( Have(etp_n, p_n, **pat).get_relationship()) pass if '商标信息' in r['content'].keys(): data = self.get_format_dict(r['content']['商标信息']) tms = Trademark.create_from_dict(data) for tm in tms: t = tm.pop('trademark') t_n = self.get_neo_node(t) if t_n is not None: relationships.append( Have(etp_n, t_n, **tm).get_relationship()) pass if '软件著作权' in r['content'].keys(): data = self.get_format_dict(r['content']['软件著作权']) scrs = SoftCopyRight.create_from_dict(data) for scr in scrs: s = scr.pop('softcopyright') s_n = self.get_neo_node(s) if s_n is not None: relationships.append( Have(etp_n, s_n, **scr).get_relationship()) pass if '作品著作权' in r['content'].keys(): data = self.get_format_dict(r['content']['作品著作权']) wcrs = WorkCopyRight.create_from_dict(data) for wcr in wcrs: w = wcr.pop('workcopyright') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **wcr).get_relationship()) pass if '微博' in r['content'].keys(): data = self.get_format_dict(r['content']['微博']) wbs = Weibo.create_from_dict(data) for wb in wbs: w = wb.pop('weibo') w_n = self.get_neo_node(w) if w_n is not None: relationships.append( Have(etp_n, w_n, **wb).get_relationship()) pass if '微信公众号' in r['content'].keys(): data = self.get_format_dict(r['content']['微信公众号']) oas = OfficialAccount.create_from_dict(data) for oa in oas: woa = oa.pop('WeChat') woa_n = self.get_neo_node(woa) if woa_n is not None: relationships.append( Have(etp_n, woa_n, **oa).get_relationship()) pass if '小程序' in r['content'].keys(): data = self.get_format_dict(r['content']['小程序']) alts = Applets.create_from_dict(data) for alt in alts: a = alt.pop('applets') a_n = self.get_neo_node(a) if a_n is not None: relationships.append( Have(etp_n, a_n, **alt).get_relationship()) pass if 'APP' in r['content'].keys(): data = self.get_format_dict(r['content']['APP']) aps = App.create_from_dict(data) for ap in aps: a = ap.pop('app') a_n = self.get_neo_node(a) if a_n is not None: relationships.append( Have(etp_n, a_n, **ap).get_relationship()) pass if len(relationships) > 1000: i += 1 sp = int(time.time() - s_t) s_t = time.time() self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise and spend {} ' 'seconds,and merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, sp, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise -[have]->punishment :return: """ ors = self.base.query( sql={ 'metaModel': '经营风险', # 'name': '重庆铭悦机械设备有限公司' }, limit=1000, # skip=2000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ors.count() relationships = [] # prs = Person() etp = Enterprise() for j in ors: # 每个公司经营风险下列式的东西,肯定就是这家公司的 k += 1 # if k < 43500: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(j['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': j['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者 # 这里就相当于把一个公司当做了一个风险提示的涉及者 # etp = Related(**{'名称': j['name'], '链接': j['url']}) etp = Related() etp['NAME'] = j['name'] etp['URL'] = j['url'] etp_n = self.get_neo_node(etp) pass if '动产抵押' in j['content'].keys(): data = self.get_format_dict(j['content']['动产抵押']) for d in data: _ = d.pop('被担保主债权数额') debt = Debt(**{'债务(金额)': _['金额'], '债务(单位)': _['单位'], '履行期限': d.pop('债务人履行债务的期限') }) debt_n = self.get_neo_node(debt) dy = d.pop('抵押权人') zw = d.pop('债务人') sy = d.pop('所有权或使用权归属') if dy['名称'] == j['name'] or dy['链接'] == j['url']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None and len(dy['名称']) > 1: dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: relationships.append(Have( dy_n, debt_n, **dict(角色='抵押权人', **d) ).get_relationship()) if zw['名称'] == j['name'] or zw['链接'] == j['url']: zw_n = etp_n else: zw_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( zw['链接'], zw['名称']) ) if zw_n is None and len(zw['名称']) > 1: zw_n = Related(**zw) zw_n = self.get_neo_node(zw_n) if zw_n is not None: relationships.append(Have( zw_n, debt_n, **dict(角色='债务人', **d) ).get_relationship()) if sy['名称'] == j['name'] or sy['链接'] == j['url']: sy_n = etp_n else: sy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sy['链接'], sy['名称']) ) if sy_n is None and len(sy['名称']) > 1: sy_n = Related(**sy) sy_n = self.get_neo_node(sy_n) if sy_n is not None: relationships.append(Have( sy_n, debt_n, **dict(角色='所有权或使用权人', **d) ).get_relationship()) pass if '公示催告' in j['content'].keys(): data = self.get_format_dict(j['content']['公示催告']) for d in data: _ = d.pop('票面金额') bn = Banknote(**{'票据号': d.pop('票据号'), '票据类型': d.pop('票据类型'), '票面金额(金额)': _['金额'], '票面金额(单位)': _['单位'] }) bn_n = self.get_neo_node(bn) sq = d.pop('申请人') cp = d.pop('持票人') if sq['名称'] == j['name'] or sq['链接'] == j['url']: sq_n = etp_n else: sq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sq['链接'], sq['名称']) ) if sq_n is None: sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: relationships.append(Have( sq_n, bn_n, **dict(角色='申请人', **d) ).get_relationship()) if cp['名称'] == j['name'] or cp['链接'] == j['url']: cp_n = etp_n else: cp_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cp['链接'], cp['名称']) ) if cp_n is None: cp_n = Related(**cp) cp_n = self.get_neo_node(cp_n) if cp_n is not None: relationships.append(Have( cp_n, bn_n, **dict(角色='持票人', **d) ).get_relationship()) relationships.append(Have( etp_n, bn_n, **dict(角色='出票人', **d) ).get_relationship()) pass if '行政处罚' in j['content'].keys(): data = j['content']['行政处罚'] d1 = self.get_format_dict(data['工商局']) ps = Punishment.create_from_dict(d1, '工商局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d2 = self.get_format_dict(data['税务局']) ps = Punishment.create_from_dict(d2, '税务局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d3 = self.get_format_dict(data['信用中国']) ps = Punishment.create_from_dict(d3, '信用中国') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) d4 = self.get_format_dict(data['其他']) ps = Punishment.create_from_dict(d4, '其他') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) pass if '环保处罚' in j['content'].keys(): data = self.get_format_dict(j['content']['环保处罚']) ps = Punishment.create_from_dict(data, '环保局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: relationships.append( Have( etp_n, n, **p ).get_relationship() ) if '股权出质' in j['content'].keys(): sh_info = j['content']['股权出质'] sh_info = self.get_format_dict(sh_info) for sh in sh_info: sh = dict(sh, **self.get_format_amount( '出质数额', sh.pop('出质数额') )) # 确定出质人 cz = sh.pop('出质人') cz['链接'] = etp.parser_url(cz['链接']) # 判断出质人是不是当前公司 if j['name'] == cz['名称'] or cz['链接'] == etp_n['URL']: cz_n = etp_n else: # 确定出质人,先在法人主体中找 cz_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( cz['名称'], cz['链接'] ) ) if cz_n is None: # 在法人中没找到,就通过url在自然人中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 cz_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(cz['链接']) ) if cz_n is None: # 创建这个股权出质人 if len(cz['名称']) > 1: cz_n = Involveder(**cz) cz_n = self.get_neo_node(cz_n) pass # 确定质权人 zq = sh.pop('质权人') zq['链接'] = etp.parser_url(zq['链接']) # 判断质权人是不是当前公司 if j['name'] == zq['名称'] or zq['链接'] == etp_n['URL']: zq_n = etp_n else: # 确定质权人,先在企业中找 zq_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( zq['名称'], zq['链接'] ) ) if zq_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 zq_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(zq['链接']) ) if zq_n is None: # 创建这个股权出质人 if len(zq['名称']) > 1: zq_n = Involveder(**zq) zq_n = self.get_neo_node(zq_n) pass # 确定出质标的企业 bd = sh.pop('标的企业') bd['链接'] = etp.parser_url(bd['链接']) # 判断出质标的是不是当前公司 if j['name'] == bd['名称'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: # 确定出质标的,先在企业中找 bd_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( bd['名称'], bd['链接'] ) ) if bd_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 bd_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(bd['链接']) ) if bd_n is None: # 创建这个出质标的 if len(bd['名称']) > 1: bd_n = Possession(**bd) bd_n = self.get_neo_node(bd_n) pass # 创建关系 # 1. 抵押 if cz_n is not None and bd_n is not None: relationships.append( Guaranty(cz_n, bd_n, **sh).get_relationship() ) # 2. 质权 if zq_n is not None and bd_n is not None: relationships.append( Have(zq_n, bd_n, **sh).get_relationship() ) if '破产重组' in j['content'].keys(): data = self.get_format_dict(j['content']['破产重组']) for d in data: sq = d.pop('申请人') if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(sq['链接']) ) if sq_n is None: sq_n = Involveder(**sq) sq_n = self.get_neo_node(sq_n) bsq = d.pop('被申请人') if bsq['名称'] == j['name'] or bsq['链接'] == etp_n['URL']: bsq_n = etp_n else: # 被申请破产的一般是法人 bsq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(bsq['链接']) ) if bsq_n is None: bsq_n = Involveder(**bsq) bsq_n = self.get_neo_node(bsq_n) if sq_n is not None and bsq_n is not None: relationships.append( Relationship(sq_n, '申请破产', bsq_n, **d) ) pass if '土地抵押' in j['content'].keys(): data = self.get_format_dict(j['content']['土地抵押']) for d in data: _ = d.pop('抵押面积') p = Plot(**{'位置': d.pop('位置'), '面积(数量)': _['数额'], '面积(单位)': _['单位'], }) p_n = self.get_neo_node(p) d = dict(d, **self.get_format_amount( '抵押金额', d.pop('抵押金额') )) dy = d.pop('抵押人') dyq = d.pop('抵押权人') if dy['名称'] == j['name'] or dy['链接'] == etp_n['URL']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: relationships.append( Guaranty(dy_n, p_n, **d).get_relationship() ) if dyq['名称'] == j['name'] or dyq['链接'] == etp_n['URL']: dyq_n = etp_n else: dyq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dyq['链接'], dyq['名称']) ) if dyq_n is None: dyq_n = Related(**dyq) dyq_n = self.get_neo_node(dyq_n) if dyq_n is not None: relationships.append( Have(dyq_n, p_n, **d).get_relationship() ) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() # return pass if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '动产抵押' in etp['content'].keys(): data = self.get_format_dict(etp['content']['动产抵押']) for d in data: _ = d.pop('被担保主债权数额') debt = Debt(**{'债务(金额)': _['金额'], '债务(单位)': _['单位'], '履行期限': d.pop('债务人履行债务的期限') }) debt_n = self.get_neo_node(debt) nodes.append(debt_n) dy = d.pop('抵押权人') zw = d.pop('债务人') sy = d.pop('所有权或使用权归属') dy['链接'] = Enterprise.parser_url(dy['链接']) zw['链接'] = Enterprise.parser_url(zw['链接']) sy['链接'] = Enterprise.parser_url(sy['链接']) if dy['名称'] == etp['name'] or dy['链接'] == etp['url']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: # dy_n = Related(**dy) dy_n = Enterprise(**dy) if not dy_n.isEnterprise(): dy_n = Person(**dy) if not dy_n.isPerson(): dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: nodes.append(dy_n) relationships.append(Have( dy_n, debt_n, **dict(角色='抵押权人', **d) )) if zw['名称'] == etp['name'] or zw['链接'] == etp['url']: zw_n = etp_n else: zw_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( zw['链接'], zw['名称']) ) if zw_n is None and len(zw['名称']) > 1: # zw_n = Related(**zw) zw_n = Enterprise(**zw) if not zw_n.isEnterprise(): zw_n = Person(**zw) if not zw_n.isPerson(): zw_n = Related(**zw) zw_n = self.get_neo_node(zw_n) if zw_n is not None: nodes.append(zw_n) relationships.append(Have( zw_n, debt_n, **dict(角色='债务人', **d) )) if sy['名称'] == etp['name'] or sy['链接'] == etp['url']: sy_n = etp_n else: sy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sy['链接'], sy['名称']) ) if sy_n is None and len(sy['名称']) > 1: # sy_n = Related(**sy) sy_n = Enterprise(**sy) if not sy_n.isEnterprise(): sy_n = Person(**sy) if not sy_n.isPerson(): sy_n = Related(**sy) sy_n = self.get_neo_node(sy_n) if sy_n is not None: nodes.append(sy_n) relationships.append(Have( sy_n, debt_n, **dict(角色='所有权或使用权人', **d) )) pass if '公示催告' in etp['content'].keys(): data = self.get_format_dict(etp['content']['公示催告']) for d in data: _ = d.pop('票面金额') bn = Banknote(**{'票据号': d.pop('票据号'), '票据类型': d.pop('票据类型'), '票面金额(金额)': _['金额'], '票面金额(单位)': _['单位'] }) bn_n = self.get_neo_node(bn) nodes.append(bn_n) sq = d.pop('申请人') cp = d.pop('持票人') sq['链接'] = Enterprise.parser_url(sq['链接']) cp['链接'] = Enterprise.parser_url(cp['链接']) if sq['名称'] == etp['name'] or sq['链接'] == etp['url']: sq_n = etp_n else: sq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sq['链接'], sq['名称']) ) if sq_n is None: # sq_n = Related(**sq) sq_n = Enterprise(**sq) if not sq_n.isEnterprise(): sq_n = Person(**sq) if not sq_n.isPerson(): sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: nodes.append(sq_n) relationships.append(Have( sq_n, bn_n, **dict(角色='申请人', **d) )) if cp['名称'] == etp['name'] or cp['链接'] == etp['url']: cp_n = etp_n else: cp_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cp['链接'], cp['名称']) ) if cp_n is None: # cp_n = Related(**cp) cp_n = Enterprise(**cp) if not cp_n.isEnterprise(): cp_n = Person(**cp) if not cp_n.isPerson(): cp_n = Related(**cp) cp_n = self.get_neo_node(cp_n) if cp_n is not None: nodes.append(cp_n) relationships.append(Have( cp_n, bn_n, **dict(角色='持票人', **d) )) relationships.append(Have( etp_n, bn_n, **dict(角色='出票人', **d) )) pass if '行政处罚' in etp['content'].keys(): data = etp['content']['行政处罚'] d1 = self.get_format_dict(data['工商局']) ps = Punishment.create_from_dict(d1, '工商局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) d2 = self.get_format_dict(data['税务局']) ps = Punishment.create_from_dict(d2, '税务局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) d3 = self.get_format_dict(data['信用中国']) ps = Punishment.create_from_dict(d3, '信用中国') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) d4 = self.get_format_dict(data['其他']) ps = Punishment.create_from_dict(d4, '其他') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) pass if '环保处罚' in etp['content'].keys(): data = self.get_format_dict(etp['content']['环保处罚']) ps = Punishment.create_from_dict(data, '环保局') for p in ps: _ = p.pop('punishment') n = self.get_neo_node(_) if n is not None: nodes.append(n) relationships.append( Have(etp_n, n, **p) ) if '股权出质' in etp['content'].keys(): sh_info = etp['content']['股权出质'] sh_info = self.get_format_dict(sh_info) for sh in sh_info: sh = dict(sh, **self.get_format_amount( '出质数额', sh.pop('出质数额') )) # 确定出质人 cz = sh.pop('出质人') cz['链接'] = Enterprise.parser_url(cz['链接']) # 判断出质人是不是当前公司 if etp['name'] == cz['名称'] or cz['链接'] == etp_n['URL']: cz_n = etp_n else: # 确定出质人,先在法人主体中找 cz_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( cz['名称'], cz['链接'] ) ) if cz_n is None: # 在法人中没找到,就通过url在自然人中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 cz_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(cz['链接']) ) if cz_n is None: # 创建这个股权出质人 if len(cz['名称']) > 1: # cz_n = Involveder(**cz) cz_n = Enterprise(**cz) if not cz_n.isEnterprise(): cz_n = Person(**cz) if not cz_n.isPerson(): cz_n = Related(**cz) cz_n = self.get_neo_node(cz_n) pass # 确定质权人 zq = sh.pop('质权人') zq['链接'] = Enterprise.parser_url(zq['链接']) # 判断质权人是不是当前公司 if etp['name'] == zq['名称'] or zq['链接'] == etp_n['URL']: zq_n = etp_n else: # 确定质权人,先在企业中找 zq_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( zq['名称'], zq['链接'] ) ) if zq_n is None: # 在企业中没找到,就通过url在所有对象中找 # 这里最好不要通过名称找了,除公司以外出现 # 同名的几率很大 # TODO(leung):在所有实体中去找开销很大,需要注意 zq_n = self.match_node( 'Person', cypher='_.URL = "{}"'.format(zq['链接']) ) if zq_n is None: # 创建这个股权出质人 if len(zq['名称']) > 1: # zq_n = Involveder(**zq) zq_n = Enterprise(**zq) if not zq_n.isEnterprise(): zq_n = Person(**zq) if not zq_n.isPerson(): zq_n = Related(**zq) zq_n = self.get_neo_node(zq_n) pass # 确定出质标的企业 bd = sh.pop('标的企业') bd['链接'] = Enterprise.parser_url(bd['链接']) # 判断出质标的是不是当前公司 if etp['name'] == bd['名称'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: # 确定出质标的,先在企业中找,不会是人 bd_n = self.match_node( *legal, cypher='_.NAME = "{}" OR _.URL = "{}"'.format( bd['名称'], bd['链接'] ) ) if bd_n is None: # 创建这个出质标的 if len(bd['名称']) > 1: bd_n = Enterprise(**bd) if not bd_n.isEnterprise(): bd_n = Possession(**bd) bd_n = self.get_neo_node(bd_n) pass # 创建关系 if bd_n is None: continue nodes.append(bd_n) # 1. 抵押 if cz_n is not None: nodes.append(cz_n) relationships.append( Guaranty(cz_n, bd_n, **sh) ) # 2. 质权 if zq_n is not None: nodes.append(zq_n) relationships.append( Have(zq_n, bd_n, **sh) ) if '破产重组' in etp['content'].keys(): data = self.get_format_dict(etp['content']['破产重组']) for d in data: sq = d.pop('申请人') sq['链接'] = Enterprise.parser_url(sq['链接']) if sq['名称'] == etp['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(sq['链接']) ) if sq_n is None: # sq_n = Involveder(**sq) sq_n = Enterprise(**sq) if not sq_n.isEnterprise(): sq_n = Person(**sq) if not sq_n.isPerson(): sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) bsq = d.pop('被申请人') bsq['链接'] = Enterprise.parser_url(bsq['链接']) if bsq['名称'] == etp['name'] or bsq['链接'] == etp_n['URL']: bsq_n = etp_n else: # 被申请破产的一般是法人 bsq_n = self.match_node( *['person'] + legal, cypher='_.URL = "{}"'.format(bsq['链接']) ) if bsq_n is None: # bsq_n = Involveder(**bsq) bsq_n = Enterprise(**bsq) if not bsq_n.isEnterprise(): bsq_n = Person(**bsq) if not bsq_n.isPerson(): bsq_n = Related(**bsq) bsq_n = self.get_neo_node(bsq_n) if sq_n is not None and bsq_n is not None: nodes += [sq_n, bsq_n] relationships.append( ApplyBankrupt(sq_n, bsq_n, **d) ) pass if '土地抵押' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地抵押']) for d in data: _ = d.pop('抵押面积') p = Plot(**{'位置': d.pop('位置'), '面积(数量)': _['数额'], '面积(单位)': _['单位'], }) p_n = self.get_neo_node(p) nodes.append(p_n) d = dict(d, **self.get_format_amount( '抵押金额', d.pop('抵押金额') )) dy = d.pop('抵押人') dyq = d.pop('抵押权人') dy['链接'] = Enterprise.parser_url(dy['链接']) dyq['链接'] = Enterprise.parser_url(dyq['链接']) if dy['名称'] == etp['name'] or dy['链接'] == etp_n['URL']: dy_n = etp_n else: dy_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dy['链接'], dy['名称']) ) if dy_n is None: # dy_n = Related(**dy) dy_n = Enterprise(**dy) if not dy_n.isEnterprise(): dy_n = Person(**dy) if not dy_n.isPerson(): dy_n = Related(**dy) dy_n = self.get_neo_node(dy_n) if dy_n is not None: nodes.append(dy_n) relationships.append( Guaranty(dy_n, p_n, **d) ) if dyq['名称'] == etp['name'] or dyq['链接'] == etp_n['URL']: dyq_n = etp_n else: dyq_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( dyq['链接'], dyq['名称']) ) if dyq_n is None: # dyq_n = Related(**dyq) dyq_n = Enterprise(**dyq) if not dyq_n.isEnterprise(): dyq_n = Person(**dyq) if not dyq_n.isPerson(): dyq_n = Related(**dyq) dyq_n = self.get_neo_node(dyq_n) if dyq_n is not None: nodes.append(dyq_n) relationships.append( Have(dyq_n, p_n, **d) ) pass return nodes, relationships
def create_all_relationship(self): """ 1.enterprise or person -[involve_case]->case :return: """ justices = self.base.query( sql={ 'metaModel': '法律诉讼', # 'name': '重庆思途科技有限公司' }, limit=100, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = justices.count() relationships = [] # prs = Person() # etp = Enterprise() for j in justices: # 每个公司的法律诉讼下的司法案件肯定跟这个案件有联系 k += 1 # if k < 4910: # continue # TODO(leung): 这里要注意,法律诉讼模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(j['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': j['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者 etp = Involveder(**{'名称': j['name'], '链接': j['url']}) etp_n = self.get_neo_node(etp) if etp_n is None: continue pass if '被执行人' in j['content'].keys(): data = self.get_format_dict(j['content']['被执行人']) eps = Enforcement.create_from_dict(data) for ep in eps: e = ep.pop('executed') e_n = self.get_neo_node(e) if e_n is not None: relationships.append( InvolveCase(etp_n, e_n, **ep).get_relationship() ) pass # if '司法案件' in j['content'].keys(): # justice_case_info = j['content']['司法案件'] # jcs = JusticeCase.create_from_dict(justice_case_info) # rps = self.create_relationship_from_justice_case( # etp_n, jcs) # relationships += rps # pass if '裁判文书' in j['content'].keys(): data = self.get_format_dict(j['content']['裁判文书']) # 返回的是[[Ruling, 相关对象],[]...] rls = Judgment.create_from_dict(data) for ruling, involve in rls: rul_n = self.get_neo_node(ruling) if rul_n is None: continue for inv in involve: # 案件相关主体 # 先判断是不是当前的企业 if j['name'] == inv[1] or j['url'] == inv[2]: # 如果是,直接关联起来 inv_n = etp_n else: # 1.先在企业中匹配 # 2.匹配自然人 inv_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( inv[2]) ) if inv_n is None: ivl = Involveder() ivl['NAME'] = inv[1] ivl['URL'] = inv[2] # if inv[2] is not None: # ivl['URL'] = inv[2] # else: # ivl['URL'] = ivl.get_entity_unique_code( # j['name']+inv[1] # ) inv_n = self.get_neo_node(ivl) # 3.以上两者都没匹配到的时候,创建这个案件参与者 # 实际上还可以到其他实体中去匹配,但那些可能是数据 # 集之外的对象了,可以先不去管他们 if inv_n is not None: relationships.append( InvolveCase( inv_n, rul_n, **{'案件身份': inv[0]} ).get_relationship() ) pass if '失信被执行人' in j['content'].keys(): data = self.get_format_dict( j['content']['失信被执行人'] ) eps = SXEnforcement.create_from_dict(data) for ep in eps: e = ep.pop('sxexecuted') e_n = self.get_neo_node(e) if e_n is not None: relationships.append( InvolveCase(etp_n, e_n, **ep).get_relationship() ) pass if '限制高消费' in j['content'].keys(): data = self.get_format_dict( j['content']['限制高消费'] ) for d in data: sq = d.pop('申请人') lh = d.pop('限消令对象') xg = d.pop('关联对象') _ = d.pop('案号') lo = dict(案号=_['名称'], 案号链接=_['链接'], **d) lo = LimitOrder(**lo) lo_n = self.get_neo_node(lo) if lo_n is None: continue if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( sq['链接']) ) if sq_n is None: # 创建这个对象 sq_n = Involveder(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: relationships.append( InvolveCase(sq_n, lo_n, **{'案件身份': '申请人'} ).get_relationship() ) if lh['名称'] == j['name'] or lh['链接'] == etp_n['URL']: lh_n = etp_n else: lh_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( lh['链接']) ) if lh_n is None: # 创建这个对象 lh_n = Involveder(**lh) lh_n = self.get_neo_node(lh_n) if lh_n is not None: relationships.append( InvolveCase(lo_n, lh_n, **{'案件身份': '限制对象'} ).get_relationship() ) if xg['名称'] == j['name'] or xg['链接'] == etp_n['URL']: xg_n = etp_n else: xg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( xg['链接']) ) if xg_n is None: # 创建这个对象 xg_n = Involveder(**xg) xg_n = self.get_neo_node(xg_n) if xg_n is not None: relationships.append( InvolveCase(lo_n, xg_n, **{'案件身份': '关联对象'} ).get_relationship() ) pass if '股权冻结' in j['content'].keys(): data = self.get_format_dict( j['content']['股权冻结'] ) for d in data: bd = d.pop('标的企业') zx = d.pop('被执行人') _1 = d.pop('股权数额') _2 = d.pop('类型|状态').split('|') sf = dict(冻结数额=_1['金额'], 金额单位=_1['单位'], 类型=_2[0], 状态=_2[1] if len(_2) > 1 else None, **d ) sf = StockFreeze(**sf) sf_n = self.get_neo_node(sf) if sf_n is None: continue if bd['名称'] == j['name'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: bd_n = self.match_node( *legal, cypher='_.URL = "{}"'.format( bd['链接']) ) if bd_n is None: bd_n = Involveder(**bd) bd_n = self.get_neo_node(bd_n) if bd_n is not None: relationships.append( InvolveCase(sf_n, bd_n, **{'案件身份': '标的企业'} ).get_relationship() ) if zx['名称'] == j['name'] or zx['链接'] == etp_n['URL']: zx_n = etp_n else: zx_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( zx['链接']) ) if zx_n is None: zx_n = Involveder(**zx) zx_n = self.get_neo_node(zx_n) if zx_n is not None: relationships.append( InvolveCase(sf_n, zx_n, **{'案件身份': '被执行人'} ).get_relationship() ) if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '法院公告' in etp['content'].keys(): data = self.get_format_dict( etp['content']['法院公告'] ) cas = CourtAnnounce.create_from_dict(data) for ca in cas: a = ca.pop('announce') a_n = self.get_neo_node(a) if a_n is None: continue nodes.append(a_n) bgs = ca.pop('defendant') for bg in bgs: bg['链接'] = Enterprise.parser_url(bg['链接']) if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']: bg_n = etp_n else: bg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( bg['链接']) ) if bg_n is None: # 创建这个对象 # sq_n = Involveder(**sq) bg_n = Enterprise(**bg) if not bg_n.isEnterprise(): bg_n = Person(**bg) if not bg_n.isPerson(): bg_n = Related(**bg) bg_n = self.get_neo_node(bg_n) if bg_n is not None: nodes.append(bg_n) relationships.append( InvolveCase(bg_n, a_n, **{'案件身份': '被告'}) ) ygs = ca.pop('plaintiff') for yg in ygs: yg['链接'] = Enterprise.parser_url(yg['链接']) if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']: yg_n = etp_n else: yg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( yg['链接']) ) if yg_n is None: # 创建这个对象 # lh_n = Involveder(**lh) yg_n = Enterprise(**yg) if not yg_n.isEnterprise(): yg_n = Person(**yg) if not yg_n.isPerson(): yg_n = Related(**yg) yg_n = self.get_neo_node(yg_n) if yg_n is not None: nodes.append(yg_n) relationships.append( InvolveCase(yg_n, a_n, **{'案件身份': '原告'}) ) pass if '开庭公告' in etp['content'].keys(): data = self.get_format_dict( etp['content']['开庭公告'] ) cas = OpenAnnounce.create_from_dict(data) for ca in cas: a = ca.pop('announce') a_n = self.get_neo_node(a) if a_n is None: continue nodes.append(a_n) bgs = ca.pop('defendant') for bg in bgs: bg['链接'] = Enterprise.parser_url(bg['链接']) if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']: bg_n = etp_n else: bg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( bg['链接']) ) if bg_n is None: # 创建这个对象 # sq_n = Involveder(**sq) bg_n = Enterprise(**bg) if not bg_n.isEnterprise(): bg_n = Person(**bg) if not bg_n.isPerson(): bg_n = Related(**bg) bg_n = self.get_neo_node(bg_n) if bg_n is not None: nodes.append(bg_n) relationships.append( InvolveCase(bg_n, a_n, **{'案件身份': '被告'}) ) ygs = ca.pop('plaintiff') for yg in ygs: yg['链接'] = Enterprise.parser_url(yg['链接']) if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']: yg_n = etp_n else: yg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( yg['链接']) ) if yg_n is None: # 创建这个对象 # lh_n = Involveder(**lh) yg_n = Enterprise(**yg) if not yg_n.isEnterprise(): yg_n = Person(**yg) if not yg_n.isPerson(): yg_n = Related(**yg) yg_n = self.get_neo_node(yg_n) if yg_n is not None: nodes.append(yg_n) relationships.append( InvolveCase(yg_n, a_n, **{'案件身份': '原告'}) ) pass if '送达公告' in etp['content'].keys(): data = self.get_format_dict( etp['content']['送达公告'] ) cas = DeliveryAnnounce.create_from_dict(data) for ca in cas: a = ca.pop('announce') a_n = self.get_neo_node(a) if a_n is None: continue nodes.append(a_n) bgs = ca.pop('defendant') for bg in bgs: bg['链接'] = Enterprise.parser_url(bg['链接']) if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']: bg_n = etp_n else: bg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( bg['链接']) ) if bg_n is None: # 创建这个对象 # sq_n = Involveder(**sq) bg_n = Enterprise(**bg) if not bg_n.isEnterprise(): bg_n = Person(**bg) if not bg_n.isPerson(): bg_n = Related(**bg) bg_n = self.get_neo_node(bg_n) if bg_n is not None: nodes.append(bg_n) relationships.append( InvolveCase(bg_n, a_n, **{'案件身份': '被告'}) ) ygs = ca.pop('plaintiff') for yg in ygs: yg['链接'] = Enterprise.parser_url(yg['链接']) if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']: yg_n = etp_n else: yg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( yg['链接']) ) if yg_n is None: # 创建这个对象 # lh_n = Involveder(**lh) yg_n = Enterprise(**yg) if not yg_n.isEnterprise(): yg_n = Person(**yg) if not yg_n.isPerson(): yg_n = Related(**yg) yg_n = self.get_neo_node(yg_n) if yg_n is not None: nodes.append(yg_n) relationships.append( InvolveCase(yg_n, a_n, **{'案件身份': '原告'}) ) pass if '立案信息' in etp['content'].keys(): data = self.get_format_dict( etp['content']['立案信息'] ) cas = RegisterCase.create_from_dict(data) for ca in cas: c = ca.pop('case') c_n = self.get_neo_node(c) if c_n is None: continue nodes.append(c_n) bgs = ca.pop('defendant') for bg in bgs: bg['链接'] = Enterprise.parser_url(bg['链接']) if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']: bg_n = etp_n else: bg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( bg['链接']) ) if bg_n is None: # 创建这个对象 # sq_n = Involveder(**sq) bg_n = Enterprise(**bg) if not bg_n.isEnterprise(): bg_n = Person(**bg) if not bg_n.isPerson(): bg_n = Related(**bg) bg_n = self.get_neo_node(bg_n) if bg_n is not None: nodes.append(bg_n) relationships.append( InvolveCase(bg_n, c_n, **{'案件身份': '被告'}) ) ygs = ca.pop('plaintiff') for yg in ygs: yg['链接'] = Enterprise.parser_url(yg['链接']) if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']: yg_n = etp_n else: yg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( yg['链接']) ) if yg_n is None: # 创建这个对象 # lh_n = Involveder(**lh) yg_n = Enterprise(**yg) if not yg_n.isEnterprise(): yg_n = Person(**yg) if not yg_n.isPerson(): yg_n = Related(**yg) yg_n = self.get_neo_node(yg_n) if yg_n is not None: nodes.append(yg_n) relationships.append( InvolveCase(yg_n, c_n, **{'案件身份': '原告'}) ) pass if '终本案件' in etp['content'].keys(): data = self.get_format_dict( etp['content']['终本案件'] ) cas = FinalCase.create_from_dict(data) for ca in cas: c = ca.pop('case') c_n = self.get_neo_node(c) if c_n is None: continue nodes.append(c_n) relationships.append( InvolveCase(etp_n, c_n) ) if '裁判文书' in etp['content'].keys(): data = self.get_format_dict(etp['content']['裁判文书']) # 返回的是[[Ruling, 相关对象],[]...] rls = Judgment.create_from_dict(data) for ruling, involve in rls: rul_n = self.get_neo_node(ruling) if rul_n is None: continue nodes.append(rul_n) for inv in involve: # 案件相关主体 # 先判断是不是当前的企业 inv[2] = Enterprise.parser_url(inv[2]) if etp['name'] == inv[1] or etp['url'] == inv[2]: # 如果是,直接关联起来 inv_n = etp_n else: # 1.先在企业中匹配 # 2.匹配自然人 inv_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( inv[2]) ) if inv_n is None: # ivl = Involveder() _ivl_ = {'名称': inv[1], '链接': inv[2]} ivl = Enterprise(**_ivl_) if not ivl.isEnterprise(): ivl = Person(**_ivl_) if not ivl.isPerson(): ivl = Related(**_ivl_) inv_n = self.get_neo_node(ivl) # 3.以上两者都没匹配到的时候,创建这个案件参与者 # 实际上还可以到其他实体中去匹配,但那些可能是数据 # 集之外的对象了,可以先不去管他们 if inv_n is not None: nodes.append(inv_n) relationships.append( InvolveCase( inv_n, rul_n, **{'案件身份': inv[0]} ) ) pass if '被执行人' in etp['content'].keys(): data = self.get_format_dict(etp['content']['被执行人']) eps = Enforcement.create_from_dict(data) for ep in eps: e = ep.pop('executed') e_n = self.get_neo_node(e) if e_n is not None: nodes.append(e_n) relationships.append( InvolveCase(etp_n, e_n, **ep) ) pass if '失信被执行人' in etp['content'].keys(): data = self.get_format_dict( etp['content']['失信被执行人'] ) eps = SXEnforcement.create_from_dict(data) for ep in eps: e = ep.pop('sxexecuted') e_n = self.get_neo_node(e) if e_n is not None: nodes.append(e_n) relationships.append( InvolveCase(etp_n, e_n, **ep) ) pass if '限制高消费' in etp['content'].keys(): data = self.get_format_dict( etp['content']['限制高消费'] ) for d in data: sq = d.pop('申请人') lh = d.pop('限消令对象') xg = d.pop('关联对象') sq['链接'] = Enterprise.parser_url(sq['链接']) lh['链接'] = Enterprise.parser_url(lh['链接']) xg['链接'] = Enterprise.parser_url(xg['链接']) _ = d.pop('案号') lo = dict(案号=_['名称'], 案号链接=_['链接'], **d) lo = LimitOrder(**lo) lo_n = self.get_neo_node(lo) if lo_n is None: continue nodes.append(lo_n) if sq['名称'] == etp['name'] or sq['链接'] == etp_n['URL']: sq_n = etp_n else: sq_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( sq['链接']) ) if sq_n is None: # 创建这个对象 # sq_n = Involveder(**sq) sq_n = Enterprise(**sq) if not sq_n.isEnterprise(): sq_n = Person(**sq) if not sq_n.isPerson(): sq_n = Related(**sq) sq_n = self.get_neo_node(sq_n) if sq_n is not None: nodes.append(sq_n) relationships.append( InvolveCase(sq_n, lo_n, **{'案件身份': '申请人'}) ) if lh['名称'] == etp['name'] or lh['链接'] == etp_n['URL']: lh_n = etp_n else: lh_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( lh['链接']) ) if lh_n is None: # 创建这个对象 # lh_n = Involveder(**lh) lh_n = Enterprise(**lh) if not lh_n.isEnterprise(): lh_n = Person(**lh) if not lh_n.isPerson(): lh_n = Related(**lh) lh_n = self.get_neo_node(lh_n) if lh_n is not None: nodes.append(lh_n) relationships.append( InvolveCase(lo_n, lh_n, **{'案件身份': '限制对象'}) ) if xg['名称'] == etp['name'] or xg['链接'] == etp_n['URL']: xg_n = etp_n else: xg_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( xg['链接']) ) if xg_n is None: # 创建这个对象 # xg_n = Involveder(**xg) xg_n = Enterprise(**xg) if not xg_n.isEnterprise(): xg_n = Person(**xg) if not xg_n.isPerson(): xg_n = Related(**xg) xg_n = self.get_neo_node(xg_n) if xg_n is not None: nodes.append(xg_n) relationships.append( InvolveCase(lo_n, xg_n, **{'案件身份': '关联对象'}) ) pass if '股权冻结' in etp['content'].keys(): data = self.get_format_dict( etp['content']['股权冻结'] ) for d in data: bd = d.pop('标的企业') zx = d.pop('被执行人') bd['链接'] = Enterprise.parser_url(bd['链接']) zx['链接'] = Enterprise.parser_url(zx['链接']) _1 = d.pop('股权数额') _2 = d.pop('类型|状态').split('|') sf = dict(冻结数额=_1['金额'], 金额单位=_1['单位'], 类型=_2[0], 状态=_2[1] if len(_2) > 1 else None, **d ) sf = StockFreeze(**sf) sf_n = self.get_neo_node(sf) if sf_n is None: continue nodes.append(sf_n) if bd['名称'] == etp['name'] or bd['链接'] == etp_n['URL']: bd_n = etp_n else: bd_n = self.match_node( *legal, cypher='_.URL = "{}"'.format( bd['链接']) ) if bd_n is None: # bd_n = Involveder(**bd) bd_n = Enterprise(**bd) if not bd_n.isEnterprise(): bd_n = Person(**bd) if not bd_n.isPerson(): bd_n = Related(**bd) bd_n = self.get_neo_node(bd_n) if bd_n is not None: nodes.append(bd_n) relationships.append( InvolveCase(sf_n, bd_n, **{'案件身份': '标的企业'}) ) if zx['名称'] == etp['name'] or zx['链接'] == etp_n['URL']: zx_n = etp_n else: zx_n = self.match_node( *['Person'] + legal, cypher='_.URL = "{}"'.format( zx['链接']) ) if zx_n is None: # zx_n = Involveder(**zx) zx_n = Enterprise(**zx) if not zx_n.isEnterprise(): zx_n = Person(**zx) if not zx_n.isPerson(): zx_n = Related(**zx) zx_n = self.get_neo_node(zx_n) if zx_n is not None: nodes.append(zx_n) relationships.append( InvolveCase(sf_n, zx_n, **{'案件身份': '被执行人'}) ) pass return nodes, relationships
def get_all_nodes_from_enterprise(self, etp): nodes = [Enterprise(URL=etp['url'], NAME=etp['name'])] if '产权交易' in etp['content'].keys(): # data = self.get_format_dict(etp['content']['产权交易']) # for d in data: # bd = d.pop('标的') # bd_n = pass if '行政许可' in etp['content'].keys(): data = etp['content']['行政许可'] if '工商局' in data.keys(): d1 = self.get_format_dict(data['工商局']) ls = License.create_from_dict(d1, '工商局') for l in ls: nodes.append(l.pop('license')) pass if '信用中国' in data.keys(): d2 = self.get_format_dict(data['信用中国']) ls = License.create_from_dict(d2, '信用中国') for l in ls: nodes.append(l.pop('license')) pass pass if '招投标信息' in etp['content'].keys(): # 公示的招投标信息一般都是结果,一般情况下是找不到 # 共同投标的单位,除非是共同中标 data = self.get_format_dict(etp['content']['招投标信息']) bs = Bidding.create_from_dict(data) for b in bs: nodes.append(b.pop('bidding')) pass if '抽查检查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['抽查检查']) cs = Check.create_from_dict(data) for c in cs: nodes.append(c.pop('check')) pass if '双随机抽查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['双随机抽查']) rcs = RandomCheck.create_from_dict(data) # rcs_n = self.get_neo_node(rcs) for rc in rcs: # TODO(leung):随机抽查没有结果 nodes.append(rc.pop('check')) pass if '税务信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['税务信用']) ts = TaxCredit.create_from_dict(data) # ts_n = self.get_neo_node(ts) for t in ts: nodes.append(t.pop('TaxCredit')) pass if '进出口信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['进出口信用']) ies = IAE.create_from_dict(data) # ies_n = self.get_neo_node(ies) for ie in ies: nodes.append(ie.pop('iae')) pass if '招聘' in etp['content'].keys(): data = self.get_format_dict(etp['content']['招聘']) rs = Position.create_from_dict(data) for r in rs: nodes.append(r.pop('position')) pass if '客户' in etp['content'].keys(): data = self.get_format_dict(etp['content']['客户']) cs = Client.create_from_dict(data) for c in cs: nodes.append(c.pop('client')) pass if '供应商' in etp['content'].keys(): data = self.get_format_dict(etp['content']['供应商']) ss = Supplier.create_from_dict(data) for s in ss: nodes.append(s.pop('supplier')) pass if '信用评级' in etp['content'].keys(): data = self.get_format_dict(etp['content']['信用评级']) for d in data: nodes.append(d.pop('评级公司')) pass if '土地转让' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地转让']) for d in data: e1 = d.pop('原土地使用权人') e2 = d.pop('现有土地使用权人') p = Plot(**d) nodes.append(p) pass return nodes pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '产权交易' in etp['content'].keys(): # data = self.get_format_dict(etp['content']['产权交易']) # for d in data: # bd = d.pop('标的') # bd_n = pass if '行政许可' in etp['content'].keys(): data = etp['content']['行政许可'] if '工商局' in data.keys(): d1 = self.get_format_dict(data['工商局']) ls = License.create_from_dict(d1, '工商局') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue nodes.append(l_n) relationships.append(Have(etp_n, l_n, **l)) pass if '信用中国' in data.keys(): d2 = self.get_format_dict(data['信用中国']) ls = License.create_from_dict(d2, '信用中国') for l in ls: l_ = l.pop('license') l_n = self.get_neo_node(l_) if l_n is None: continue nodes.append(l_n) relationships.append(Have(etp_n, l_n, **l)) pass pass if '招投标信息' in etp['content'].keys(): # 公示的招投标信息一般都是结果,一般情况下是找不到 # 共同投标的单位,除非是共同中标 data = self.get_format_dict(etp['content']['招投标信息']) bs = Bidding.create_from_dict(data) for b in bs: _ = b.pop('bidding') b_n = self.get_neo_node(_) if b_n is None: continue # TODO(leung):项目分类用作了招投标结果 nodes.append(b_n) relationships.append( TakePartIn(etp_n, b_n, **dict(b, **{'RESULT': b_n['TYPE']}))) pass if '抽查检查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['抽查检查']) cs = Check.create_from_dict(data) for c in cs: _ = c.pop('check') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append( Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']}))) pass if '双随机抽查' in etp['content'].keys(): data = self.get_format_dict(etp['content']['双随机抽查']) rcs = RandomCheck.create_from_dict(data) # rcs_n = self.get_neo_node(rcs) for rc in rcs: # TODO(leung):随机抽查没有结果 _ = rc.pop('check') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append(Have(etp_n, n, **rc)) pass if '税务信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['税务信用']) ts = TaxCredit.create_from_dict(data) # ts_n = self.get_neo_node(ts) for t in ts: _ = t.pop('TaxCredit') n = self.get_neo_node(_) if n is None: continue # TODO(leung):纳税信用等级作为税务信用评级结果 nodes.append(n) relationships.append( Have(etp_n, n, **dict(RESULT=n['GRADE'], **t))) pass if '进出口信用' in etp['content'].keys(): data = self.get_format_dict(etp['content']['进出口信用']) ies = IAE.create_from_dict(data) # ies_n = self.get_neo_node(ies) for ie in ies: _ = ie.pop('iae') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append(Have(etp_n, n, **ie)) pass if '招聘' in etp['content'].keys(): data = self.get_format_dict(etp['content']['招聘']) rs = Position.create_from_dict(data) for r in rs: _ = r.pop('position') n = self.get_neo_node(_) if n is None: continue nodes.append(n) relationships.append(Recruit(etp_n, n, **r)) pass if '客户' in etp['content'].keys(): data = self.get_format_dict(etp['content']['客户']) cs = Client.create_from_dict(data) for c in cs: cli = c.pop('client') cli_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( cli['URL'], cli['NAME'])) if cli_n is None: if cli.isEnterprise(): cli = Enterprise(**cli.to_dict(with_label=False)) cli_n = self.get_neo_node(cli) if cli_n is None: continue nodes.append(cli_n) relationships.append(SellTo(etp_n, cli_n, **c)) pass if '供应商' in etp['content'].keys(): data = self.get_format_dict(etp['content']['供应商']) ss = Supplier.create_from_dict(data) for s in ss: sup = s.pop('supplier') sup_n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( sup['URL'], sup['NAME'])) if sup_n is None: if sup.isEnterprise(): sup = Enterprise(**sup.to_dict(with_label=False)) sup_n = self.get_neo_node(sup) if sup_n is None: continue nodes.append(sup_n) relationships.append(BuyFrom(etp_n, sup_n, **s)) pass if '信用评级' in etp['content'].keys(): data = self.get_format_dict(etp['content']['信用评级']) for d in data: _ = d.pop('评级公司') _['链接'] = Enterprise.parser_url(_['链接']) n = self.match_node( *legal, cypher='_.URL = "{}" OR _.NAME = "{}"'.format( _['链接'], _['名称'])) if n is None: n = Enterprise(**_) n = self.get_neo_node(n) if n is None: continue __ = d.pop('内容') d['评级内容'] = __['内容'] d['评级链接'] = __['链接'] nodes.append(n) relationships.append(Appraise(n, etp_n, **d)) pass if '土地转让' in etp['content'].keys(): data = self.get_format_dict(etp['content']['土地转让']) for d in data: e1 = d.pop('原土地使用权人') e2 = d.pop('现有土地使用权人') p = Plot(**d) p_n = self.get_neo_node(p) if p_n is None: continue e1['链接'] = Enterprise.parser_url(e1['链接']) if e1['名称'] == etp['name'] or e1['链接'] == etp['url']: n1 = etp_n else: # 有可能是人 n1 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e1['链接'])) if n1 is None: n1 = Enterprise(**e1) if not n1.isEnterprise(): n1 = Person(**e1) if not n1.isPerson(): n1 = Related(**e1) n1 = self.get_neo_node(n1) if n1 is not None: nodes.append(n1) nodes.append(p_n) relationships.append(Sell(n1, p_n)) e2['链接'] = Enterprise.parser_url(e2['链接']) if e2['名称'] == etp['name'] or e2['链接'] == etp['url']: n2 = etp_n else: n2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( e2['链接'])) if n2 is None: n2 = Enterprise(**e2) if not n2.isEnterprise(): n2 = Person(**e2) if not n2.isPerson(): n2 = Related(**e2) n2 = self.get_neo_node(n2) if n2 is not None: nodes.append(n2) nodes.append(p_n) relationships.append(Buy(n2, p_n)) pass return nodes, relationships
def create_all_relationship(self): """ 1.enterprise -[compete]->enterprise :return: """ ops = self.base.query(sql={'metaModel': '企业发展'}, field={ 'name': 1, 'url': 1, 'content.竞品信息': 1 }, limit=1000, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] etp = Enterprise() for o in ops: k += 1 # if k < 41321: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node(*legal, cypher='_.NAME = "{}"'.format(o['name'])) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one(sql={ 'metaModel': '基本信息', 'name': o['name'] }) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo( _) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 # etp = Enterprise({'name': o['name'], 'url': o['url']}) etp = Related() etp['NAME'] = o['name'] etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) pass if '竞品信息' in o['content'].keys(): data = self.get_format_dict(o['content']['竞品信息']) for d in data: etp_2 = d.pop('关联企业') if etp_2['名称'] is not None and len(etp_2['名称']) > 1: etp_2['链接'] = etp.parser_url(etp_2['链接']) etp_n_2 = self.match_node(*legal, cypher='_.URL = "{}"'.format( etp_2['链接'])) if etp_n_2 is None and etp_2['名称'] > 1: _ = { 'URL': etp_2['链接'], 'NAME': etp_2['名称'], '简介': d.pop('产品介绍'), '成立日期': d.pop('成立日期'), '融资信息': d.pop('融资信息'), '所属地': d.pop('所属地'), } etp_n_2 = Related(**_) etp_n_2 = self.get_neo_node(etp_n_2) relationships.append( Compete(etp_n, etp_n_2, **d).get_relationship()) pass if len(relationships) > 1000: i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage( '{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print( SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships)))) relationships.clear() pass
def create_all_relationship(self): """ 1.enterprise -[have or x]->x :return: """ ops = self.base.query( sql={'metaModel': '公司新闻'}, # limit=10, skip=2020, no_cursor_timeout=True) i, k = 0, 0 eg = EtpGraph() etp_count = ops.count() relationships = [] # etp = Enterprise() s_t = time.time() for o in ops: k += 1 # if k < 43500: # continue # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司 etp_n = self.match_node( *legal, cypher='_.NAME = "{}"'.format(o['name']) ) if etp_n is None: # 如果这个公司还没在数据库里面,那么应该创建这个公司 _ = self.base.query_one( sql={'metaModel': '基本信息', 'name': o['name']} ) if _ is not None: etp = Enterprise(_) etp_n = self.get_neo_node(etp) # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建 # 这个企业的基本关系,因此需要添加其基本关系 relationships += eg.create_relationship_from_enterprise_baseinfo(_) pass else: # 没有这个公司的信息,那就创建一个信息不全的公司 etp = Related(**{'名称': o['name'], '链接': o['url']}) # etp['NAME'] = o['name'] # etp['URL'] = o['url'] etp_n = self.get_neo_node(etp) if etp_n is None: continue pass if '新闻舆情' in o['content'].keys(): data = self.get_format_dict(o['content']['新闻舆情']) ns = News.create_from_dict(data) for n in ns: n_ = n.pop('news') n_n = self.get_neo_node(n_) if n_n is not None: relationships.append( Have(etp_n, n_n, **n).get_relationship() ) pass if len(relationships) > 1000: i += 1 sp = int(time.time() - s_t) s_t = time.time() self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise and spend {} ' 'seconds,and merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, sp, len(relationships) ))) relationships.clear() # return if len(relationships): i += 1 self.graph_merge_relationships(relationships) if not self.index_and_constraint_statue: self.create_index_and_constraint() print(SuccessMessage('{}:success merge relationships to database ' 'round {} and deal {}/{} enterprise,and' ' merge {} relationships.'.format( dt.datetime.now(), i, k, etp_count, len(relationships) ))) relationships.clear() pass
def get_all_nodes_and_relationships_from_enterprise(self, etp): etp_n = Enterprise(URL=etp['url'], NAME=etp['name']) etp_n = self.get_neo_node(etp_n) if etp_n is None: return [], [] nodes, relationships = [], [] nodes.append(etp_n) if '网站信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['网站信息']) webs = Website.create_from_dict(data) for web in webs: w = web.pop('website') w_n = self.get_neo_node(w) if w_n is not None: nodes.append(w_n) relationships.append(Have(etp_n, w_n, **web)) pass if '证书信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['证书信息']) ctfs = Certificate.create_from_dict(data) for ctf in ctfs: c = ctf.pop('certificate') c_n = self.get_neo_node(c) if c_n is not None: nodes.append(c_n) relationships.append(Have(etp_n, c_n, **ctf)) pass if '专利信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['专利信息']) pats = Patent.create_from_dict(data) for pat in pats: p = pat.pop('patent') p_n = self.get_neo_node(p) if p_n is not None: nodes.append(p_n) relationships.append(Have(etp_n, p_n, **pat)) pass if '商标信息' in etp['content'].keys(): data = self.get_format_dict(etp['content']['商标信息']) tms = Trademark.create_from_dict(data) for tm in tms: t = tm.pop('trademark') t_n = self.get_neo_node(t) if t_n is not None: nodes.append(t_n) relationships.append(Have(etp_n, t_n, **tm)) pass if '软件著作权' in etp['content'].keys(): data = self.get_format_dict(etp['content']['软件著作权']) scrs = SoftCopyRight.create_from_dict(data) for scr in scrs: s = scr.pop('softcopyright') s_n = self.get_neo_node(s) if s_n is not None: nodes.append(s_n) relationships.append(Have(etp_n, s_n, **scr)) pass if '作品著作权' in etp['content'].keys(): data = self.get_format_dict(etp['content']['作品著作权']) wcrs = WorkCopyRight.create_from_dict(data) for wcr in wcrs: w = wcr.pop('workcopyright') w_n = self.get_neo_node(w) if w_n is not None: nodes.append(w_n) relationships.append(Have(etp_n, w_n, **wcr)) pass if '微博' in etp['content'].keys(): data = self.get_format_dict(etp['content']['微博']) wbs = Weibo.create_from_dict(data) for wb in wbs: w = wb.pop('weibo') w_n = self.get_neo_node(w) if w_n is not None: nodes.append(w_n) relationships.append(Have(etp_n, w_n, **wb)) pass if '微信公众号' in etp['content'].keys(): data = self.get_format_dict(etp['content']['微信公众号']) oas = OfficialAccount.create_from_dict(data) for oa in oas: woa = oa.pop('WeChat') woa_n = self.get_neo_node(woa) if woa_n is not None: nodes.append(woa_n) relationships.append(Have(etp_n, woa_n, **oa)) pass if '小程序' in etp['content'].keys(): data = self.get_format_dict(etp['content']['小程序']) alts = Applets.create_from_dict(data) for alt in alts: a = alt.pop('applets') a_n = self.get_neo_node(a) if a_n is not None: nodes.append(a_n) relationships.append(Have(etp_n, a_n, **alt)) pass if 'APP' in etp['content'].keys(): data = self.get_format_dict(etp['content']['APP']) aps = App.create_from_dict(data) for ap in aps: a = ap.pop('app') a_n = self.get_neo_node(a) if a_n is not None: nodes.append(a_n) relationships.append(Have(etp_n, a_n, **ap)) pass return nodes, relationships