예제 #1
0
    def create_all_relationship(self):
        """
        1.ruling -[have]->ruling_text
        :return:
        """
        rts = self.base.query(
            sql={'metaModel': '裁判文书'},
            no_cursor_timeout=True)
        i, k = 0, 0
        # eg = EtpGraph()
        etp_count = rts.count()
        relationships = []
        # prs = Person()
        ruling = Ruling()
        for r in rts:
            k += 1
            rt = RulingText.create_from_original_text(
                r['content'], **{'链接': r['url']}
            )
            rl_n = self.NodeMatcher.match(ruling.label).where(
                '_.CASE_NUM="{}"'.format(   # OR _.URL="{}"
                    rt.BaseAttributes['CASE_NUM'],
                    # rt.BaseAttributes['URL']
                )
            ).first()
            if rl_n is None:
                continue
            relationships.append(
                Have(rl_n, rt.get_neo_node(primarykey=rt.primarykey)
                     ).get_relationship()
            )

            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(SuccessMessage('{}:success merge relationships to database '
                                     'round {} and deal {}/{} enterprise,and'
                                     ' merge {} relationships.'.format(
                    dt.datetime.now(), i, k, etp_count, len(relationships)
                )))
                relationships.clear()
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(SuccessMessage('{}:success merge relationships to database '
                                 'round {} and deal {}/{} enterprise,and'
                                 ' merge {} relationships.'.format(
                dt.datetime.now(), i, k, etp_count, len(relationships)
            )))
            relationships.clear()
        pass


# rtg = JusRulingTextGraph()
# rtg.create_all_relationship()
예제 #2
0
    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)

        if '新闻舆情' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['新闻舆情'])
            ns = News.create_from_dict(data)
            for n in ns:
                n_ = n.pop('news')
                n_n = self.get_neo_node(n_)
                if n_n is not None:
                    nodes.append(n_n)
                    relationships.append(
                        Have(etp_n, n_n, **n)
                    )
            pass
        return nodes, relationships
예제 #3
0
    def create_all_relationship(self):
        """
        1.enterprise -[have or x]->x
        :return:
        """
        ops = self.base.query(
            sql={'metaModel': '公司新闻'},
            # limit=10,
            skip=2020,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ops.count()
        relationships = []
        # etp = Enterprise()
        s_t = time.time()
        for o in ops:
            k += 1
            # if k < 43500:
            #     continue
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(
                *legal,
                cypher='_.NAME = "{}"'.format(o['name'])
            )
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(
                    sql={'metaModel': '基本信息', 'name': o['name']}
                )
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(_)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    etp = Related(**{'名称': o['name'], '链接': o['url']})
                    # etp['NAME'] = o['name']
                    # etp['URL'] = o['url']
                    etp_n = self.get_neo_node(etp)
                    if etp_n is None:
                        continue
                    pass

            if '新闻舆情' in o['content'].keys():
                data = self.get_format_dict(o['content']['新闻舆情'])
                ns = News.create_from_dict(data)
                for n in ns:
                    n_ = n.pop('news')
                    n_n = self.get_neo_node(n_)
                    if n_n is not None:
                        relationships.append(
                            Have(etp_n, n_n, **n).get_relationship()
                        )
                pass
            if len(relationships) > 1000:
                i += 1
                sp = int(time.time() - s_t)
                s_t = time.time()
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(SuccessMessage('{}:success merge relationships to database '
                                     'round {} and deal {}/{} enterprise and spend {} '
                                     'seconds,and merge {} relationships.'.format(
                    dt.datetime.now(), i, k, etp_count, sp, len(relationships)
                )))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(SuccessMessage('{}:success merge relationships to database '
                                 'round {} and deal {}/{} enterprise,and'
                                 ' merge {} relationships.'.format(
                dt.datetime.now(), i, k, etp_count, len(relationships)
            )))
            relationships.clear()
            pass
예제 #4
0
 def get_all_nodes_and_relationships_from_enterprise(self, etp):
     etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
     etp_n = self.get_neo_node(etp_n)
     if etp_n is None:
         return [], []
     nodes, relationships = [], []
     nodes.append(etp_n)
     if '产权交易' in etp['content'].keys():
         # data = self.get_format_dict(etp['content']['产权交易'])
         # for d in data:
         #     bd = d.pop('标的')
         #     bd_n =
         pass
     if '行政许可' in etp['content'].keys():
         data = etp['content']['行政许可']
         if '工商局' in data.keys():
             d1 = self.get_format_dict(data['工商局'])
             ls = License.create_from_dict(d1, '工商局')
             for l in ls:
                 l_ = l.pop('license')
                 l_n = self.get_neo_node(l_)
                 if l_n is None:
                     continue
                 nodes.append(l_n)
                 relationships.append(Have(etp_n, l_n, **l))
             pass
         if '信用中国' in data.keys():
             d2 = self.get_format_dict(data['信用中国'])
             ls = License.create_from_dict(d2, '信用中国')
             for l in ls:
                 l_ = l.pop('license')
                 l_n = self.get_neo_node(l_)
                 if l_n is None:
                     continue
                 nodes.append(l_n)
                 relationships.append(Have(etp_n, l_n, **l))
             pass
         pass
     if '招投标信息' in etp['content'].keys():
         # 公示的招投标信息一般都是结果,一般情况下是找不到
         # 共同投标的单位,除非是共同中标
         data = self.get_format_dict(etp['content']['招投标信息'])
         bs = Bidding.create_from_dict(data)
         for b in bs:
             _ = b.pop('bidding')
             b_n = self.get_neo_node(_)
             if b_n is None:
                 continue
             # TODO(leung):项目分类用作了招投标结果
             nodes.append(b_n)
             relationships.append(
                 TakePartIn(etp_n, b_n, **dict(b, **{'RESULT':
                                                     b_n['TYPE']})))
         pass
     if '抽查检查' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['抽查检查'])
         cs = Check.create_from_dict(data)
         for c in cs:
             _ = c.pop('check')
             n = self.get_neo_node(_)
             if n is None:
                 continue
             nodes.append(n)
             relationships.append(
                 Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']})))
         pass
     if '双随机抽查' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['双随机抽查'])
         rcs = RandomCheck.create_from_dict(data)
         # rcs_n = self.get_neo_node(rcs)
         for rc in rcs:
             # TODO(leung):随机抽查没有结果
             _ = rc.pop('check')
             n = self.get_neo_node(_)
             if n is None:
                 continue
             nodes.append(n)
             relationships.append(Have(etp_n, n, **rc))
         pass
     if '税务信用' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['税务信用'])
         ts = TaxCredit.create_from_dict(data)
         # ts_n = self.get_neo_node(ts)
         for t in ts:
             _ = t.pop('TaxCredit')
             n = self.get_neo_node(_)
             if n is None:
                 continue
             # TODO(leung):纳税信用等级作为税务信用评级结果
             nodes.append(n)
             relationships.append(
                 Have(etp_n, n, **dict(RESULT=n['GRADE'], **t)))
         pass
     if '进出口信用' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['进出口信用'])
         ies = IAE.create_from_dict(data)
         # ies_n = self.get_neo_node(ies)
         for ie in ies:
             _ = ie.pop('iae')
             n = self.get_neo_node(_)
             if n is None:
                 continue
             nodes.append(n)
             relationships.append(Have(etp_n, n, **ie))
         pass
     if '招聘' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['招聘'])
         rs = Position.create_from_dict(data)
         for r in rs:
             _ = r.pop('position')
             n = self.get_neo_node(_)
             if n is None:
                 continue
             nodes.append(n)
             relationships.append(Recruit(etp_n, n, **r))
         pass
     if '客户' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['客户'])
         cs = Client.create_from_dict(data)
         for c in cs:
             cli = c.pop('client')
             cli_n = self.match_node(
                 *legal,
                 cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                     cli['URL'], cli['NAME']))
             if cli_n is None:
                 if cli.isEnterprise():
                     cli = Enterprise(**cli.to_dict(with_label=False))
                 cli_n = self.get_neo_node(cli)
                 if cli_n is None:
                     continue
             nodes.append(cli_n)
             relationships.append(SellTo(etp_n, cli_n, **c))
         pass
     if '供应商' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['供应商'])
         ss = Supplier.create_from_dict(data)
         for s in ss:
             sup = s.pop('supplier')
             sup_n = self.match_node(
                 *legal,
                 cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                     sup['URL'], sup['NAME']))
             if sup_n is None:
                 if sup.isEnterprise():
                     sup = Enterprise(**sup.to_dict(with_label=False))
                 sup_n = self.get_neo_node(sup)
                 if sup_n is None:
                     continue
             nodes.append(sup_n)
             relationships.append(BuyFrom(etp_n, sup_n, **s))
         pass
     if '信用评级' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['信用评级'])
         for d in data:
             _ = d.pop('评级公司')
             _['链接'] = Enterprise.parser_url(_['链接'])
             n = self.match_node(
                 *legal,
                 cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                     _['链接'], _['名称']))
             if n is None:
                 n = Enterprise(**_)
                 n = self.get_neo_node(n)
                 if n is None:
                     continue
             __ = d.pop('内容')
             d['评级内容'] = __['内容']
             d['评级链接'] = __['链接']
             nodes.append(n)
             relationships.append(Appraise(n, etp_n, **d))
         pass
     if '土地转让' in etp['content'].keys():
         data = self.get_format_dict(etp['content']['土地转让'])
         for d in data:
             e1 = d.pop('原土地使用权人')
             e2 = d.pop('现有土地使用权人')
             p = Plot(**d)
             p_n = self.get_neo_node(p)
             if p_n is None:
                 continue
             e1['链接'] = Enterprise.parser_url(e1['链接'])
             if e1['名称'] == etp['name'] or e1['链接'] == etp['url']:
                 n1 = etp_n
             else:
                 # 有可能是人
                 n1 = self.match_node(*legal,
                                      cypher='_.URL = "{}"'.format(
                                          e1['链接']))
                 if n1 is None:
                     n1 = Enterprise(**e1)
                     if not n1.isEnterprise():
                         n1 = Person(**e1)
                         if not n1.isPerson():
                             n1 = Related(**e1)
                     n1 = self.get_neo_node(n1)
             if n1 is not None:
                 nodes.append(n1)
                 nodes.append(p_n)
                 relationships.append(Sell(n1, p_n))
             e2['链接'] = Enterprise.parser_url(e2['链接'])
             if e2['名称'] == etp['name'] or e2['链接'] == etp['url']:
                 n2 = etp_n
             else:
                 n2 = self.match_node(*legal,
                                      cypher='_.URL = "{}"'.format(
                                          e2['链接']))
                 if n2 is None:
                     n2 = Enterprise(**e2)
                     if not n2.isEnterprise():
                         n2 = Person(**e2)
                         if not n2.isPerson():
                             n2 = Related(**e2)
                     n2 = self.get_neo_node(n2)
             if n2 is not None:
                 nodes.append(n2)
                 nodes.append(p_n)
                 relationships.append(Buy(n2, p_n))
         pass
     return nodes, relationships
예제 #5
0
    def create_all_relationship(self):
        """
        1.enterprise -[have or x]->x
        :return:
        """
        ops = self.base.query(
            sql={
                'metaModel': '经营状况',
                # 'name': '重庆轩烽建材有限公司'
            },
            limit=1000,
            # skip=2000,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ops.count()
        relationships = []
        # etp = Enterprise()
        for o in ops:
            k += 1
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(*legal,
                                    cypher='_.NAME = "{}"'.format(o['name']))
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(sql={
                    'metaModel': '基本信息',
                    'name': o['name']
                })
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(
                        _)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    # 如果在neo4j里面存着只有name,url的公司,意味着
                    # 这家公司没有“基本信息”
                    etp = Related()
                    etp['NAME'] = o['name']
                    etp['URL'] = o['url']
                    etp_n = self.get_neo_node(etp)
                    pass

            if '产权交易' in etp['content'].keys():
                # data = self.get_format_dict(etp['content']['产权交易'])
                # for d in data:
                #     bd = d.pop('标的')
                #     bd_n =
                pass

            if '行政许可' in etp['content'].keys():
                data = etp['content']['行政许可']
                if '工商局' in data.keys():
                    d1 = self.get_format_dict(data['工商局'])
                    ls = License.create_from_dict(d1, '工商局')
                    for l in ls:
                        l_ = l.pop('license')
                        l_n = self.get_neo_node(l_)
                        if l_n is None:
                            continue
                        relationships.append(
                            Have(etp_n, l_n, **l).get_relationship())
                    pass
                if '信用中国' in data.keys():
                    d2 = self.get_format_dict(data['信用中国'])
                    ls = License.create_from_dict(d2, '信用中国')
                    for l in ls:
                        l_ = l.pop('license')
                        l_n = self.get_neo_node(l_)
                        if l_n is None:
                            continue
                        relationships.append(
                            Have(etp_n, l_n, **l).get_relationship())
                    pass
                pass
            if '招投标信息' in etp['content'].keys():
                # 公示的招投标信息一般都是结果,一般情况下是找不到
                # 共同投标的单位,除非是共同中标
                data = self.get_format_dict(etp['content']['招投标信息'])
                bs = Bidding.create_from_dict(data)
                for b in bs:
                    _ = b.pop('bidding')
                    b_n = self.get_neo_node(_)
                    if b_n is None:
                        continue
                    # TODO(leung):项目分类用作了招投标结果
                    relationships.append(
                        TakePartIn(etp_n, b_n,
                                   **dict(b,
                                          **{'RESULT':
                                             b_n['TYPE']})).get_relationship())
                pass
            if '抽查检查' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['抽查检查'])
                cs = Check.create_from_dict(data)
                for c in cs:
                    _ = c.pop('check')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Have(etp_n, n,
                             **dict(c, **{'RESULT':
                                          n['RESULT']})).get_relationship())
                pass
            if '双随机抽查' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['双随机抽查'])
                rcs = RandomCheck.create_from_dict(data)
                # rcs_n = self.get_neo_node(rcs)
                for rc in rcs:
                    # TODO(leung):随机抽查没有结果
                    _ = rc.pop('check')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Have(etp_n, n, **rc).get_relationship())
                pass
            if '税务信用' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['税务信用'])
                ts = TaxCredit.create_from_dict(data)
                # ts_n = self.get_neo_node(ts)
                for t in ts:
                    _ = t.pop('TaxCredit')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    # TODO(leung):纳税信用等级作为税务信用评级结果
                    relationships.append(
                        Have(etp_n, n, **dict(RESULT=n['GRADE'],
                                              **t)).get_relationship())
                pass
            if '进出口信用' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['进出口信用'])
                ies = IAE.create_from_dict(data)
                # ies_n = self.get_neo_node(ies)
                for ie in ies:
                    _ = ie.pop('iae')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Have(etp_n, n, **ie).get_relationship())
                pass
            if '招聘' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['招聘'])
                rs = Position.create_from_dict(data)
                for r in rs:
                    _ = r.pop('position')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Recruit(etp_n, n, **r).get_relationship())
                pass
            if '客户' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['客户'])
                cs = Client.create_from_dict(data)
                for c in cs:
                    _ = c.pop('client')
                    n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            _['URL'], _['NAME']))
                    if n is None:
                        n = self.get_neo_node(_)
                        if n is None:
                            continue
                    relationships.append(
                        SellTo(etp_n, n, **c).get_relationship())
                pass
            if '供应商' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['供应商'])
                ss = Supplier.create_from_dict(data)
                for s in ss:
                    _ = s.pop('supplier')
                    n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            _['URL'], _['NAME']))
                    if n is None:
                        n = self.get_neo_node(_)
                        if n is None:
                            continue
                    relationships.append(
                        BuyFrom(etp_n, n, **s).get_relationship())
                pass
            if '信用评级' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['信用评级'])
                for d in data:
                    _ = d.pop('评级公司')
                    n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            _['链接'], _['名称']))
                    if n is None:
                        n = Related()
                        n['NAME'] = _['名称']
                        n['URL'] = _['链接']
                        n = self.get_neo_node(n)
                        if n is None:
                            continue
                    __ = d.pop('内容')
                    d['评级内容'] = __['内容']
                    d['评级链接'] = __['链接']
                    relationships.append(
                        Appraise(n, etp_n, **d).get_relationship())
                pass
            if '土地转让' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['土地转让'])
                for d in data:
                    e1 = d.pop('原土地使用权人')
                    e2 = d.pop('现有土地使用权人')
                    p = Plot(**d)
                    p_n = self.get_neo_node(p)
                    if p_n is None:
                        continue
                    if e1['名称'] == o['name'] or e1['链接'] == o['url']:
                        n1 = etp_n
                    else:
                        # 有可能是人
                        n1 = self.match_node(*legal,
                                             cypher='_.URL = "{}"'.format(
                                                 e1['链接']))
                        if n1 is None:
                            n1 = Related(**e1)
                            n1 = self.get_neo_node(n1)
                    if n1 is not None:
                        relationships.append(Sell(n1, p_n).get_relationship())
                    if e2['名称'] == o['name'] or e2['链接'] == o['url']:
                        n2 = etp_n
                    else:
                        n2 = self.match_node(*legal,
                                             cypher='_.URL = "{}"'.format(
                                                 e2['链接']))
                        if n2 is None:
                            n2 = Related(**e2)
                            n2 = self.get_neo_node(n2)
                    if n2 is not None:
                        relationships.append(Buy(n2, p_n).get_relationship())
                pass

            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(
                    SuccessMessage(
                        '{}:success merge relationships to database '
                        'round {} and deal {}/{} enterprise,and'
                        ' merge {} relationships.'.format(
                            dt.datetime.now(), i, k, etp_count,
                            len(relationships))))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(
                SuccessMessage('{}:success merge relationships to database '
                               'round {} and deal {}/{} enterprise,and'
                               ' merge {} relationships.'.format(
                                   dt.datetime.now(), i, k, etp_count,
                                   len(relationships))))
            relationships.clear()
            pass
예제 #6
0
    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        """
        创建从公司基本信息可以看出的关系:
        1.person-[lr]->enterprise
        2.person-[be_in_office]->enterprise
        3.enterprise-[located]->address
        4.person|enterprise-[holding]->enterprise
        5.enterprise-[have]->telephone
        6.enterprise-[have]->email
        :param :
        :return:
        """
        # 如果关系上的节点不存在,数据库同样会补充创建节点,这一点很重要
        nodes, rps = [], []
        etp_n = self.get_neo_node(etp)
        if etp_n is None:
            self.logger.debug('{} filed initialize enterprise '
                              'Neo node'.format(etp['NAME']))
            return nodes, rps
        nodes.append(etp_n)
        try:
            lr = etp.get_legal_representative()
            # 法定代表人有可能会是以下这些对象
            lr_n = self.match_node(
                *['Person'] + legal,
                cypher='_.URL = "{}"'.format(lr['URL'])
            )
            if lr_n is None:
                lr_n = self.get_neo_node(lr)
            if lr_n is None:
                self.logger.debug('{} filed initialize legal representative '
                                  'Neo node'.format(etp['NAME']))
            else:
                nodes.append(lr_n)
                rps.append(LegalRep(lr_n, etp_n))
        except Exception as e:
            ExceptionInfo(e)
            self.logger.error('{} deal legal representative raise '
                              '({})'.format(etp['NAME'], e),
                              exc_info=True)
        try:
            ms = etp.get_manager()
            if len(ms):
                for m in ms:
                    # 主要人员 下面必然是人
                    m_n = m.pop('person')
                    m_n = self.get_neo_node(m_n)
                    if m_n is None:
                        self.logger.debug('{} filed initialize major manager '
                                          'Neo node'.format(etp['NAME']))
                    else:
                        nodes.append(m_n)
                        rps.append(BeInOffice(m_n, etp_n, **m))
        except Exception as e:
            self.logger.error('{} deal major managers raise '
                              '({})'.format(etp['NAME'], e),
                              exc_info=True)
        try:
            dz = etp.get_address()
            dz_n = self.get_neo_node(dz)
            if dz_n is None:
                self.logger.debug('{} filed initialize address '
                                  'Neo node'.format(etp['NAME']))
            else:
                nodes.append(dz_n)
                rps.append(Located(etp_n, dz_n))
        except Exception as e:
            self.logger.error('{} deal address raise '
                              '({})'.format(etp['NAME'], e),
                              exc_info=True)

        try:
            sh = etp.get_share_holder()
            if len(sh):
                for s in sh:
                    s_ = s.pop('share_holder')
                    # 股东有可能会是以下这些对象
                    sh_n = self.match_node(
                        'Person',
                        cypher='_.URL = "{}"'.format(s_['URL'])
                    )
                    if sh_n is None:
                        sh_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                s_['URL'], s_['NAME'])
                        )
                    if sh_n is None:  # 在以有的对象里面没找到这个股东
                        # 创建这个意外的股东
                        sh_n = self.get_neo_node(s_)
                        if sh_n is None:
                            self.logger.debug('{} filed initialize unexpected share '
                                              'holder Neo node'.format(etp['NAME']))
                    if sh_n is not None:
                        nodes.append(sh_n)
                        rps.append(Share(etp_n, sh_n, **s))
        except Exception as e:
            self.logger.error('{} deal share holder raise '
                              '({})'.format(etp['NAME'], e),
                              exc_info=True)

        try:
            tel = etp.get_telephone_number()
            if tel is None:
                # self.to_logs('there is not valid telephone for'
                #              ' this enterprise.', 'ERROR', eb['name'])
                pass
            else:
                tel_n = self.get_neo_node(tel)
                if tel_n is None:
                    self.logger.debug('{} filed initialize telephone '
                                      'Neo node'.format(etp['NAME']))
                else:
                    nodes.append(tel_n)
                    rps.append(Have(etp_n, tel_n))
            pass
        except Exception as e:
            self.logger.error('{} deal telephone number raise '
                              '({})'.format(etp['NAME'], e),
                              exc_info=True)

        try:
            eml = etp.get_email()
            if eml is None:
                # self.to_logs('there is not valid email for'
                #              ' this enterprise.', 'ERROR', eb['name'])
                pass
            else:
                eml_n = self.get_neo_node(eml)
                if eml_n is None:
                    self.logger.debug('{} filed initialize email '
                                      'Neo node'.format(etp['NAME']))
                else:
                    nodes.append(eml_n)
                    rps.append(Have(etp_n, eml_n))
            pass
        except Exception as e:
            self.logger.debug('{} deal email raise ({})'
                              ''.format(etp['NAME'], e),
                              exc_info=True)
        try:
            ivs = etp.get_invest_outer()
            if len(ivs):
                for iv in ivs:
                    iv_ = iv.pop('invested')
                    # 被投资企业可能是下面这些对象
                    iv_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            iv_['URL'], iv_['NAME'])
                    )
                    if iv_n is None:
                        iv_n = self.get_neo_node(iv_)
                        if iv_n is None:
                            self.logger.debug('{} filed initialize unexpected invested '
                                              'Neo node'.format(etp['NAME']))
                            continue
                    nodes.append(iv_n)
                    rps.append(Investing(etp_n, iv_n, **iv))
        except Exception as e:
            self.logger.error('{} deal invest raise ({})'
                              ''.format(etp['NAME'], e),
                              exc_info=True)
        try:
            brs = etp.get_branch()
            if len(brs):
                for b in brs:
                    b_ = b.pop('branch')
                    # 分支机构可能是下面这些对象
                    b_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            b_['URL'], b_['NAME'])
                    )
                    if b_n is None:
                        b_n = self.get_neo_node(b_)
                        if b_n is None:
                            self.logger.debug('{} filed initialize unexpected branch '
                                              'Neo node'.format(etp['NAME']))
                            continue
                        p_ = b['principal']
                        p_n = self.get_neo_node(p_)
                        if p_n is not None:
                            nodes.append(p_n)
                            rps.append(Principal(p_n, b_n))
                    b.pop('principal')
                    nodes.append(b_n)
                    rps.append(BranchAgency(
                        etp_n, b_n, **b
                    ))
        except Exception as e:
            self.logger.error('{} deal branch raise ({})'
                              ''.format(etp['NAME'], e),
                              exc_info=True)
        try:
            hcs = etp.get_head_company()
            if len(hcs):
                for h in hcs:
                    h_ = h.pop('head')
                    # 总公司可能是下面这些对象
                    h_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            h_['URL'], h_['NAME'])
                    )
                    if h_n is None:
                        h_n = self.get_neo_node(h_)
                        if h_n is None:
                            self.logger.debug('filed initialize unexpected head '
                                              'company Neo node'.format(etp['NAME']))
                            continue
                        p_ = h['principal']
                        p_n = self.get_neo_node(p_)
                        if p_n is not None:
                            nodes.append(p_n)
                            rps.append(Principal(p_n, h_n))
                    h.pop('principal')
                    nodes.append(h_n)
                    rps.append(SuperiorAgency(
                        etp_n, h_n, **h
                    ))
        except Exception as e:
            self.logger.error('{} deal head company raise ({})'
                              ''.format(etp['NAME'], e),
                              exc_info=True)
        try:
            cps = etp.get_construction_project()
            if len(cps):
                for c in cps:
                    c_ = c.pop('project')
                    c_n = self.get_neo_node(c_)
                    if c_n is None:
                        self.logger.debug('filed initialize unexpected construction '
                                          'project Neo node'.format(etp['NAME']))
                        continue
                    jsdw = c.pop('jsdw')
                    # 查询这个建设单位是否已经存在
                    j_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            jsdw['URL'], jsdw['NAME'])
                    )
                    if j_n is None:
                        j_n = self.get_neo_node(jsdw)
                        if j_n is None:
                            self.logger.debug('filed initialize unexpected construction '
                                              'agency Neo node'.format(etp['NAME']))
                            continue
                    # TODO(lj):需要考虑是否将承建、建设单独列为一种关系
                    nodes.append(c_n)
                    rps.append(Have(
                        etp_n, c_n, **dict(角色='承建单位', **c)
                    ))
                    nodes.append(j_n)
                    rps.append(Have(
                        j_n, c_n, **dict(角色='建设单位', **c)
                    ))
        except Exception as e:
            self.logger.error('{} deal construction project raise ({})'
                              ''.format(etp['NAME'], e),
                              exc_info=True)
        try:
            ccs = etp.get_construction_certificate()
            if len(ccs):
                for c in ccs:
                    c_ = c.pop('ctf')
                    c_n = self.get_neo_node(c_)
                    if c_n is None:
                        self.logger.debug('filed initialize unexpected construction '
                                          'certificate Neo node'.format(etp['NAME']))
                        continue
                    nodes.append(c_n)
                    rps.append(Have(etp_n, c_n, **c))
        except Exception as e:
            ExceptionInfo(e)
            self.logger.error('deal construction certificate raise ({})'
                              ''.format(etp['NAME'], e),
                              exc_info=True)
        return nodes, rps
예제 #7
0
    def create_all_relationship(self):
        """
        1.enterprise -[have]->punishment
        :return:
        """
        ors = self.base.query(
            sql={
                'metaModel': '经营风险',
                # 'name': '重庆铭悦机械设备有限公司'
            },
            limit=1000,
            # skip=2000,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ors.count()
        relationships = []
        # prs = Person()
        etp = Enterprise()
        for j in ors:
            # 每个公司经营风险下列式的东西,肯定就是这家公司的
            k += 1
            # if k < 43500:
            #     continue
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(
                *legal,
                cypher='_.NAME = "{}"'.format(j['name'])
            )
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(
                    sql={'metaModel': '基本信息', 'name': j['name']}
                )
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(_)
                    pass
                else:
                    # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者
                    # 这里就相当于把一个公司当做了一个风险提示的涉及者
                    # etp = Related(**{'名称': j['name'], '链接': j['url']})
                    etp = Related()
                    etp['NAME'] = j['name']
                    etp['URL'] = j['url']
                    etp_n = self.get_neo_node(etp)
                    pass

            if '动产抵押' in j['content'].keys():
                data = self.get_format_dict(j['content']['动产抵押'])
                for d in data:
                    _ = d.pop('被担保主债权数额')
                    debt = Debt(**{'债务(金额)': _['金额'],
                                   '债务(单位)': _['单位'],
                                   '履行期限': d.pop('债务人履行债务的期限')
                                   })
                    debt_n = self.get_neo_node(debt)
                    dy = d.pop('抵押权人')
                    zw = d.pop('债务人')
                    sy = d.pop('所有权或使用权归属')
                    if dy['名称'] == j['name'] or dy['链接'] == j['url']:
                        dy_n = etp_n
                    else:
                        dy_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                dy['链接'], dy['名称'])
                        )
                        if dy_n is None and len(dy['名称']) > 1:
                            dy_n = Related(**dy)
                            dy_n = self.get_neo_node(dy_n)
                    if dy_n is not None:
                        relationships.append(Have(
                            dy_n, debt_n, **dict(角色='抵押权人', **d)
                        ).get_relationship())

                    if zw['名称'] == j['name'] or zw['链接'] == j['url']:
                        zw_n = etp_n
                    else:
                        zw_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                zw['链接'], zw['名称'])
                        )
                        if zw_n is None and len(zw['名称']) > 1:
                            zw_n = Related(**zw)
                            zw_n = self.get_neo_node(zw_n)
                    if zw_n is not None:
                        relationships.append(Have(
                            zw_n, debt_n, **dict(角色='债务人', **d)
                        ).get_relationship())

                    if sy['名称'] == j['name'] or sy['链接'] == j['url']:
                        sy_n = etp_n
                    else:
                        sy_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                sy['链接'], sy['名称'])
                        )
                        if sy_n is None and len(sy['名称']) > 1:
                            sy_n = Related(**sy)
                            sy_n = self.get_neo_node(sy_n)
                    if sy_n is not None:
                        relationships.append(Have(
                            sy_n, debt_n, **dict(角色='所有权或使用权人', **d)
                        ).get_relationship())
                    pass

            if '公示催告' in j['content'].keys():
                data = self.get_format_dict(j['content']['公示催告'])
                for d in data:
                    _ = d.pop('票面金额')
                    bn = Banknote(**{'票据号': d.pop('票据号'),
                                     '票据类型': d.pop('票据类型'),
                                     '票面金额(金额)': _['金额'],
                                     '票面金额(单位)': _['单位']
                                     })
                    bn_n = self.get_neo_node(bn)
                    sq = d.pop('申请人')
                    cp = d.pop('持票人')
                    if sq['名称'] == j['name'] or sq['链接'] == j['url']:
                        sq_n = etp_n
                    else:
                        sq_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                sq['链接'], sq['名称'])
                        )
                        if sq_n is None:
                            sq_n = Related(**sq)
                            sq_n = self.get_neo_node(sq_n)
                    if sq_n is not None:
                        relationships.append(Have(
                            sq_n, bn_n, **dict(角色='申请人', **d)
                        ).get_relationship())

                    if cp['名称'] == j['name'] or cp['链接'] == j['url']:
                        cp_n = etp_n
                    else:
                        cp_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                cp['链接'], cp['名称'])
                        )
                        if cp_n is None:
                            cp_n = Related(**cp)
                            cp_n = self.get_neo_node(cp_n)
                    if cp_n is not None:
                        relationships.append(Have(
                            cp_n, bn_n, **dict(角色='持票人', **d)
                        ).get_relationship())
                    relationships.append(Have(
                        etp_n, bn_n, **dict(角色='出票人', **d)
                    ).get_relationship())
                    pass

            if '行政处罚' in j['content'].keys():
                data = j['content']['行政处罚']
                d1 = self.get_format_dict(data['工商局'])
                ps = Punishment.create_from_dict(d1, '工商局')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

                d2 = self.get_format_dict(data['税务局'])
                ps = Punishment.create_from_dict(d2, '税务局')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

                d3 = self.get_format_dict(data['信用中国'])
                ps = Punishment.create_from_dict(d3, '信用中国')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

                d4 = self.get_format_dict(data['其他'])
                ps = Punishment.create_from_dict(d4, '其他')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )
                pass

            if '环保处罚' in j['content'].keys():
                data = self.get_format_dict(j['content']['环保处罚'])
                ps = Punishment.create_from_dict(data, '环保局')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

            if '股权出质' in j['content'].keys():
                sh_info = j['content']['股权出质']
                sh_info = self.get_format_dict(sh_info)
                for sh in sh_info:
                    sh = dict(sh, **self.get_format_amount(
                        '出质数额', sh.pop('出质数额')
                    ))
                    # 确定出质人
                    cz = sh.pop('出质人')
                    cz['链接'] = etp.parser_url(cz['链接'])
                    # 判断出质人是不是当前公司
                    if j['name'] == cz['名称'] or cz['链接'] == etp_n['URL']:
                        cz_n = etp_n
                    else:
                        # 确定出质人,先在法人主体中找
                        cz_n = self.match_node(
                            *legal,
                            cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                                cz['名称'], cz['链接']
                            )
                        )
                        if cz_n is None:
                            # 在法人中没找到,就通过url在自然人中找
                            # 这里最好不要通过名称找了,除公司以外出现
                            # 同名的几率很大
                            # TODO(leung):在所有实体中去找开销很大,需要注意
                            cz_n = self.match_node(
                                'Person',
                                cypher='_.URL = "{}"'.format(cz['链接'])
                            )
                            if cz_n is None:
                                # 创建这个股权出质人
                                if len(cz['名称']) > 1:
                                    cz_n = Involveder(**cz)
                                    cz_n = self.get_neo_node(cz_n)
                        pass
                    # 确定质权人
                    zq = sh.pop('质权人')
                    zq['链接'] = etp.parser_url(zq['链接'])
                    # 判断质权人是不是当前公司
                    if j['name'] == zq['名称'] or zq['链接'] == etp_n['URL']:
                        zq_n = etp_n
                    else:
                        # 确定质权人,先在企业中找
                        zq_n = self.match_node(
                            *legal,
                            cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                                zq['名称'], zq['链接']
                            )
                        )
                        if zq_n is None:
                            # 在企业中没找到,就通过url在所有对象中找
                            # 这里最好不要通过名称找了,除公司以外出现
                            # 同名的几率很大
                            # TODO(leung):在所有实体中去找开销很大,需要注意
                            zq_n = self.match_node(
                                'Person',
                                cypher='_.URL = "{}"'.format(zq['链接'])
                            )
                            if zq_n is None:
                                # 创建这个股权出质人
                                if len(zq['名称']) > 1:
                                    zq_n = Involveder(**zq)
                                    zq_n = self.get_neo_node(zq_n)
                        pass
                    # 确定出质标的企业
                    bd = sh.pop('标的企业')
                    bd['链接'] = etp.parser_url(bd['链接'])
                    # 判断出质标的是不是当前公司
                    if j['name'] == bd['名称'] or bd['链接'] == etp_n['URL']:
                        bd_n = etp_n
                    else:
                        # 确定出质标的,先在企业中找
                        bd_n = self.match_node(
                            *legal,
                            cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                                bd['名称'], bd['链接']
                            )
                        )
                        if bd_n is None:
                            # 在企业中没找到,就通过url在所有对象中找
                            # 这里最好不要通过名称找了,除公司以外出现
                            # 同名的几率很大
                            # TODO(leung):在所有实体中去找开销很大,需要注意
                            bd_n = self.match_node(
                                'Person',
                                cypher='_.URL = "{}"'.format(bd['链接'])
                            )
                            if bd_n is None:
                                # 创建这个出质标的
                                if len(bd['名称']) > 1:
                                    bd_n = Possession(**bd)
                                    bd_n = self.get_neo_node(bd_n)
                        pass
                    # 创建关系
                    # 1. 抵押
                    if cz_n is not None and bd_n is not None:
                        relationships.append(
                            Guaranty(cz_n, bd_n, **sh).get_relationship()
                        )
                    # 2. 质权
                    if zq_n is not None and bd_n is not None:
                        relationships.append(
                            Have(zq_n, bd_n, **sh).get_relationship()
                        )

            if '破产重组' in j['content'].keys():
                data = self.get_format_dict(j['content']['破产重组'])
                for d in data:
                    sq = d.pop('申请人')
                    if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']:
                        sq_n = etp_n
                    else:
                        sq_n = self.match_node(
                            *['person'] + legal,
                            cypher='_.URL = "{}"'.format(sq['链接'])
                        )
                        if sq_n is None:
                            sq_n = Involveder(**sq)
                            sq_n = self.get_neo_node(sq_n)
                    bsq = d.pop('被申请人')
                    if bsq['名称'] == j['name'] or bsq['链接'] == etp_n['URL']:
                        bsq_n = etp_n
                    else:
                        # 被申请破产的一般是法人
                        bsq_n = self.match_node(
                            *['person'] + legal,
                            cypher='_.URL = "{}"'.format(bsq['链接'])
                        )
                        if bsq_n is None:
                            bsq_n = Involveder(**bsq)
                            bsq_n = self.get_neo_node(bsq_n)
                    if sq_n is not None and bsq_n is not None:
                        relationships.append(
                            Relationship(sq_n, '申请破产', bsq_n, **d)
                        )
                pass

            if '土地抵押' in j['content'].keys():
                data = self.get_format_dict(j['content']['土地抵押'])
                for d in data:
                    _ = d.pop('抵押面积')
                    p = Plot(**{'位置': d.pop('位置'),
                                '面积(数量)': _['数额'],
                                '面积(单位)': _['单位'],
                                })
                    p_n = self.get_neo_node(p)
                    d = dict(d, **self.get_format_amount(
                        '抵押金额', d.pop('抵押金额')
                    ))
                    dy = d.pop('抵押人')
                    dyq = d.pop('抵押权人')

                    if dy['名称'] == j['name'] or dy['链接'] == etp_n['URL']:
                        dy_n = etp_n
                    else:
                        dy_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                dy['链接'], dy['名称'])
                        )
                        if dy_n is None:
                            dy_n = Related(**dy)
                            dy_n = self.get_neo_node(dy_n)
                    if dy_n is not None:
                        relationships.append(
                            Guaranty(dy_n, p_n, **d).get_relationship()
                        )
                    if dyq['名称'] == j['name'] or dyq['链接'] == etp_n['URL']:
                        dyq_n = etp_n
                    else:
                        dyq_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                dyq['链接'], dyq['名称'])
                        )
                        if dyq_n is None:
                            dyq_n = Related(**dyq)
                            dyq_n = self.get_neo_node(dyq_n)
                    if dyq_n is not None:
                        relationships.append(
                            Have(dyq_n, p_n, **d).get_relationship()
                        )
                pass

            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(SuccessMessage('{}:success merge relationships to database '
                                     'round {} and deal {}/{} enterprise,and'
                                     ' merge {} relationships.'.format(
                    dt.datetime.now(), i, k, etp_count, len(relationships)
                )))
                relationships.clear()
                # return
                pass
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(SuccessMessage('{}:success merge relationships to database '
                                 'round {} and deal {}/{} enterprise,and'
                                 ' merge {} relationships.'.format(
                dt.datetime.now(), i, k, etp_count, len(relationships)
            )))
            relationships.clear()
            pass
예제 #8
0
    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)
        if '动产抵押' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['动产抵押'])
            for d in data:
                _ = d.pop('被担保主债权数额')
                debt = Debt(**{'债务(金额)': _['金额'],
                               '债务(单位)': _['单位'],
                               '履行期限': d.pop('债务人履行债务的期限')
                               })
                debt_n = self.get_neo_node(debt)
                nodes.append(debt_n)
                dy = d.pop('抵押权人')
                zw = d.pop('债务人')
                sy = d.pop('所有权或使用权归属')
                dy['链接'] = Enterprise.parser_url(dy['链接'])
                zw['链接'] = Enterprise.parser_url(zw['链接'])
                sy['链接'] = Enterprise.parser_url(sy['链接'])
                if dy['名称'] == etp['name'] or dy['链接'] == etp['url']:
                    dy_n = etp_n
                else:
                    dy_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            dy['链接'], dy['名称'])
                    )
                    if dy_n is None:
                        # dy_n = Related(**dy)
                        dy_n = Enterprise(**dy)
                        if not dy_n.isEnterprise():
                            dy_n = Person(**dy)
                            if not dy_n.isPerson():
                                dy_n = Related(**dy)
                        dy_n = self.get_neo_node(dy_n)
                if dy_n is not None:
                    nodes.append(dy_n)
                    relationships.append(Have(
                        dy_n, debt_n, **dict(角色='抵押权人', **d)
                    ))

                if zw['名称'] == etp['name'] or zw['链接'] == etp['url']:
                    zw_n = etp_n
                else:
                    zw_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            zw['链接'], zw['名称'])
                    )
                    if zw_n is None and len(zw['名称']) > 1:
                        # zw_n = Related(**zw)
                        zw_n = Enterprise(**zw)
                        if not zw_n.isEnterprise():
                            zw_n = Person(**zw)
                            if not zw_n.isPerson():
                                zw_n = Related(**zw)
                        zw_n = self.get_neo_node(zw_n)
                if zw_n is not None:
                    nodes.append(zw_n)
                    relationships.append(Have(
                        zw_n, debt_n, **dict(角色='债务人', **d)
                    ))

                if sy['名称'] == etp['name'] or sy['链接'] == etp['url']:
                    sy_n = etp_n
                else:
                    sy_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            sy['链接'], sy['名称'])
                    )
                    if sy_n is None and len(sy['名称']) > 1:
                        # sy_n = Related(**sy)
                        sy_n = Enterprise(**sy)
                        if not sy_n.isEnterprise():
                            sy_n = Person(**sy)
                            if not sy_n.isPerson():
                                sy_n = Related(**sy)
                        sy_n = self.get_neo_node(sy_n)
                if sy_n is not None:
                    nodes.append(sy_n)
                    relationships.append(Have(
                        sy_n, debt_n, **dict(角色='所有权或使用权人', **d)
                    ))
                pass

        if '公示催告' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['公示催告'])
            for d in data:
                _ = d.pop('票面金额')
                bn = Banknote(**{'票据号': d.pop('票据号'),
                                 '票据类型': d.pop('票据类型'),
                                 '票面金额(金额)': _['金额'],
                                 '票面金额(单位)': _['单位']
                                 })
                bn_n = self.get_neo_node(bn)
                nodes.append(bn_n)
                sq = d.pop('申请人')
                cp = d.pop('持票人')
                sq['链接'] = Enterprise.parser_url(sq['链接'])
                cp['链接'] = Enterprise.parser_url(cp['链接'])
                if sq['名称'] == etp['name'] or sq['链接'] == etp['url']:
                    sq_n = etp_n
                else:
                    sq_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            sq['链接'], sq['名称'])
                    )
                    if sq_n is None:
                        # sq_n = Related(**sq)
                        sq_n = Enterprise(**sq)
                        if not sq_n.isEnterprise():
                            sq_n = Person(**sq)
                            if not sq_n.isPerson():
                                sq_n = Related(**sq)
                        sq_n = self.get_neo_node(sq_n)
                if sq_n is not None:
                    nodes.append(sq_n)
                    relationships.append(Have(
                        sq_n, bn_n, **dict(角色='申请人', **d)
                    ))

                if cp['名称'] == etp['name'] or cp['链接'] == etp['url']:
                    cp_n = etp_n
                else:
                    cp_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            cp['链接'], cp['名称'])
                    )
                    if cp_n is None:
                        # cp_n = Related(**cp)
                        cp_n = Enterprise(**cp)
                        if not cp_n.isEnterprise():
                            cp_n = Person(**cp)
                            if not cp_n.isPerson():
                                cp_n = Related(**cp)
                        cp_n = self.get_neo_node(cp_n)
                if cp_n is not None:
                    nodes.append(cp_n)
                    relationships.append(Have(
                        cp_n, bn_n, **dict(角色='持票人', **d)
                    ))
                relationships.append(Have(
                    etp_n, bn_n, **dict(角色='出票人', **d)
                ))
                pass

        if '行政处罚' in etp['content'].keys():
            data = etp['content']['行政处罚']
            d1 = self.get_format_dict(data['工商局'])
            ps = Punishment.create_from_dict(d1, '工商局')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

            d2 = self.get_format_dict(data['税务局'])
            ps = Punishment.create_from_dict(d2, '税务局')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

            d3 = self.get_format_dict(data['信用中国'])
            ps = Punishment.create_from_dict(d3, '信用中国')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

            d4 = self.get_format_dict(data['其他'])
            ps = Punishment.create_from_dict(d4, '其他')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )
            pass

        if '环保处罚' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['环保处罚'])
            ps = Punishment.create_from_dict(data, '环保局')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

        if '股权出质' in etp['content'].keys():
            sh_info = etp['content']['股权出质']
            sh_info = self.get_format_dict(sh_info)
            for sh in sh_info:
                sh = dict(sh, **self.get_format_amount(
                    '出质数额', sh.pop('出质数额')
                ))
                # 确定出质人
                cz = sh.pop('出质人')
                cz['链接'] = Enterprise.parser_url(cz['链接'])
                # 判断出质人是不是当前公司
                if etp['name'] == cz['名称'] or cz['链接'] == etp_n['URL']:
                    cz_n = etp_n
                else:
                    # 确定出质人,先在法人主体中找
                    cz_n = self.match_node(
                        *legal,
                        cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                            cz['名称'], cz['链接']
                        )
                    )
                    if cz_n is None:
                        # 在法人中没找到,就通过url在自然人中找
                        # 这里最好不要通过名称找了,除公司以外出现
                        # 同名的几率很大
                        # TODO(leung):在所有实体中去找开销很大,需要注意
                        cz_n = self.match_node(
                            'Person',
                            cypher='_.URL = "{}"'.format(cz['链接'])
                        )
                        if cz_n is None:
                            # 创建这个股权出质人
                            if len(cz['名称']) > 1:
                                # cz_n = Involveder(**cz)
                                cz_n = Enterprise(**cz)
                                if not cz_n.isEnterprise():
                                    cz_n = Person(**cz)
                                    if not cz_n.isPerson():
                                        cz_n = Related(**cz)
                                cz_n = self.get_neo_node(cz_n)
                    pass
                # 确定质权人
                zq = sh.pop('质权人')
                zq['链接'] = Enterprise.parser_url(zq['链接'])
                # 判断质权人是不是当前公司
                if etp['name'] == zq['名称'] or zq['链接'] == etp_n['URL']:
                    zq_n = etp_n
                else:
                    # 确定质权人,先在企业中找
                    zq_n = self.match_node(
                        *legal,
                        cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                            zq['名称'], zq['链接']
                        )
                    )
                    if zq_n is None:
                        # 在企业中没找到,就通过url在所有对象中找
                        # 这里最好不要通过名称找了,除公司以外出现
                        # 同名的几率很大
                        # TODO(leung):在所有实体中去找开销很大,需要注意
                        zq_n = self.match_node(
                            'Person',
                            cypher='_.URL = "{}"'.format(zq['链接'])
                        )
                        if zq_n is None:
                            # 创建这个股权出质人
                            if len(zq['名称']) > 1:
                                # zq_n = Involveder(**zq)
                                zq_n = Enterprise(**zq)
                                if not zq_n.isEnterprise():
                                    zq_n = Person(**zq)
                                    if not zq_n.isPerson():
                                        zq_n = Related(**zq)
                                zq_n = self.get_neo_node(zq_n)
                    pass
                # 确定出质标的企业
                bd = sh.pop('标的企业')
                bd['链接'] = Enterprise.parser_url(bd['链接'])
                # 判断出质标的是不是当前公司
                if etp['name'] == bd['名称'] or bd['链接'] == etp_n['URL']:
                    bd_n = etp_n
                else:
                    # 确定出质标的,先在企业中找,不会是人
                    bd_n = self.match_node(
                        *legal,
                        cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                            bd['名称'], bd['链接']
                        )
                    )
                    if bd_n is None:
                        # 创建这个出质标的
                        if len(bd['名称']) > 1:
                            bd_n = Enterprise(**bd)
                            if not bd_n.isEnterprise():
                                bd_n = Possession(**bd)
                            bd_n = self.get_neo_node(bd_n)
                    pass
                # 创建关系
                if bd_n is None:
                    continue
                nodes.append(bd_n)
                # 1. 抵押
                if cz_n is not None:
                    nodes.append(cz_n)
                    relationships.append(
                        Guaranty(cz_n, bd_n, **sh)
                    )
                # 2. 质权
                if zq_n is not None:
                    nodes.append(zq_n)
                    relationships.append(
                        Have(zq_n, bd_n, **sh)
                    )

        if '破产重组' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['破产重组'])
            for d in data:
                sq = d.pop('申请人')
                sq['链接'] = Enterprise.parser_url(sq['链接'])
                if sq['名称'] == etp['name'] or sq['链接'] == etp_n['URL']:
                    sq_n = etp_n
                else:
                    sq_n = self.match_node(
                        *['person'] + legal,
                        cypher='_.URL = "{}"'.format(sq['链接'])
                    )
                    if sq_n is None:
                        # sq_n = Involveder(**sq)
                        sq_n = Enterprise(**sq)
                        if not sq_n.isEnterprise():
                            sq_n = Person(**sq)
                            if not sq_n.isPerson():
                                sq_n = Related(**sq)
                        sq_n = self.get_neo_node(sq_n)
                bsq = d.pop('被申请人')
                bsq['链接'] = Enterprise.parser_url(bsq['链接'])
                if bsq['名称'] == etp['name'] or bsq['链接'] == etp_n['URL']:
                    bsq_n = etp_n
                else:
                    # 被申请破产的一般是法人
                    bsq_n = self.match_node(
                        *['person'] + legal,
                        cypher='_.URL = "{}"'.format(bsq['链接'])
                    )
                    if bsq_n is None:
                        # bsq_n = Involveder(**bsq)
                        bsq_n = Enterprise(**bsq)
                        if not bsq_n.isEnterprise():
                            bsq_n = Person(**bsq)
                            if not bsq_n.isPerson():
                                bsq_n = Related(**bsq)
                        bsq_n = self.get_neo_node(bsq_n)
                if sq_n is not None and bsq_n is not None:
                    nodes += [sq_n, bsq_n]
                    relationships.append(
                        ApplyBankrupt(sq_n, bsq_n, **d)
                    )
            pass

        if '土地抵押' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['土地抵押'])
            for d in data:
                _ = d.pop('抵押面积')
                p = Plot(**{'位置': d.pop('位置'),
                            '面积(数量)': _['数额'],
                            '面积(单位)': _['单位'],
                            })
                p_n = self.get_neo_node(p)
                nodes.append(p_n)
                d = dict(d, **self.get_format_amount(
                    '抵押金额', d.pop('抵押金额')
                ))
                dy = d.pop('抵押人')
                dyq = d.pop('抵押权人')
                dy['链接'] = Enterprise.parser_url(dy['链接'])
                dyq['链接'] = Enterprise.parser_url(dyq['链接'])
                if dy['名称'] == etp['name'] or dy['链接'] == etp_n['URL']:
                    dy_n = etp_n
                else:
                    dy_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            dy['链接'], dy['名称'])
                    )
                    if dy_n is None:
                        # dy_n = Related(**dy)
                        dy_n = Enterprise(**dy)
                        if not dy_n.isEnterprise():
                            dy_n = Person(**dy)
                            if not dy_n.isPerson():
                                dy_n = Related(**dy)
                        dy_n = self.get_neo_node(dy_n)
                if dy_n is not None:
                    nodes.append(dy_n)
                    relationships.append(
                        Guaranty(dy_n, p_n, **d)
                    )
                if dyq['名称'] == etp['name'] or dyq['链接'] == etp_n['URL']:
                    dyq_n = etp_n
                else:
                    dyq_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            dyq['链接'], dyq['名称'])
                    )
                    if dyq_n is None:
                        # dyq_n = Related(**dyq)
                        dyq_n = Enterprise(**dyq)
                        if not dyq_n.isEnterprise():
                            dyq_n = Person(**dyq)
                            if not dyq_n.isPerson():
                                dyq_n = Related(**dyq)
                        dyq_n = self.get_neo_node(dyq_n)
                if dyq_n is not None:
                    nodes.append(dyq_n)
                    relationships.append(
                        Have(dyq_n, p_n, **d)
                    )
            pass

        return nodes, relationships
예제 #9
0
    def create_all_relationship(self):
        """
        1.enterprise -[have]->x
        :return:
        """
        rts = self.base.query(
            sql={'metaModel': '知识产权'},
            # limit=100,
            skip=79175 + 7909,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        # etp = Enterprise()
        etp_count = rts.count()
        relationships = []
        s_t = time.time()
        for r in rts:
            k += 1
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(*legal,
                                    cypher='_.NAME = "{}"'.format(r['name']))
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(sql={
                    'metaModel': '基本信息',
                    'name': r['name']
                })
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(
                        _)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    etp = Related(**{'名称': r['name'], '链接': r['url']})
                    # etp['NAME'] = r['name']
                    # etp['URL'] = r['url']
                    etp_n = self.get_neo_node(etp)
                    pass
                pass

            if '网站信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['网站信息'])
                webs = Website.create_from_dict(data)
                for web in webs:
                    w = web.pop('website')
                    w_n = self.get_neo_node(w)
                    if w_n is not None:
                        relationships.append(
                            Have(etp_n, w_n, **web).get_relationship())
                pass

            if '证书信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['证书信息'])
                ctfs = Certificate.create_from_dict(data)
                for ctf in ctfs:
                    c = ctf.pop('certificate')
                    c_n = self.get_neo_node(c)
                    if c_n is not None:
                        relationships.append(
                            Have(etp_n, c_n, **ctf).get_relationship())
                pass

            if '专利信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['专利信息'])
                pats = Patent.create_from_dict(data)
                for pat in pats:
                    p = pat.pop('patent')
                    p_n = self.get_neo_node(p)
                    if p_n is not None:
                        relationships.append(
                            Have(etp_n, p_n, **pat).get_relationship())
                pass

            if '商标信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['商标信息'])
                tms = Trademark.create_from_dict(data)
                for tm in tms:
                    t = tm.pop('trademark')
                    t_n = self.get_neo_node(t)
                    if t_n is not None:
                        relationships.append(
                            Have(etp_n, t_n, **tm).get_relationship())
                pass

            if '软件著作权' in r['content'].keys():
                data = self.get_format_dict(r['content']['软件著作权'])
                scrs = SoftCopyRight.create_from_dict(data)
                for scr in scrs:
                    s = scr.pop('softcopyright')
                    s_n = self.get_neo_node(s)
                    if s_n is not None:
                        relationships.append(
                            Have(etp_n, s_n, **scr).get_relationship())
                pass

            if '作品著作权' in r['content'].keys():
                data = self.get_format_dict(r['content']['作品著作权'])
                wcrs = WorkCopyRight.create_from_dict(data)
                for wcr in wcrs:
                    w = wcr.pop('workcopyright')
                    w_n = self.get_neo_node(w)
                    if w_n is not None:
                        relationships.append(
                            Have(etp_n, w_n, **wcr).get_relationship())
                pass

            if '微博' in r['content'].keys():
                data = self.get_format_dict(r['content']['微博'])
                wbs = Weibo.create_from_dict(data)
                for wb in wbs:
                    w = wb.pop('weibo')
                    w_n = self.get_neo_node(w)
                    if w_n is not None:
                        relationships.append(
                            Have(etp_n, w_n, **wb).get_relationship())
                pass

            if '微信公众号' in r['content'].keys():
                data = self.get_format_dict(r['content']['微信公众号'])
                oas = OfficialAccount.create_from_dict(data)
                for oa in oas:
                    woa = oa.pop('WeChat')
                    woa_n = self.get_neo_node(woa)
                    if woa_n is not None:
                        relationships.append(
                            Have(etp_n, woa_n, **oa).get_relationship())
                pass

            if '小程序' in r['content'].keys():
                data = self.get_format_dict(r['content']['小程序'])
                alts = Applets.create_from_dict(data)
                for alt in alts:
                    a = alt.pop('applets')
                    a_n = self.get_neo_node(a)
                    if a_n is not None:
                        relationships.append(
                            Have(etp_n, a_n, **alt).get_relationship())
                pass

            if 'APP' in r['content'].keys():
                data = self.get_format_dict(r['content']['APP'])
                aps = App.create_from_dict(data)
                for ap in aps:
                    a = ap.pop('app')
                    a_n = self.get_neo_node(a)
                    if a_n is not None:
                        relationships.append(
                            Have(etp_n, a_n, **ap).get_relationship())
                pass
            if len(relationships) > 1000:
                i += 1
                sp = int(time.time() - s_t)
                s_t = time.time()
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(
                    SuccessMessage(
                        '{}:success merge relationships to database '
                        'round {} and deal {}/{} enterprise and spend {} '
                        'seconds,and merge {} relationships.'.format(
                            dt.datetime.now(), i, k, etp_count, sp,
                            len(relationships))))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(
                SuccessMessage('{}:success merge relationships to database '
                               'round {} and deal {}/{} enterprise,and'
                               ' merge {} relationships.'.format(
                                   dt.datetime.now(), i, k, etp_count,
                                   len(relationships))))
            relationships.clear()
            pass
예제 #10
0
    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)

        if '网站信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['网站信息'])
            webs = Website.create_from_dict(data)
            for web in webs:
                w = web.pop('website')
                w_n = self.get_neo_node(w)
                if w_n is not None:
                    nodes.append(w_n)
                    relationships.append(Have(etp_n, w_n, **web))
            pass

        if '证书信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['证书信息'])
            ctfs = Certificate.create_from_dict(data)
            for ctf in ctfs:
                c = ctf.pop('certificate')
                c_n = self.get_neo_node(c)
                if c_n is not None:
                    nodes.append(c_n)
                    relationships.append(Have(etp_n, c_n, **ctf))
            pass

        if '专利信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['专利信息'])
            pats = Patent.create_from_dict(data)
            for pat in pats:
                p = pat.pop('patent')
                p_n = self.get_neo_node(p)
                if p_n is not None:
                    nodes.append(p_n)
                    relationships.append(Have(etp_n, p_n, **pat))
            pass

        if '商标信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['商标信息'])
            tms = Trademark.create_from_dict(data)
            for tm in tms:
                t = tm.pop('trademark')
                t_n = self.get_neo_node(t)
                if t_n is not None:
                    nodes.append(t_n)
                    relationships.append(Have(etp_n, t_n, **tm))
            pass

        if '软件著作权' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['软件著作权'])
            scrs = SoftCopyRight.create_from_dict(data)
            for scr in scrs:
                s = scr.pop('softcopyright')
                s_n = self.get_neo_node(s)
                if s_n is not None:
                    nodes.append(s_n)
                    relationships.append(Have(etp_n, s_n, **scr))
            pass

        if '作品著作权' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['作品著作权'])
            wcrs = WorkCopyRight.create_from_dict(data)
            for wcr in wcrs:
                w = wcr.pop('workcopyright')
                w_n = self.get_neo_node(w)
                if w_n is not None:
                    nodes.append(w_n)
                    relationships.append(Have(etp_n, w_n, **wcr))
            pass

        if '微博' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['微博'])
            wbs = Weibo.create_from_dict(data)
            for wb in wbs:
                w = wb.pop('weibo')
                w_n = self.get_neo_node(w)
                if w_n is not None:
                    nodes.append(w_n)
                    relationships.append(Have(etp_n, w_n, **wb))
            pass

        if '微信公众号' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['微信公众号'])
            oas = OfficialAccount.create_from_dict(data)
            for oa in oas:
                woa = oa.pop('WeChat')
                woa_n = self.get_neo_node(woa)
                if woa_n is not None:
                    nodes.append(woa_n)
                    relationships.append(Have(etp_n, woa_n, **oa))
            pass

        if '小程序' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['小程序'])
            alts = Applets.create_from_dict(data)
            for alt in alts:
                a = alt.pop('applets')
                a_n = self.get_neo_node(a)
                if a_n is not None:
                    nodes.append(a_n)
                    relationships.append(Have(etp_n, a_n, **alt))
            pass

        if 'APP' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['APP'])
            aps = App.create_from_dict(data)
            for ap in aps:
                a = ap.pop('app')
                a_n = self.get_neo_node(a)
                if a_n is not None:
                    nodes.append(a_n)
                    relationships.append(Have(etp_n, a_n, **ap))
            pass
        return nodes, relationships