Esempio n. 1
0
class NewsGraph(BaseGraph):

    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_all',
            # tn='qcc.1.1',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def create_index_and_constraint(self):
        """
        为涉及到的实体创建唯一性约束跟索引,唯一键自动带有索引
        不比再单独创建索引
        :return:
        """
        # TODO(leung): 要随时确保label的准确性
        used_entity = [
            'News',
        ]
        constraint = {}
        index = {}
        for l in used_entity:
            constraint[l] = [entities(l).primarykey]
            idx = entities(l).index
            if len(idx):
                index[l] = idx
        self.add_index_and_constraint(index, constraint)
        pass

    def create_all_relationship(self):
        """
        1.enterprise -[have or x]->x
        :return:
        """
        ops = self.base.query(
            sql={'metaModel': '公司新闻'},
            # limit=10,
            skip=2020,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ops.count()
        relationships = []
        # etp = Enterprise()
        s_t = time.time()
        for o in ops:
            k += 1
            # if k < 43500:
            #     continue
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(
                *legal,
                cypher='_.NAME = "{}"'.format(o['name'])
            )
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(
                    sql={'metaModel': '基本信息', 'name': o['name']}
                )
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(_)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    etp = Related(**{'名称': o['name'], '链接': o['url']})
                    # etp['NAME'] = o['name']
                    # etp['URL'] = o['url']
                    etp_n = self.get_neo_node(etp)
                    if etp_n is None:
                        continue
                    pass

            if '新闻舆情' in o['content'].keys():
                data = self.get_format_dict(o['content']['新闻舆情'])
                ns = News.create_from_dict(data)
                for n in ns:
                    n_ = n.pop('news')
                    n_n = self.get_neo_node(n_)
                    if n_n is not None:
                        relationships.append(
                            Have(etp_n, n_n, **n).get_relationship()
                        )
                pass
            if len(relationships) > 1000:
                i += 1
                sp = int(time.time() - s_t)
                s_t = time.time()
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(SuccessMessage('{}:success merge relationships to database '
                                     'round {} and deal {}/{} enterprise and spend {} '
                                     'seconds,and merge {} relationships.'.format(
                    dt.datetime.now(), i, k, etp_count, sp, len(relationships)
                )))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(SuccessMessage('{}:success merge relationships to database '
                                 'round {} and deal {}/{} enterprise,and'
                                 ' merge {} relationships.'.format(
                dt.datetime.now(), i, k, etp_count, len(relationships)
            )))
            relationships.clear()
            pass

    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)

        if '新闻舆情' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['新闻舆情'])
            ns = News.create_from_dict(data)
            for n in ns:
                n_ = n.pop('news')
                n_n = self.get_neo_node(n_)
                if n_n is not None:
                    nodes.append(n_n)
                    relationships.append(
                        Have(etp_n, n_n, **n)
                    )
            pass
        return nodes, relationships

    def get_all_nodes_and_relationships(
            self, save_folder=None, **kwargs):
        enterprises = self.base.query(
            sql={
                'metaModel': '公司新闻',
                # 'name': '重庆轩烽建材有限公司'
            },
            # limit=10000,
            # skip=100000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(
                        save_folder, nodes,
                        relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(SuccessMessage(
                    'success trans data to csv round {} and '
                    'deal {}/{} enterprise spend {} seconds.'
                    ''.format(j, i, etp_count, int(_st_ - time.time()))
                ))
                _st_ = time.time()
                pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(
                save_folder, nodes,
                relationships, **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships
Esempio n. 2
0
class DvpGraph(BaseGraph):
    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_all',
            # tn='qcc.1.1',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def create_index_and_constraint(self):
        """
        为涉及到的实体创建唯一性约束跟索引,唯一键自动带有索引
        不比再单独创建索引
        :return:
        """
        # TODO(leung): 要随时确保label的准确性
        constraint = {
            # 'News': [News.primarykey],
            # 'Possession': [Possession.primarykey],
            # 'Involveder': ['HASH_ID'],
        }
        index = {
            # 'Enterprise': [('NAME',)]
        }
        self.add_index_and_constraint(index, constraint)
        pass

    def create_all_relationship(self):
        """
        1.enterprise -[compete]->enterprise
        :return:
        """
        ops = self.base.query(sql={'metaModel': '企业发展'},
                              field={
                                  'name': 1,
                                  'url': 1,
                                  'content.竞品信息': 1
                              },
                              limit=1000,
                              no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ops.count()
        relationships = []
        etp = Enterprise()
        for o in ops:
            k += 1
            # if k < 41321:
            #     continue
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(*legal,
                                    cypher='_.NAME = "{}"'.format(o['name']))
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(sql={
                    'metaModel': '基本信息',
                    'name': o['name']
                })
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(
                        _)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    # etp = Enterprise({'name': o['name'], 'url': o['url']})
                    etp = Related()
                    etp['NAME'] = o['name']
                    etp['URL'] = o['url']
                    etp_n = self.get_neo_node(etp)
                    pass

            if '竞品信息' in o['content'].keys():
                data = self.get_format_dict(o['content']['竞品信息'])
                for d in data:
                    etp_2 = d.pop('关联企业')
                    if etp_2['名称'] is not None and len(etp_2['名称']) > 1:
                        etp_2['链接'] = etp.parser_url(etp_2['链接'])
                        etp_n_2 = self.match_node(*legal,
                                                  cypher='_.URL = "{}"'.format(
                                                      etp_2['链接']))
                        if etp_n_2 is None and etp_2['名称'] > 1:
                            _ = {
                                'URL': etp_2['链接'],
                                'NAME': etp_2['名称'],
                                '简介': d.pop('产品介绍'),
                                '成立日期': d.pop('成立日期'),
                                '融资信息': d.pop('融资信息'),
                                '所属地': d.pop('所属地'),
                            }
                            etp_n_2 = Related(**_)
                            etp_n_2 = self.get_neo_node(etp_n_2)
                        relationships.append(
                            Compete(etp_n, etp_n_2, **d).get_relationship())

                pass
            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(
                    SuccessMessage(
                        '{}:success merge relationships to database '
                        'round {} and deal {}/{} enterprise,and'
                        ' merge {} relationships.'.format(
                            dt.datetime.now(), i, k, etp_count,
                            len(relationships))))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(
                SuccessMessage('{}:success merge relationships to database '
                               'round {} and deal {}/{} enterprise,and'
                               ' merge {} relationships.'.format(
                                   dt.datetime.now(), i, k, etp_count,
                                   len(relationships))))
            relationships.clear()
            pass

    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)
        if '竞品信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['竞品信息'])
            data = Product.create_from_dict(data)
            for d in data:
                p = d.pop('product')
                p_n = self.get_neo_node(p)
                if p_n is None:
                    continue
                nodes.append(p_n)
                relationships.append(Compete(etp_n, p_n))
                etp_2 = d.pop('关联企业')
                etp_2['链接'] = Enterprise.parser_url(etp_2['链接'])
                if etp_2['名称'] is not None and len(etp_2['名称']) > 1:
                    # etp_2['链接'] = Enterprise.parser_url(etp_2['链接'])
                    etp_n_2 = self.match_node(*legal,
                                              cypher='_.URL = "{}"'.format(
                                                  etp_2['链接']))
                    if etp_n_2 is None and len(etp_2['名称']) > 1:
                        etp_n_2 = Enterprise(**etp_2)
                        if not etp_n_2.isEnterprise():
                            _ = {
                                'URL': etp_2['链接'],
                                'NAME': etp_2['名称'],
                                '简介': d.pop('产品介绍'),
                                '成立日期': d.pop('成立日期'),
                                '融资信息': d.pop('融资信息'),
                                '所属地': d.pop('所属地'),
                            }
                            etp_n_2 = Related(**{
                                '链接': etp_2['链接'],
                                '名称': etp_2['名称']
                            })
                        # etp_n_2 = Related(**_)
                        etp_n_2 = self.get_neo_node(etp_n_2)
                    nodes.append(etp_n_2)
                    relationships.append(Produce(etp_n_2, p_n))
        return nodes, relationships

    def get_all_nodes_and_relationships(self, save_folder=None, **kwargs):
        enterprises = self.base.query(
            sql={'metaModel': '企业发展'},
            field={
                'name': 1,
                'url': 1,
                'content.竞品信息': 1
            },
            # limit=100000,
            # skip=2000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(save_folder, nodes,
                                                 relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(
                    SuccessMessage('success trans data to csv round {} and '
                                   'deal {}/{} enterprise spend {} seconds.'
                                   ''.format(j, i, etp_count,
                                             int(time.time() - _st_))))
                _st_ = time.time()
                pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(save_folder, nodes, relationships,
                                         **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships
Esempio n. 3
0
class OptGraph(BaseGraph):
    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_all',
            # tn='qcc.1.1',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def create_index_and_constraint(self):
        """
        为涉及到的实体创建唯一性约束跟索引,唯一键自动带有索引
        不比再单独创建索引
        :return:
        """
        # TODO(leung): 要随时确保label的准确性
        # 用到是实体对象
        used_entity = [
            'License',
            'Bidding',
            'Check',
            'RandomCheck',
            'TaxCredit',
            'IAE',
            'Position',
            # 'Client',
            # 'Supplier',
            # 'Possession',
            'Plot'
        ]
        constraint = {}
        index = {}
        for l in used_entity:
            constraint[l] = [entities(l).primarykey]
            idx = entities(l).index
            if len(idx):
                index[l] = idx
        self.add_index_and_constraint(index, constraint)
        pass

    def get_all_nodes_from_enterprise(self, etp):
        nodes = [Enterprise(URL=etp['url'], NAME=etp['name'])]

        if '产权交易' in etp['content'].keys():
            # data = self.get_format_dict(etp['content']['产权交易'])
            # for d in data:
            #     bd = d.pop('标的')
            #     bd_n =
            pass
        if '行政许可' in etp['content'].keys():
            data = etp['content']['行政许可']
            if '工商局' in data.keys():
                d1 = self.get_format_dict(data['工商局'])
                ls = License.create_from_dict(d1, '工商局')
                for l in ls:
                    nodes.append(l.pop('license'))
                pass
            if '信用中国' in data.keys():
                d2 = self.get_format_dict(data['信用中国'])
                ls = License.create_from_dict(d2, '信用中国')
                for l in ls:
                    nodes.append(l.pop('license'))
                pass
            pass
        if '招投标信息' in etp['content'].keys():
            # 公示的招投标信息一般都是结果,一般情况下是找不到
            # 共同投标的单位,除非是共同中标
            data = self.get_format_dict(etp['content']['招投标信息'])
            bs = Bidding.create_from_dict(data)
            for b in bs:
                nodes.append(b.pop('bidding'))
            pass
        if '抽查检查' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['抽查检查'])
            cs = Check.create_from_dict(data)
            for c in cs:
                nodes.append(c.pop('check'))
            pass
        if '双随机抽查' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['双随机抽查'])
            rcs = RandomCheck.create_from_dict(data)
            # rcs_n = self.get_neo_node(rcs)
            for rc in rcs:
                # TODO(leung):随机抽查没有结果
                nodes.append(rc.pop('check'))
            pass
        if '税务信用' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['税务信用'])
            ts = TaxCredit.create_from_dict(data)
            # ts_n = self.get_neo_node(ts)
            for t in ts:
                nodes.append(t.pop('TaxCredit'))
            pass
        if '进出口信用' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['进出口信用'])
            ies = IAE.create_from_dict(data)
            # ies_n = self.get_neo_node(ies)
            for ie in ies:
                nodes.append(ie.pop('iae'))
            pass
        if '招聘' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['招聘'])
            rs = Position.create_from_dict(data)
            for r in rs:
                nodes.append(r.pop('position'))
            pass
        if '客户' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['客户'])
            cs = Client.create_from_dict(data)
            for c in cs:
                nodes.append(c.pop('client'))
            pass
        if '供应商' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['供应商'])
            ss = Supplier.create_from_dict(data)
            for s in ss:
                nodes.append(s.pop('supplier'))
            pass
        if '信用评级' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['信用评级'])
            for d in data:
                nodes.append(d.pop('评级公司'))
            pass
        if '土地转让' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['土地转让'])
            for d in data:
                e1 = d.pop('原土地使用权人')
                e2 = d.pop('现有土地使用权人')
                p = Plot(**d)
                nodes.append(p)
            pass
        return nodes
        pass

    def get_all_nodes(self):
        enterprises = self.base.query(
            sql={
                'metaModel': '经营状况',
                # 'name': '重庆轩烽建材有限公司'
            },
            limit=1000,
            # skip=2000,
            no_cursor_timeout=True)
        i, j = 0, 0
        etp_count = enterprises.count()
        nodes = {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds = self.get_all_nodes_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                _nds_ = _nds_.to_dict()
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            if i % 1000 == 0:
                j += 1
                print(
                    SuccessMessage('{}:success merge nodes to database '
                                   'round {} and deal {}/{} enterprise'
                                   ''.format(dt.datetime.now(), i, j,
                                             etp_count)))
            pass
        return nodes

    def get_all_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return []
        relationships = []
        if '产权交易' in etp['content'].keys():
            # data = self.get_format_dict(etp['content']['产权交易'])
            # for d in data:
            #     bd = d.pop('标的')
            #     bd_n =
            pass

        if '行政许可' in etp['content'].keys():
            data = etp['content']['行政许可']
            if '工商局' in data.keys():
                d1 = self.get_format_dict(data['工商局'])
                ls = License.create_from_dict(d1, '工商局')
                for l in ls:
                    l_ = l.pop('license')
                    l_n = self.get_neo_node(l_)
                    if l_n is None:
                        continue
                    relationships.append(Have(etp_n, l_n, **l))
                pass
            if '信用中国' in data.keys():
                d2 = self.get_format_dict(data['信用中国'])
                ls = License.create_from_dict(d2, '信用中国')
                for l in ls:
                    l_ = l.pop('license')
                    l_n = self.get_neo_node(l_)
                    if l_n is None:
                        continue
                    relationships.append(Have(etp_n, l_n, **l))
                pass
            pass
        if '招投标信息' in etp['content'].keys():
            # 公示的招投标信息一般都是结果,一般情况下是找不到
            # 共同投标的单位,除非是共同中标
            data = self.get_format_dict(etp['content']['招投标信息'])
            bs = Bidding.create_from_dict(data)
            for b in bs:
                _ = b.pop('bidding')
                b_n = self.get_neo_node(_)
                if b_n is None:
                    continue
                # TODO(leung):项目分类用作了招投标结果
                relationships.append(
                    TakePartIn(etp_n, b_n, **dict(b, **{'RESULT':
                                                        b_n['TYPE']})))
            pass
        if '抽查检查' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['抽查检查'])
            cs = Check.create_from_dict(data)
            for c in cs:
                _ = c.pop('check')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                relationships.append(
                    Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']})))
            pass
        if '双随机抽查' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['双随机抽查'])
            rcs = RandomCheck.create_from_dict(data)
            # rcs_n = self.get_neo_node(rcs)
            for rc in rcs:
                # TODO(leung):随机抽查没有结果
                _ = rc.pop('check')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                relationships.append(Have(etp_n, n, **rc))
            pass
        if '税务信用' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['税务信用'])
            ts = TaxCredit.create_from_dict(data)
            # ts_n = self.get_neo_node(ts)
            for t in ts:
                _ = t.pop('TaxCredit')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                # TODO(leung):纳税信用等级作为税务信用评级结果
                relationships.append(
                    Have(etp_n, n, **dict(RESULT=n['GRADE'], **t)))
            pass
        if '进出口信用' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['进出口信用'])
            ies = IAE.create_from_dict(data)
            # ies_n = self.get_neo_node(ies)
            for ie in ies:
                _ = ie.pop('iae')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                relationships.append(Have(etp_n, n, **ie))
            pass
        if '招聘' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['招聘'])
            rs = Position.create_from_dict(data)
            for r in rs:
                _ = r.pop('position')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                relationships.append(Recruit(etp_n, n, **r))
            pass
        if '客户' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['客户'])
            cs = Client.create_from_dict(data)
            for c in cs:
                _ = c.pop('client')
                n = self.match_node(
                    *legal,
                    cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                        _['URL'], _['NAME']))
                if n is None:
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                relationships.append(SellTo(etp_n, n, **c))
            pass
        if '供应商' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['供应商'])
            ss = Supplier.create_from_dict(data)
            for s in ss:
                _ = s.pop('supplier')
                n = self.match_node(
                    *legal,
                    cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                        _['URL'], _['NAME']))
                if n is None:
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                relationships.append(BuyFrom(etp_n, n, **s))
            pass
        if '信用评级' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['信用评级'])
            for d in data:
                _ = d.pop('评级公司')
                n = self.match_node(
                    *legal,
                    cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                        _['链接'], _['名称']))
                if n is None:
                    n = Enterprise(**_)
                    n = self.get_neo_node(n)
                    if n is None:
                        continue
                __ = d.pop('内容')
                d['评级内容'] = __['内容']
                d['评级链接'] = __['链接']
                relationships.append(Appraise(n, etp_n, **d))
            pass
        if '土地转让' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['土地转让'])
            for d in data:
                e1 = d.pop('原土地使用权人')
                e2 = d.pop('现有土地使用权人')
                p = Plot(**d)
                p_n = self.get_neo_node(p)
                if p_n is None:
                    continue
                if e1['名称'] == etp['name'] or e1['链接'] == etp['url']:
                    n1 = etp_n
                else:
                    # 有可能是人
                    n1 = self.match_node(*legal,
                                         cypher='_.URL = "{}"'.format(
                                             e1['链接']))
                    if n1 is None:
                        n1 = Enterprise(**e1)
                        if not n1.isEnterprise():
                            n1 = Person(**e1)
                            if not n1.isPerson():
                                n1 = Related(**e1)
                        n1 = self.get_neo_node(n1)
                if n1 is not None:
                    relationships.append(Sell(n1, p_n))
                if e2['名称'] == etp['name'] or e2['链接'] == etp['url']:
                    n2 = etp_n
                else:
                    n2 = self.match_node(*legal,
                                         cypher='_.URL = "{}"'.format(
                                             e2['链接']))
                    if n2 is None:
                        n2 = Enterprise(**e2)
                        if not n2.isEnterprise():
                            n2 = Person(**e2)
                            if not n2.isPerson():
                                n2 = Related(**e2)
                        n2 = self.get_neo_node(n2)
                if n2 is not None:
                    relationships.append(Buy(n2, p_n))
            pass
        return relationships

    def get_all_relationships(self):
        enterprises = self.base.query(
            sql={
                'metaModel': '经营状况',
                # 'name': '重庆轩烽建材有限公司'
            },
            limit=1000,
            # skip=2000,
            no_cursor_timeout=True)
        i, j = 0, 0
        etp_count = enterprises.count()
        relationships = {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                continue
            ep['url'] = '/firm_' + uc + '.html'
            rps = self.get_all_relationships_from_enterprise(ep)
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 1000 == 0:
                j += 1
                print(
                    SuccessMessage('{}:success merge relationship to database '
                                   'round {} and deal {}/{} enterprise'
                                   ''.format(dt.datetime.now(), i, j,
                                             etp_count)))
            pass
        return relationships

    def create_all_relationship(self):
        """
        1.enterprise -[have or x]->x
        :return:
        """
        ops = self.base.query(
            sql={
                'metaModel': '经营状况',
                # 'name': '重庆轩烽建材有限公司'
            },
            limit=1000,
            # skip=2000,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ops.count()
        relationships = []
        # etp = Enterprise()
        for o in ops:
            k += 1
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(*legal,
                                    cypher='_.NAME = "{}"'.format(o['name']))
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(sql={
                    'metaModel': '基本信息',
                    'name': o['name']
                })
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(
                        _)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    # 如果在neo4j里面存着只有name,url的公司,意味着
                    # 这家公司没有“基本信息”
                    etp = Related()
                    etp['NAME'] = o['name']
                    etp['URL'] = o['url']
                    etp_n = self.get_neo_node(etp)
                    pass

            if '产权交易' in etp['content'].keys():
                # data = self.get_format_dict(etp['content']['产权交易'])
                # for d in data:
                #     bd = d.pop('标的')
                #     bd_n =
                pass

            if '行政许可' in etp['content'].keys():
                data = etp['content']['行政许可']
                if '工商局' in data.keys():
                    d1 = self.get_format_dict(data['工商局'])
                    ls = License.create_from_dict(d1, '工商局')
                    for l in ls:
                        l_ = l.pop('license')
                        l_n = self.get_neo_node(l_)
                        if l_n is None:
                            continue
                        relationships.append(
                            Have(etp_n, l_n, **l).get_relationship())
                    pass
                if '信用中国' in data.keys():
                    d2 = self.get_format_dict(data['信用中国'])
                    ls = License.create_from_dict(d2, '信用中国')
                    for l in ls:
                        l_ = l.pop('license')
                        l_n = self.get_neo_node(l_)
                        if l_n is None:
                            continue
                        relationships.append(
                            Have(etp_n, l_n, **l).get_relationship())
                    pass
                pass
            if '招投标信息' in etp['content'].keys():
                # 公示的招投标信息一般都是结果,一般情况下是找不到
                # 共同投标的单位,除非是共同中标
                data = self.get_format_dict(etp['content']['招投标信息'])
                bs = Bidding.create_from_dict(data)
                for b in bs:
                    _ = b.pop('bidding')
                    b_n = self.get_neo_node(_)
                    if b_n is None:
                        continue
                    # TODO(leung):项目分类用作了招投标结果
                    relationships.append(
                        TakePartIn(etp_n, b_n,
                                   **dict(b,
                                          **{'RESULT':
                                             b_n['TYPE']})).get_relationship())
                pass
            if '抽查检查' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['抽查检查'])
                cs = Check.create_from_dict(data)
                for c in cs:
                    _ = c.pop('check')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Have(etp_n, n,
                             **dict(c, **{'RESULT':
                                          n['RESULT']})).get_relationship())
                pass
            if '双随机抽查' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['双随机抽查'])
                rcs = RandomCheck.create_from_dict(data)
                # rcs_n = self.get_neo_node(rcs)
                for rc in rcs:
                    # TODO(leung):随机抽查没有结果
                    _ = rc.pop('check')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Have(etp_n, n, **rc).get_relationship())
                pass
            if '税务信用' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['税务信用'])
                ts = TaxCredit.create_from_dict(data)
                # ts_n = self.get_neo_node(ts)
                for t in ts:
                    _ = t.pop('TaxCredit')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    # TODO(leung):纳税信用等级作为税务信用评级结果
                    relationships.append(
                        Have(etp_n, n, **dict(RESULT=n['GRADE'],
                                              **t)).get_relationship())
                pass
            if '进出口信用' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['进出口信用'])
                ies = IAE.create_from_dict(data)
                # ies_n = self.get_neo_node(ies)
                for ie in ies:
                    _ = ie.pop('iae')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Have(etp_n, n, **ie).get_relationship())
                pass
            if '招聘' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['招聘'])
                rs = Position.create_from_dict(data)
                for r in rs:
                    _ = r.pop('position')
                    n = self.get_neo_node(_)
                    if n is None:
                        continue
                    relationships.append(
                        Recruit(etp_n, n, **r).get_relationship())
                pass
            if '客户' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['客户'])
                cs = Client.create_from_dict(data)
                for c in cs:
                    _ = c.pop('client')
                    n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            _['URL'], _['NAME']))
                    if n is None:
                        n = self.get_neo_node(_)
                        if n is None:
                            continue
                    relationships.append(
                        SellTo(etp_n, n, **c).get_relationship())
                pass
            if '供应商' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['供应商'])
                ss = Supplier.create_from_dict(data)
                for s in ss:
                    _ = s.pop('supplier')
                    n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            _['URL'], _['NAME']))
                    if n is None:
                        n = self.get_neo_node(_)
                        if n is None:
                            continue
                    relationships.append(
                        BuyFrom(etp_n, n, **s).get_relationship())
                pass
            if '信用评级' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['信用评级'])
                for d in data:
                    _ = d.pop('评级公司')
                    n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            _['链接'], _['名称']))
                    if n is None:
                        n = Related()
                        n['NAME'] = _['名称']
                        n['URL'] = _['链接']
                        n = self.get_neo_node(n)
                        if n is None:
                            continue
                    __ = d.pop('内容')
                    d['评级内容'] = __['内容']
                    d['评级链接'] = __['链接']
                    relationships.append(
                        Appraise(n, etp_n, **d).get_relationship())
                pass
            if '土地转让' in etp['content'].keys():
                data = self.get_format_dict(etp['content']['土地转让'])
                for d in data:
                    e1 = d.pop('原土地使用权人')
                    e2 = d.pop('现有土地使用权人')
                    p = Plot(**d)
                    p_n = self.get_neo_node(p)
                    if p_n is None:
                        continue
                    if e1['名称'] == o['name'] or e1['链接'] == o['url']:
                        n1 = etp_n
                    else:
                        # 有可能是人
                        n1 = self.match_node(*legal,
                                             cypher='_.URL = "{}"'.format(
                                                 e1['链接']))
                        if n1 is None:
                            n1 = Related(**e1)
                            n1 = self.get_neo_node(n1)
                    if n1 is not None:
                        relationships.append(Sell(n1, p_n).get_relationship())
                    if e2['名称'] == o['name'] or e2['链接'] == o['url']:
                        n2 = etp_n
                    else:
                        n2 = self.match_node(*legal,
                                             cypher='_.URL = "{}"'.format(
                                                 e2['链接']))
                        if n2 is None:
                            n2 = Related(**e2)
                            n2 = self.get_neo_node(n2)
                    if n2 is not None:
                        relationships.append(Buy(n2, p_n).get_relationship())
                pass

            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(
                    SuccessMessage(
                        '{}:success merge relationships to database '
                        'round {} and deal {}/{} enterprise,and'
                        ' merge {} relationships.'.format(
                            dt.datetime.now(), i, k, etp_count,
                            len(relationships))))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(
                SuccessMessage('{}:success merge relationships to database '
                               'round {} and deal {}/{} enterprise,and'
                               ' merge {} relationships.'.format(
                                   dt.datetime.now(), i, k, etp_count,
                                   len(relationships))))
            relationships.clear()
            pass

    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)
        if '产权交易' in etp['content'].keys():
            # data = self.get_format_dict(etp['content']['产权交易'])
            # for d in data:
            #     bd = d.pop('标的')
            #     bd_n =
            pass
        if '行政许可' in etp['content'].keys():
            data = etp['content']['行政许可']
            if '工商局' in data.keys():
                d1 = self.get_format_dict(data['工商局'])
                ls = License.create_from_dict(d1, '工商局')
                for l in ls:
                    l_ = l.pop('license')
                    l_n = self.get_neo_node(l_)
                    if l_n is None:
                        continue
                    nodes.append(l_n)
                    relationships.append(Have(etp_n, l_n, **l))
                pass
            if '信用中国' in data.keys():
                d2 = self.get_format_dict(data['信用中国'])
                ls = License.create_from_dict(d2, '信用中国')
                for l in ls:
                    l_ = l.pop('license')
                    l_n = self.get_neo_node(l_)
                    if l_n is None:
                        continue
                    nodes.append(l_n)
                    relationships.append(Have(etp_n, l_n, **l))
                pass
            pass
        if '招投标信息' in etp['content'].keys():
            # 公示的招投标信息一般都是结果,一般情况下是找不到
            # 共同投标的单位,除非是共同中标
            data = self.get_format_dict(etp['content']['招投标信息'])
            bs = Bidding.create_from_dict(data)
            for b in bs:
                _ = b.pop('bidding')
                b_n = self.get_neo_node(_)
                if b_n is None:
                    continue
                # TODO(leung):项目分类用作了招投标结果
                nodes.append(b_n)
                relationships.append(
                    TakePartIn(etp_n, b_n, **dict(b, **{'RESULT':
                                                        b_n['TYPE']})))
            pass
        if '抽查检查' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['抽查检查'])
            cs = Check.create_from_dict(data)
            for c in cs:
                _ = c.pop('check')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                nodes.append(n)
                relationships.append(
                    Have(etp_n, n, **dict(c, **{'RESULT': n['RESULT']})))
            pass
        if '双随机抽查' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['双随机抽查'])
            rcs = RandomCheck.create_from_dict(data)
            # rcs_n = self.get_neo_node(rcs)
            for rc in rcs:
                # TODO(leung):随机抽查没有结果
                _ = rc.pop('check')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                nodes.append(n)
                relationships.append(Have(etp_n, n, **rc))
            pass
        if '税务信用' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['税务信用'])
            ts = TaxCredit.create_from_dict(data)
            # ts_n = self.get_neo_node(ts)
            for t in ts:
                _ = t.pop('TaxCredit')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                # TODO(leung):纳税信用等级作为税务信用评级结果
                nodes.append(n)
                relationships.append(
                    Have(etp_n, n, **dict(RESULT=n['GRADE'], **t)))
            pass
        if '进出口信用' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['进出口信用'])
            ies = IAE.create_from_dict(data)
            # ies_n = self.get_neo_node(ies)
            for ie in ies:
                _ = ie.pop('iae')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                nodes.append(n)
                relationships.append(Have(etp_n, n, **ie))
            pass
        if '招聘' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['招聘'])
            rs = Position.create_from_dict(data)
            for r in rs:
                _ = r.pop('position')
                n = self.get_neo_node(_)
                if n is None:
                    continue
                nodes.append(n)
                relationships.append(Recruit(etp_n, n, **r))
            pass
        if '客户' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['客户'])
            cs = Client.create_from_dict(data)
            for c in cs:
                cli = c.pop('client')
                cli_n = self.match_node(
                    *legal,
                    cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                        cli['URL'], cli['NAME']))
                if cli_n is None:
                    if cli.isEnterprise():
                        cli = Enterprise(**cli.to_dict(with_label=False))
                    cli_n = self.get_neo_node(cli)
                    if cli_n is None:
                        continue
                nodes.append(cli_n)
                relationships.append(SellTo(etp_n, cli_n, **c))
            pass
        if '供应商' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['供应商'])
            ss = Supplier.create_from_dict(data)
            for s in ss:
                sup = s.pop('supplier')
                sup_n = self.match_node(
                    *legal,
                    cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                        sup['URL'], sup['NAME']))
                if sup_n is None:
                    if sup.isEnterprise():
                        sup = Enterprise(**sup.to_dict(with_label=False))
                    sup_n = self.get_neo_node(sup)
                    if sup_n is None:
                        continue
                nodes.append(sup_n)
                relationships.append(BuyFrom(etp_n, sup_n, **s))
            pass
        if '信用评级' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['信用评级'])
            for d in data:
                _ = d.pop('评级公司')
                _['链接'] = Enterprise.parser_url(_['链接'])
                n = self.match_node(
                    *legal,
                    cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                        _['链接'], _['名称']))
                if n is None:
                    n = Enterprise(**_)
                    n = self.get_neo_node(n)
                    if n is None:
                        continue
                __ = d.pop('内容')
                d['评级内容'] = __['内容']
                d['评级链接'] = __['链接']
                nodes.append(n)
                relationships.append(Appraise(n, etp_n, **d))
            pass
        if '土地转让' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['土地转让'])
            for d in data:
                e1 = d.pop('原土地使用权人')
                e2 = d.pop('现有土地使用权人')
                p = Plot(**d)
                p_n = self.get_neo_node(p)
                if p_n is None:
                    continue
                e1['链接'] = Enterprise.parser_url(e1['链接'])
                if e1['名称'] == etp['name'] or e1['链接'] == etp['url']:
                    n1 = etp_n
                else:
                    # 有可能是人
                    n1 = self.match_node(*legal,
                                         cypher='_.URL = "{}"'.format(
                                             e1['链接']))
                    if n1 is None:
                        n1 = Enterprise(**e1)
                        if not n1.isEnterprise():
                            n1 = Person(**e1)
                            if not n1.isPerson():
                                n1 = Related(**e1)
                        n1 = self.get_neo_node(n1)
                if n1 is not None:
                    nodes.append(n1)
                    nodes.append(p_n)
                    relationships.append(Sell(n1, p_n))
                e2['链接'] = Enterprise.parser_url(e2['链接'])
                if e2['名称'] == etp['name'] or e2['链接'] == etp['url']:
                    n2 = etp_n
                else:
                    n2 = self.match_node(*legal,
                                         cypher='_.URL = "{}"'.format(
                                             e2['链接']))
                    if n2 is None:
                        n2 = Enterprise(**e2)
                        if not n2.isEnterprise():
                            n2 = Person(**e2)
                            if not n2.isPerson():
                                n2 = Related(**e2)
                        n2 = self.get_neo_node(n2)
                if n2 is not None:
                    nodes.append(n2)
                    nodes.append(p_n)
                    relationships.append(Buy(n2, p_n))
            pass
        return nodes, relationships

    def get_all_nodes_and_relationships(self, save_folder=None, **kwargs):
        enterprises = self.base.query(
            sql={
                'metaModel': '经营状况',
                # 'name': '重庆轩烽建材有限公司'
            },
            # limit=100000,
            # skip=10000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(save_folder, nodes,
                                                 relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(
                    SuccessMessage('success trans data to csv round {} and '
                                   'deal {}/{} enterprise spend {} seconds.'
                                   ''.format(j, i, etp_count,
                                             int(time.time() - _st_))))
                _st_ = time.time()
            pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(save_folder, nodes, relationships,
                                         **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships
Esempio n. 4
0
class IndGraph(BaseGraph):

    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_api',
            # tn='relationsDetail.1.0',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def get_all_nodes_and_relationships_from_api(self, etp):
        """
        创建所有的行业实体,实体对象从外部传进来,因为行业可能
        会作为一个相对独立的研究领域,与数据库中企业基本信息中的
        行业可能不完全匹配
        :return:
        """
        etp_n = self.match_node(
            'Enterprise',
            cypher='_.URL = "{}" OR _.NAME = "{}"'
                   ''.format(Enterprise.parser_url(etp['url']),
                             etp['name']))
        if etp_n is None:
            etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
            etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)
        ind = etp['IndustryV3']
        if ind is None:
            return nodes, relationships
        ind1 = self.get_neo_node(Industry(**{
            'name': ind['Industry'],
            'code': ind['IndustryCode'],
            '类别': '一级'
        }))
        ind2 = self.get_neo_node(Industry(**{
            'name': ind['SubIndustry'],
            'code': ind['SubIndustryCode'],
            '类别': '二级'
        }))
        ind3 = self.get_neo_node(Industry(**{
            'name': ind['MiddleCategory'],
            'code': ind['MiddleCategoryCode'],
            '类别': '三级'
        }))
        ind4 = self.get_neo_node(Industry(**{
            'name': ind['SmallCategory'],
            'code': ind['SmallCategoryCode'],
            '类别': '四级'
        }))
        _ids_ = [ind4, ind3, ind2, ind1]
        ids = []
        for i in _ids_:
            if i is not None:
                ids.append(i)
                nodes.append(i)
        if len(ids):
            relationships.append(Belong(etp_n, ids[0]))
            for i in range(len(ids) - 1):
                relationships.append(Belong(ids[i], ids[i + 1]))
            pass
        return nodes, relationships
        pass

    def merge_all_nodes_and_relationships(self):
        enterprises = self.base.query(
            # sql={'metaModel': '企业发展'},
            field={
                '_id': 0,
                'value.Result.Name': 1,
                'value.Result.KeyNo': 1,
                'value.Result.IndustryV3': 1
            },
            limit=10000,
            # skip=2000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = 10000
        # etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            ep = ep['value']['Result']
            uc = ep['KeyNo']  # getUniqueCode(ep['url'])
            ep['name'] = ep.pop('Name')
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_api(ep)

    def get_all_nodes_and_relationships(
            self, save_folder=None, enterprises=None, **kwargs):
        if enterprises is None:
            enterprises_data = self.base.query(
                # sql={'metaModel': '企业发展'},
                field={
                    '_id': 0,
                    'value.Result.Name': 1,
                    'value.Result.KeyNo': 1,
                    'value.Result.IndustryV3': 1
                },
                limit=10000,
                # skip=2000,
                no_cursor_timeout=True)
            etp_count = 10000
            # etp_count = enterprises_data.count()
        else:
            enterprises_data = enterprises
            etp_count = len(enterprises)
        i, j = 0, 0
        nc, rc = 0, 0
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises_data:
            i += 1
            if enterprises is not None:
                ep = self.base.query_one(
                    sql={'value.Result.Name': ep['name']},
                    field={
                        '_id': 0,
                        'value.Result.Name': 1,
                        'value.Result.KeyNo': 1,
                        'value.Result.IndustryV3': 1
                    },
                )
                if ep is None:
                    continue

            ep = ep['value']['Result']
            uc = ep['KeyNo']  # getUniqueCode(ep['url'])
            ep['name'] = ep.pop('Name')
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_api(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(
                        save_folder, nodes,
                        relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(SuccessMessage(
                    'success trans data to csv round {} and '
                    'deal {}/{} enterprise spend {} seconds.'
                    ''.format(j, i, etp_count, int(time.time() - _st_))
                ))
                _st_ = time.time()
                pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(
                save_folder, nodes,
                relationships, **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships
Esempio n. 5
0
class OptRiskGraph(BaseGraph):

    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_all',
            # tn='qcc.1.1',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def create_index_and_constraint(self):
        """
        为涉及到的实体创建唯一性约束跟索引,唯一键自动带有索引
        不比再单独创建索引
        :return:
        """
        # TODO(leung): 要随时确保label的准确性
        used_entity = [
            'Punishment',
            'Possession',
        ]
        constraint = {}
        index = {}
        for l in used_entity:
            constraint[l] = [entities(l).primarykey]
            idx = entities(l).index
            if len(idx):
                index[l] = idx
        self.add_index_and_constraint(index, constraint)
        pass

    def create_all_relationship(self):
        """
        1.enterprise -[have]->punishment
        :return:
        """
        ors = self.base.query(
            sql={
                'metaModel': '经营风险',
                # 'name': '重庆铭悦机械设备有限公司'
            },
            limit=1000,
            # skip=2000,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = ors.count()
        relationships = []
        # prs = Person()
        etp = Enterprise()
        for j in ors:
            # 每个公司经营风险下列式的东西,肯定就是这家公司的
            k += 1
            # if k < 43500:
            #     continue
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(
                *legal,
                cypher='_.NAME = "{}"'.format(j['name'])
            )
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(
                    sql={'metaModel': '基本信息', 'name': j['name']}
                )
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(_)
                    pass
                else:
                    # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者
                    # 这里就相当于把一个公司当做了一个风险提示的涉及者
                    # etp = Related(**{'名称': j['name'], '链接': j['url']})
                    etp = Related()
                    etp['NAME'] = j['name']
                    etp['URL'] = j['url']
                    etp_n = self.get_neo_node(etp)
                    pass

            if '动产抵押' in j['content'].keys():
                data = self.get_format_dict(j['content']['动产抵押'])
                for d in data:
                    _ = d.pop('被担保主债权数额')
                    debt = Debt(**{'债务(金额)': _['金额'],
                                   '债务(单位)': _['单位'],
                                   '履行期限': d.pop('债务人履行债务的期限')
                                   })
                    debt_n = self.get_neo_node(debt)
                    dy = d.pop('抵押权人')
                    zw = d.pop('债务人')
                    sy = d.pop('所有权或使用权归属')
                    if dy['名称'] == j['name'] or dy['链接'] == j['url']:
                        dy_n = etp_n
                    else:
                        dy_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                dy['链接'], dy['名称'])
                        )
                        if dy_n is None and len(dy['名称']) > 1:
                            dy_n = Related(**dy)
                            dy_n = self.get_neo_node(dy_n)
                    if dy_n is not None:
                        relationships.append(Have(
                            dy_n, debt_n, **dict(角色='抵押权人', **d)
                        ).get_relationship())

                    if zw['名称'] == j['name'] or zw['链接'] == j['url']:
                        zw_n = etp_n
                    else:
                        zw_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                zw['链接'], zw['名称'])
                        )
                        if zw_n is None and len(zw['名称']) > 1:
                            zw_n = Related(**zw)
                            zw_n = self.get_neo_node(zw_n)
                    if zw_n is not None:
                        relationships.append(Have(
                            zw_n, debt_n, **dict(角色='债务人', **d)
                        ).get_relationship())

                    if sy['名称'] == j['name'] or sy['链接'] == j['url']:
                        sy_n = etp_n
                    else:
                        sy_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                sy['链接'], sy['名称'])
                        )
                        if sy_n is None and len(sy['名称']) > 1:
                            sy_n = Related(**sy)
                            sy_n = self.get_neo_node(sy_n)
                    if sy_n is not None:
                        relationships.append(Have(
                            sy_n, debt_n, **dict(角色='所有权或使用权人', **d)
                        ).get_relationship())
                    pass

            if '公示催告' in j['content'].keys():
                data = self.get_format_dict(j['content']['公示催告'])
                for d in data:
                    _ = d.pop('票面金额')
                    bn = Banknote(**{'票据号': d.pop('票据号'),
                                     '票据类型': d.pop('票据类型'),
                                     '票面金额(金额)': _['金额'],
                                     '票面金额(单位)': _['单位']
                                     })
                    bn_n = self.get_neo_node(bn)
                    sq = d.pop('申请人')
                    cp = d.pop('持票人')
                    if sq['名称'] == j['name'] or sq['链接'] == j['url']:
                        sq_n = etp_n
                    else:
                        sq_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                sq['链接'], sq['名称'])
                        )
                        if sq_n is None:
                            sq_n = Related(**sq)
                            sq_n = self.get_neo_node(sq_n)
                    if sq_n is not None:
                        relationships.append(Have(
                            sq_n, bn_n, **dict(角色='申请人', **d)
                        ).get_relationship())

                    if cp['名称'] == j['name'] or cp['链接'] == j['url']:
                        cp_n = etp_n
                    else:
                        cp_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                cp['链接'], cp['名称'])
                        )
                        if cp_n is None:
                            cp_n = Related(**cp)
                            cp_n = self.get_neo_node(cp_n)
                    if cp_n is not None:
                        relationships.append(Have(
                            cp_n, bn_n, **dict(角色='持票人', **d)
                        ).get_relationship())
                    relationships.append(Have(
                        etp_n, bn_n, **dict(角色='出票人', **d)
                    ).get_relationship())
                    pass

            if '行政处罚' in j['content'].keys():
                data = j['content']['行政处罚']
                d1 = self.get_format_dict(data['工商局'])
                ps = Punishment.create_from_dict(d1, '工商局')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

                d2 = self.get_format_dict(data['税务局'])
                ps = Punishment.create_from_dict(d2, '税务局')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

                d3 = self.get_format_dict(data['信用中国'])
                ps = Punishment.create_from_dict(d3, '信用中国')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

                d4 = self.get_format_dict(data['其他'])
                ps = Punishment.create_from_dict(d4, '其他')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )
                pass

            if '环保处罚' in j['content'].keys():
                data = self.get_format_dict(j['content']['环保处罚'])
                ps = Punishment.create_from_dict(data, '环保局')
                for p in ps:
                    _ = p.pop('punishment')
                    n = self.get_neo_node(_)
                    if n is not None:
                        relationships.append(
                            Have(
                                etp_n, n, **p
                            ).get_relationship()
                        )

            if '股权出质' in j['content'].keys():
                sh_info = j['content']['股权出质']
                sh_info = self.get_format_dict(sh_info)
                for sh in sh_info:
                    sh = dict(sh, **self.get_format_amount(
                        '出质数额', sh.pop('出质数额')
                    ))
                    # 确定出质人
                    cz = sh.pop('出质人')
                    cz['链接'] = etp.parser_url(cz['链接'])
                    # 判断出质人是不是当前公司
                    if j['name'] == cz['名称'] or cz['链接'] == etp_n['URL']:
                        cz_n = etp_n
                    else:
                        # 确定出质人,先在法人主体中找
                        cz_n = self.match_node(
                            *legal,
                            cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                                cz['名称'], cz['链接']
                            )
                        )
                        if cz_n is None:
                            # 在法人中没找到,就通过url在自然人中找
                            # 这里最好不要通过名称找了,除公司以外出现
                            # 同名的几率很大
                            # TODO(leung):在所有实体中去找开销很大,需要注意
                            cz_n = self.match_node(
                                'Person',
                                cypher='_.URL = "{}"'.format(cz['链接'])
                            )
                            if cz_n is None:
                                # 创建这个股权出质人
                                if len(cz['名称']) > 1:
                                    cz_n = Involveder(**cz)
                                    cz_n = self.get_neo_node(cz_n)
                        pass
                    # 确定质权人
                    zq = sh.pop('质权人')
                    zq['链接'] = etp.parser_url(zq['链接'])
                    # 判断质权人是不是当前公司
                    if j['name'] == zq['名称'] or zq['链接'] == etp_n['URL']:
                        zq_n = etp_n
                    else:
                        # 确定质权人,先在企业中找
                        zq_n = self.match_node(
                            *legal,
                            cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                                zq['名称'], zq['链接']
                            )
                        )
                        if zq_n is None:
                            # 在企业中没找到,就通过url在所有对象中找
                            # 这里最好不要通过名称找了,除公司以外出现
                            # 同名的几率很大
                            # TODO(leung):在所有实体中去找开销很大,需要注意
                            zq_n = self.match_node(
                                'Person',
                                cypher='_.URL = "{}"'.format(zq['链接'])
                            )
                            if zq_n is None:
                                # 创建这个股权出质人
                                if len(zq['名称']) > 1:
                                    zq_n = Involveder(**zq)
                                    zq_n = self.get_neo_node(zq_n)
                        pass
                    # 确定出质标的企业
                    bd = sh.pop('标的企业')
                    bd['链接'] = etp.parser_url(bd['链接'])
                    # 判断出质标的是不是当前公司
                    if j['name'] == bd['名称'] or bd['链接'] == etp_n['URL']:
                        bd_n = etp_n
                    else:
                        # 确定出质标的,先在企业中找
                        bd_n = self.match_node(
                            *legal,
                            cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                                bd['名称'], bd['链接']
                            )
                        )
                        if bd_n is None:
                            # 在企业中没找到,就通过url在所有对象中找
                            # 这里最好不要通过名称找了,除公司以外出现
                            # 同名的几率很大
                            # TODO(leung):在所有实体中去找开销很大,需要注意
                            bd_n = self.match_node(
                                'Person',
                                cypher='_.URL = "{}"'.format(bd['链接'])
                            )
                            if bd_n is None:
                                # 创建这个出质标的
                                if len(bd['名称']) > 1:
                                    bd_n = Possession(**bd)
                                    bd_n = self.get_neo_node(bd_n)
                        pass
                    # 创建关系
                    # 1. 抵押
                    if cz_n is not None and bd_n is not None:
                        relationships.append(
                            Guaranty(cz_n, bd_n, **sh).get_relationship()
                        )
                    # 2. 质权
                    if zq_n is not None and bd_n is not None:
                        relationships.append(
                            Have(zq_n, bd_n, **sh).get_relationship()
                        )

            if '破产重组' in j['content'].keys():
                data = self.get_format_dict(j['content']['破产重组'])
                for d in data:
                    sq = d.pop('申请人')
                    if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']:
                        sq_n = etp_n
                    else:
                        sq_n = self.match_node(
                            *['person'] + legal,
                            cypher='_.URL = "{}"'.format(sq['链接'])
                        )
                        if sq_n is None:
                            sq_n = Involveder(**sq)
                            sq_n = self.get_neo_node(sq_n)
                    bsq = d.pop('被申请人')
                    if bsq['名称'] == j['name'] or bsq['链接'] == etp_n['URL']:
                        bsq_n = etp_n
                    else:
                        # 被申请破产的一般是法人
                        bsq_n = self.match_node(
                            *['person'] + legal,
                            cypher='_.URL = "{}"'.format(bsq['链接'])
                        )
                        if bsq_n is None:
                            bsq_n = Involveder(**bsq)
                            bsq_n = self.get_neo_node(bsq_n)
                    if sq_n is not None and bsq_n is not None:
                        relationships.append(
                            Relationship(sq_n, '申请破产', bsq_n, **d)
                        )
                pass

            if '土地抵押' in j['content'].keys():
                data = self.get_format_dict(j['content']['土地抵押'])
                for d in data:
                    _ = d.pop('抵押面积')
                    p = Plot(**{'位置': d.pop('位置'),
                                '面积(数量)': _['数额'],
                                '面积(单位)': _['单位'],
                                })
                    p_n = self.get_neo_node(p)
                    d = dict(d, **self.get_format_amount(
                        '抵押金额', d.pop('抵押金额')
                    ))
                    dy = d.pop('抵押人')
                    dyq = d.pop('抵押权人')

                    if dy['名称'] == j['name'] or dy['链接'] == etp_n['URL']:
                        dy_n = etp_n
                    else:
                        dy_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                dy['链接'], dy['名称'])
                        )
                        if dy_n is None:
                            dy_n = Related(**dy)
                            dy_n = self.get_neo_node(dy_n)
                    if dy_n is not None:
                        relationships.append(
                            Guaranty(dy_n, p_n, **d).get_relationship()
                        )
                    if dyq['名称'] == j['name'] or dyq['链接'] == etp_n['URL']:
                        dyq_n = etp_n
                    else:
                        dyq_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                                dyq['链接'], dyq['名称'])
                        )
                        if dyq_n is None:
                            dyq_n = Related(**dyq)
                            dyq_n = self.get_neo_node(dyq_n)
                    if dyq_n is not None:
                        relationships.append(
                            Have(dyq_n, p_n, **d).get_relationship()
                        )
                pass

            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(SuccessMessage('{}:success merge relationships to database '
                                     'round {} and deal {}/{} enterprise,and'
                                     ' merge {} relationships.'.format(
                    dt.datetime.now(), i, k, etp_count, len(relationships)
                )))
                relationships.clear()
                # return
                pass
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(SuccessMessage('{}:success merge relationships to database '
                                 'round {} and deal {}/{} enterprise,and'
                                 ' merge {} relationships.'.format(
                dt.datetime.now(), i, k, etp_count, len(relationships)
            )))
            relationships.clear()
            pass

    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)
        if '动产抵押' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['动产抵押'])
            for d in data:
                _ = d.pop('被担保主债权数额')
                debt = Debt(**{'债务(金额)': _['金额'],
                               '债务(单位)': _['单位'],
                               '履行期限': d.pop('债务人履行债务的期限')
                               })
                debt_n = self.get_neo_node(debt)
                nodes.append(debt_n)
                dy = d.pop('抵押权人')
                zw = d.pop('债务人')
                sy = d.pop('所有权或使用权归属')
                dy['链接'] = Enterprise.parser_url(dy['链接'])
                zw['链接'] = Enterprise.parser_url(zw['链接'])
                sy['链接'] = Enterprise.parser_url(sy['链接'])
                if dy['名称'] == etp['name'] or dy['链接'] == etp['url']:
                    dy_n = etp_n
                else:
                    dy_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            dy['链接'], dy['名称'])
                    )
                    if dy_n is None:
                        # dy_n = Related(**dy)
                        dy_n = Enterprise(**dy)
                        if not dy_n.isEnterprise():
                            dy_n = Person(**dy)
                            if not dy_n.isPerson():
                                dy_n = Related(**dy)
                        dy_n = self.get_neo_node(dy_n)
                if dy_n is not None:
                    nodes.append(dy_n)
                    relationships.append(Have(
                        dy_n, debt_n, **dict(角色='抵押权人', **d)
                    ))

                if zw['名称'] == etp['name'] or zw['链接'] == etp['url']:
                    zw_n = etp_n
                else:
                    zw_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            zw['链接'], zw['名称'])
                    )
                    if zw_n is None and len(zw['名称']) > 1:
                        # zw_n = Related(**zw)
                        zw_n = Enterprise(**zw)
                        if not zw_n.isEnterprise():
                            zw_n = Person(**zw)
                            if not zw_n.isPerson():
                                zw_n = Related(**zw)
                        zw_n = self.get_neo_node(zw_n)
                if zw_n is not None:
                    nodes.append(zw_n)
                    relationships.append(Have(
                        zw_n, debt_n, **dict(角色='债务人', **d)
                    ))

                if sy['名称'] == etp['name'] or sy['链接'] == etp['url']:
                    sy_n = etp_n
                else:
                    sy_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            sy['链接'], sy['名称'])
                    )
                    if sy_n is None and len(sy['名称']) > 1:
                        # sy_n = Related(**sy)
                        sy_n = Enterprise(**sy)
                        if not sy_n.isEnterprise():
                            sy_n = Person(**sy)
                            if not sy_n.isPerson():
                                sy_n = Related(**sy)
                        sy_n = self.get_neo_node(sy_n)
                if sy_n is not None:
                    nodes.append(sy_n)
                    relationships.append(Have(
                        sy_n, debt_n, **dict(角色='所有权或使用权人', **d)
                    ))
                pass

        if '公示催告' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['公示催告'])
            for d in data:
                _ = d.pop('票面金额')
                bn = Banknote(**{'票据号': d.pop('票据号'),
                                 '票据类型': d.pop('票据类型'),
                                 '票面金额(金额)': _['金额'],
                                 '票面金额(单位)': _['单位']
                                 })
                bn_n = self.get_neo_node(bn)
                nodes.append(bn_n)
                sq = d.pop('申请人')
                cp = d.pop('持票人')
                sq['链接'] = Enterprise.parser_url(sq['链接'])
                cp['链接'] = Enterprise.parser_url(cp['链接'])
                if sq['名称'] == etp['name'] or sq['链接'] == etp['url']:
                    sq_n = etp_n
                else:
                    sq_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            sq['链接'], sq['名称'])
                    )
                    if sq_n is None:
                        # sq_n = Related(**sq)
                        sq_n = Enterprise(**sq)
                        if not sq_n.isEnterprise():
                            sq_n = Person(**sq)
                            if not sq_n.isPerson():
                                sq_n = Related(**sq)
                        sq_n = self.get_neo_node(sq_n)
                if sq_n is not None:
                    nodes.append(sq_n)
                    relationships.append(Have(
                        sq_n, bn_n, **dict(角色='申请人', **d)
                    ))

                if cp['名称'] == etp['name'] or cp['链接'] == etp['url']:
                    cp_n = etp_n
                else:
                    cp_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            cp['链接'], cp['名称'])
                    )
                    if cp_n is None:
                        # cp_n = Related(**cp)
                        cp_n = Enterprise(**cp)
                        if not cp_n.isEnterprise():
                            cp_n = Person(**cp)
                            if not cp_n.isPerson():
                                cp_n = Related(**cp)
                        cp_n = self.get_neo_node(cp_n)
                if cp_n is not None:
                    nodes.append(cp_n)
                    relationships.append(Have(
                        cp_n, bn_n, **dict(角色='持票人', **d)
                    ))
                relationships.append(Have(
                    etp_n, bn_n, **dict(角色='出票人', **d)
                ))
                pass

        if '行政处罚' in etp['content'].keys():
            data = etp['content']['行政处罚']
            d1 = self.get_format_dict(data['工商局'])
            ps = Punishment.create_from_dict(d1, '工商局')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

            d2 = self.get_format_dict(data['税务局'])
            ps = Punishment.create_from_dict(d2, '税务局')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

            d3 = self.get_format_dict(data['信用中国'])
            ps = Punishment.create_from_dict(d3, '信用中国')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

            d4 = self.get_format_dict(data['其他'])
            ps = Punishment.create_from_dict(d4, '其他')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )
            pass

        if '环保处罚' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['环保处罚'])
            ps = Punishment.create_from_dict(data, '环保局')
            for p in ps:
                _ = p.pop('punishment')
                n = self.get_neo_node(_)
                if n is not None:
                    nodes.append(n)
                    relationships.append(
                        Have(etp_n, n, **p)
                    )

        if '股权出质' in etp['content'].keys():
            sh_info = etp['content']['股权出质']
            sh_info = self.get_format_dict(sh_info)
            for sh in sh_info:
                sh = dict(sh, **self.get_format_amount(
                    '出质数额', sh.pop('出质数额')
                ))
                # 确定出质人
                cz = sh.pop('出质人')
                cz['链接'] = Enterprise.parser_url(cz['链接'])
                # 判断出质人是不是当前公司
                if etp['name'] == cz['名称'] or cz['链接'] == etp_n['URL']:
                    cz_n = etp_n
                else:
                    # 确定出质人,先在法人主体中找
                    cz_n = self.match_node(
                        *legal,
                        cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                            cz['名称'], cz['链接']
                        )
                    )
                    if cz_n is None:
                        # 在法人中没找到,就通过url在自然人中找
                        # 这里最好不要通过名称找了,除公司以外出现
                        # 同名的几率很大
                        # TODO(leung):在所有实体中去找开销很大,需要注意
                        cz_n = self.match_node(
                            'Person',
                            cypher='_.URL = "{}"'.format(cz['链接'])
                        )
                        if cz_n is None:
                            # 创建这个股权出质人
                            if len(cz['名称']) > 1:
                                # cz_n = Involveder(**cz)
                                cz_n = Enterprise(**cz)
                                if not cz_n.isEnterprise():
                                    cz_n = Person(**cz)
                                    if not cz_n.isPerson():
                                        cz_n = Related(**cz)
                                cz_n = self.get_neo_node(cz_n)
                    pass
                # 确定质权人
                zq = sh.pop('质权人')
                zq['链接'] = Enterprise.parser_url(zq['链接'])
                # 判断质权人是不是当前公司
                if etp['name'] == zq['名称'] or zq['链接'] == etp_n['URL']:
                    zq_n = etp_n
                else:
                    # 确定质权人,先在企业中找
                    zq_n = self.match_node(
                        *legal,
                        cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                            zq['名称'], zq['链接']
                        )
                    )
                    if zq_n is None:
                        # 在企业中没找到,就通过url在所有对象中找
                        # 这里最好不要通过名称找了,除公司以外出现
                        # 同名的几率很大
                        # TODO(leung):在所有实体中去找开销很大,需要注意
                        zq_n = self.match_node(
                            'Person',
                            cypher='_.URL = "{}"'.format(zq['链接'])
                        )
                        if zq_n is None:
                            # 创建这个股权出质人
                            if len(zq['名称']) > 1:
                                # zq_n = Involveder(**zq)
                                zq_n = Enterprise(**zq)
                                if not zq_n.isEnterprise():
                                    zq_n = Person(**zq)
                                    if not zq_n.isPerson():
                                        zq_n = Related(**zq)
                                zq_n = self.get_neo_node(zq_n)
                    pass
                # 确定出质标的企业
                bd = sh.pop('标的企业')
                bd['链接'] = Enterprise.parser_url(bd['链接'])
                # 判断出质标的是不是当前公司
                if etp['name'] == bd['名称'] or bd['链接'] == etp_n['URL']:
                    bd_n = etp_n
                else:
                    # 确定出质标的,先在企业中找,不会是人
                    bd_n = self.match_node(
                        *legal,
                        cypher='_.NAME = "{}" OR _.URL = "{}"'.format(
                            bd['名称'], bd['链接']
                        )
                    )
                    if bd_n is None:
                        # 创建这个出质标的
                        if len(bd['名称']) > 1:
                            bd_n = Enterprise(**bd)
                            if not bd_n.isEnterprise():
                                bd_n = Possession(**bd)
                            bd_n = self.get_neo_node(bd_n)
                    pass
                # 创建关系
                if bd_n is None:
                    continue
                nodes.append(bd_n)
                # 1. 抵押
                if cz_n is not None:
                    nodes.append(cz_n)
                    relationships.append(
                        Guaranty(cz_n, bd_n, **sh)
                    )
                # 2. 质权
                if zq_n is not None:
                    nodes.append(zq_n)
                    relationships.append(
                        Have(zq_n, bd_n, **sh)
                    )

        if '破产重组' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['破产重组'])
            for d in data:
                sq = d.pop('申请人')
                sq['链接'] = Enterprise.parser_url(sq['链接'])
                if sq['名称'] == etp['name'] or sq['链接'] == etp_n['URL']:
                    sq_n = etp_n
                else:
                    sq_n = self.match_node(
                        *['person'] + legal,
                        cypher='_.URL = "{}"'.format(sq['链接'])
                    )
                    if sq_n is None:
                        # sq_n = Involveder(**sq)
                        sq_n = Enterprise(**sq)
                        if not sq_n.isEnterprise():
                            sq_n = Person(**sq)
                            if not sq_n.isPerson():
                                sq_n = Related(**sq)
                        sq_n = self.get_neo_node(sq_n)
                bsq = d.pop('被申请人')
                bsq['链接'] = Enterprise.parser_url(bsq['链接'])
                if bsq['名称'] == etp['name'] or bsq['链接'] == etp_n['URL']:
                    bsq_n = etp_n
                else:
                    # 被申请破产的一般是法人
                    bsq_n = self.match_node(
                        *['person'] + legal,
                        cypher='_.URL = "{}"'.format(bsq['链接'])
                    )
                    if bsq_n is None:
                        # bsq_n = Involveder(**bsq)
                        bsq_n = Enterprise(**bsq)
                        if not bsq_n.isEnterprise():
                            bsq_n = Person(**bsq)
                            if not bsq_n.isPerson():
                                bsq_n = Related(**bsq)
                        bsq_n = self.get_neo_node(bsq_n)
                if sq_n is not None and bsq_n is not None:
                    nodes += [sq_n, bsq_n]
                    relationships.append(
                        ApplyBankrupt(sq_n, bsq_n, **d)
                    )
            pass

        if '土地抵押' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['土地抵押'])
            for d in data:
                _ = d.pop('抵押面积')
                p = Plot(**{'位置': d.pop('位置'),
                            '面积(数量)': _['数额'],
                            '面积(单位)': _['单位'],
                            })
                p_n = self.get_neo_node(p)
                nodes.append(p_n)
                d = dict(d, **self.get_format_amount(
                    '抵押金额', d.pop('抵押金额')
                ))
                dy = d.pop('抵押人')
                dyq = d.pop('抵押权人')
                dy['链接'] = Enterprise.parser_url(dy['链接'])
                dyq['链接'] = Enterprise.parser_url(dyq['链接'])
                if dy['名称'] == etp['name'] or dy['链接'] == etp_n['URL']:
                    dy_n = etp_n
                else:
                    dy_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            dy['链接'], dy['名称'])
                    )
                    if dy_n is None:
                        # dy_n = Related(**dy)
                        dy_n = Enterprise(**dy)
                        if not dy_n.isEnterprise():
                            dy_n = Person(**dy)
                            if not dy_n.isPerson():
                                dy_n = Related(**dy)
                        dy_n = self.get_neo_node(dy_n)
                if dy_n is not None:
                    nodes.append(dy_n)
                    relationships.append(
                        Guaranty(dy_n, p_n, **d)
                    )
                if dyq['名称'] == etp['name'] or dyq['链接'] == etp_n['URL']:
                    dyq_n = etp_n
                else:
                    dyq_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}" OR _.NAME = "{}"'.format(
                            dyq['链接'], dyq['名称'])
                    )
                    if dyq_n is None:
                        # dyq_n = Related(**dyq)
                        dyq_n = Enterprise(**dyq)
                        if not dyq_n.isEnterprise():
                            dyq_n = Person(**dyq)
                            if not dyq_n.isPerson():
                                dyq_n = Related(**dyq)
                        dyq_n = self.get_neo_node(dyq_n)
                if dyq_n is not None:
                    nodes.append(dyq_n)
                    relationships.append(
                        Have(dyq_n, p_n, **d)
                    )
            pass

        return nodes, relationships

    def get_all_nodes_and_relationships(
            self, save_folder=None, **kwargs):
        enterprises = self.base.query(
            sql={
                'metaModel': '经营风险',
                # 'name': '重庆轩烽建材有限公司'
            },
            # limit=100000,
            # skip=90000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(
                        save_folder, nodes,
                        relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(SuccessMessage(
                    'success trans data to csv round {} and '
                    'deal {}/{} enterprise spend {} seconds.'
                    ''.format(j, i, etp_count, int(time.time() - _st_))
                ))
                _st_ = time.time()
                pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(
                save_folder, nodes,
                relationships, **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships
Esempio n. 6
0
class JusGraph(BaseGraph):

    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_all',
            # tn='qcc.1.1',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def create_index_and_constraint(self):
        """
        为涉及到的实体创建唯一性约束跟索引,唯一键自动带有索引
        不比再单独创建索引
        :return:
        """
        # TODO(leung): 要随时确保label的准确性
        # 用到的实体对象
        used_entity = [
            # 'JusticeCase',
            'Ruling',
            'Involveder',
            'Executed',
            'SXExecuted',
            'LimitOrder',
            'StockFreeze'
        ]
        constraint = {}
        index = {}
        for l in used_entity:
            constraint[l] = [entities(l).primarykey]
            idx = entities(l).index
            if len(idx):
                index[l] = idx
        self.add_index_and_constraint(index, constraint)
        pass

    def create_relationship_from_justice_case(
            self, suspect, justice_case, **kwargs):
        """
        enterprise or person -[involve_case]->justice case
        :param suspect:
        :param justice_case:
        :param kwargs:
        :return:
        """
        rps = []
        for jc in justice_case:
            kwargs = dict(kwargs, **{'案件身份': jc.CASE_IDENTITY})
            jc_n = jc.get_neo_node(primarykey=jc.primarykey)
            if jc_n is None:
                self.to_logs('filed initialize justice case Neo node',
                             'ERROR')
            else:
                rps.append(InvolveCase(
                    suspect, jc_n, **kwargs
                ).get_relationship())
        return rps

    def create_all_relationship(self):
        """
        1.enterprise or person -[involve_case]->case
        :return:
        """
        justices = self.base.query(
            sql={
                'metaModel': '法律诉讼',
                # 'name': '重庆思途科技有限公司'
            },
            limit=100,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        etp_count = justices.count()
        relationships = []
        # prs = Person()
        # etp = Enterprise()
        for j in justices:
            # 每个公司的法律诉讼下的司法案件肯定跟这个案件有联系
            k += 1
            # if k < 4910:
            #     continue
            # TODO(leung): 这里要注意,法律诉讼模块中的url确定不了公司
            etp_n = self.match_node(
                *legal,
                cypher='_.NAME = "{}"'.format(j['name'])
            )
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(
                    sql={'metaModel': '基本信息', 'name': j['name']}
                )
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(_)
                    pass
                else:
                    # 没有这个公司的信息,那么就简单的把这个公司理解成一个涉案者
                    etp = Involveder(**{'名称': j['name'], '链接': j['url']})
                    etp_n = self.get_neo_node(etp)
                    if etp_n is None:
                        continue
                    pass

            if '被执行人' in j['content'].keys():
                data = self.get_format_dict(j['content']['被执行人'])
                eps = Enforcement.create_from_dict(data)
                for ep in eps:
                    e = ep.pop('executed')
                    e_n = self.get_neo_node(e)
                    if e_n is not None:
                        relationships.append(
                            InvolveCase(etp_n, e_n, **ep).get_relationship()
                        )
                pass

            # if '司法案件' in j['content'].keys():
            #     justice_case_info = j['content']['司法案件']
            #     jcs = JusticeCase.create_from_dict(justice_case_info)
            #     rps = self.create_relationship_from_justice_case(
            #         etp_n, jcs)
            #     relationships += rps
            #     pass

            if '裁判文书' in j['content'].keys():
                data = self.get_format_dict(j['content']['裁判文书'])
                # 返回的是[[Ruling, 相关对象],[]...]
                rls = Judgment.create_from_dict(data)
                for ruling, involve in rls:
                    rul_n = self.get_neo_node(ruling)
                    if rul_n is None:
                        continue
                    for inv in involve:
                        # 案件相关主体
                        # 先判断是不是当前的企业
                        if j['name'] == inv[1] or j['url'] == inv[2]:
                            # 如果是,直接关联起来
                            inv_n = etp_n
                        else:
                            # 1.先在企业中匹配
                            # 2.匹配自然人
                            inv_n = self.match_node(
                                *['Person'] + legal,
                                cypher='_.URL = "{}"'.format(
                                    inv[2])
                            )
                            if inv_n is None:
                                ivl = Involveder()
                                ivl['NAME'] = inv[1]
                                ivl['URL'] = inv[2]
                                # if inv[2] is not None:
                                #     ivl['URL'] = inv[2]
                                # else:
                                #     ivl['URL'] = ivl.get_entity_unique_code(
                                #         j['name']+inv[1]
                                #     )
                                inv_n = self.get_neo_node(ivl)
                        # 3.以上两者都没匹配到的时候,创建这个案件参与者
                        # 实际上还可以到其他实体中去匹配,但那些可能是数据
                        # 集之外的对象了,可以先不去管他们

                        if inv_n is not None:
                            relationships.append(
                                InvolveCase(
                                    inv_n, rul_n, **{'案件身份': inv[0]}
                                ).get_relationship()
                            )
                pass

            if '失信被执行人' in j['content'].keys():
                data = self.get_format_dict(
                    j['content']['失信被执行人']
                )
                eps = SXEnforcement.create_from_dict(data)
                for ep in eps:
                    e = ep.pop('sxexecuted')
                    e_n = self.get_neo_node(e)
                    if e_n is not None:
                        relationships.append(
                            InvolveCase(etp_n, e_n, **ep).get_relationship()
                        )
                pass

            if '限制高消费' in j['content'].keys():
                data = self.get_format_dict(
                    j['content']['限制高消费']
                )
                for d in data:
                    sq = d.pop('申请人')
                    lh = d.pop('限消令对象')
                    xg = d.pop('关联对象')
                    _ = d.pop('案号')
                    lo = dict(案号=_['名称'], 案号链接=_['链接'], **d)
                    lo = LimitOrder(**lo)
                    lo_n = self.get_neo_node(lo)
                    if lo_n is None:
                        continue
                    if sq['名称'] == j['name'] or sq['链接'] == etp_n['URL']:
                        sq_n = etp_n
                    else:
                        sq_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                sq['链接'])
                        )
                        if sq_n is None:
                            # 创建这个对象
                            sq_n = Involveder(**sq)
                            sq_n = self.get_neo_node(sq_n)
                    if sq_n is not None:
                        relationships.append(
                            InvolveCase(sq_n, lo_n, **{'案件身份': '申请人'}
                                        ).get_relationship()
                        )
                    if lh['名称'] == j['name'] or lh['链接'] == etp_n['URL']:
                        lh_n = etp_n
                    else:
                        lh_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                lh['链接'])
                        )
                        if lh_n is None:
                            # 创建这个对象
                            lh_n = Involveder(**lh)
                            lh_n = self.get_neo_node(lh_n)
                    if lh_n is not None:
                        relationships.append(
                            InvolveCase(lo_n, lh_n, **{'案件身份': '限制对象'}
                                        ).get_relationship()
                        )
                    if xg['名称'] == j['name'] or xg['链接'] == etp_n['URL']:
                        xg_n = etp_n
                    else:
                        xg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                xg['链接'])
                        )
                        if xg_n is None:
                            # 创建这个对象
                            xg_n = Involveder(**xg)
                            xg_n = self.get_neo_node(xg_n)
                    if xg_n is not None:
                        relationships.append(
                            InvolveCase(lo_n, xg_n, **{'案件身份': '关联对象'}
                                        ).get_relationship()
                        )
                pass

            if '股权冻结' in j['content'].keys():
                data = self.get_format_dict(
                    j['content']['股权冻结']
                )
                for d in data:
                    bd = d.pop('标的企业')
                    zx = d.pop('被执行人')
                    _1 = d.pop('股权数额')
                    _2 = d.pop('类型|状态').split('|')
                    sf = dict(冻结数额=_1['金额'], 金额单位=_1['单位'],
                              类型=_2[0], 状态=_2[1] if len(_2) > 1 else None, **d
                              )
                    sf = StockFreeze(**sf)
                    sf_n = self.get_neo_node(sf)
                    if sf_n is None:
                        continue
                    if bd['名称'] == j['name'] or bd['链接'] == etp_n['URL']:
                        bd_n = etp_n
                    else:
                        bd_n = self.match_node(
                            *legal,
                            cypher='_.URL = "{}"'.format(
                                bd['链接'])
                        )
                        if bd_n is None:
                            bd_n = Involveder(**bd)
                            bd_n = self.get_neo_node(bd_n)
                    if bd_n is not None:
                        relationships.append(
                            InvolveCase(sf_n, bd_n, **{'案件身份': '标的企业'}
                                        ).get_relationship()
                        )
                    if zx['名称'] == j['name'] or zx['链接'] == etp_n['URL']:
                        zx_n = etp_n
                    else:
                        zx_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                zx['链接'])
                        )
                        if zx_n is None:
                            zx_n = Involveder(**zx)
                            zx_n = self.get_neo_node(zx_n)
                    if zx_n is not None:
                        relationships.append(
                            InvolveCase(sf_n, zx_n, **{'案件身份': '被执行人'}
                                        ).get_relationship()
                        )

            if len(relationships) > 1000:
                i += 1
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(SuccessMessage('{}:success merge relationships to database '
                                     'round {} and deal {}/{} enterprise,and'
                                     ' merge {} relationships.'.format(
                    dt.datetime.now(), i, k, etp_count, len(relationships)
                )))
                relationships.clear()
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(SuccessMessage('{}:success merge relationships to database '
                                 'round {} and deal {}/{} enterprise,and'
                                 ' merge {} relationships.'.format(
                dt.datetime.now(), i, k, etp_count, len(relationships)
            )))
            relationships.clear()
        pass

    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)

        if '法院公告' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['法院公告']
            )
            cas = CourtAnnounce.create_from_dict(data)
            for ca in cas:
                a = ca.pop('announce')
                a_n = self.get_neo_node(a)
                if a_n is None:
                    continue
                nodes.append(a_n)
                bgs = ca.pop('defendant')
                for bg in bgs:
                    bg['链接'] = Enterprise.parser_url(bg['链接'])
                    if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']:
                        bg_n = etp_n
                    else:
                        bg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                bg['链接'])
                        )
                        if bg_n is None:
                            # 创建这个对象
                            # sq_n = Involveder(**sq)
                            bg_n = Enterprise(**bg)
                            if not bg_n.isEnterprise():
                                bg_n = Person(**bg)
                                if not bg_n.isPerson():
                                    bg_n = Related(**bg)
                            bg_n = self.get_neo_node(bg_n)
                    if bg_n is not None:
                        nodes.append(bg_n)
                        relationships.append(
                            InvolveCase(bg_n, a_n, **{'案件身份': '被告'})
                        )
                ygs = ca.pop('plaintiff')
                for yg in ygs:
                    yg['链接'] = Enterprise.parser_url(yg['链接'])
                    if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']:
                        yg_n = etp_n
                    else:
                        yg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                yg['链接'])
                        )
                        if yg_n is None:
                            # 创建这个对象
                            # lh_n = Involveder(**lh)
                            yg_n = Enterprise(**yg)
                            if not yg_n.isEnterprise():
                                yg_n = Person(**yg)
                                if not yg_n.isPerson():
                                    yg_n = Related(**yg)
                            yg_n = self.get_neo_node(yg_n)
                    if yg_n is not None:
                        nodes.append(yg_n)
                        relationships.append(
                            InvolveCase(yg_n, a_n, **{'案件身份': '原告'})
                        )
            pass

        if '开庭公告' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['开庭公告']
            )
            cas = OpenAnnounce.create_from_dict(data)
            for ca in cas:
                a = ca.pop('announce')
                a_n = self.get_neo_node(a)
                if a_n is None:
                    continue
                nodes.append(a_n)
                bgs = ca.pop('defendant')
                for bg in bgs:
                    bg['链接'] = Enterprise.parser_url(bg['链接'])
                    if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']:
                        bg_n = etp_n
                    else:
                        bg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                bg['链接'])
                        )
                        if bg_n is None:
                            # 创建这个对象
                            # sq_n = Involveder(**sq)
                            bg_n = Enterprise(**bg)
                            if not bg_n.isEnterprise():
                                bg_n = Person(**bg)
                                if not bg_n.isPerson():
                                    bg_n = Related(**bg)
                            bg_n = self.get_neo_node(bg_n)
                    if bg_n is not None:
                        nodes.append(bg_n)
                        relationships.append(
                            InvolveCase(bg_n, a_n, **{'案件身份': '被告'})
                        )
                ygs = ca.pop('plaintiff')
                for yg in ygs:
                    yg['链接'] = Enterprise.parser_url(yg['链接'])
                    if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']:
                        yg_n = etp_n
                    else:
                        yg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                yg['链接'])
                        )
                        if yg_n is None:
                            # 创建这个对象
                            # lh_n = Involveder(**lh)
                            yg_n = Enterprise(**yg)
                            if not yg_n.isEnterprise():
                                yg_n = Person(**yg)
                                if not yg_n.isPerson():
                                    yg_n = Related(**yg)
                            yg_n = self.get_neo_node(yg_n)
                    if yg_n is not None:
                        nodes.append(yg_n)
                        relationships.append(
                            InvolveCase(yg_n, a_n, **{'案件身份': '原告'})
                        )
            pass

        if '送达公告' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['送达公告']
            )
            cas = DeliveryAnnounce.create_from_dict(data)
            for ca in cas:
                a = ca.pop('announce')
                a_n = self.get_neo_node(a)
                if a_n is None:
                    continue
                nodes.append(a_n)
                bgs = ca.pop('defendant')
                for bg in bgs:
                    bg['链接'] = Enterprise.parser_url(bg['链接'])
                    if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']:
                        bg_n = etp_n
                    else:
                        bg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                bg['链接'])
                        )
                        if bg_n is None:
                            # 创建这个对象
                            # sq_n = Involveder(**sq)
                            bg_n = Enterprise(**bg)
                            if not bg_n.isEnterprise():
                                bg_n = Person(**bg)
                                if not bg_n.isPerson():
                                    bg_n = Related(**bg)
                            bg_n = self.get_neo_node(bg_n)
                    if bg_n is not None:
                        nodes.append(bg_n)
                        relationships.append(
                            InvolveCase(bg_n, a_n, **{'案件身份': '被告'})
                        )
                ygs = ca.pop('plaintiff')
                for yg in ygs:
                    yg['链接'] = Enterprise.parser_url(yg['链接'])
                    if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']:
                        yg_n = etp_n
                    else:
                        yg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                yg['链接'])
                        )
                        if yg_n is None:
                            # 创建这个对象
                            # lh_n = Involveder(**lh)
                            yg_n = Enterprise(**yg)
                            if not yg_n.isEnterprise():
                                yg_n = Person(**yg)
                                if not yg_n.isPerson():
                                    yg_n = Related(**yg)
                            yg_n = self.get_neo_node(yg_n)
                    if yg_n is not None:
                        nodes.append(yg_n)
                        relationships.append(
                            InvolveCase(yg_n, a_n, **{'案件身份': '原告'})
                        )
            pass

        if '立案信息' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['立案信息']
            )
            cas = RegisterCase.create_from_dict(data)
            for ca in cas:
                c = ca.pop('case')
                c_n = self.get_neo_node(c)
                if c_n is None:
                    continue
                nodes.append(c_n)
                bgs = ca.pop('defendant')
                for bg in bgs:
                    bg['链接'] = Enterprise.parser_url(bg['链接'])
                    if bg['名称'] == etp['name'] or bg['链接'] == etp_n['URL']:
                        bg_n = etp_n
                    else:
                        bg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                bg['链接'])
                        )
                        if bg_n is None:
                            # 创建这个对象
                            # sq_n = Involveder(**sq)
                            bg_n = Enterprise(**bg)
                            if not bg_n.isEnterprise():
                                bg_n = Person(**bg)
                                if not bg_n.isPerson():
                                    bg_n = Related(**bg)
                            bg_n = self.get_neo_node(bg_n)
                    if bg_n is not None:
                        nodes.append(bg_n)
                        relationships.append(
                            InvolveCase(bg_n, c_n, **{'案件身份': '被告'})
                        )
                ygs = ca.pop('plaintiff')
                for yg in ygs:
                    yg['链接'] = Enterprise.parser_url(yg['链接'])
                    if yg['名称'] == etp['name'] or yg['链接'] == etp_n['URL']:
                        yg_n = etp_n
                    else:
                        yg_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                yg['链接'])
                        )
                        if yg_n is None:
                            # 创建这个对象
                            # lh_n = Involveder(**lh)
                            yg_n = Enterprise(**yg)
                            if not yg_n.isEnterprise():
                                yg_n = Person(**yg)
                                if not yg_n.isPerson():
                                    yg_n = Related(**yg)
                            yg_n = self.get_neo_node(yg_n)
                    if yg_n is not None:
                        nodes.append(yg_n)
                        relationships.append(
                            InvolveCase(yg_n, c_n, **{'案件身份': '原告'})
                        )
            pass

        if '终本案件' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['终本案件']
            )
            cas = FinalCase.create_from_dict(data)
            for ca in cas:
                c = ca.pop('case')
                c_n = self.get_neo_node(c)
                if c_n is None:
                    continue
                nodes.append(c_n)
                relationships.append(
                    InvolveCase(etp_n, c_n)
                )

        if '裁判文书' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['裁判文书'])
            # 返回的是[[Ruling, 相关对象],[]...]
            rls = Judgment.create_from_dict(data)
            for ruling, involve in rls:
                rul_n = self.get_neo_node(ruling)
                if rul_n is None:
                    continue
                nodes.append(rul_n)
                for inv in involve:
                    # 案件相关主体
                    # 先判断是不是当前的企业
                    inv[2] = Enterprise.parser_url(inv[2])
                    if etp['name'] == inv[1] or etp['url'] == inv[2]:
                        # 如果是,直接关联起来
                        inv_n = etp_n
                    else:
                        # 1.先在企业中匹配
                        # 2.匹配自然人
                        inv_n = self.match_node(
                            *['Person'] + legal,
                            cypher='_.URL = "{}"'.format(
                                inv[2])
                        )
                        if inv_n is None:
                            # ivl = Involveder()
                            _ivl_ = {'名称': inv[1], '链接': inv[2]}
                            ivl = Enterprise(**_ivl_)
                            if not ivl.isEnterprise():
                                ivl = Person(**_ivl_)
                                if not ivl.isPerson():
                                    ivl = Related(**_ivl_)
                            inv_n = self.get_neo_node(ivl)
                    # 3.以上两者都没匹配到的时候,创建这个案件参与者
                    # 实际上还可以到其他实体中去匹配,但那些可能是数据
                    # 集之外的对象了,可以先不去管他们

                    if inv_n is not None:
                        nodes.append(inv_n)
                        relationships.append(
                            InvolveCase(
                                inv_n, rul_n, **{'案件身份': inv[0]}
                            )
                        )
            pass

        if '被执行人' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['被执行人'])
            eps = Enforcement.create_from_dict(data)
            for ep in eps:
                e = ep.pop('executed')
                e_n = self.get_neo_node(e)
                if e_n is not None:
                    nodes.append(e_n)
                    relationships.append(
                        InvolveCase(etp_n, e_n, **ep)
                    )
            pass

        if '失信被执行人' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['失信被执行人']
            )
            eps = SXEnforcement.create_from_dict(data)
            for ep in eps:
                e = ep.pop('sxexecuted')
                e_n = self.get_neo_node(e)
                if e_n is not None:
                    nodes.append(e_n)
                    relationships.append(
                        InvolveCase(etp_n, e_n, **ep)
                    )
            pass

        if '限制高消费' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['限制高消费']
            )
            for d in data:
                sq = d.pop('申请人')
                lh = d.pop('限消令对象')
                xg = d.pop('关联对象')
                sq['链接'] = Enterprise.parser_url(sq['链接'])
                lh['链接'] = Enterprise.parser_url(lh['链接'])
                xg['链接'] = Enterprise.parser_url(xg['链接'])
                _ = d.pop('案号')
                lo = dict(案号=_['名称'], 案号链接=_['链接'], **d)
                lo = LimitOrder(**lo)
                lo_n = self.get_neo_node(lo)
                if lo_n is None:
                    continue
                nodes.append(lo_n)
                if sq['名称'] == etp['name'] or sq['链接'] == etp_n['URL']:
                    sq_n = etp_n
                else:
                    sq_n = self.match_node(
                        *['Person'] + legal,
                        cypher='_.URL = "{}"'.format(
                            sq['链接'])
                    )
                    if sq_n is None:
                        # 创建这个对象
                        # sq_n = Involveder(**sq)
                        sq_n = Enterprise(**sq)
                        if not sq_n.isEnterprise():
                            sq_n = Person(**sq)
                            if not sq_n.isPerson():
                                sq_n = Related(**sq)
                        sq_n = self.get_neo_node(sq_n)
                if sq_n is not None:
                    nodes.append(sq_n)
                    relationships.append(
                        InvolveCase(sq_n, lo_n, **{'案件身份': '申请人'})
                    )
                if lh['名称'] == etp['name'] or lh['链接'] == etp_n['URL']:
                    lh_n = etp_n
                else:
                    lh_n = self.match_node(
                        *['Person'] + legal,
                        cypher='_.URL = "{}"'.format(
                            lh['链接'])
                    )
                    if lh_n is None:
                        # 创建这个对象
                        # lh_n = Involveder(**lh)
                        lh_n = Enterprise(**lh)
                        if not lh_n.isEnterprise():
                            lh_n = Person(**lh)
                            if not lh_n.isPerson():
                                lh_n = Related(**lh)
                        lh_n = self.get_neo_node(lh_n)
                if lh_n is not None:
                    nodes.append(lh_n)
                    relationships.append(
                        InvolveCase(lo_n, lh_n, **{'案件身份': '限制对象'})
                    )
                if xg['名称'] == etp['name'] or xg['链接'] == etp_n['URL']:
                    xg_n = etp_n
                else:
                    xg_n = self.match_node(
                        *['Person'] + legal,
                        cypher='_.URL = "{}"'.format(
                            xg['链接'])
                    )
                    if xg_n is None:
                        # 创建这个对象
                        # xg_n = Involveder(**xg)
                        xg_n = Enterprise(**xg)
                        if not xg_n.isEnterprise():
                            xg_n = Person(**xg)
                            if not xg_n.isPerson():
                                xg_n = Related(**xg)
                        xg_n = self.get_neo_node(xg_n)
                if xg_n is not None:
                    nodes.append(xg_n)
                    relationships.append(
                        InvolveCase(lo_n, xg_n, **{'案件身份': '关联对象'})
                    )
            pass

        if '股权冻结' in etp['content'].keys():
            data = self.get_format_dict(
                etp['content']['股权冻结']
            )
            for d in data:
                bd = d.pop('标的企业')
                zx = d.pop('被执行人')
                bd['链接'] = Enterprise.parser_url(bd['链接'])
                zx['链接'] = Enterprise.parser_url(zx['链接'])
                _1 = d.pop('股权数额')
                _2 = d.pop('类型|状态').split('|')
                sf = dict(冻结数额=_1['金额'], 金额单位=_1['单位'],
                          类型=_2[0], 状态=_2[1] if len(_2) > 1 else None, **d
                          )
                sf = StockFreeze(**sf)
                sf_n = self.get_neo_node(sf)
                if sf_n is None:
                    continue
                nodes.append(sf_n)
                if bd['名称'] == etp['name'] or bd['链接'] == etp_n['URL']:
                    bd_n = etp_n
                else:
                    bd_n = self.match_node(
                        *legal,
                        cypher='_.URL = "{}"'.format(
                            bd['链接'])
                    )
                    if bd_n is None:
                        # bd_n = Involveder(**bd)
                        bd_n = Enterprise(**bd)
                        if not bd_n.isEnterprise():
                            bd_n = Person(**bd)
                            if not bd_n.isPerson():
                                bd_n = Related(**bd)
                        bd_n = self.get_neo_node(bd_n)
                if bd_n is not None:
                    nodes.append(bd_n)
                    relationships.append(
                        InvolveCase(sf_n, bd_n, **{'案件身份': '标的企业'})
                    )
                if zx['名称'] == etp['name'] or zx['链接'] == etp_n['URL']:
                    zx_n = etp_n
                else:
                    zx_n = self.match_node(
                        *['Person'] + legal,
                        cypher='_.URL = "{}"'.format(
                            zx['链接'])
                    )
                    if zx_n is None:
                        # zx_n = Involveder(**zx)
                        zx_n = Enterprise(**zx)
                        if not zx_n.isEnterprise():
                            zx_n = Person(**zx)
                            if not zx_n.isPerson():
                                zx_n = Related(**zx)
                        zx_n = self.get_neo_node(zx_n)
                if zx_n is not None:
                    nodes.append(zx_n)
                    relationships.append(
                        InvolveCase(sf_n, zx_n, **{'案件身份': '被执行人'})
                    )
            pass

        return nodes, relationships

    def get_all_nodes_and_relationships(
            self, save_folder=None, **kwargs):
        enterprises = self.base.query(
            sql={
                'metaModel': '法律诉讼',
                # 'name': '重庆合文贸易有限公司'
            },
            # limit=10000,
            # skip=2000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                self.logger.info('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(
                        save_folder, nodes,
                        relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(SuccessMessage(
                    'success trans data to csv round {} and '
                    'deal {}/{} enterprise spend {} seconds.'
                    ''.format(j, i, etp_count, int(_st_ - time.time()))
                ))
                _st_ = time.time()
                pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(
                save_folder, nodes,
                relationships, **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships
Esempio n. 7
0
class Timeline:
    def __init__(self, name):
        self.bm = BaseModel(tn='qcc', location='gcxy', dbname='data')
        self.name = name
        self.timeline = []
        self.getTimeline()
        self.timeline.sort(key=lambda x: x[0], reverse=False)
        pass

    def getTimeline(self):
        self.f1()
        self.f2()
        self.f3()
        self.f4()
        self.f5()
        self.f6()
        self.f7()
        tl = []
        for t in self.timeline:
            if t[0] is not None:
                tl.append(t)
        self.timeline = tl
        pass

    def to_excel(self, path):
        wb = load_workbook(project_dir + '\\xxx公司发展历程.xlsx')
        sh = wb['公司历程']
        sh['A1'] = self.name
        for i in range(len(self.timeline)):
            sh.cell(i + 3, 1, self.timeline[i][0])
            sh.cell(i + 3, 2, self.timeline[i][1])
            sh.cell(i + 3, 3, self.timeline[i][2])
        wb.save(path)

    def f1(self):
        meta = '基本信息'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        __ = d['工商信息']['#1']
        self.timeline.append([
            __['成立日期'], meta, '公司注册成立,注册资本{}{}'.format(__['注册资本']['金额'],
                                                       __['注册资本']['单位'])
        ])
        __ = d['变更记录']
        for b in __.values():
            self.timeline.append([
                b['变更日期'], meta, "公司发生{}:\n变更前:{}\n变更后:{}".format(
                    b['变更项目'],
                    b['变更前']['内容'],
                    b['变更后']['内容'],
                )
            ])
        __ = d['股东信息']
        for _ in __.values():
            if '认缴出资日期' in _.keys():
                self.timeline.append([
                    _['认缴出资日期'], meta,
                    '股东{}认缴出资{}{}'.format(_['股东']['名称'], _['认缴出资额']['金额'],
                                          _['认缴出资额']['单位'])
                ])
            if '实缴出资日期' in _.keys():
                self.timeline.append([
                    _['实缴出资日期'], meta,
                    '股东{}实缴出资{}{}'.format(_['股东']['名称'], _['实缴出资额']['金额'],
                                          _['实缴出资额']['单位'])
                ])
        __ = d['对外投资']
        for _ in __.values():
            if '融资日期' in _.keys():
                self.timeline.append([
                    _['融资日期'], meta,
                    '投资{}{}{},所占比例{}'.format(_['被投资企业']['名称'], _['投资数额']['金额'],
                                             _['投资数额']['单位'], _['投资比例'])
                ])
        __ = d['建筑资质资格']
        for _ in __.values():
            self.timeline.append(
                [_['发证日期'], meta, '获得建筑资质资格,证书名称{}'.format(_['资质名称'])])
        __ = d['股权变更']
        for _ in __.values():
            self.timeline.append([
                _['公示日期'], meta,
                '股东,股权比例由{}变更为{}'.format(_['变更前股权比例'], _['变更后股权比例'])
            ])
        pass

    def f2(self):
        meta = '经营状况'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        __ = d['产权交易']
        for _ in __.values():
            self.timeline.append([
                _['交易日期'], meta,
                '以{}{}转让{}给{}'.format(_['转让价格']['金额'], _['转让价格']['单位'],
                                      _['标的企业']['名称'], _['转让方']['名称'])
            ])
        __ = d['抽查检查']
        for _ in __.values():
            self.timeline.append([
                _['日期'], meta,
                '{}对公司进行{}检查,检查结果为{}'.format(_['实施机关'], _['类型'], _['结果'])
            ])
        __ = d['购地信息']
        for _ in __.values():
            self.timeline.append([
                _['合同签订日期'], meta, '购入位于{},共{}{}土地,土地用途为{},供地方式为{}'.format(
                    _['项目位置']['位置'], _['面积']['数量'], _['面积']['单位'], _['土地用途'],
                    _['供地方式'])
            ])
        __ = d['行政许可']
        for v in __.values():
            if '工商局' in v.keys():
                _ = v['工商局']
                self.timeline.append([
                    _['有效期自'], meta,
                    '由{}颁布行政许可"{}"'.format(_['许可机关'], _['许可内容'])
                ])
            if '信用中国' in v.keys():
                _ = v['信用中国']
                self.timeline.append([
                    _['决定日期'], meta,
                    '由{}颁布行政许可"{}"'.format(_['许可机关'], _['决定文书号'])
                ])
        __ = d['进出口信用']
        for _ in __.values():
            self.timeline.append(
                [_['注册日期'], meta, '在{}注册{}进出口信息'.format(_['注册海关'], _['经营类别'])])
        __ = d['双随机抽查']
        for _ in __.values():
            self.timeline.append(
                [_['完成日期'], meta, '{}完成对公司的“{}”'.format(_['抽查机关'], _['任务名称'])])
        __ = d['招聘']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'], meta,
                '发布招聘职位:{},薪资:{}'.format(_['职位']['职位'], _['月薪'])
            ])
        __ = d['招投标信息']
        for _ in __.values():
            self.timeline.append(
                [_['发布日期'], meta, '{}:{}'.format(_['项目分类'], _['描述']['描述'])])
        __ = d['信用评级']
        for _ in __.values():
            self.timeline.append([
                _['评级日期'], meta, '公司被{}信用评级为{}'.format(_['评级公司']['名称'],
                                                       _['主体评级'])
            ])
        pass

    def f3(self):
        meta = '经营风险'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        __ = d['动产抵押']
        for _ in __.values():
            self.timeline.append([
                _['登记日期'], meta,
                '涉及动产抵押,抵押权人:{},债务人:{},所有权或使用权归属{},涉及金额{}{}'.format(
                    _['抵押权人']['名称'], _['债务人']['名称'], _['所有权或使用权归属']['名称'],
                    _['被担保主债权数额']['金额'], _['被担保主债权数额']['单位'])
            ])
        __ = d['公示催告']
        for _ in __.values():
            self.timeline.append([
                _['公告日期'], meta, '{}申请{}票据承兑,票面金额{}{},持票人:{}'.format(
                    _['申请人']['名称'],
                    _['票据类型'],
                    _['票面金额']['金额'],
                    _['票面金额']['单位'],
                    _['持票人']['名称'],
                )
            ])
        __ = d['股权出质']
        for _ in __.values():
            self.timeline.append([
                _['登记日期'], meta, '{}将{}出质给{},涉及金额{}{}'.format(
                    _['出质人']['名称'],
                    _['标的企业']['名称'],
                    _['质权人']['名称'],
                    _['出质数额']['金额'],
                    _['出质数额']['单位'],
                )
            ])
        __ = d['行政处罚']['工商局']
        for _ in __.values():
            self.timeline.append([
                _['公示日期'] if _['公示日期'] is not None else _['决定日期'], meta,
                '公司因{},{}对公司实施{}'.format(
                    _['违法行为类型'],
                    _['决定机关'],
                    _['处罚内容'],
                )
            ])
        __ = d['行政处罚']['税务局']
        for _ in __.values():
            self.timeline.append(
                [_['处罚决定日期'], meta, '公司因{},税务局对公司实施行政处罚'.format(_['处罚事由'], )])
        __ = d['行政处罚']['其他']
        for _ in __.values():
            self.timeline.append([
                _['处罚日期'], meta, '公司因{},{}对公司实施行政处罚'.format(
                    _['处罚事由'],
                    _['处罚单位'],
                )
            ])
        __ = d['行政处罚']['信用中国']
        for _ in __.values():
            self.timeline.append([
                _['处罚日期'], meta, '公司因{},{}对公司实施行政处罚'.format(
                    _['处罚事由'],
                    _['处罚机关'],
                )
            ])
        __ = d['环保处罚']
        for _ in __.values():
            self.timeline.append([
                _['处罚日期'], meta, '公司因{},{}对公司实施行政处罚'.format(
                    _['违法类型'],
                    _['处罚单位'],
                )
            ])
        __ = d['简易注销']
        for _ in __.values():
            self.timeline.append([_['公告申请日期'], meta, '申请简易注销'])
        __ = d['经营异常']
        for _ in __.values():
            self.timeline.append(
                [_['列入日期'], meta, '因{},被列入经营异常名单'.format(_['列入原因'])])
            if '移出日期' in _.keys():
                self.timeline.append(
                    [_['移出日期'], meta, '因{},被移出经营异常名单'.format(_['移出原因'])])
        __ = d['破产重组']
        for _ in __.values():
            self.timeline.append([
                _['公开日期'], meta,
                '{}申请对{}进行破产重组'.format(_['申请人']['名称'], _['被申请人']['名称'])
            ])
        __ = d['欠税公告']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'], meta, '公司涉及{}{}的欠税'.format(_['欠税余额']['金额'],
                                                      _['欠税余额']['单位'])
            ])
        __ = d['税收违法']
        for _ in __.values():
            self.timeline.append(
                [_['发布日期'], meta, '公司涉税收违法,案件性质:{}'.format(_['案件性质'], )])
        __ = d['司法拍卖']
        for _ in __.values():
            self.timeline.append([_['拍卖时间'], meta, '{}'.format(_['标题'], )])
        __ = d['土地抵押']
        for _ in __.values():
            self.timeline.append([
                _['抵押起止日期'].split('至')[0].strip().replace('\n', ''), meta,
                '{}将{}{}土地抵押给{},抵押金额{}{},土地坐落于{}'.format(
                    _['抵押人']['名称'], _['抵押面积']['数额'], _['抵押面积']['单位'],
                    _['抵押权人']['名称'], _['抵押金额']['金额'], _['抵押金额']['单位'], _['位置'])
            ])
        __ = d['询价评估']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'], meta, '公司就{}发布询价评估,询价结果{}{}'.format(
                    _['标的物']['名称'],
                    _['询价结果']['金额'],
                    _['询价结果']['单位'],
                )
            ])
        __ = d['严重违法']
        for _ in __.values():
            self.timeline.append(
                [_['列入日期'], meta, '因{},被列入严重违法名单'.format(_['列入原因'])])
            if '移出日期' in _.keys():
                self.timeline.append(
                    [_['移出日期'], meta, '因{},被移出严重违法名单'.format(_['移出原因'])])
        __ = d['注销备案']
        if '清算组备案信息' in __.keys():
            _ = __['清算组备案信息']
            self.timeline.append(
                [_['清算组备案日期'], meta, '公司因{}成立清算组'.format(_['注销原因'], )])
        pass

    def f4(self):
        meta = '企业发展'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        __ = d['企业业务']
        for _ in __.values():
            self.timeline.append(
                [_['成立日期'], meta, '公司开发或生产{}产品'.format(_['产品名']['名称'], )])
        __ = d['融资信息']
        for _ in __.values():
            self.timeline.append([
                _['日期'], meta, '公司向{}融资{}{}'.format(
                    _['投资方']['名称'],
                    _['金额']['金额'],
                    _['金额']['单位'],
                )
            ])
        pass

    def f5(self):
        meta = '知识产权'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        __ = d['软件著作权']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'] if _['发布日期'] is not None else _['登记批准日期'], meta,
                '公司申请了{}的软件著作权'.format(_['软件名称'], )
            ])
        __ = d['商标信息']
        for _ in __.values():
            self.timeline.append(
                [_['申请日期'], meta, '公司申请了商标:{}'.format(_['商标']['名称'], )])
        __ = d['网站信息']
        for _ in __.values():
            self.timeline.append(
                [_['审核日期'], meta, '公司申请了网站备案:{}'.format(_['名称'], )])
        __ = d['证书信息']
        for _ in __.values():
            self.timeline.append(
                [_['发证日期'], meta, '公司获得了{}证书'.format(_['证书']['名称'], )])
        __ = d['专利信息']
        for _ in __.values():
            self.timeline.append(
                [_['公开日期'], meta, '公司申请了{}专利'.format(_['专利']['名称'], )])
        __ = d['作品著作权']
        for _ in __.values():
            self.timeline.append(
                [_['首次发表日期'], meta, '公司申请了{}的作品著作权'.format(_['作品名称'], )])
        pass

    def f6(self):
        meta = '法律诉讼'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        try:
            __ = d['被执行人']
            for _ in __.values():
                self.timeline.append([
                    _['立案日期'], meta, '公司被列为“被执行人”,执行金额{}{}'.format(
                        _['执行标的']['金额'],
                        _['执行标的']['单位'],
                    )
                ])
        except Exception as e:
            ExceptionInfo(e)
        __ = d['裁判文书']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'], meta, '公司涉及的“{}”案件:{},裁决结果:{}'.format(
                    _['案由'],
                    _['案号'],
                    _['裁判文书']['标题'],
                )
            ])
        __ = d['法院公告']
        for _ in __.values():
            self.timeline.append([
                _['刊登日期'], meta,
                '法院公告:{}与{}的“{}”'.format(_['被告人/被告/被上诉人/被申请人']['名称'],
                                         _['公诉人/原告/上诉人/申请人']['名称'], _['案由'])
            ])
        # __ = d['股权冻结']
        # for _ in __.values():
        #     self.timeline.append([
        #         _['刊登日期'],
        #         '{}的{}被执行股权冻结,冻结股权数额{}{}'.format(
        #             _['被执行人']['名称'],
        #             _['标的企业']['名称'],
        #             _['股权数额']['金额'], _['股权数额']['单位']
        #         )
        #     ])
        __ = d['开庭公告']
        for _ in __.values():
            self.timeline.append([
                _['开庭时间'], meta,
                '开庭公告:{}与{}的“{}”'.format(_['被告人/被告/被上诉人/被申请人']['名称'],
                                         _['公诉人/原告/上诉人/申请人']['名称'], _['案由'])
            ])
        __ = d['立案信息']
        for _ in __.values():
            self.timeline.append([
                _['立案日期'], meta,
                '立案信息:{}与{}的“{}”'.format(_['被告人/被告/被上诉人/被申请人']['名称'],
                                         _['公诉人/原告/上诉人/申请人']['名称'], _['案由'])
            ])
        __ = d['失信被执行人']
        for _ in __.values():
            self.timeline.append(
                [_['发布日期'], meta, '公司被列为失信被执行人,履行情况:{}'.format(_['履行情况'])])
        __ = d['送达公告']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'], meta,
                '送达公告:{}与{}的“{}”'.format(_['被告人/被告/被上诉人/被申请人']['名称'],
                                         _['公诉人/原告/上诉人/申请人']['名称'], _['案由'])
            ])
        __ = d['限制高消费']
        for _ in __.values():
            self.timeline.append([
                _['发布日期'], meta, '因“{}”,{}申请对{}实施限制高消费'.format(
                    _['案号']['名称'],
                    _['申请人']['名称'],
                    _['限消令对象']['名称'],
                )
            ])
        __ = d['终本案件']
        for _ in __.values():
            self.timeline.append([
                _['终本日期'], meta, '终本案件:执行标的{}{},未履行{}{}'.format(
                    _['执行标的']['金额'],
                    _['执行标的']['单位'],
                    _['未履行金额']['金额'],
                    _['未履行金额']['单位'],
                )
            ])
        pass

    def f7(self):
        meta = '公司新闻'
        d = self.bm.query_one(sql={'name': self.name, 'metaModel': meta})
        if d is None:
            return
        d = d['content']
        __ = d['企业公告']
        for _ in __.values():
            self.timeline.append(
                [_['日期'], meta, '发布企业公告:{}'.format(_['标题'], )])
        __ = d['相关公告']
        for _ in __.values():
            self.timeline.append([_['日期'], meta, '相关公告:{}'.format(_['标题'], )])
        __ = d['新闻舆情']
        for _ in __.values():
            self.timeline.append(
                [_['发布时间'], meta, '公司新闻:{}'.format(_['新闻标题'], )])
        pass
Esempio n. 8
0
class RightsGraph(BaseGraph):
    def __init__(self, **kwargs):
        BaseGraph.__init__(self, **kwargs)
        self.base = BaseModel(
            tn='cq_all',
            # tn='qcc.1.1',
            # location='gcxy',
            # dbname='data'
        )
        pass

    def create_index_and_constraint(self):
        """
        为涉及到的实体创建唯一性约束跟索引,唯一键自动带有索引
        不必再单独创建索引
        :return:
        """
        # 用到是实体对象
        used_entity = [
            'Website',
            'Certificate',
            'Patent',
            'Trademark',
            'App',
            'WorkCopyRight',
            'SoftCopyRight',
            'Weibo',
            'OfficialAccount',
            'Applets',
        ]
        constraint = {}
        index = {}
        for l in used_entity:
            constraint[l] = [entities(l).primarykey]
            idx = entities(l).index
            if len(idx):
                index[l] = idx
        self.add_index_and_constraint(index, constraint)
        pass

    def create_all_relationship(self):
        """
        1.enterprise -[have]->x
        :return:
        """
        rts = self.base.query(
            sql={'metaModel': '知识产权'},
            # limit=100,
            skip=79175 + 7909,
            no_cursor_timeout=True)
        i, k = 0, 0
        eg = EtpGraph()
        # etp = Enterprise()
        etp_count = rts.count()
        relationships = []
        s_t = time.time()
        for r in rts:
            k += 1
            # TODO(leung): 这里要注意,基本信息以外的模块中的url确定不了公司
            etp_n = self.match_node(*legal,
                                    cypher='_.NAME = "{}"'.format(r['name']))
            if etp_n is None:
                # 如果这个公司还没在数据库里面,那么应该创建这个公司
                _ = self.base.query_one(sql={
                    'metaModel': '基本信息',
                    'name': r['name']
                })
                if _ is not None:
                    etp = Enterprise(_)
                    etp_n = self.get_neo_node(etp)
                    # 虽然在创建司法关系的时候会创建未在库中的企业,但不会创建
                    # 这个企业的基本关系,因此需要添加其基本关系
                    relationships += eg.create_relationship_from_enterprise_baseinfo(
                        _)
                    pass
                else:
                    # 没有这个公司的信息,那就创建一个信息不全的公司
                    etp = Related(**{'名称': r['name'], '链接': r['url']})
                    # etp['NAME'] = r['name']
                    # etp['URL'] = r['url']
                    etp_n = self.get_neo_node(etp)
                    pass
                pass

            if '网站信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['网站信息'])
                webs = Website.create_from_dict(data)
                for web in webs:
                    w = web.pop('website')
                    w_n = self.get_neo_node(w)
                    if w_n is not None:
                        relationships.append(
                            Have(etp_n, w_n, **web).get_relationship())
                pass

            if '证书信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['证书信息'])
                ctfs = Certificate.create_from_dict(data)
                for ctf in ctfs:
                    c = ctf.pop('certificate')
                    c_n = self.get_neo_node(c)
                    if c_n is not None:
                        relationships.append(
                            Have(etp_n, c_n, **ctf).get_relationship())
                pass

            if '专利信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['专利信息'])
                pats = Patent.create_from_dict(data)
                for pat in pats:
                    p = pat.pop('patent')
                    p_n = self.get_neo_node(p)
                    if p_n is not None:
                        relationships.append(
                            Have(etp_n, p_n, **pat).get_relationship())
                pass

            if '商标信息' in r['content'].keys():
                data = self.get_format_dict(r['content']['商标信息'])
                tms = Trademark.create_from_dict(data)
                for tm in tms:
                    t = tm.pop('trademark')
                    t_n = self.get_neo_node(t)
                    if t_n is not None:
                        relationships.append(
                            Have(etp_n, t_n, **tm).get_relationship())
                pass

            if '软件著作权' in r['content'].keys():
                data = self.get_format_dict(r['content']['软件著作权'])
                scrs = SoftCopyRight.create_from_dict(data)
                for scr in scrs:
                    s = scr.pop('softcopyright')
                    s_n = self.get_neo_node(s)
                    if s_n is not None:
                        relationships.append(
                            Have(etp_n, s_n, **scr).get_relationship())
                pass

            if '作品著作权' in r['content'].keys():
                data = self.get_format_dict(r['content']['作品著作权'])
                wcrs = WorkCopyRight.create_from_dict(data)
                for wcr in wcrs:
                    w = wcr.pop('workcopyright')
                    w_n = self.get_neo_node(w)
                    if w_n is not None:
                        relationships.append(
                            Have(etp_n, w_n, **wcr).get_relationship())
                pass

            if '微博' in r['content'].keys():
                data = self.get_format_dict(r['content']['微博'])
                wbs = Weibo.create_from_dict(data)
                for wb in wbs:
                    w = wb.pop('weibo')
                    w_n = self.get_neo_node(w)
                    if w_n is not None:
                        relationships.append(
                            Have(etp_n, w_n, **wb).get_relationship())
                pass

            if '微信公众号' in r['content'].keys():
                data = self.get_format_dict(r['content']['微信公众号'])
                oas = OfficialAccount.create_from_dict(data)
                for oa in oas:
                    woa = oa.pop('WeChat')
                    woa_n = self.get_neo_node(woa)
                    if woa_n is not None:
                        relationships.append(
                            Have(etp_n, woa_n, **oa).get_relationship())
                pass

            if '小程序' in r['content'].keys():
                data = self.get_format_dict(r['content']['小程序'])
                alts = Applets.create_from_dict(data)
                for alt in alts:
                    a = alt.pop('applets')
                    a_n = self.get_neo_node(a)
                    if a_n is not None:
                        relationships.append(
                            Have(etp_n, a_n, **alt).get_relationship())
                pass

            if 'APP' in r['content'].keys():
                data = self.get_format_dict(r['content']['APP'])
                aps = App.create_from_dict(data)
                for ap in aps:
                    a = ap.pop('app')
                    a_n = self.get_neo_node(a)
                    if a_n is not None:
                        relationships.append(
                            Have(etp_n, a_n, **ap).get_relationship())
                pass
            if len(relationships) > 1000:
                i += 1
                sp = int(time.time() - s_t)
                s_t = time.time()
                self.graph_merge_relationships(relationships)
                if not self.index_and_constraint_statue:
                    self.create_index_and_constraint()
                print(
                    SuccessMessage(
                        '{}:success merge relationships to database '
                        'round {} and deal {}/{} enterprise and spend {} '
                        'seconds,and merge {} relationships.'.format(
                            dt.datetime.now(), i, k, etp_count, sp,
                            len(relationships))))
                relationships.clear()
                # return
        if len(relationships):
            i += 1
            self.graph_merge_relationships(relationships)
            if not self.index_and_constraint_statue:
                self.create_index_and_constraint()
            print(
                SuccessMessage('{}:success merge relationships to database '
                               'round {} and deal {}/{} enterprise,and'
                               ' merge {} relationships.'.format(
                                   dt.datetime.now(), i, k, etp_count,
                                   len(relationships))))
            relationships.clear()
            pass

    def get_all_nodes_and_relationships_from_enterprise(self, etp):
        etp_n = Enterprise(URL=etp['url'], NAME=etp['name'])
        etp_n = self.get_neo_node(etp_n)
        if etp_n is None:
            return [], []
        nodes, relationships = [], []
        nodes.append(etp_n)

        if '网站信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['网站信息'])
            webs = Website.create_from_dict(data)
            for web in webs:
                w = web.pop('website')
                w_n = self.get_neo_node(w)
                if w_n is not None:
                    nodes.append(w_n)
                    relationships.append(Have(etp_n, w_n, **web))
            pass

        if '证书信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['证书信息'])
            ctfs = Certificate.create_from_dict(data)
            for ctf in ctfs:
                c = ctf.pop('certificate')
                c_n = self.get_neo_node(c)
                if c_n is not None:
                    nodes.append(c_n)
                    relationships.append(Have(etp_n, c_n, **ctf))
            pass

        if '专利信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['专利信息'])
            pats = Patent.create_from_dict(data)
            for pat in pats:
                p = pat.pop('patent')
                p_n = self.get_neo_node(p)
                if p_n is not None:
                    nodes.append(p_n)
                    relationships.append(Have(etp_n, p_n, **pat))
            pass

        if '商标信息' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['商标信息'])
            tms = Trademark.create_from_dict(data)
            for tm in tms:
                t = tm.pop('trademark')
                t_n = self.get_neo_node(t)
                if t_n is not None:
                    nodes.append(t_n)
                    relationships.append(Have(etp_n, t_n, **tm))
            pass

        if '软件著作权' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['软件著作权'])
            scrs = SoftCopyRight.create_from_dict(data)
            for scr in scrs:
                s = scr.pop('softcopyright')
                s_n = self.get_neo_node(s)
                if s_n is not None:
                    nodes.append(s_n)
                    relationships.append(Have(etp_n, s_n, **scr))
            pass

        if '作品著作权' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['作品著作权'])
            wcrs = WorkCopyRight.create_from_dict(data)
            for wcr in wcrs:
                w = wcr.pop('workcopyright')
                w_n = self.get_neo_node(w)
                if w_n is not None:
                    nodes.append(w_n)
                    relationships.append(Have(etp_n, w_n, **wcr))
            pass

        if '微博' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['微博'])
            wbs = Weibo.create_from_dict(data)
            for wb in wbs:
                w = wb.pop('weibo')
                w_n = self.get_neo_node(w)
                if w_n is not None:
                    nodes.append(w_n)
                    relationships.append(Have(etp_n, w_n, **wb))
            pass

        if '微信公众号' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['微信公众号'])
            oas = OfficialAccount.create_from_dict(data)
            for oa in oas:
                woa = oa.pop('WeChat')
                woa_n = self.get_neo_node(woa)
                if woa_n is not None:
                    nodes.append(woa_n)
                    relationships.append(Have(etp_n, woa_n, **oa))
            pass

        if '小程序' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['小程序'])
            alts = Applets.create_from_dict(data)
            for alt in alts:
                a = alt.pop('applets')
                a_n = self.get_neo_node(a)
                if a_n is not None:
                    nodes.append(a_n)
                    relationships.append(Have(etp_n, a_n, **alt))
            pass

        if 'APP' in etp['content'].keys():
            data = self.get_format_dict(etp['content']['APP'])
            aps = App.create_from_dict(data)
            for ap in aps:
                a = ap.pop('app')
                a_n = self.get_neo_node(a)
                if a_n is not None:
                    nodes.append(a_n)
                    relationships.append(Have(etp_n, a_n, **ap))
            pass
        return nodes, relationships

    def get_all_nodes_and_relationships(self, save_folder=None, **kwargs):
        enterprises = self.base.query(
            sql={
                'metaModel': '知识产权',
                # 'name': '重庆轩烽建材有限公司'
            },
            # limit=100000,
            # skip=2000,
            no_cursor_timeout=True)
        i, j = 0, 0
        nc, rc = 0, 0
        etp_count = enterprises.count()
        nodes, relationships = {}, {}
        unique_code_pattern = re.compile('(?<=unique=)\w{32}')

        def getUniqueCode(url):
            _uc_ = re.search(unique_code_pattern, url)
            if _uc_ is not None:
                return _uc_.group(0)
            else:
                return None

        _st_ = time.time()
        for ep in enterprises:
            i += 1
            uc = getUniqueCode(ep['url'])
            if uc is None:
                print('{}:mismatch url'.format(ep['name']))
                continue
            ep['url'] = '/firm_' + uc + '.html'
            nds, rps = self.get_all_nodes_and_relationships_from_enterprise(ep)
            for _nds_ in nds:
                if _nds_ is None:
                    continue
                # _nds_ = _nds_.to_dict()
                label = list(_nds_.labels)[0]
                _nds_ = dict(label=label, **_nds_)
                if _nds_['label'] in nodes.keys():
                    nodes[_nds_['label']].append(_nds_)
                else:
                    nodes[_nds_['label']] = [_nds_]
                pass
            for _rps_ in rps:
                _rps_ = _rps_.to_dict()
                if _rps_['label'] in relationships.keys():
                    relationships[_rps_['label']].append(_rps_)
                else:
                    relationships[_rps_['label']] = [_rps_]
                pass
            if i % 10000 == 0:
                j += 1
                if save_folder is not None:
                    _nc_, _rc_ = self.save_graph(save_folder, nodes,
                                                 relationships, **kwargs)
                    nc += _nc_
                    rc += _rc_
                    nodes.clear()
                    relationships.clear()
                self.logger.info(
                    SuccessMessage('success trans data to csv round {} and '
                                   'deal {}/{} enterprise spend {} seconds.'
                                   ''.format(j, i, etp_count,
                                             int(_st_ - time.time()))))
                _st_ = time.time()
                pass
        if save_folder is not None:
            _nc_, _rc_ = self.save_graph(save_folder, nodes, relationships,
                                         **kwargs)
            nc += _nc_
            rc += _rc_
            nodes.clear()
            relationships.clear()
            self.logger.info('Summary:')
            self.logger.info(' save graph data:')
            self.logger.info('   {} nodes'.format(nc))
            self.logger.info('   {} relationships'.format(rc))
            pass
        return nodes, relationships