Exemple #1
0
class get_json(object):
    def __init__(self):
        self.db = MongoDB('172.16.74.249:27017', 'db_reptile_company',
                          'company_name')
        self.redis = REDIS(host=RedisHost,
                           port=RedisPort,
                           password=RedisPassword,
                           db=RedisDB)
        self.item = {}

    # mongdb--redeis
    def transfer(self):
        dd = self.db.mongo_find({})
        for i in dd:
            item = {}
            item['_id'] = i['_id']
            item['company_name'] = i['company_name']
            b = self.redis.add('coampanylidt', json.dumps(item))
            print('存入成功', b, item)

    # 百度企业信用基本信息
    def get_companydetails(self, company_name):
        res1 = s.fetch('https://xin.baidu.com/s?q={}&t=0'.format(
            parse.quote(company_name)))
        href_list = re.findall(r'{"pid":"(\S+)","entName":', res1.text)
        if len(href_list) != 0:
            details_href = 'https://xin.baidu.com//detail//compinfo?pid=' + href_list[
                0]
            # company = res1.html.xpath('//a[@class="zx-list-item-url"]/@title')[0]
            print(details_href)
            res = s.fetch(details_href)
            # print(res.text)
            exit()
            # 统一社会信用代码
            self.item['credit_code'] = res.html.xpath(
                '//td[contains(text(),"统一社会信用代码")]/following-sibling::td[1]/text()',
                first=True)
            # 客户公司注册时间
            self.item['register_time'] = res.html.xpath(
                '//*[@class="zx-detail-basic-table"]//td[contains(text(),"成立日期")]/following-sibling::td[1]/text()',
                first=True)
            # 客户公司注册金额 registerMoney
            self.item['register_money'] = res.html.xpath(
                '//td[contains(text(),"注册资本")]/following-sibling::td[1]/text()',
                first=True)
            # 客户所属行业
            self.item['industry'] = res.html.xpath(
                '//td[contains(text(),"所属行业")]/following-sibling::td[1]/text()',
                first=True)
            # 客户公司状态:正常/注销
            self.item['business_state'] = res.html.xpath(
                '//td[contains(text(),"经营状态")]/following-sibling::td[1]/text()',
                first=True)
            # 组织机构代码
            self.item['organization_code'] = res.html.xpath(
                '//td[contains(text(),"组织机构代码")]/following-sibling::td[1]/text()',
                first=True)
            # 工商注册号
            self.item['register_num'] = res.html.xpath(
                '//td[contains(text(),"工商注册号")]/following-sibling::td[1]/text()',
                first=True)
            # 法定代表人
            self.item['legal_man'] = res.html.xpath(
                '//td[contains(text(),"法定代表人")]/following-sibling::td[1]/text()',
                first=True)
            # 登记机关
            self.item['regist_organ'] = res.html.xpath(
                '//td[contains(text(),"登记机关")]/following-sibling::td[1]/text()',
                first=True)
            # 核准日期
            self.item['confirmtime'] = res.html.xpath(
                '//*[@class="zx-detail-basic-table"]//td[contains(text(),"审核/年检日期")]/following-sibling::td[1]/text()',
                first=True)
            # 营业期限
            self.item['business_timeout'] = res.html.xpath(
                '//*[@class="zx-detail-basic-table"]//td[contains(text(),"营业期限")]/following-sibling::td[1]/text()',
                first=True)
            # 企业类型
            self.item['register_address'] = res.html.xpath(
                '//*[@class="zx-detail-basic-table"]//td[contains(text(),"企业类型")]/following-sibling::td[1]/text()',
                first=True)
            # 企业地址
            self.item['registerAddress'] = res.html.xpath(
                '//*[@class="zx-detail-basic-table"]//td[contains(text(),"注册地址")]/following-sibling::td[1]/text()',
                first=True)
            # 经营范围
            self.item['business_scope'] = res.html.xpath(
                '//td[contains(text(),"经营范围")]/following-sibling::td[1]//@data-content',
                first=True)

            self.item['usedName'] = res.html.xpath(
                '//td[contains(text(),"曾用名")]/following-sibling::td[1]/text()',
                first=True)
            # 经营方式
            # self.item['operation'] = None
            # 来源网站
            self.item['web_source'] = 'https://xin.baidu.com/'
            # 公司名
            self.item['company_name'] = company_name
            # 来源网址
            self.item['company_url'] = details_href
            self.item['_id'] = hashlib.md5(
                (company_name).encode(encoding='utf-8')).hexdigest()
            self.item['web_update_time'] = time.strftime(
                "%Y-%m-%d", time.localtime(int(time.time())))
            # print(self.item)
            # return self.item
            # code 201
            if company_name != company and len(self.item) > 4:
                self.db.mong_find_one_update({"_id": self.item['_id']},
                                             {"flag": "公司名有问题"})
                return '公司名有问题 --- %s' % self.item['company_name']
            else:
                db1.mongo_add(self.item)
                return '%s 插入成功 !!!!' % self.item['company_name']

        else:
            _id = hashlib.md5(
                (company_name).encode(encoding='utf-8')).hexdigest()
            self.db.mong_find_one_update({"_id": _id}, {"flag": "未找到匹配的公司名"})
            return '未找到匹配的公司名---%s' % company_name
Exemple #2
0
        if down in rec_list1:
            down_translate = '撤销复审决定书'
        elif down in rec_list2:
            down_translate = '关于撤销连续三年未使用商标的决定'
        elif down in rec_list3:
            down_translate = '商标撤销复审答辩通知书'
        elif down in rec_list4:
            down_translate = '商标无效宣告答辩通知书'
        elif down in rec_list5:
            down_translate = '连续三年不使用撤销申请的结案通知'
        elif down in rec_list6:
            down_translate = '商标异议答辩通知书'
        same_pic_data = db.find_many('pic_url', pic_url)
        try:
            if same_pic_data[0]['id'] < same_pic_data[1]['id']:
                db.mong_find_one_update({'id': same_pic_data[0]['id']},
                                        {"type": up_translate})
                db.mong_find_one_update({'id': same_pic_data[1]['id']},
                                        {"type": down_translate})

                os.remove(r'G:\after\up.jpg')
                os.remove(r'G:\after\down.jpg')
                os.remove(r'G:\before\target.jpg')
            else:
                db.mong_find_one_update({'id': same_pic_data[0]['id']},
                                        {"type": down_translate})

                db.mong_find_one_update({'id': same_pic_data[1]['id']},
                                        {"type": up_translate})

                os.remove(r'G:\after\up.jpg')
                os.remove(r'G:\after\down.jpg')