Esempio n. 1
0
    def parse(self,response):
        # print(response.text)
        item = QiyemuluItem()
        try:
            dataJson= json.loads(response.text)
            alldata = dataJson.get("data","")
            if alldata:
                '''股东信息'''
                result = alldata.get("result", "")
                if result:
                    infoList = []
                    for info in result:
                        tags = []
                        infoDict = {}
                        infoDict['id'] = info.get("id", '')
                        infoDict['name'] = info.get("name", '')
                        if info.get("tagList", ''):
                            for tag in info.get("tagList", ''):
                                tags.append(tag.get("name", "0"))
                                infoDict['tags'] = ','.join(tags)
                        if info.get("capital", ''):
                            for cap in info.get("capital", ''):
                                infoDict['amomon'] = cap.get("amomon", "")
                                infoDict['percent'] = cap.get("percent", "")
                        infoList.append(infoDict)
                    item['companyId'] = response.meta['id']
                    item["holderInfo"] = str(infoList)
                    yield item

        except Exception as e:
            print(e.args)
Esempio n. 2
0
    def content_parse(self, response):
        item = QiyemuluItem()
        infoDict = response.meta['infoDict']
        content = response.css("div.container ::text").extract()
        infoDict['container'] = ''.join(content).replace(" ", "")

        item['companyId'] = response.meta['id']
        item['allInfo'] = str(infoDict)
        yield item
Esempio n. 3
0
    def parse(self,response):
        # print(response.text)
        item = QiyemuluItem()
        try:
            dataJson= json.loads(response.text)
            alldata = dataJson.get("data","")
            if alldata:
                result = alldata.get("resultList", "")
                if result:
                    infoList = []
                    for info in result:
                        infoDict = {}
                        plaintiffs = info.get("plaintiff","")
                        defendants = info.get("defendant","")
                        if plaintiffs:
                            plaintiff_list = []
                            for pla in plaintiffs:
                                plaintiff_list.append(pla.get('name',''))
                            infoDict['plaintiff'] = ','.join(plaintiff_list)
                        if defendants:
                            defendant_list = []
                            for defend in defendants:
                                defendant_list.append(defend.get('name',''))
                            infoDict['defendant'] = ','.join(defendant_list)
                        infoDict['caseReason'] = info.get("caseReason", '')
                        infoDict['caseNo'] = info.get("caseNo", '')
                        infoDict['startDate'] = info.get("startDate", '')
                        infoDict['litigant'] = info.get("litigant", '')
                        infoDict['contractors'] = info.get("contractors", '')
                        infoDict['judge'] = info.get("judge", '')
                        infoDict['court'] = info.get("court", '')
                        infoDict['courtroom'] = info.get("courtroom", '')

                        infoList.append(infoDict)
                    item['companyId'] = response.meta['id']
                    item["allInfo"] = str(infoList)
                    yield item
        except Exception as e:
            print(e.args)
Esempio n. 4
0
 def parse(self, response):
     # print(response.text)
     item = QiyemuluItem()
     try:
         dataJson = json.loads(response.text)
         alldata = dataJson.get("data", "")
         if alldata:
             '''高管信息'''
             result = alldata.get("result", "")
             if result:
                 infoList = []
                 for info in result:
                     infoDict = {}
                     infoDict['id'] = info.get("id", '')
                     infoDict['name'] = info.get("name", '')
                     infoDict['typeJoin'] = ','.join(
                         info.get("typeJoin", ""))
                     infoList.append(infoDict)
                 item['companyId'] = response.meta['id']
                 item["allInfo"] = str(infoList)
                 yield item
     except Exception as e:
         print(e.args)
Esempio n. 5
0
 def parse(self,response):
     # print(response.text)
     item = QiyemuluItem()
     try:
         dataJson= json.loads(response.text)
         alldata = dataJson.get("data","")
         if alldata:
             result = alldata.get("result", "")
             if result:
                 infoList = []
                 for info in result:
                     infoDict = {}
                     infoDict['changeItem'] = info.get("changeItem", '')
                     infoDict['changeTime'] = info.get("changeTime", '')
                     infoDict['createTime'] = info.get("createTime", '')
                     infoDict['contentBefore'] = remove_tags(info.get("contentBefore", ''))
                     infoDict['contentAfter'] = remove_tags(info.get("contentAfter", ''))
                     infoList.append(infoDict)
                 item['companyId'] = response.meta['id']
                 item["allInfo"] = str(infoList)
                 yield item
                 # print(infoList)
     except Exception as e:
         print(e.args)
Esempio n. 6
0
    def parse(self, response):

        item = QiyemuluItem()
        if response.status == 200:
            json_data = json.loads(response.text)
            state = json_data.get("state", '')
            try:
                if state == "ok":
                    data = json_data.get('data', '')
                    if data:
                        for info in data[:-1]:
                            item['cname'] = response.meta['name']
                            item['companyId'] = info.get('id', "")
                            item['companyName'] = remove_tags(
                                info.get('name', ""))
                            item['alias'] = info.get('alias', "")
                            item['legalPersonName'] = info.get(
                                'legalPersonName', "")
                            item['phoneList'] = info.get('phone', "")
                            item['emailList'] = info.get('emails', "")
                            item['websiteList'] = info.get('websites', "")
                            item['estiblishTime'] = info.get(
                                'estiblishTime', "")
                            item['regCapital'] = info.get('regCapital', "")
                            item['creditCode'] = info.get('creditCode', "")
                            item['taxNumber'] = info.get('creditCode', "")
                            item['orgNumber'] = info.get('creditCode',
                                                         "")[9:18]
                            item['regStatus'] = info.get('regStatus', "")
                            item['companyOrgType'] = info.get(
                                'companyOrgType', "")
                            item['industry'] = info.get('categoryStr', "")
                            item['regLocation'] = info.get('regLocation', "")
                            item['logo'] = info.get('logo', "")
                            item['businessScope'] = info.get(
                                'businessScope', "")
                            item['base'] = info.get('base', "")
                            item['district'] = info.get('district', "")
                            item['historyNames'] = info.get('historyNames', "")
                            #后期补充数据
                            item['intro'] = info.get('intro', "")
                            item['regNumber'] = info.get('regNumber', "")
                            item['actualCapital'] = info.get(
                                'actualCapital', "")
                            item['taxQualification'] = info.get(
                                'taxQualification', "")
                            item['englishName'] = info.get('property3', "")
                            item['staffNumRange'] = info.get(
                                'staffNumRange', "")
                            item['socialStaffNum'] = info.get(
                                'socialStaffNum', "")
                            item['operatingPeriod'] = info.get(
                                'operatingPeriod', "")
                            item['approvedTime'] = info.get('approvedTime', "")
                            item['regInstitute'] = info.get('regInstitute', "")
                            item['updatetime'] = info.get('updatetime', "")
                            yield item
                            # break
                else:
                    # self.redis_pool.srem("names:key",response.mata['name'])
                    print(response.mata['name'], "没有数据")
            except Exception as e:
                print(e.args)
Esempio n. 7
0
 def parse(self,response):
     # print(response.text)
     try:
         dataJson= json.loads(response.text)
         alldata = dataJson.get("data","")
         if alldata:
             item = QiyemuluItem()
             #公司Id
             item['companyId'] =  alldata.get("id","")#response.meta['id']
             #公司名称
             item['companyName'] =  alldata.get("name","")#response.meta['name']
             #别名
             item['alias'] = alldata.get("alias","")
             #曾用名
             item['historyNames'] = alldata.get("historyNames","")
             #法定代表人
             item['legalPersonName'] = alldata.get("legalPersonName","")
             #联系方式列表
             item['phoneList'] = ""
             #邮箱列表
             item['emailList'] = ""
             #网址
             item['websiteList'] = str(alldata.get("websiteList",""))
             #成立日期
             item['estiblishTime'] = alldata.get("estiblishTime","")
             #工商注册号
             item['regNumber'] = alldata.get("regNumber","")
             #注册资本
             item['regCapital'] = alldata.get("regCapital","")
             #实缴资本
             item['actualCapital'] = alldata.get("actualCapital","")
             #统一信用代码
             item['creditCode'] = alldata.get("creditCode","")
             #纳税人识别号
             item['taxNumber'] = alldata.get("taxNumber","")
             #组织机构代码
             item['orgNumber'] = alldata.get("orgNumber","")
             #纳税人资质
             item['taxQualification'] = alldata.get("taxQualification","")
             #英文名称
             item['englishName'] = alldata.get("property3","")
             #经营状态
             item['regStatus'] = alldata.get("regStatus","")
             #人员规模
             item['staffNumRange'] = alldata.get("staffNumRange","")
             #参保人数
             item['socialStaffNum'] = alldata.get("socialStaffNum","")
             #公司类型
             item['companyOrgType'] = alldata.get("companyOrgType","")
             #行业
             item['industry'] = alldata.get("industry","")
             #营业期限
             fromTime = alldata.get("fromTime","")
             toTime = alldata.get("toTime","")
             item['operatingPeriod'] = "%s/%s"%(fromTime,toTime)
             #注册地址
             item['regLocation'] = alldata.get("regLocation","")
             #核准日期
             item['approvedTime'] = alldata.get("approvedTime","")
             #登记机关
             item['regInstitute'] = alldata.get("regInstitute","")
             #公司logo地址
             item['logo'] = alldata.get("logo","")
             #股权结构图
             # item['equityUrl'] = baseInfo.get("equityUrl","")
             #最新更新时间
             item['updatetime'] = alldata.get("updatetime","")
             #经营范围
             item['businessScope'] = alldata.get("businessScope","")
             #简介
             item['intro'] = alldata.get("baseInfo","")
             #所属地区简称
             item['base'] = alldata.get("base","")
             #所述区域简称
             item['district'] = ""
             yield item
             # print(item)
     except Exception as e:
         print(e.args)
Esempio n. 8
0
 def parse(self,response):
     print(response.text)
     item = QiyemuluItem()