コード例 #1
0
 def __init__(self, userId, password):
     self._qianzhan_client = QianzhanClient(userId, password)
     self._company_detail_url_list = []
     self._txt = get_1000_txt()
     # self._txt_len = len(self._txt)
     # self._i = 0
     # self._j = 0
     logging.info("txt len:->%d" % len(self._txt))
     pass
コード例 #2
0
 def __init__(self, userId, password):
     self._qianzhan_client = QianzhanClient(userId, password)
     pass
コード例 #3
0
class Spider(object):
    def __init__(self, userId, password):
        self._qianzhan_client = QianzhanClient(userId, password)
        pass

    def _get_company(self, url):

        response = self._qianzhan_client.get_company(url)
        # print(response.text)
        soup = BeautifulSoup(response.text, 'lxml')

        company = {}
        company.update({
            'company_name':
            soup.select_one('h1[class="ct_name"]').contents[0]
        })
        try:
            company.update({'url': soup.select_one('a[class="url"]').text})
        except Exception, e:
            pass
        company.update({
            'item_update_time':
            time.strftime('%Y-%m-%d', time.localtime(time.time()))
        })

        span_list = soup.select('ul[class="art-basic"] li span[class="info"]')

        company.update({
            'organization_registration_code': span_list[0].text,
            'registration_number': span_list[1].text,
            'legal_representative': span_list[2].text,
            'business_status': span_list[3].text,
            'registered_capital': span_list[4].text,
            'business_type': span_list[5].text,
            'register_date': span_list[6].text,
            'operating_period': span_list[7].text,
            'business_address': span_list[8].text,
            'business_scope': span_list[9].text
        })

        span_list_2 = soup.select('ul[class="art-org"] li span[class=""info]')

        company.update({
            'province': span_list_2[1].text,
            'registration_authority': span_list_2[4].text
        })

        # use
        company.update({
            'hdencryptCode':
            soup.select_one('input[id="hdencryptCode"]')['value'],
            'hdoc_area':
            soup.select_one('input[id="hdoc_area"]')['value']
        })

        # logging.debug("company:->%s" % company)

        # company.update({'getcommentlist': self._qianzhan_client.post_getcommentlist(company['hdencryptCode'])})
        # company.update({'SearchItemCCXX': self._qianzhan_client.post_SearchItemCCXX(company['hdencryptCode'],
        #                                                                             company['hdoc_area'])})
        # company.update({'searchitemdftz': self._qianzhan_client.post_searchitemdftz(company['company_name'])})
        company.update({
            'searchitemnbinfo':
            self._qianzhan_client.post_searchitemnbinfo(
                company['hdencryptCode'], company['hdoc_area'])
        })
        if company['searchitemnbinfo'] and len(
                company['searchitemnbinfo']) > 0:
            company.update({
                'searchitemnb':
                self._qianzhan_client.post_searchitemnb(
                    company['hdencryptCode'], company['hdoc_area'],
                    company['searchitemnbinfo'][0].get('year'))
            })
        # company.update({'searchitemsite': self._qianzhan_client.post_searchitemsite(company['hdencryptCode'])})

        # print "company:->", company

        return company