Ejemplo n.º 1
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    # print content
    info = {}
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class='viewBox']//dl")[0]

        datalist = etree.tostring(dl).split(
            '<dt style="color:#333;margin-bottom:10px;"/>')
        datalist.remove(datalist[0])
        if len(datalist) > 0:
            pattern = re.compile(".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(datalist, info, j)
            else:
                j = 0
                deal_single_info(datalist, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)

                for k in xrange(2, totalpage + 1):
                    href = share_url.format(entid, cid, k)
                    content, status_code = Send_Request().send_request(
                        href, headers)
                    if status_code == 200:
                        start = k * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']//dl")[0]
                        datalist = etree.tostring(dl).split(
                            '<dt style="color:#333;margin-bottom:10px;"/>')
                        datalist.remove(datalist[0])

                        if len(datalist) > 0:
                            deal_single_info(datalist, info, start)
                    else:
                        pass
        else:
            logging.info("无股东及出资信息")
    else:
        flag = 100000004

    info = deal_html_code.remove_repeat(info)
    return info, flag
Ejemplo n.º 2
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    if status_code == 200:
        flag = 1
        result = etree.xpath(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class = viewBox']//dl")[0]
        info = {}
        if "企业名称" in content:
            datallist = etree.tostring(dl).split(
                '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
            )
            datallist.remove(datallist[-1])
            pattern = re.compile(u".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(datallist, info, j)
            else:
                j = 0
                deal_single_info(datallist, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)
                href = out_invest_url.format(entid, cid)
                for k in xrange(2, totalpage + 1):
                    content, status_code = Send_Request().send_request(href)
                    if status_code == 200:
                        start = k * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']/dl")[0]
                        datalist = etree.tostring(dl).split(
                            '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
                        )

                        if len(datalist) > 0:
                            datalist.remove(datalist[-1])
                            deal_single_info(datalist, info, start)
                    else:
                        pass
        else:
            flag = 100000004

    else:
        flag = 100000004
    if flag == 1:
        info = deal_html_code.remove_repeat(info)
    return info, flag
Ejemplo n.º 3
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class='viewBox']/dl")[0]
        info = {}
        if "企业名称" in content:
            pattern = re.compile(".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(dl, info, j)
            else:
                j = 0
                deal_single_info(dl, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)
                for k in xrange(2, totalpage + 1):
                    href = out_invest_url.format(entid, cid, k)
                    content, status_code = Send_Request().send_request(
                        href, headers)
                    if status_code == 200:
                        start = (k - 1) * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']/dl")[0]
                        deal_single_info(dl, info, start)
                    else:
                        pass
        else:
            flag = 100000004
    else:
        flag = 100000004
    if flag == 1:
        info = deal_html_code.remove_repeat(info)
    return info, flag