Python remove_repeat Exemples, PublicCode.deal_html_code.remove_repeat Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : BJ_report_schange.py Projet : cash2one/BussinessQG

def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    info = {}
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.path("//div[@class= 'viewBox']/dl")[0]
        datallist = etree.tostring(dl).split(
            '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
        )
        datallist.remove(datallist[-1])
        for i, single in enumerate(datallist):
            single = etree.xpath(content,
                                 parser=etree.HTMLParser(encoding='utf-8'))
            string = u"股东"
            name = deal_dd_content(string, single)
            string = u"变更前"
            percent_pre = deal_dd_content(string, single)
            string = u"变更后"
            percent_after = deal_dd_content(string, single)
            string = u"变更日期"
            dates = deal_dd_content(string, single)
            info[i] = [name, percent_pre, percent_after, dates]
    else:
        flag = 100000004
    if flag == 1:
        deal_html_code.remove_repeat(info)
    return info, flag

Exemple #2

0

Afficher le fichier

Fichier : BJ_report_share.py Projet : cash2one/BussinessQG

def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    # print content
    info = {}
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class='viewBox']//dl")[0]

        datalist = etree.tostring(dl).split(
            '<dt style="color:#333;margin-bottom:10px;"/>')
        datalist.remove(datalist[0])
        if len(datalist) > 0:
            pattern = re.compile(".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(datalist, info, j)
            else:
                j = 0
                deal_single_info(datalist, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)

                for k in xrange(2, totalpage + 1):
                    href = share_url.format(entid, cid, k)
                    content, status_code = Send_Request().send_request(
                        href, headers)
                    if status_code == 200:
                        start = k * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']//dl")[0]
                        datalist = etree.tostring(dl).split(
                            '<dt style="color:#333;margin-bottom:10px;"/>')
                        datalist.remove(datalist[0])

                        if len(datalist) > 0:
                            deal_single_info(datalist, info, start)
                    else:
                        pass
        else:
            logging.info("无股东及出资信息")
    else:
        flag = 100000004

    info = deal_html_code.remove_repeat(info)
    return info, flag

Exemple #3

0

Afficher le fichier

def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    if status_code == 200:
        flag = 1
        result = etree.xpath(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class = viewBox']//dl")[0]
        info = {}
        if "企业名称" in content:
            datallist = etree.tostring(dl).split(
                '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
            )
            datallist.remove(datallist[-1])
            pattern = re.compile(u".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(datallist, info, j)
            else:
                j = 0
                deal_single_info(datallist, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)
                href = out_invest_url.format(entid, cid)
                for k in xrange(2, totalpage + 1):
                    content, status_code = Send_Request().send_request(href)
                    if status_code == 200:
                        start = k * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']/dl")[0]
                        datalist = etree.tostring(dl).split(
                            '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
                        )

                        if len(datalist) > 0:
                            datalist.remove(datalist[-1])
                            deal_single_info(datalist, info, start)
                    else:
                        pass
        else:
            flag = 100000004

    else:
        flag = 100000004
    if flag == 1:
        info = deal_html_code.remove_repeat(info)
    return info, flag

Exemple #4

0

Afficher le fichier

def name(url):
	headers = config.headers_detail
	content, status_code = Send_Request().send_request(url, headers)
	info = {}
	if status_code == 200:
		flag = 1
		result = etree.HTML(content, parser=etree.HTMLParser(encoding="utf-8"))
		dl = result.xpath("//div[@class= 'viewBox']//dl")[0]
		datalist = etree.tostring(dl).split('<br/>')
		datalist.remove(datalist[-1])
		for i, single in enumerate(datalist):
			single = etree.HTML(single, parser=etree.HTMLParser(encoding="utf-8"))
			string = u"许可文件名称"
			types = deal_dd_content(string, single)
			string = u"有效期至"
			valto = deal_dd_content(string, single)
			uuid = ''
			info[i] = [types, valto, uuid]
	else:
		flag = 100000004
	if flag == 1:
		deal_html_code.remove_repeat(info)
	return info, flag

Exemple #5

0

Afficher le fichier

def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    info = {}
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class= 'viewBox']//dl")[0]
        ddlist = dl.xpath('./dd')
        remark = -1
        for i, single in enumerate(ddlist):
            if i % 4 == 0:
                remark += 1
                name = single.xpath("./text()")[0]
                name = deal_html_code.remove_space(name)
                info.setdefault(remark, []).append(name)
            elif i % 4 == 1:
                percent_pre = single.xpath("./text()")[0]
                percent_pre = deal_html_code.remove_space(percent_pre)
                info.setdefault(remark, []).append(percent_pre)
            elif i % 4 == 2:
                percent_after = single.xpath("./text()")[0]
                percent_after = deal_html_code.remove_space(percent_after)
                info.setdefault(remark, []).append(percent_after)
            elif i % 4 == 3:
                dates = single.xpath("./text()")[0]
                dates = deal_html_code.remove_space(dates)
                info.setdefault(remark, []).append(dates)
                uuid = ''
                info.setdefault(remark, []).append(uuid)

    else:
        flag = 100000004
    if flag == 1:
        deal_html_code.remove_repeat(info)
    return info, flag

Exemple #6

0

Afficher le fichier

 def name(self, url):
     headers = config.headers_detail
     content, status_code = Send_Request().send_request(url, headers)
     info = {}
     if status_code == 200:
         # print content
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
         dl = result.xpath("//div[@class='viewBox']//dl")[0]
         datalsit = etree.tostring(dl).split('<br/>')
         datalsit.remove(datalsit[-1])
         for i, single in enumerate(datalsit):
             single = etree.HTML(single,
                                 parser=etree.HTMLParser(encoding='utf-8'))
             name = single.xpath(
                 "//dt[@style='color:#333;margin-bottom:10px;']/text()")[0]
             string = u"投资人类型"
             types = self.deal_dd_content(string, single)
             string = u"认缴出资金额"
             reg_amount = self.deal_dd_content(string, single)
             string = u"认缴出资方式"
             ra_ways = self.deal_dd_content(string, single)
             string = u"认缴出资时间"
             ra_date = self.deal_dd_content(string, single)
             if ra_date == '':
                 ra_date = '0000-00-00'
             string = u"实缴出资金额"
             true_amount = self.deal_dd_content(string, single)
             string = u"实缴出资方式"
             ta_ways = self.deal_dd_content(string, single)
             string = u"实缴出资时间"
             ta_date = self.deal_dd_content(string, single)
             if ta_date == '':
                 ta_date = '0000-00-00'
             info[i] = [
                 name, types, reg_amount, ra_ways, ra_date, true_amount,
                 ta_ways, ta_date
             ]
     else:
         flag = 100000004
     if len(info) > 0:
         info = deal_html_code.remove_repeat(info)
     return info, flag

Exemple #7

0

Afficher le fichier

def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class='viewBox']/dl")[0]
        info = {}
        if "企业名称" in content:
            pattern = re.compile(".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(dl, info, j)
            else:
                j = 0
                deal_single_info(dl, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)
                for k in xrange(2, totalpage + 1):
                    href = out_invest_url.format(entid, cid, k)
                    content, status_code = Send_Request().send_request(
                        href, headers)
                    if status_code == 200:
                        start = (k - 1) * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']/dl")[0]
                        deal_single_info(dl, info, start)
                    else:
                        pass
        else:
            flag = 100000004
    else:
        flag = 100000004
    if flag == 1:
        info = deal_html_code.remove_repeat(info)
    return info, flag