Exemple #1
0
    def name(self, data):
        information = {}
        for i, singledata in enumerate(data):
            types = '黑名单'
            if "bulletinListed" in singledata.keys():
                in_reason = singledata["bulletinListed"]
                in_reason = deal_html_code.remove_symbol(in_reason)
            else:
                in_reason = ''
            if "abnTime" in singledata.keys():
                in_date = singledata["abnTime"]
                in_date = change_chinese_date(in_date)
            else:
                in_date = '0000-00-00'
            if "bulletinRemoved" in singledata.keys():
                out_reason = singledata["bulletinRemoved"]
                out_reason = deal_html_code.remove_symbol(out_reason)
            else:
                out_reason = ''

            if "remTime" in singledata.keys():
                out_date = singledata["remTime"]
                out_date = change_chinese_date(out_date)
            else:
                out_date = '0000-00-00'
            if "remOrganInterpreted" in singledata.keys():
                gov_dept = singledata["remOrganInterpreted"]
            else:
                gov_dept = ''
            information[i] = [
                types, in_reason, in_date, out_reason, out_date, gov_dept
            ]
        return information
Exemple #2
0
def deal_single_info(result):
	infolist = {}
	# 注意设置编码格式防止乱码
	content = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8'))
	url = config.host + content.xpath("//*[@class = 'moreInfo']/a/@href")[0]
	infolist["href"] = url
	ddlist = content.xpath("//dl/dt")
	codetemp = content.xpath("//dd[@style='color:red;']")
	if len(codetemp) == 1:
		string = codetemp[0].xpath('string(.)')
		pattern = re.compile(r"\d+")
		code = re.findall(pattern, string)
		if len(code) == 1:
			code = code[0]
		else:
			code = None
	else:
		code = None
	infolist['code'] = code
	for i, single in enumerate(ddlist, 0):
		key = single.xpath("string(.)")
		key = deal_html_code.remove_symbol(key)
		dd = single.xpath("./following-sibling::*[1]")[0].xpath("string(.)")
		dd = deal_html_code.remove_symbol(dd)
		infolist[key] = dd
	return infolist
Exemple #3
0
def get_need_info(result):
    url, company, history_name = {}, {}, {}
    a_list = result.find('div', {"class", "main-layout fw f14"}).find_all(
        "a", {"class": "search_list_item db"})
    for i, item in enumerate(a_list):
        href = item["href"]
        url[i] = url_first + href
        company[i] = item.find("h1", {"class": "f20"}).text.strip()
        company[i] = remove_symbol(company[i])
        if item.find("div", {"class": "div-info-circle3"}) != None:
            history = item.find("div", {
                "class": "div-info-circle3"
            }).find("span", {
                "class": "g3"
            }).text.strip()
        else:
            history = None
        history = remove_symbol(history)
        if history != None:
            history = history.replace(u'*', '')
        if history != None:
            list = re.split(';', str(history))
            templist = []
            for k, temp in enumerate(list):
                if temp != u'':
                    templist.append(temp)
            history = ';'.join(templist)
        history_name[i] = history

    return url, company, history_name
Exemple #4
0
    def get_info(self, data):
        info = {}
        tr_list = data.xpath(".//table[@id='table_dcdy']//tr[@name = 'dcdy']")
        for i, singledata in enumerate(tr_list):
            temp = {}
            td_list = singledata.xpath("./td")
            if len(td_list) == 0:
                continue

            temp["code"] = deal_html_code.remove_symbol(
                td_list[1].xpath("string(.)"))

            dates = deal_html_code.remove_symbol(td_list[2].xpath("string(.)"))
            temp["dates"] = deal_html_code.change_date_style(dates)
            temp["dept"] = deal_html_code.remove_symbol(
                td_list[3].xpath("string(.)"))
            temp["amount"] = deal_html_code.remove_symbol(
                td_list[4].xpath("string(.)"))
            temp["status"] = deal_html_code.remove_symbol(
                td_list[5].xpath("string(.)"))

            onclick = td_list[6].xpath("./a/@onclick")[0]
            tuple = deal_html_code.match_key_content(str(onclick))
            xh = tuple[0]
            detail_url = self._url.format(self._pripid, xh)
            person_info, goods_info = self.get_detail_info(detail_url, temp)
            temp["person_info"] = person_info
            temp["goods_info"] = goods_info
            info[i] = temp
        return info
 def get_detail(self, string, data, json_data, flag):
     table = data.xpath("//*[contains(.,'%s')]" %
                        string)[0].xpath(".//following-sibline::*[1]")
     td = table[0].xpath(".//td")
     if flag == 'rj':
         if len(td) < 3:
             logging.info("该条数据无认缴信息!")
             json_data["ra_ways"] = ''
             json_data["ra_date"] = '0000-00-00'
         else:
             ra_ways = deal_html_code.remove_symbol(
                 td[0].xpath("string(.)"))
             ra_date = deal_html_code.remove_symbol(
                 td[2].xpath("string(.)"))
             ra_date = deal_html_code.change_chinese_date(ra_date)
             json_data["ra_ways"] = ra_ways
             json_data["ra_date"] = ra_date
     elif flag == 'sj':
         if len(td) < 3:
             logging.info("该条数据无实缴信息!")
             json_data["ta_ways"] = ''
             json_data["ta_date"] = '0000-00-00'
         else:
             ta_ways = deal_html_code.remove_symbol(
                 td[0].xpath("string(.)"))
             ta_date = deal_html_code.remove_symbol(
                 td[2].xpath("string(.)"))
             ta_date = deal_html_code.change_chinese_date(ta_date)
             json_data["ta_ways"] = ta_ways
             json_data["ta_date"] = ta_date
Exemple #6
0
 def get_single_info(self, items):
     dict = {}
     openView = items.xpath("./@onclick")
     pattern = re.compile("openView\('(.*?)','(.*?)','(.*?)','(.*?)'\)")
     tuple = re.findall(pattern, str(openView))[0]
     pripid = tuple[0]
     dict["prirpid"] = pripid
     types = tuple[3]
     dict["types"] = types
     company = items.xpath(".//span[@id = 'mySpan']/@title")[0]
     dict["company"] = company
     status = items.xpath(".//span[@id = 'mySpan']/following-sibling::*[1]")
     status = status[0].xpath("string(.)")
     status = deal_html_code.remove_symbol(status)
     dict["status"] = status
     code = items.xpath("//span[@class = 'shxydm']")[0]
     code = code.xpath("string(.)").split(":")[1]
     dict["code"] = code
     legal_person = items.xpath(".//span[@class = 'fddbr']")[0].xpath(
         "string(.)")
     legal_person = deal_html_code.remove_symbol(legal_person)
     # 将企业负责人的职位确定出来
     self.judge_position(legal_person, dict)
     reg_date = items.xpath(".//span[@class= 'clrq']")[0].xpath("string(.)")
     reg_date = reg_date.split(":")[1]
     reg_date = deal_html_code.change_chinese_date(reg_date)
     dict["reg_date"] = reg_date
     return dict
Exemple #7
0
    def deal_single_info(self, href, i, info, cookies):
        result = requests.get(href,
                              headers=headers,
                              cookies=cookies,
                              timeout=5)
        status_code = result.status_code
        if status_code == 200:
            content = result.content
            result = etree.HTML(content,
                                parser=etree.HTMLParser(encoding='utf-8'))
            types = "变更"
            #变更分为带表格的变更事项和不带表格的变更事项
            string = u'变更时间'
            plist = result.xpath(".//p[contains(.,'%s')]" % string)
            #处理不带表格的信息
            if len(plist) == 0:
                ddlist = result.xpath(".//dl/dd/text()")
                change_date = deal_html_code.remove_symbol(ddlist[0])
                item = deal_html_code.remove_symbol(ddlist[1])
                content_before = deal_html_code.remove_symbol(ddlist[2])
                content_after = deal_html_code.remove_symbol(ddlist[3])
                info[i] = [
                    types, change_date, item, content_before, content_after
                ]

            elif len(plist) == 1:
                change_date, item, change_before, change_after = self.deal_table_info(
                    result)
                info[i] = [
                    types, change_date, item, change_before, change_after
                ]
Exemple #8
0
 def name(self, url):
     info = {}
     headers = config.headers_detail
     content, status_code = Send_Request().send_request(url, headers)
     if status_code == 200:
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
         # total = result.xpath("//table[@id='tableIdStyle']//div/text()")[0]
         # pattern = re.compile(u".*记录总数(.*?)条.*")
         # number = re.findall(pattern,total)
         # if len(number)==1:
         # 	temp =int(number[0])
         trlist = result.xpath("//table[@id = 'tableIdStyle']//tr")
         for i, single in enumerate(trlist):
             tdlist = single.xpath("./td")
             if len(tdlist) == 0 or len(tdlist) < 4:
                 pass
             else:
                 name = deal_html_code.remove_symbol(
                     tdlist[1].xpath("string(.)"))
                 code = deal_html_code.remove_symbol(
                     tdlist[2].xpath("string(.)"))
                 gov_dept = deal_html_code.remove_symbol(
                     tdlist[5].xpath("string(.)"))
                 info[i] = [name, code, gov_dept]
     else:
         flag = 100000004
     return info, flag
Exemple #9
0
 def get_info(self, data):
     info = {}
     tr_list = data.xpath(".//table[@id='table_sfxz']//tr[@name = 'sfxz']")
     for i, singledata in enumerate(tr_list):
         temp = {}
         td_list = singledata.xpath("./td")
         if len(td_list) == 0:
             continue
         executor = deal_html_code.remove_symbol(
             td_list[1].xpath("string(.)"))
         temp["exceutor"] = executor
         stock_amount = deal_html_code.remove_symbol(
             td_list[2].xpath("string(.)"))
         temp["stock_amount"] = stock_amount
         court = deal_html_code.remove_symbol(td_list[3].xpath("string(.)"))
         temp["court"] = court
         notice_no = deal_html_code.remove_symbol(
             td_list[4].xpath("string(.)"))
         temp["notice_no"] = notice_no
         temp["enforce_no"] = notice_no
         status = deal_html_code.remove_symbol(
             td_list[5].xpath("string(.)"))
         temp["status"] = status
         onclik = td_list[6].xpath("./a/@onclick")[0]
         tuple = deal_html_code.match_key_content(str(onclik))
         xh = tuple[0]
         lx = tuple[1]
         detail_url = self._url.format(self._pripid, lx, xh)
         self.get_deatail_info(detail_url, info)
         info[i] = temp
     return info
Exemple #10
0
 def name(self, data):
     info = {}
     if len(data) != 0:
         for i, single in enumerate(data):
             if "regNo" in single.keys():
                 code = single["regNo"]
                 code = deal_html_code.remove_symbol(code)
             else:
                 code = ''
             if "uniScid" in single.keys():
                 ccode = single["uniScid"]
             else:
                 ccode = ''
             ccode = deal_html_code.remove_symbol(ccode)
             if "brName" in single.keys():
                 name = single["brName"]
                 name = deal_html_code.remove_symbol(name)
             else:
                 name = ''
             if "regOrganName" in single.keys():
                 gov_dept = single["regOrganName"]
                 gov_dept = deal_html_code.remove_symbol(gov_dept)
             else:
                 gov_dept = ''
             info[i] = [name, code, gov_dept, ccode]
     return info
 def name(self, url):
     info = {}
     content, status_code = Send_Request().send_request(url)
     if status_code == 200:
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding="utf-8"))
         dlinfo = result.xpath("//div[@class ='viewBox']//dl")[0]
         dl = etree.tostring(dlinfo).split("<br/>")
         # 将最后一项的无用数据移除
         dl.remove(dl[-1])
         for i, single in enumerate(dl):
             single = etree.HTML(single,
                                 parser=etree.HTMLParser(encoding="utf-8"))
             name = deal_html_code.remove_symbol(
                 single.xpath(".//dt")[0].xpath("string(.)"))
             templist = single.xpath('.//dd')
             types = deal_html_code.remove_symbol(
                 templist[0].xpath("string(.)"))
             license_type = deal_html_code.remove_symbol(
                 templist[1].xpath('string(.)'))
             license_code = deal_html_code.remove_symbol(
                 templist[2].xpath('string(.)'))
             info[i] = [name, types, license_type, license_code]
     else:
         flag = 100000004
     return info, flag
Exemple #12
0
 def get_info(self, data):
     info = {}
     for i, singledata in enumerate(data):
         temp = {}
         td_list = singledata.xpath("./td")
         if len(td_list) == 0:
             continue
         temp["equityNo"] = deal_html_code.remove_symbol(
             td_list[1].xpath("string(.)"))
         temp["pledgor"] = deal_html_code.remove_symbol(
             td_list[2].xpath("string(.)"))
         temp["pledBLicNo"] = deal_html_code.remove_symbol(
             td_list[3].xpath("string(.)"))
         temp["impAm"] = deal_html_code.remove_symbol(
             td_list[4].xpath("string(.)"))
         temp["impOrg"] = deal_html_code.remove_symbol(
             td_list[5].xpath("string(.)"))
         temp["impOrgBLicNo"] = deal_html_code.remove_symbol(
             td_list[6].xpath("string(.)"))
         equPleDate = deal_html_code.remove_symbol(
             td_list[7].xpath("string(.)"))
         temp["equPleDate"] = deal_html_code.change_chinese_date(equPleDate)
         publicDate = deal_html_code.remove_symbol(
             td_list[9].xpath("string(.)"))
         temp["type"] = deal_html_code.remove_symbol(
             td_list[8].xpath("string(.)"))
         temp["publicDate"] = deal_html_code.change_chinese_date(publicDate)
         info[i] = temp
     return info
Exemple #13
0
	def get_info(self, data):
		# data.xpath("//table[@id = 'table_ccjc']//tr[@name = 'ccjc']")
		info = {}
		for i, singledata in enumerate(data):
			td_list = singledata.xpath("//td")
			temp = {}
			# number = deal_html_code.remove_symbol(td_list[0].xpath("string(.)"))
			temp["gov_dept"] = deal_html_code.remove_symbol(td_list[1].xpath("string(.)"))
			temp["types"] = deal_html_code.remove_symbol(td_list[2].xpath("string(.)"))
			temp["check_date"] = deal_html_code.remove_symbol(td_list[3].xpath("string(.)"))
			temp["result"] = deal_html_code.remove_symbol(td_list[4].xpath("string(.)"))
			info[i] = temp
		return info
Exemple #14
0
	def get_info(self, data):
		
		tr_list = data.xpath(".//tr")
		info = {}
		for i, singledata in enumerate(tr_list):
			temp = {}
			td_list = singledata.xpath("./td")
			if len(td_list) == 0 or len(td_list) == 1:
				continue
			temp["types"] = deal_html_code.remove_symbol(td_list[1].xpath("string(.)"))
			valto = deal_html_code.remove_symbol(td_list[2].xpath("string(.)"))
			temp["valto"] = deal_html_code.change_chinese_date(valto)
			info[i] = temp
		return info
	def get_info(self, data):
		info = {}
		tr_list = data.xpath(".//tr")
		for i, singledata in enumerate(tr_list):
			temp = {}
			td_list = singledata.xpath("./td")
			if len(td_list) == 0 or len(td_list) == 1:
				continue
			temp["name"] = deal_html_code.remove_symbol(td_list[1].xpath("string(.)"))
			temp["percent_pre"] = deal_html_code.remove_symbol(td_list[2].xpath("string(.)"))
			temp["percent_after"] = deal_html_code.remove_symbol(td_list[3].xpath("string(.)"))
			dates = deal_html_code.remove_symbol(td_list[4].xpath("string(.)"))
			temp["dates"] = deal_html_code.change_chinese_date(dates)
			info[i] = temp
		return info
Exemple #16
0
	def deal_tr_content(self, result, string):
		before_table = result.xpath(".//table[contains(.,'%s')]" % string)[0]
		trlist = before_table.xpath("./tr")
		trlist.remove(trlist[0])
		trlist.remove(trlist[0])
		string = ''
		for i, single in enumerate(trlist):
			temp = single.xpath("./td")
			text = deal_html_code.remove_symbol(temp[0].xpath("string(.)")) + " " + deal_html_code.remove_symbol(
				temp[1].xpath("string(.)"))
			if i == 0:
				string = string + text
			else:
				string = string + "||" + text
		return string
Exemple #17
0
 def get_person_info(self, data):
     tr_list = data.xpath(".//tr[@name = 'dydj']")
     info = {}
     for i, singledata in enumerate(tr_list):
         temp = {}
         td_list = singledata.xpath("./td")
         name = deal_html_code.remove_symbol(td_list[1].xpath("string(.)"))
         temp["name"] = name
         cert = deal_html_code.remove_symbol(td_list[2].xpath("string(.)"))
         temp["cert"] = cert
         number = deal_html_code.remove_symbol(
             td_list[3].xpath("string(.)"))
         temp["number"] = number
         info[i] = temp
     return info
Exemple #18
0
 def name(self, data):
     info = {}
     if len(data) > 0:
         for i, single in enumerate(data):
             if "altBe" in single.keys():
                 content_before = single["altBe"]
             else:
                 content_before = ''
             if "altAf" in single.keys():
                 content_after = single["altAf"]
                 content_after = deal_html_code.remove_symbol(content_after)
             else:
                 content_after = ''
             if "altDate" in single.keys():
                 change_date = single["altDate"]
                 change_date = deal_html_code.change_chinese_date(
                     change_date)
             else:
                 change_date = '0000-00-00'
             if "altItem" in single.keys():
                 item = single["altItem"]
             else:
                 item = ""
             info[i] = [content_before, content_after, change_date, item]
     return info
Exemple #19
0
def get_all_info(gs_basic_id, gs_search_id, info_list):
    Log().found_log(gs_basic_id, gs_search_id)
    info = class_dict["basic"]().get_info(info_list["basic"])
    flag = class_dict["basic"]().update_to_db(info, gs_basic_id)
    print "basic:%s" % flag
    pripid = '28890'
    name = info["name"]

    for key, value in info_list.iteritems():

        # 这两块信息较为特殊,拿出来单独处理
        if key == "basic" or key == "report" or key == "report1":
            continue
        # 如果没有获取到对应的信息就跳过
        if key not in info_list.keys():
            continue
        Judge(pripid, name,
              config.dict_url[key]).update_info(key, class_dict[key], value,
                                                gs_basic_id)
    # 单独处理年报
    tr_list = info_list["report"].xpath(".//tr")
    fill_data = {}
    for i, singledata in enumerate(tr_list):
        td_list = singledata.xpath(".//td")
        if len(td_list) == 0:
            continue
        fill_data[i] = deal_html_code.change_chinese_date(
            deal_html_code.remove_symbol(td_list[2].xpath("string(.)")))
    if len(fill_data) == 0:
        print "report:-1"
    else:
        print "report:%s" % len(fill_data)
    SHX_report.main(info_list["report1"], fill_data, gs_basic_id)
Exemple #20
0
    def name(self, data):
        information = {}
        for i in xrange(len(data)):
            singledata = data[i]
            types = '经营异常'
            if "speCauseInterpreted" in singledata.keys():
                in_reason = singledata["speCauseInterpreted"]
            else:
                in_reason = ''
            if 'abnTime' in singledata.keys():
                in_date = singledata["abnTime"]
                in_date = deal_html_code.change_chinese_date(in_date)
            else:
                in_date = '0000-00-00'

            if "remExcpResInterpreted" in singledata.keys():
                out_reason = singledata["remExcpResInterpreted"]
                out_reason = deal_html_code.remove_symbol(out_reason)
            else:
                out_reason = ''
            if 'remDate' in singledata.keys():
                out_date = singledata["remDate"]
                out_date = deal_html_code.change_chinese_date(out_date)
            else:
                out_date = '0000-00-00'
            if "decOrgInterpreted" in singledata.keys():
                gov_dept = singledata["decOrgInterpreted"]
            else:
                gov_dept = ''
            information[i] = [
                types, in_reason, in_date, out_reason, out_date, gov_dept
            ]
        return information
Exemple #21
0
 def name(self, data):
     information = {}
     for i in xrange(len(data)):
         singledata = data[i]
         nodeNum = singledata["nodeNum"]
         ia_zch = singledata["regNum"]
         ia_flh = singledata["intCls"]
         ia_zcgg = singledata["regAnncIssue"]
         ia_servicelist = singledata["goodsCnName"]
         ia_servicelist = remove_symbol(ia_servicelist)
         begin = singledata["propertyBgnDate"]
         begin = change_date_style(begin)
         end = singledata["propertyEndDate"]
         end = change_date_style(end)
         if begin == '0000-0000-00' and end == '0000-00-00':
             ia_zyqqx = ''
         else:
             ia_zyqqx = begin + '至' + end
         ia_zcdate = singledata["regAnncDate"]
         ia_zcdate = change_date_style(ia_zcdate)
         tmImage = singledata["tmImage"]
         information[i] = [
             ia_zch, ia_flh, ia_zcgg, ia_servicelist, ia_zyqqx, ia_zcdate,
             nodeNum, tmImage
         ]
     return information
Exemple #22
0
    def deal_single_info(self, i, single, cookies, item):
        # 文献标识
        nrdAn = single.xpath(".//input[@name = 'nrdAnHidden']")[0].xpath(
            './@value')[0]
        # 文献唯一标识
        cid = single.xpath(".//input[@name='idHidden']")[0].xpath(
            './@value')[0]
        sid = cid
        nrdPn = single.xpath(".//input[@name ='nrdPnHidden']")[0].xpath(
            './@value')[0]
        str = u'代理机构'
        agency = self.deal_info(str, single)
        str = u'代理人'
        agent = self.deal_info(str, single)
        str = u'申请号'
        code = self.deal_info(str, single)
        code = code.split('CN')[-1]
        str = u'申请日'
        app_date = self.deal_info(str, single)
        app_date = deal_html_code.change_date(app_date)
        str = u'申请(专利权)人'
        applicant = self.deal_info(str, single)
        address = single.xpath(".//input[@name ='appAddrHidden']")[0].xpath(
            './@value')[0]
        str = u'发明人'
        inventor = self.deal_info(str, single)
        str = u'IPC分类号'
        main_cate = self.deal_info(str, single)
        str = u'IPC分类号'
        sub_cate = self.deal_info(str, single)
        str = u'公开(公告)号'
        pub_code = self.deal_info(str, single)
        str = u'公开(公告)日'
        pub_date = self.deal_info(str, single)
        pub_date = deal_html_code.change_date(pub_date)
        str = u'优先权日'
        priority_date = self.deal_info(str, single)
        str = u'优先权号'
        priority_code = self.deal_info(str, single)
        priority = priority_date + ' ' + priority_code
        name = single.xpath(".//input[@name ='titleHidden']")[0].xpath(
            './@value')[0]
        name = deal_html_code.remove_symbol(name)
        remark = self.get_remark(nrdPn, sid, cid, cookies)
        source = 'pss-system'
        law_search_info = self.get_law_info(nrdAn, nrdPn, cookies)
        string = u'同族'
        finger = single.xpath(".//a[contains(.,'%s')]" %
                              string)[0].xpath("string(.)")
        finger = finger.split(":")[-1]
        if int(finger) == 0:
            same_info = {}
        else:
            same_info = self.get_cognation_info(nrdPn, cookies)

        item[i] = [
            name, code, app_date, applicant, address, inventor, main_cate,
            sub_cate, pub_code, pub_date, priority, remark, agent, agency,
            source, law_search_info, same_info
        ]
Exemple #23
0
def deal_dd_content(string, result):

    dd = result.xpath(".//dt[contains(.,'%s')]" %
                      string)[0].xpath("./following-sibling::*[1]")
    dd = dd[0]
    data = deal_html_code.remove_symbol(dd.xpath("string(.)"))
    return data
Exemple #24
0
def main():
    Bulid_Log.Log().found_log()
    try:
        HOST, USER, PASSWD, DB, PORT = config.HOST, config.USER, config.PASSWD, config.DB, config.PORT
        connect, cursor = Connect_to_DB().ConnectDB(HOST, USER, PASSWD, DB,
                                                    PORT)
        count = cursor.execute(select_info)
        if count == 0:
            print "there is no task need to do!"
        else:
            for gs_new_id, name, province in cursor.fetchall():
                print "now the gs_new_id is %s" % gs_new_id
                logging.info("now the gs_new id is %s" % gs_new_id)
                name = deal_html_code.remove_symbol(name)
                if province == 'SHH' or province == "HEB" or province == "SCH" or province == "YUN" or province == "JSU":
                    print "the province is out of range"
                    logging.info("the province is out of range")
                elif name == '':
                    print "this is an useless information!"
                    logging.info("this is an useless information!")
                else:
                    info, flag = GetUrl.main(name)
                    if flag == 1 and len(info) > 0:
                        update_info(cursor, connect, info, gs_new_id, province)
                    else:
                        logging.info(
                            "get cookies failed or there is no search information ,the status is %s"
                            % flag)
                        cursor.execute(update_status1, (gs_new_id, gs_new_id))
                        connect.commit()
    except Exception, e:
        logging.info("unknown error:%s" % e)
Exemple #25
0
    def get_info(self, data):
        info = {}
        tr_list = data.xpath(".//tr")

        for i, singledata in enumerate(tr_list):
            temp = {}
            td_list = singledata.xpath(".//td")
            if len(td_list) == 0:
                continue

            temp["name"] = deal_html_code.remove_symbol(
                td_list[1].xpath("string(.)"))
            temp["position"] = deal_html_code.remove_symbol(
                td_list[2].xpath("string(.)"))
            info[i] = temp
        return info
Exemple #26
0
def get_info(code, ccode):
    pattern = re.compile(r'^9.*')
    result1 = re.findall(pattern, code)
    result2 = re.findall(pattern, ccode)
    if len(result1) == 0 and len(result2) == 0:
        string = code
    elif len(result1) == 1:
        string = code
    elif len(result2) == 1:
        string = ccode
    else:
        pass
    info, flag = get_list(string)
    if flag == 100000003:
        HOST, USER, PASSWD, DB, PORT = config.HOST, config.USER, config.PASSWD, config.DB, config.PORT
        connect, cursor = Connect_to_DB().ConnectDB(HOST, USER, PASSWD, DB,
                                                    PORT)
        select_string = select_name % gs_basic_id
        cursor.execute(select_string)
        name = cursor.fetchall()[0][0]
        name = deal_html_code.remove_symbol(name)
        cursor.close()
        connect.close()
        info, flag = get_list(name)
    return info, flag
 def name(self,data):
     info = {}
     if len(data)!=0:
         for i,single in enumerate(data):
             if "invName" in single.keys():
                 name = single["invName"]
             else:
                 name = ''
             if "invTypeInterpreted" in single.keys():
                 types = single["invTypeInterpreted"]
             else:
                 types = ''
             if single["blicTypeInterpreted"]!='':
                 license_type = single["blicTypeInterpreted"]
                 license_code = single["bLicNo"]
             elif single["cetfTypeInterpreted"]!='':
                 license_type = single["cetfTypeInterpreted"]
                 license_code = single["cetfId"]
             elif single["blicTypeInterpreted"]=='' and single["cetfTypeInterpreted"] =='':
                 license_code = ''
                 license_type = ''
             license_code = deal_html_code.remove_symbol(license_code)
             if "subconAm" in single.keys():
                 reg_amount = single["subconAm"]
             else:
                 reg_amount = ''
             if "acconAm" in single.keys():
                 true_amount = single["acconAm"]
             else:
                 true_amount = ''
             if "conDate" in single.keys():
                 ta_date = single["conDate"]
                 ta_date = deal_html_code.change_chinese_date(ta_date)
             else:
                 ta_date = '0000-00-00'
             if "conForm" in single.keys():
                 ta_ways = single["conForm"]
             else:
                 ta_ways = ''
             if ta_ways == '1':
                 ta_ways = '货币'
             if "countryInterpreted" in single.keys():
                 country = single["countryInterpreted"]
             else:
                 country = ''
             if "dom" in single.keys():
                 address = single["dom"]
             else:
                 address = ''
             encrypted = single["encrypted"]
             if "cetfType" in single.keys():
                 cetfType = single["cetfType"]
             elif "bLicType" in single.keys():
                 cetfType = single["bLicType"]
             else:
                 cetfType = ''
             info[i] = [name, types, license_code, license_type, reg_amount, true_amount, ta_date, ta_ways, country,
                        address, encrypted, cetfType]
     return info
Exemple #28
0
	def deal_table_info(self, result):
		string = u'变更时间'
		plist = result.xpath(".//p[contains(.,'%s')]" % string)[0]
		item = plist.xpath("./following-sibling::*[1]")[0]
		change_date = deal_html_code.remove_symbol(plist.xpath("string(.)")).split(u":")[-1]
		item = deal_html_code.remove_symbol(item.xpath("string(.)")).split(u":")[-1]
		if u"投资人" in item:
			item = "投资人"
		elif u"认缴的出资额" in item:
			item = "投资人"
		elif u"实缴的出资额" in item:
			item = "投资人"
		string = u'变更前'
		change_before = self.deal_tr_content(result, string)
		string = u"变更后"
		change_after = self.deal_tr_content(result, string)
		return change_date, item, change_before, change_after
Exemple #29
0
	def deal_single_info(self, items):
		info = {}
		url = items.xpath(".//a[@class='font16']/@href")[0]
		company = items.xpath(".//span[@class= 'rsfont']")[0].xpath("string(.)")
		company = deal_html_code.remove_symbol(company)
		status = items.xpath(".//span[@class= 'rsfont']/following-sibling::*[1]")[0].xpath("string(.)")
		status = deal_html_code.remove_symbol(status)
		tablelist = items.xpath(".//table[@class = 'textStyle']//span[@class = 'dataTextStyle']")
		code = tablelist[0].xpath("string(.)")
		code = deal_html_code.remove_symbol(code)
		legal_person = tablelist[1].xpath("string(.)")
		legal_person = deal_html_code.remove_symbol(legal_person)
		dates = tablelist[2].xpath("string(.)")
		dates = deal_html_code.change_chinese_date(dates)
		
		info[code] = [url,company,status,code,legal_person,dates]
		return info
 def get_info(self, data):
     tr_list = data.xpath(".//tr")
     info = {}
     for i, singledata in enumerate(tr_list):
         temp = {}
         td_list = singledata.xpath("./td")
         # 针对表头和为信息的情况进行特殊对待
         if len(td_list) == 0 or len(td_list) == 1:
             continue
         temp["name"] = deal_html_code.remove_symbol(
             td_list[1].xpath("string(.)"))
         reg_amount = deal_html_code.remove_symbol(
             td_list[2].xpath("string(.)"))
         temp["reg_amount"] = deal_html_code.match_float(reg_amount)
         reg_date = deal_html_code.remove_symbol(
             td_list[3].xpath("string(.)"))
         temp["reg_date"] = deal_html_code.change_chinese_date(reg_date)
         temp["reg_way"] = deal_html_code.remove_symbol(
             td_list[4].xpath("string(.)"))
         ac_amount = deal_html_code.remove_symbol(
             td_list[5].xpath("string(.)"))
         temp["ac_amount"] = deal_html_code.match_float(ac_amount)
         ac_date = deal_html_code.remove_symbol(
             td_list[6].xpath("string(.)"))
         temp["ac_date"] = deal_html_code.change_chinese_date(ac_date)
         temp["ac_way"] = deal_html_code.remove_symbol(
             td_list[7].xpath("string(.)"))
         info[i] = temp
     return info