def running(self): sh = StockHolder() count_cond = sh.verify_cond() count = 0 print('\n{2}\n{1}剩余{0}家企业股东数据待采集!{1}\n{2}\n'.format( count_cond, '*' * 20, '*' * 63)) while count_cond > 0: print('Loading......\n') time.sleep(3) print('开始新一轮采集') result = sh.get_com_id() com_id = result[0] com_name = result[1] count_sh = sh.count_sh_judge(com_id) status_column = 'status_stockholder' count_column = 'count_stockholder' # sh.gm.upd_status(com_id, status_column, count_column, count_sh) sh_page_count = sh.sh_page_judge(count_sh) for page in range(1, sh_page_count + 1): count += 1 tree = sh.get_page_req(com_id, com_name, page) sh.parse_info(tree, com_id, com_name, page, sh_page_count) gm().upd_status(com_id, status_column, count_column, count_sh) # input('Pause!') count_cond = sh.verify_cond() print('\n{2}\n{1}剩余{0}家企业股东数据待采集!{1}\n{2}\n'.format( count_cond, '*' * 20, '*' * 63)) print('\n数据采集完成!')
def __init__(self): self.db = db() self.dk = dk() self.gh = gh() self.tm = tm() self.gm = gm() self.index_url = 'https://www.qcc.com'
def count_sh_judge(self,com_id): #根据公司首页股东信息字段判断股东数量,模糊判断,需做二次判断 sh = StockHolder() header = sh.gh.header() if com_id == None: count_sh = 0 else: com_url = f'{sh.index_url}/firm_{com_id}.html' time.sleep(random.randint(3, 5)) res = requests.get(com_url,headers=header).text tree = sh.gm.verify(res) try: count_sh = tree.xpath('//div[@class="company-nav-items"]/span[contains(text(),"股东信息")]/span/text()|//div[@class="company-nav-items"]/a[@data-pos="partnerslist"]/span/text()')[0] if count_sh == '999+': count_sh = 999 count_sh = int(count_sh) except: count_sh = 0 status_column = 'status_stockholder' count_column = 'count_stockholder' gm().upd_status(com_id, status_column, count_column, count_sh) return count_sh
def running(self): #执行该方法使程序整体运行 mm = MainMember() count_cond = mm.verify_cond() print('\n{2}\n{1}剩余{0}家企业主要人员数据待采集!{1}\n{2}\n'.format( count_cond, '*' * 20, '*' * 63)) while count_cond > 0: print('Loading......\n') time.sleep(3) print('开始新一轮采集') result = mm.get_com_id() com_id = result[0] info = mm.count_cm_judge(com_id) count_mm = info[0] tree = info[1] mm.parse_info(com_id, tree) status_column = 'status_main_member' count_column = 'count_main_member' gm().upd_status(com_id, status_column, count_column, count_mm) count_cond = mm.verify_cond() print('\n{2}\n{1}剩余{0}家企业主要人员数据待采集!{1}\n{2}\n'.format( count_cond, '*' * 20, '*' * 63)) print('\n数据采集完成!')
def get_count_rc(self, count_rc, key, count, com_id): #根据模糊判断,到招聘详情页判断出精确的招聘数量 if count_rc > 0: info_url = f'https://www.qichacha.com/company_getinfos?unique={com_id}&companyname={key}&tab=run' hds = self.gh.header() hds.update( {'Referer': f'https://www.qichacha.com/firm_{com_id}.html'}) time.sleep(random.randint(3, 5)) res = requests.get(info_url, headers=hds).text tree = self.gm.verify(res) count_rc = tree.xpath('//a[contains(@onclick,"#joblist")]/text()' )[0].split('招聘')[1].strip() count_rc = int(count_rc) localtime = tm().get_localtime() # 当前时间 print(localtime) print(f'计数器:{count}\n公司ID:{com_id}\n招聘岗位数:{count_rc}') else: count_rc = 0 res = 0 status_column = 'status_recruit' # 表字段名 count_column = 'count_recruit' # 表字段名 gm().upd_status(com_id, status_column, count_column, count_rc) return count_rc, res
def __init__(self): self.rc = RecruitInfo() self.db = db() self.gh = gh() self.gm = gm() self.index_url = 'https://www.qichacha.com'
def __init__(self): self.db = db() self.dk = dk() self.gh = gh() self.gm = gm() self.tm = tm()
def __init__(self): self.db = db() self.gh = gh() self.gm = gm() self.index_url = 'https://www.qichacha.com/'