def login_and_get_data(self, ui): lu = LoginZYZS(ui).run_login() if not lu.get('succ'): return lu self.cookies = lu.get('cookie') self._headers = lu.get('headers') mths, days = Util().make_days(ys=None, ms=None, ye=None, me=None) empty_day = [] for day in days: logger.info('crawler day ----- %s' % day) res = self.get_data_process(day) if not res.get('succ'): return res if res.get('msg') == 'no data': empty_day.append(day) try: self.init_chrome(ui) for sd in days: if sd in empty_day: continue self.get_img(sd) except Exception as es: logger.error(es, exc_info=1) finally: self.d.quit() return {'succ': True}
def login_and_get_data(self, ui): # login lu = LoginUC(ui).run_login() if not lu.get('succ'): return lu self.d = lu.pop('driver') self.wait = WebDriverWait(self.d, 20) self.cookies = '; '.join(['%s=%s' % (e.get('name'), e.get('value')) for e in lu.get('cookie')]) # 获取数据 data = [] mths, dates = Util().make_dates(ys=None, ms=None, ye=None, me=None) for sd, ed in dates: res = self.get_data_process(sd, ed) if not res.get('succ'): return res data.append([sd, ed, res.get('pids')]) # 截图 try: for sd, ed, pids in data: self.change_date(sd, ed) for pid in pids: self.get_img(sd, ed, pid) finally: self.d.quit() return {'succ': True}
def init_logger(spider): global logger log_path = os.path.abspath('./logs/ccbn') if not os.path.exists(log_path): os.makedirs(log_path) spider = 'ccbn_crawler' if not spider else spider logger = Util().record_log( log_path, spider) if not logger else logging.getLogger(spider)
def __init__(self, user_info, logger_name): global logger self.d = None self.u = Util() self.user_info = user_info self.line_path = None self.cookies = {} self.gtk = None logger = getLogger(logger_name)
def get_data_process(self): app_ids = self.get_account_type() content = [] mths, dates = Util().make_dates(ys=2016, ms=1, ye=2017, me=12) for sd, ed in dates: logger.info('date range ---- %s~%s' % (sd, ed)) for i in app_ids: content.extend(self.get_data(i, sd, ed)) return content
def deal_vc(self): ele = self.wait.until(EC.visibility_of_element_located((By.ID, 'checkpic'))) img_path = join(IMG_PATH, 'vc.png') Util().cutimg_by_driver(self.d, ele, img_path) # with open(img_path, 'br') as i: # im = i.read() vc = self.ch_img(img_path) if not vc: ele.click() return self.deal_vc() return vc
def crawler_and_save(self): year, this_month, today = time.strftime('%Y-%m-%d').split('-') if int(today) < 5: day_len = Util().mDays(int(year), int(this_month) - 1) day = str(day_len - 5 - int(today)) else: day = str(int(today) - 5) month = str(int(this_month) - 1) if int(today) < 5 else this_month first_date = '%s-%s-%s' % (year, month, day) last_date = '%s-%s-%s' % (year, month, today) return self.crawler(first_date, last_date)
def ebank_run(cookie=None, spider=None): global logger cookie = "com.bocom.cebs.base.resolver.CEBSSmartLocaleResolver.LOCALE=zh_CN; JSESSIONID=0000aDGgxUDXj-141Az5eHtwaGc:-1" if not cookie else cookie log_path = os.path.abspath('.') spider = 'ebank_crawler' if not spider else spider logger = Util().record_log(log_path, spider) headers = { 'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8", 'Accept': "application/json, text/javascript, */*; q=0.01", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", 'Host': "ebank.95559.com.cn", 'Cookie': cookie } cc = EbankCrawler(cookie=cookie, headers=headers, spider=spider) return cc.crawl_page_list()
def crawl_page_list(self): year, this_month, today = time.strftime('%Y-%m-%d').split('-') if int(today) < 25: day_len = Util().mDays(int(year), int(this_month) - 1) day = str(day_len - 25 + int(today)) else: day = str(int(today) - 25) month = str(int(this_month) - 1) if int(today) < 25 else this_month z = lambda x: x if len(str(x)) == 2 else '0%s' % x first_date = '%s%s%s' % (year, z(month), z(day)) last_date = '%s%s%s' % (year, z(this_month), z(today)) data = self.crawl(first_date, last_date) if isinstance(data, list): self.save(data) return True else: return False
def login_and_get_data(self, ui): lu = LoginSYL(ui).run_login() if not lu.get('succ'): return lu self.cookies = lu.get('cookie') self.cookie_jar = [{ 'name': e.split('=')[0], 'value': e.split('=')[1] } for e in self.cookies.split('; ')] self._headers = lu.get('headers') ys, ms, ye, me = ui.get('date') if ui.get('date') else (None, None, None, None) mths, dates = Util().make_dates(ys=ys, ms=ms, ye=ye, me=me) pages_list = [] data_list = [] for sd, ed in dates: res = self.get_data_process(sd, ed) if not res.get('succ'): return res if res.get('msg') == 'no data': continue data_list.append(1) pages_list.append((sd, ed, res.get('pages'))) logger.info('crawled month range ----- %s ~ %s' % (sd, ed)) if len(data_list) == 0: return {'succ': True, 'msg': 'no data'} url = 'http://www.etjg.com/member/' self.init_driver() self.driver_get(url) for c in self.cookie_jar: self.d.add_cookie(c) self.driver_get(url) for sd, ed, p in pages_list: if not p: continue self.get_img_process(sd, ed) self.d.quit() return {'succ': True}
def icbc_run(spider, icbc_sid=None, icbc_cks=None): global logger, log_name # log_name = '%s.icbc' % spider logger = logging.getLogger(log_name) log_path = os.path.abspath('.') logger = Util().record_log(log_path, spider) # icbc_sid = "EKCYDJDUGREDIDJVCOEJENISJGFGHMDMDWHLJIHC" # icbc_cks = "ar_stat_ss=4936397698_7_1540807848_9999; ar_stat_uv=31490953308686463371|9999; SRV_EBANKC_PUJI=rs8|W9ZsK|W9ZmF" headers = { "Accept": "text/html, application/xhtml+xml, image/jxr, */*", "Referer": "https://corporbank-simp.icbc.com.cn/ebankc/newnormalbank/include/leftframe.jsp?dse_sessionId=" + icbc_sid + "&chain=E19%7C%E8%B4%A6%E6%88%B7%E7%AE%A1%E7%90%86", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", "Host": "corporbank-simp.icbc.com.cn", "Cookie": None, } c = IcbcCrawlers(icbc_sid, icbc_cks, base_headers=headers) c.crawler_and_save()
def cmb_run(sid=None, list_url=None, spider=None): global logger sid = "JSESSIONID=00008W4youy7X-Ms0bvZ9QEaQaQ:1883m3ce3" if not sid else sid list_url = list_url if list_url else 'https://ubank.cmbchina.com/html/--QmJXWHFLeTQ3M0w0Zm9Ddlo-Q1oycS49aSZxWG4zcT1P.--' log_path = os.path.abspath('.') spider = 'cmb_crawler' if not spider else spider logger = Util().record_log(log_path, spider) headers = { 'Host': "ubank.cmbchina.com", 'Origin': "https://ubank.cmbchina.com", 'Content-Type': "application/x-www-form-urlencoded", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3569.0 Safari/537.36", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 'Referer': "https://ubank.cmbchina.com/html/accmgr/inputDate.jsp", 'Cookie': sid, } cc = CmbCrawler(session_id=sid, list_url=list_url, headers=headers, spider=spider) cc.crawl_page_list() post_data()
cpa http://cp.chaohuida.com:9097/manage/user/login.html zly ''' from platform_crawler.utils.utils import Util import requests from platform_crawler.spiders.pylib.cut_img import cut_img from platform_crawler.spiders.pylib.task_process import TaskProcess from platform_crawler import settings import json from selenium.webdriver.common.by import By import time import os import re from html.parser import HTMLParser u = Util() logger = None gHost = 'http://cp.chaohuida.com:9097' # 解析html文档 class hp(HTMLParser): a_text = False index = 0 def __init__(self): self.urlArr = [] self.resArr = [] super(hp, self).__init__() def handle_starttag(self, tag, attr):
from platform_crawler.spiders.pylib.get_pwd import get_pwd from platform_crawler.utils.utils import Util import time import os import json # import xlsxwriter import xlrd import xlwt from xlutils.copy import copy #支持对已经存在的文件进行读写 import requests ask_sql_url = 'http://erp.btomorrow.cn/adminjson/adminjson/ERP_GetCrawlerTaskStatus' # useless post_res_url = 'http://erp.btomorrow.cn/adminjson/ERP_ReportPythonCrawlerTask' fscapture = r'D:\fscapture\FSCapture.exe' u = Util() log_path = os.path.abspath('./logs/AliosExcel') if not os.path.exists(log_path): os.makedirs(log_path) logger = u.record_log(log_path, __name__) real_ip = '139.224.116.116' serv_parm = { 'ip': real_ip, 'user': '******', 'pwd': 'hhmt@pwd@123', 'dst_path': '' } class AliyunExcelSpider:
pag.hotkey('enter') time.sleep(3) pag.screenshot(after_enter_login_btn) # 判断是否出现验证码 (90,135) res = handle_login_res(loginid) if not res: return False pag.hotkey('enter') time.sleep(4) a = win32gui.FindWindow(None, "TIM") # 获取窗口的句柄,参数1: 类名,参数2: 标题QQ loginid = win32gui.GetWindowPlacement(a) pag.click(loginid[4][2] - 68, loginid[4][1] + 29) # print(68, 29) return True def login_cli(acc, pwd, util): global u, pag, logger, ACC u = util ACC = acc pag = util.pag logger = logging.getLogger('%s.login_with_tim' % GlobalVal.CUR_MAIN_LOG_NAME) kill_qq() return QQ(acc, pwd) if __name__ == '__main__': from platform_crawler.utils.utils import Util login_cli('2823259680', 'Hhmt123456', Util())
""" from time import sleep, time from threading import Thread import json import os from platform_crawler.utils.post_get import post from platform_crawler.utils.utils import Util from platform_crawler.spiders.pylib.kill_sth import stop_thread, kill_chrome_fscapture # , clean_desk spider_type = {} from platform_crawler.spiders.CPA.qq_finacial_spider import QQFinancialSpider get_task_url = 'http://erp.btomorrow.cn/adminjson/ERP_PubishCrawlerTask' u = Util() sd_path = os.path.abspath('./save_data') log_path = os.path.abspath('./logs') logger = u.record_log(log_path, 'YYBHLCPD') # record the process id pid = os.getpid() with open('cm_main.pid', 'w') as pd: pd.write(str(pid)) # Run task process with a thread so that it could be strongly killed when it was running timeout def run_process(task_name, args=None): args = args if args else () task_object = task_name() # 创建任务对象 task_func = task_object.run_task # 指定要执行的函数入口
from selenium.webdriver.common.by import By import time import pyautogui as pag import random import os import logging # from pwd import pkey from platform_crawler.utils.utils import Util # from apis.rk import RClient # from apis.rk_v2 import APIClient # 初始化对象 logger = logging.getLogger('ccbn') util = Util() # 初始化全局变量 # driver_imgs_path = os.path.abspath('./spiders') # passwd_img_path = driver_imgs_path + 'pwd.png' # wrong_pwd_img_path = driver_imgs_path + 'wrong_pwd.png' # login_result = driver_imgs_path + 'last.png' base_path = os.path.abspath('./spiders/ccbn') verify_code_img_path = os.path.join(base_path, 'verify.png') passwd_img_path = os.path.join(base_path, 'pwd.png') wrong_verify_code = os.path.join(base_path, 'vc_error.png') login_succ = os.path.join(base_path, 'login_success.png') # 打码平台对象 # code_pwd = pkey['ruokuai']['pw'].encode('utf-8') # rc = RClient(pkey['ruokuai']['un'], code_pwd, '1', 'b40ffbee5c1cf4e38028c197eb2fc751')