Esempi in Python per Util, esempi in Python per platform_crawler.utils.utils.Util

Esempio n. 1

0

Mostra file

File: zhangyushurufa_spider.py Progetto: lonehand/save_code

    def login_and_get_data(self, ui):
        lu = LoginZYZS(ui).run_login()
        if not lu.get('succ'):
            return lu
        self.cookies = lu.get('cookie')
        self._headers = lu.get('headers')

        mths, days = Util().make_days(ys=None, ms=None, ye=None, me=None)
        empty_day = []
        for day in days:
            logger.info('crawler day ----- %s' % day)
            res = self.get_data_process(day)
            if not res.get('succ'):
                return res
            if res.get('msg') == 'no data':
                empty_day.append(day)

        try:
            self.init_chrome(ui)
            for sd in days:
                if sd in empty_day:
                    continue
                self.get_img(sd)
        except Exception as es:
            logger.error(es, exc_info=1)
        finally:
            self.d.quit()
        return {'succ': True}

Esempio n. 2

0

Mostra file

File: uc_spider.py Progetto: lonehand/save_code

    def login_and_get_data(self, ui):
        # login
        lu = LoginUC(ui).run_login()
        if not lu.get('succ'):
            return lu
        self.d = lu.pop('driver')
        self.wait = WebDriverWait(self.d, 20)
        self.cookies = '; '.join(['%s=%s' % (e.get('name'), e.get('value')) for e in lu.get('cookie')])

        # 获取数据
        data = []
        mths, dates = Util().make_dates(ys=None, ms=None, ye=None, me=None)
        for sd, ed in dates:
            res = self.get_data_process(sd, ed)
            if not res.get('succ'):
                return res
            data.append([sd, ed, res.get('pids')])

        # 截图
        try:
            for sd, ed, pids in data:
                self.change_date(sd, ed)
                for pid in pids:
                    self.get_img(sd, ed, pid)
        finally:
            self.d.quit()

        return {'succ': True}

Esempio n. 3

0

Mostra file

def init_logger(spider):
    global logger
    log_path = os.path.abspath('./logs/ccbn')
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    spider = 'ccbn_crawler' if not spider else spider
    logger = Util().record_log(
        log_path, spider) if not logger else logging.getLogger(spider)

Esempio n. 4

0

Mostra file

 def __init__(self, user_info, logger_name):
     global logger
     self.d = None
     self.u = Util()
     self.user_info = user_info
     self.line_path = None
     self.cookies = {}
     self.gtk = None
     logger = getLogger(logger_name)

Esempio n. 5

0

Mostra file

 def get_data_process(self):
     app_ids = self.get_account_type()
     content = []
     mths, dates = Util().make_dates(ys=2016, ms=1, ye=2017, me=12)
     for sd, ed in dates:
         logger.info('date range ---- %s~%s' % (sd, ed))
         for i in app_ids:
             content.extend(self.get_data(i, sd, ed))
     return content

Esempio n. 6

0

Mostra file

File: uc_spider.py Progetto: lonehand/save_code

 def deal_vc(self):
     ele = self.wait.until(EC.visibility_of_element_located((By.ID, 'checkpic')))
     img_path = join(IMG_PATH, 'vc.png')
     Util().cutimg_by_driver(self.d, ele, img_path)
     # with open(img_path, 'br') as i:
     #     im = i.read()
     vc = self.ch_img(img_path)
     if not vc:
         ele.click()
         return self.deal_vc()
     return vc

Esempio n. 7

0

Mostra file

 def crawler_and_save(self):
     year, this_month, today = time.strftime('%Y-%m-%d').split('-')
     if int(today) < 5:
         day_len = Util().mDays(int(year), int(this_month) - 1)
         day = str(day_len - 5 - int(today))
     else:
         day = str(int(today) - 5)
     month = str(int(this_month) - 1) if int(today) < 5 else this_month
     first_date = '%s-%s-%s' % (year, month, day)
     last_date = '%s-%s-%s' % (year, month, today)
     return self.crawler(first_date, last_date)

Esempio n. 8

0

Mostra file

def ebank_run(cookie=None, spider=None):
    global logger
    cookie = "com.bocom.cebs.base.resolver.CEBSSmartLocaleResolver.LOCALE=zh_CN; JSESSIONID=0000aDGgxUDXj-141Az5eHtwaGc:-1" if not cookie else cookie
    log_path = os.path.abspath('.')
    spider = 'ebank_crawler' if not spider else spider
    logger = Util().record_log(log_path, spider)
    headers = {
        'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
        'Accept': "application/json, text/javascript, */*; q=0.01",
        'User-Agent':
        "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
        'Host': "ebank.95559.com.cn",
        'Cookie': cookie
    }
    cc = EbankCrawler(cookie=cookie, headers=headers, spider=spider)
    return cc.crawl_page_list()

Esempio n. 9

0

Mostra file

File: cmb_crawler.py Progetto: lonehand/save_code

 def crawl_page_list(self):
     year, this_month, today = time.strftime('%Y-%m-%d').split('-')
     if int(today) < 25:
         day_len = Util().mDays(int(year), int(this_month) - 1)
         day = str(day_len - 25 + int(today))
     else:
         day = str(int(today) - 25)
     month = str(int(this_month) - 1) if int(today) < 25 else this_month
     z = lambda x: x if len(str(x)) == 2 else '0%s' % x
     first_date = '%s%s%s' % (year, z(month), z(day))
     last_date = '%s%s%s' % (year, z(this_month), z(today))
     data = self.crawl(first_date, last_date)
     if isinstance(data, list):
         self.save(data)
         return True
     else:
         return False

Esempio n. 10

0

Mostra file

File: shengleyou_spider.py Progetto: lonehand/save_code

    def login_and_get_data(self, ui):
        lu = LoginSYL(ui).run_login()
        if not lu.get('succ'):
            return lu
        self.cookies = lu.get('cookie')
        self.cookie_jar = [{
            'name': e.split('=')[0],
            'value': e.split('=')[1]
        } for e in self.cookies.split('; ')]
        self._headers = lu.get('headers')

        ys, ms, ye, me = ui.get('date') if ui.get('date') else (None, None,
                                                                None, None)
        mths, dates = Util().make_dates(ys=ys, ms=ms, ye=ye, me=me)
        pages_list = []
        data_list = []
        for sd, ed in dates:
            res = self.get_data_process(sd, ed)
            if not res.get('succ'):
                return res
            if res.get('msg') == 'no data':
                continue
            data_list.append(1)
            pages_list.append((sd, ed, res.get('pages')))
            logger.info('crawled month range ----- %s ~ %s' % (sd, ed))
        if len(data_list) == 0:
            return {'succ': True, 'msg': 'no data'}

        url = 'http://www.etjg.com/member/'
        self.init_driver()
        self.driver_get(url)
        for c in self.cookie_jar:
            self.d.add_cookie(c)
        self.driver_get(url)
        for sd, ed, p in pages_list:
            if not p:
                continue
            self.get_img_process(sd, ed)
        self.d.quit()
        return {'succ': True}

Esempio n. 11

0

Mostra file

def icbc_run(spider, icbc_sid=None, icbc_cks=None):
    global logger, log_name
    # log_name = '%s.icbc' % spider
    logger = logging.getLogger(log_name)
    log_path = os.path.abspath('.')
    logger = Util().record_log(log_path, spider)
    # icbc_sid = "EKCYDJDUGREDIDJVCOEJENISJGFGHMDMDWHLJIHC"
    # icbc_cks = "ar_stat_ss=4936397698_7_1540807848_9999; ar_stat_uv=31490953308686463371|9999; SRV_EBANKC_PUJI=rs8|W9ZsK|W9ZmF"
    headers = {
        "Accept":
        "text/html, application/xhtml+xml, image/jxr, */*",
        "Referer":
        "https://corporbank-simp.icbc.com.cn/ebankc/newnormalbank/include/leftframe.jsp?dse_sessionId="
        + icbc_sid + "&chain=E19%7C%E8%B4%A6%E6%88%B7%E7%AE%A1%E7%90%86",
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
        "Host":
        "corporbank-simp.icbc.com.cn",
        "Cookie":
        None,
    }
    c = IcbcCrawlers(icbc_sid, icbc_cks, base_headers=headers)
    c.crawler_and_save()

Esempio n. 12

0

Mostra file

File: cmb_crawler.py Progetto: lonehand/save_code

def cmb_run(sid=None, list_url=None, spider=None):
    global logger
    sid = "JSESSIONID=00008W4youy7X-Ms0bvZ9QEaQaQ:1883m3ce3" if not sid else sid
    list_url = list_url if list_url else 'https://ubank.cmbchina.com/html/--QmJXWHFLeTQ3M0w0Zm9Ddlo-Q1oycS49aSZxWG4zcT1P.--'
    log_path = os.path.abspath('.')
    spider = 'cmb_crawler' if not spider else spider
    logger = Util().record_log(log_path, spider)
    headers = {
        'Host': "ubank.cmbchina.com",
        'Origin': "https://ubank.cmbchina.com",
        'Content-Type': "application/x-www-form-urlencoded",
        'User-Agent':
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3569.0 Safari/537.36",
        'Accept':
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        'Referer': "https://ubank.cmbchina.com/html/accmgr/inputDate.jsp",
        'Cookie': sid,
    }
    cc = CmbCrawler(session_id=sid,
                    list_url=list_url,
                    headers=headers,
                    spider=spider)
    cc.crawl_page_list()
    post_data()

Esempio n. 13

0

Mostra file

cpa http://cp.chaohuida.com:9097/manage/user/login.html zly
'''
from platform_crawler.utils.utils import Util
import requests
from platform_crawler.spiders.pylib.cut_img import cut_img
from platform_crawler.spiders.pylib.task_process import TaskProcess
from platform_crawler import settings

import json
from selenium.webdriver.common.by import By
import time
import os
import re
from html.parser import HTMLParser

u = Util()
logger = None
gHost = 'http://cp.chaohuida.com:9097'


# 解析html文档
class hp(HTMLParser):
    a_text = False
    index = 0

    def __init__(self):
        self.urlArr = []
        self.resArr = []
        super(hp, self).__init__()

    def handle_starttag(self, tag, attr):

Esempio n. 14

0

Mostra file

from platform_crawler.spiders.pylib.get_pwd import get_pwd
from platform_crawler.utils.utils import Util
import time
import os
import json
# import xlsxwriter
import xlrd
import xlwt
from xlutils.copy import copy  #支持对已经存在的文件进行读写
import requests

ask_sql_url = 'http://erp.btomorrow.cn/adminjson/adminjson/ERP_GetCrawlerTaskStatus'  # useless
post_res_url = 'http://erp.btomorrow.cn/adminjson/ERP_ReportPythonCrawlerTask'
fscapture = r'D:\fscapture\FSCapture.exe'

u = Util()
log_path = os.path.abspath('./logs/AliosExcel')
if not os.path.exists(log_path):
    os.makedirs(log_path)
logger = u.record_log(log_path, __name__)

real_ip = '139.224.116.116'
serv_parm = {
    'ip': real_ip,
    'user': '******',
    'pwd': 'hhmt@pwd@123',
    'dst_path': ''
}


class AliyunExcelSpider:

Esempio n. 15

0

Mostra file

    pag.hotkey('enter')
    time.sleep(3)
    pag.screenshot(after_enter_login_btn)
    # 判断是否出现验证码   (90,135)
    res = handle_login_res(loginid)
    if not res:
        return False
    pag.hotkey('enter')
    time.sleep(4)
    a = win32gui.FindWindow(None, "TIM")  # 获取窗口的句柄，参数1: 类名，参数2： 标题QQ
    loginid = win32gui.GetWindowPlacement(a)
    pag.click(loginid[4][2] - 68, loginid[4][1] + 29)
    # print(68, 29)
    return True


def login_cli(acc, pwd, util):
    global u, pag, logger, ACC
    u = util
    ACC = acc
    pag = util.pag
    logger = logging.getLogger('%s.login_with_tim' %
                               GlobalVal.CUR_MAIN_LOG_NAME)
    kill_qq()
    return QQ(acc, pwd)


if __name__ == '__main__':
    from platform_crawler.utils.utils import Util
    login_cli('2823259680', 'Hhmt123456', Util())

Esempio n. 16

0

Mostra file

"""
from time import sleep, time
from threading import Thread
import json
import os

from platform_crawler.utils.post_get import post
from platform_crawler.utils.utils import Util
from platform_crawler.spiders.pylib.kill_sth import stop_thread, kill_chrome_fscapture  # , clean_desk

spider_type = {}

from platform_crawler.spiders.CPA.qq_finacial_spider import QQFinancialSpider

get_task_url = 'http://erp.btomorrow.cn/adminjson/ERP_PubishCrawlerTask'
u = Util()
sd_path = os.path.abspath('./save_data')
log_path = os.path.abspath('./logs')
logger = u.record_log(log_path, 'YYBHLCPD')

# record the process id
pid = os.getpid()
with open('cm_main.pid', 'w') as pd:
    pd.write(str(pid))


# Run task process with a thread so that it could be strongly killed when it was running timeout
def run_process(task_name, args=None):
    args = args if args else ()
    task_object = task_name()  # 创建任务对象
    task_func = task_object.run_task  # 指定要执行的函数入口

Esempio n. 17

0

Mostra file

from selenium.webdriver.common.by import By
import time
import pyautogui as pag
import random
import os
import logging

# from pwd import pkey
from platform_crawler.utils.utils import Util
# from apis.rk import RClient
# from apis.rk_v2 import APIClient


# 初始化对象
logger = logging.getLogger('ccbn')
util = Util()

# 初始化全局变量
# driver_imgs_path = os.path.abspath('./spiders')
# passwd_img_path = driver_imgs_path + 'pwd.png'
# wrong_pwd_img_path = driver_imgs_path + 'wrong_pwd.png'
# login_result = driver_imgs_path + 'last.png'
base_path = os.path.abspath('./spiders/ccbn')
verify_code_img_path = os.path.join(base_path, 'verify.png')
passwd_img_path = os.path.join(base_path, 'pwd.png')
wrong_verify_code = os.path.join(base_path, 'vc_error.png')
login_succ = os.path.join(base_path, 'login_success.png')

# 打码平台对象
# code_pwd = pkey['ruokuai']['pw'].encode('utf-8')
# rc = RClient(pkey['ruokuai']['un'], code_pwd, '1', 'b40ffbee5c1cf4e38028c197eb2fc751')