Exemple #1
0
 def login(self):
     """
         登录账号密码
     """
     acc_el = self.wait_el_presence_by_xpath(self.acc_xpath)
     try:
         acc_el.clear()  # 处理acc残留
     except:
         warning('acc clear error')
     acc_el.send_keys(self.acc)
     time.sleep(1.5)
     pwd_el = self.wait_el_presence_by_xpath(self.pwd_xpath)
     try:
         pwd_el.clear()  # 处理acc残留
     except:
         warning('pwd clear error')
     pwd_el.send_keys(self.pwd)
     end_el = pwd_el
     time.sleep(1.5)
     # 处理数字字母验证码
     if self.cap_xpath:
         cap_el = self.wait_el_presence_by_xpath(self.cap_xpath)
         cap_text = self.get_captcha_text()
         cap_el.send_keys(cap_text)
         end_el = cap_el
     # seld.enter,处理滑动验证码
     if not self.enter():
         end_el.send_keys(Keys.ENTER)
Exemple #2
0
    def login(self):
        """
            登录账号密码
        """
        acc_el = self.wait_el_presence_by_xpath(self.acc_xpath)
        try:
            acc_el.clear()  # 处理acc残留
        except:
            warning('acc clear error')
        acc_el.send_keys(self.acc)
        time.sleep(1.5)
        pwd_el = self.wait_el_presence_by_xpath(self.pwd_xpath)
        try:
            pwd_el.clear()  # 处理acc残留
        except:
            warning('pwd clear error')
        pwd_el.send_keys(self.pwd)
        end_el = pwd_el
        time.sleep(1.5)

        end_el.send_keys(Keys.ENTER)
        time.sleep(2)
        # 处理数字字母验证码
        if self.cap_xpath:
            try:
                cap_el = self.wait_el_presence_by_xpath(self.cap_xpath)
                cap_text = self.get_captcha_text()
                cap_el.send_keys(cap_text)
                end_el = cap_el
            except selenium.common.exceptions.TimeoutException:
                print('---暂无验证码')
        # seld.enter,处理滑动验证码
        if not self.enter():
            end_el.send_keys(Keys.ENTER)
Exemple #3
0
 def get_data(self, url, headers, timeout=30, data=None):
     try:
         res = requests.get(url, headers=headers, verify=False, timeout=timeout) if not data \
             else requests.post(url, headers=headers, verify=False, timeout=timeout, data=data)
     except Exception as e:
         warning('访问报错 plat: {} || {}'.format(self.plat, e))
         return False
     return res.text
Exemple #4
0
 def login_task(self, task):
     plat = task['plat']
     try:
         task = copy(task)
         plat_obj = LOGIN_MAPPING[plat](**task)
         plat_obj.run()
         plat_obj.close()
     except Exception as e:
         warning('plat: {} || login_error || {}'.format(plat, e))
Exemple #5
0
 def after_login(self):
     time.sleep(2)
     try:
         el = self.wait_el_presence_by_xpath(
             '//*[@id="app"]/div/div/div[2]/div/div/div/div[3]/button')
         ActionChains(self.browser).move_to_element(el).click().perform()
     except Exception as e:
         warning('bilibili miss click_login_redirct')
     time.sleep(3)
Exemple #6
0
    def crawl_task(self, task):
        plat = task['plat']

        plat_obj = EXTRACT_MAPPING[plat](**task)
        try:
            plat_obj.start_crawl()
        except Exception as e:
            warning('plat: {} || crawl_data_error || {} || reason: {}'.format(
                plat, task['acc'], e))
        plat_obj.close()
Exemple #7
0
 def get_captcha_text(self):
     element = self.wait_el_presence_by_xpath('//*[@id="img-captcha"]')
     left = element.location['x']
     top = element.location['y']
     right = element.location['x'] + element.size['width']
     bottom = element.location['y'] + element.size['height']
     self.browser.save_screenshot(DIR_ + 'screenshot.png')
     im = Image.open(DIR_ + 'screenshot.png')
     im = im.crop((left, top, right, bottom))
     im.save(DIR_ + 'baitong.png')
     captcha = YDMHttp().run(DIR_ + 'baitong.png')
     warning('baitong || captcha: {}'.format(captcha))
     return captcha
Exemple #8
0
 def start_crawl(self):
     """
         流程
     """
     self.set_headers(self.get_cookie())
     data = self.get_next_page_data(self.first_page)
     if not data:
         warning(
             'plat: {}|| classes->extract_data->start_crawl || miss start_crawl_data'
             .format(self.plat))
         return
     data = json.loads(data, encoding='utf-8')
     self.total_nums = self.get_total_nums(data)
     self.parse(data, self.first_page, self.headers)
     self.deal_end()
Exemple #9
0
    def get_data(self, url, headers, timeout=30, data=None):
        try:
            proxy = self.get_proxy()
            res = requests.get(url, headers=headers, verify=False, timeout=timeout, proxies=proxy) if not data \
                else requests.post(url, headers=headers, verify=False, data=data, timeout=timeout, proxies=proxy)

            if res.status_code != 200:
                raise Exception('proxy error')
        except:
            try:
                res = requests.get(url, headers=headers, verify=False, timeout=timeout) if not data \
                    else requests.post(url, headers=headers, verify=False, timeout=timeout, data=data)
            except Exception as e:
                warning('访问报错 plat: {} || {}'.format(self.plat, e))
                return False
        return res.text
Exemple #10
0
 def to_login(self):
     """
         登录
     """
     print('---plat:{}, user:{}, mark:{}, login_retry_time:{}'.format(
         self.plat, self.acc, self.mark, self.login_retry_times))
     self.browser.delete_all_cookies()
     self.before_login()
     time.sleep(2)
     self.login()
     time.sleep(2)
     res = self.after_login()
     if res is False and self.mark < self.login_retry_times:
         self.mark += 1
         self.to_login()
     elif self.mark >= self.login_retry_times:
         warning('>>>>login faile, plat:{}, user:{}'.format(
             self.plat, self.acc))
Exemple #11
0
    def __init__(self,
                 first_page=1,
                 channel_category_id=None,
                 channel_id=None,
                 agent_id=None,
                 plat='',
                 acc=''):
        self.first_page = first_page
        self.plat = plat
        self.acc = acc
        self.total_nums = 0
        self.headers = {}
        self.time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        self.ad_data = {}
        self.ad_data = {self.plat: []}

        self.channel_category_id = channel_category_id
        self.channel_id = channel_id
        self.agent_id = agent_id
        try:
            assert channel_category_id and channel_id and agent_id, 'classes->extract_data->__init__: channel_id'
        except Exception as e:
            warning(e)
            raise e
Exemple #12
0
    Tasks(process_nums=num, type='extra').run()


def run_plats_to_login(num=PROCESS_NUMS):
    # plats = PLATS_INFO
    # now = time.strftime("%Y-%m-%d %H:%M:%S")
    # print('\n>>>>{}--{}'.format(now, plats))
    # Tasks(process_nums=num).run(plats)
    Tasks(process_nums=num).run()


if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print('####------set argv----------####')
        sys.exit()
    try:
        if sys.argv[1] == 'login':
            run_plats_to_login()
            #
            # os.system("kill -9 $(ps -ef|grep chrom|grep -v grep|awk '{print $2}')")
            # os.system("kill -9 $(ps -ef|grep phantomjs|grep -v grep|awk '{print $2}')")
        elif sys.argv[1] == 'extract':
            crawl_plats_data()
        else:
            warning('sys.argv[1] not in [login, extract], invalid argument')
    except:
        content = 'location:{}, error:{}'.format(sys.argv[0],
                                                 traceback.format_exc())
        warning(content)