Exemple #1
0
    def test_cf_recaptcha_15_04_2019(self, **kwargs):
        scraper = cfscrape.CloudflareScraper(**kwargs)
        message = re.compile(r'captcha challenge presented')
        scraper.get.when.called_with(url) \
            .should.have.raised(cfscrape.CloudflareCaptchaError, message)

        v = ssl.OPENSSL_VERSION_NUMBER
        ssl.OPENSSL_VERSION_NUMBER = 0x0090581f
        try:
            scraper = cfscrape.CloudflareScraper(**kwargs)
            message = re.compile(r'OpenSSL version is lower than 1.1.1')
            scraper.get.when.called_with(url) \
                .should.have.raised(cfscrape.CloudflareCaptchaError, message)
        finally:
            ssl.OPENSSL_VERSION_NUMBER = v
    def __init__(self):

        coins = ['Bitcoin', 'Ethereum', 'Cash', 'Litecoin', 'Omise', 'TRON']
        self.coins = ['BTC', 'ETH', 'BCH', 'LTC', 'OMG', 'TRX']

        scraper = cfscrape.CloudflareScraper()  # Coinspot uses cloudflare

        page = scraper.get("https://www.coinspot.com.au/tradecoins")
        soup = BeautifulSoup(page.content, 'html.parser')
        pricelist = soup.find_all('tr', class_="tradeitem showrow")

        foundcoin = [0] * len(coins)
        buyPrice = [0] * len(coins)
        sellPrice = [0] * len(coins)
        for i, element in enumerate(pricelist):
            for j, coin in enumerate(coins):
                if coin in element.get_text() and foundcoin[j] is 0:
                    print('\nFOUND: ' + coin)
                    table_elements = element.find_all('td')
                    buyPrice[j] = float(table_elements[1].attrs['data-value'])
                    sellPrice[j] = float(table_elements[2].attrs['data-value'])
                    foundcoin[j] = 1
        self.buyPrice = buyPrice
        self.sellPrice = sellPrice
        print(self.buyPrice)
Exemple #3
0
        def test(self, **kwargs):
            __Popen = subprocess.Popen

            # Temporarily Mock subprocess method to return non-zero exit code
            def mock(*args, **kwargs):
                def node():
                    pass

                node.communicate = lambda: ('stdout',
                                            'Outdated Node.js detected')
                node.returncode = 1
                return node

            subprocess.Popen = mock

            try:
                scraper = cfscrape.CloudflareScraper(**kwargs)
                message = re.compile(r'non-zero exit status')
                scraper.get.when.called_with(url) \
                    .should.have.raised(subprocess.CalledProcessError, message)
                caplog.text.should_not.match(
                    re.compile(r'Error executing Cloudflare IUAM Javascript'))
                caplog.text.should.match(
                    re.compile(r'Outdated Node.js detected'))
            finally:
                subprocess.Popen = __Popen
Exemple #4
0
 def __init__(self, proxy, loop, localsession, executor, id=0):
     self.proxy = proxy
     self.loop = loop
     self.session = cfscrape.CloudflareScraper(headers=genHeaders())
     self.localsession = localsession
     self.executor = executor
     self.id = id  #用于标记这次投票是第几次
     #if proxy:
     self.fingerprint = md5(
         (proxy + 'Hecate2' + str(time.time())).encode()).hexdigest()
    def login(self):
        """Login user if needed"""
        LOGGER.info('"%s": start login, method: "%s"', self.username,
                    self.login_method)
        cookie = self.get_cookie(self.username)
        if cookie is None:
            LOGGER.info('"%s": no cookie, new login, method "%s"',
                        self.username, self.login_method)

            method_dict = {
                'g': self.login_google,
                'google': self.login_google,
                'v': self.login_vk,
                'vk': self.login_vk,
                'f': self.login_facebook,
                'facebook': self.login_facebook,
            }

            if type(self.login_method) is not str:
                raise RRClientException(
                    f"{self.login_method} is not a valid login method.")

            if self.login_method.lower() not in method_dict.keys():
                raise RRClientException(
                    f"{self.login_method} is not a valid login method.")

            auth_text = requests.get("https://rivalregions.com").text
            browser = Browser(showWindow=self.show_window)

            browser = method_dict[self.login_method.lower()](browser,
                                                             auth_text)

            LOGGER.info('"%s": Get cookie', self.username)
            phpsessid = browser.get_cookie('PHPSESSID')
            if phpsessid:
                cookie = self.create_cookie(phpsessid.get('expiry', None),
                                            phpsessid.get('value', None))
                self.write_cookie(self.username, cookie)
            else:
                raise NoPHPsessidException()
            LOGGER.debug('"%s": closing login tab', self.username)
            browser.close_current_tab()

        self.session = cfscrape.CloudflareScraper()
        self.cookie = cookie
        self.session.cookies.set(**cookie)

        LOGGER.debug('"%s": set the var_c', self.username)
        response = self.session.get('https://rivalregions.com/#overview')
        lines = response.text.split("\n")
        for line in lines:
            if re.match("(.*)var c_html(.*)", line):
                var_c = line.split("'")[-2]
                LOGGER.debug('"%s": got var_c: %s', self.username, var_c)
                self.var_c = line.split("'")[-2]
    def new_session(self):
        """
        Launch a new session
        """

        self.session = cfscrape.CloudflareScraper()

        retries = Retry(total=5, backoff_factor=1)
        self.session.mount('https://', HTTPAdapter(max_retries=retries))

        self.pass_cloudflare()
Exemple #7
0
 def test_js_challenge_environment_error(self, **kwargs):
     __path = os.environ['PATH']
     # Temporarily unset PATH to hide Node.js
     os.environ['PATH'] = ''
     try:
         scraper = cfscrape.CloudflareScraper(**kwargs)
         message = re.compile(r'Missing Node.js runtime')
         scraper.get.when.called_with(url) \
             .should.have.raised(EnvironmentError, message)
     finally:
         os.environ['PATH'] = __path
    def login(self):
        """Login user if needed"""
        cookie = self.get_cookie(self.username)
        if cookie is None:
            LOGGER.info('Client login "%s" username "%s"', self.login_method,
                        self.username)
            if self.login_method not in [
                    "g", "google", "v", "vk", "f", "facebook"
            ]:
                raise RRClientException("Not a valid login method.")

            auth_text = requests.get("https://rivalregions.com").text
            web = Browser(showWindow=self.show_window)

            method_dict = {
                'g': self.login_google,
                'google': self.login_google,
                'v': self.login_vk,
                'vk': self.login_vk,
                'f': self.login_facebook,
                'facebook': self.login_facebook,
            }

            if self.login_method in method_dict:
                web = method_dict[self.login_method](web, auth_text)
            else:
                LOGGER.info('Invallid loggin method "%s"', self.login_method)
                sys.exit()

            LOGGER.debug('Get cookie')
            phpsessid = web.get_cookie('PHPSESSID')
            if phpsessid:
                cookie = self.create_cookie(phpsessid.get('expiry', None),
                                            phpsessid.get('value', None))
                self.write_cookie(self.username, cookie)
            else:
                raise NoPHPsessidException()
            LOGGER.debug('closing login tab')
            web.close_current_tab()

        self.session = cfscrape.CloudflareScraper()
        self.cookie = cookie
        self.session.cookies.set(**cookie)

        LOGGER.debug('set the var_c')
        response = self.session.get('https://rivalregions.com/#overview')
        lines = response.text.split("\n")
        for line in lines:
            if re.match("(.*)var c_html(.*)", line):
                var_c = line.split("'")[-2]
                LOGGER.debug('var_c: %s', var_c)
                self.var_c = line.split("'")[-2]
Exemple #9
0
        def test(self, **kwargs):
            __Popen = subprocess.Popen

            # Temporarily disable this method to generate an exception
            subprocess.Popen = None

            try:
                scraper = cfscrape.CloudflareScraper(**kwargs)
                scraper.get.when.called_with(url) \
                    .should.have.raised(TypeError)
                caplog.text.should.match(
                    re.compile(r'Error executing Cloudflare IUAM Javascript'))
            finally:
                subprocess.Popen = __Popen
Exemple #10
0
def grab_password(email):
    # No docs(Because no API), just found it by analyzing the network and told the admin :D
    url = "https://ghostproject.fr/search.php"
    scraper = cfscrape.CloudflareScraper()

    cookie = {"cookieconsent_status": "dismiss"}
    data = {"param": email}

    req = scraper.post(url, cookies=cookie, data=data).text
    result = req.split("\\n")
    if "Error" in req or len(result) == 2:
        return False
    else:
        return result[1:-1]
Exemple #11
0
 def get_tags(self):
     """Get tags."""
     page = self.page
     url = self.url
     result = list(self.parse_page(page))
     if not result:
         h1_tag_text = page.select_one('h1').text
         if h1_tag_text != '503 Service Temporarily Unavailable':
             log.error('Unexpected H1-tag text', text=h1_tag_text)
         if self.scraper is None:
             self.scraper = cfscrape.CloudflareScraper()
         resp = self.scraper.get(url, timeout=10)
         html_soup = bs4.BeautifulSoup(resp.text, 'lxml')
         return self.parse_page(html_soup)
     return result
Exemple #12
0
        def test(self, **kwargs):
            __Popen = subprocess.Popen

            # Temporarily Mock subprocess method to raise an OSError
            def mock(*args, **kwargs):
                raise OSError('System Error')

            subprocess.Popen = mock

            try:
                scraper = cfscrape.CloudflareScraper(**kwargs)
                scraper.get.when.called_with(url) \
                    .should.have.raised(OSError, re.compile(r'System Error'))
                caplog.text.should.equal('')
            finally:
                subprocess.Popen = __Popen
Exemple #13
0
    def get_tags(self):
        """Get tags."""
        classname_to_namespace_dict = {
            'tag-type-artist': 'creator',
            'tag-type-character': 'character',
            'tag-type-copyright': 'series',
            'tag-type-species': 'species',
            'tag-type-general': ''
        }
        scraper = cfscrape.CloudflareScraper()
        resp = scraper.get(self.url, timeout=10)
        page = bs4.BeautifulSoup(resp.text, 'lxml')

        for key, namespace in classname_to_namespace_dict.items():
            for item in page.select('li.{}'.format(key)):
                name = \
                    item.text \
                    .rsplit(' ', 1)[0].strip().split('? ', 1)[1].strip()
                yield (namespace, name)
    def open(self, url):
        count = 0
        maxcount = 5

        scraper = cfscrape.CloudflareScraper()

        # Retry
        while count < maxcount:
            try:
                response = scraper.get(url)

                return response

            except Exception as e:
                print("Could not open '{}', retrying... ({})".format(
                    url, count))

                count = count + 1
                time.sleep(1)

                if count >= maxcount:
                    raise
Exemple #15
0
    def scraper(self, url: str):
        """
        It takes a URL, makes a request to it, and returns the response

        :param url: str = "hxxps://URL"
        :type url: str
        :return: A response object.
        """
        cloudflare_scraper = cfscrape.CloudflareScraper()
        resp = cloudflare_scraper.get(url, stream=True, timeout=10)
        resp.headers.update({
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "en-US,en;q=0.9",
            "Cache-Control":
            "max-age=0",
            "Connection":
            "keep-alive",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/89.0",
        })
        try:
            resp.raise_for_status()
        except (requests.HTTPError, requests.ReadTimeout) as err:
            status = err.response.status_code
            if status not in (403, 429):
                self.log.error(f"{str(err)}")
        except requests.exceptions.RequestException as err:
            self.log.error(f"{str(err)}")
        else:
            return resp

        return None
Exemple #16
0
def init_cfscraper():
    s = cfs.CloudflareScraper()
    print("CFScraper initialized")
    return s
                            floor=comment["floor"],
                            time=datetime.strptime(comment["updatedAt"],
                                                   "%Y-%m-%dT%H:%M:%S.%fZ"),
                            content=comment["content"])
                    else:
                        new_comt = dcard_comments(
                            id=comment["postId"],
                            owner="這則回應已被本人刪除",
                            gender=0,
                            floor=comment["floor"],
                            time=datetime.strptime(comment["updatedAt"],
                                                   "%Y-%m-%dT%H:%M:%S.%fZ"),
                            content="已經刪除的內容就像 Dcard 一樣,錯過是無法再相見的!")
                    db.session.add(new_comt)
                except Exception as e:
                    local_var_key = locals()
                    logging.error(traceback.format_exc())
                    logging.error({
                        key: local_var_key[key]
                        for key in local_var_key if key != "article_comments"
                    })
                    logging.error("")
            db.session.commit()


crawler = Dcard_crawler(req_module=cfscrape.CloudflareScraper())
article_list_gen = crawler.get_article_list(cate="dressup", limit=100)
list_gen_factory = generators_factory(article_list_gen)
article_gen = crawler.get_article(list_generator=list_gen_factory())
comt_gen = crawler.get_comments(list_generator=list_gen_factory(), limit=100)
crawler.save_data(article_gen, comt_gen)
Exemple #18
0
import cfscrape
import fileinput
import random
import time
import os
import array as arr

LinkFile = "link.txt"  #file chua link tap hop chuong truyen
filenameTXT = "Ketqua.txt"
chapter = 0
f = open(LinkFile, "r")
for x in f:
    if "http" in x:
        StrippedContent = ""
        ChapterURL = x
        scraper = cfscrape.CloudflareScraper()
        response = scraper.get(ChapterURL)
        filenameHTML = str(chapter) + ".html"
        bCheckLink = 0
        open(filenameHTML, 'wb').write(response.content)
        with open(filenameHTML, encoding="utf-8") as fp:
            soup = BeautifulSoup(fp, "lxml")
            try:
                #Luu tieu de
                downloaded = soup.title.string
                StrippedContent = "\n" + downloaded + "\n"

                #Lay noi dung chuong
                div = soup.find(id="bookContentBody")
                for elem in div.find_all("p"):
                    elem.replace_with(elem.text + "\n\n")
Exemple #19
0
def main(prog_input=None,
         resize=False,
         size=None,
         db_path=None,
         place=DEFAULT_PLACE,
         match_filter='default',
         write_tags=False,
         input_mode='default',
         verbose=False,
         debug=False,
         abort_on_error=False):
    """Get similar image from iqdb."""
    assert prog_input is not None, "Input is not a valid path"

    # logging
    log_level = None
    if verbose:
        log_level = logging.INFO
    if debug:
        log_level = logging.DEBUG
    if log_level:

        logging.basicConfig(handlers=[
            logging.FileHandler(os.path.join(user_data_dir, 'output.log'), 'w',
                                'utf-8')
        ],
                            level=log_level)

    init_program(db_path)
    br = mechanicalsoup.StatefulBrowser(soup_config={'features': 'lxml'})
    br.raise_on_404 = True
    scraper = cfscrape.CloudflareScraper()

    # variable used in both input mode
    error_set = []
    if input_mode == 'folder':
        assert os.path.isdir(prog_input), 'Input is not valid folder'
        files = [os.path.join(prog_input, x) for x in os.listdir(prog_input)]
        if not files:
            print('No files found.')
            return
        sorted_files = sorted(files, key=lambda x: os.path.splitext(x)[1])
        for idx, ff in enumerate(sorted_files):
            log.debug('file',
                      f=os.path.basename(ff),
                      idx=idx,
                      total=len(files))
            result = {}
            try:
                result = run_program_for_single_img(ff,
                                                    resize,
                                                    size,
                                                    place,
                                                    match_filter,
                                                    write_tags,
                                                    browser=br,
                                                    scraper=scraper,
                                                    disable_tag_print=True)
            except Exception as e:  # pylint:disable=broad-except
                if abort_on_error:
                    raise e
                error_set.append((ff, e))
            if result is not None and result.get('error'):
                error_set.extend([(ff, x) for x in result['error']])
    else:
        image = prog_input
        result = run_program_for_single_img(image,
                                            resize,
                                            size,
                                            place,
                                            match_filter,
                                            write_tags,
                                            browser=br,
                                            scraper=scraper)
        if result is not None and result.get('error'):
            error_set.extend([(image, x) for x in result['error']])

    if error_set:
        log.error('Found error(s)')
        for x in error_set:
            log.error('path: ' + x[0] + '\nerror: ' + str(x[1]))
    def login(self):
        """Login user if needed"""
        LOGGER.info('"%s": start login, method: "%s"', self.username,
                    self.login_method)
        cookies = CookieHandler.get_cookies(self.username)
        if not cookies:
            cookies = []
            LOGGER.info('"%s": no cookie, new login, method "%s"',
                        self.username, self.login_method)

            login_method_dict = {
                'g': login_methods.login_google,
                'google': login_methods.login_google,
                'v': login_methods.login_vk,
                'vk': login_methods.login_vk,
                'f': login_methods.login_facebook,
                'facebook': login_methods.login_facebook,
            }

            auth_text = requests.get("https://rivalregions.com").text
            browser = Browser(showWindow=self.show_window)

            if self.login_method in login_method_dict:
                browser = login_method_dict[self.login_method](browser,
                                                               auth_text,
                                                               self.username,
                                                               self.password)
            else:
                LOGGER.info('"%s": Invalid login method "%s"', self.username,
                            self.login_method)
                sys.exit()

            LOGGER.info('"%s": Get PHPSESSID', self.username)
            browser_cookie = browser.get_cookie('PHPSESSID')
            if browser_cookie:
                expiry = browser_cookie.get('expiry', None)
                value = browser_cookie.get('value', None)
                LOGGER.info('"%s": "value": %s, "expiry": %s', self.username,
                            value, expiry)
                cookie = CookieHandler.create_cookie('PHPSESSID', expiry,
                                                     value)
                cookies.append(cookie)
            else:
                raise NoCookieException()

            cookie_names = ['rr_f']
            for cookie_name in cookie_names:
                browser_cookie = browser.get_cookie(cookie_name)
                if browser_cookie:
                    LOGGER.info('"%s": Get %s', self.username, cookie_name)
                    expiry = browser_cookie.get('expiry', None)
                    value = browser_cookie.get('value', None)
                    cookies.append(
                        CookieHandler.create_cookie(cookie_name, expiry,
                                                    value))
                    LOGGER.info('"%s": "value": %s, "expiry": %s',
                                self.username, value, expiry)
                else:
                    raise NoCookieException()

            CookieHandler.write_cookies(self.username, cookies)
            LOGGER.debug('"%s": closing login tab', self.username)
            browser.close_current_tab()
        else:
            LOGGER.info('"%s": Cookies found', self.username)

        self.session = cfscrape.CloudflareScraper()
        for cookie in cookies:
            self.session.cookies.set(**cookie)

        LOGGER.debug('"%s": set the var_c', self.username)
        response = self.session.get('https://rivalregions.com/#overview')
        lines = response.text.split("\n")
        for line in lines:
            if re.match("(.*)var c_html(.*)", line):
                var_c = line.split("'")[-2]
                LOGGER.debug('"%s": got var_c: %s', self.username, var_c)
                self.var_c = line.split("'")[-2]
Exemple #21
0
 def test_js_challenge_21_05_2015(self, **kwargs):
     scraper = cfscrape.CloudflareScraper(**kwargs)
     expect(scraper.get(url).content).to.equal(requested_page)
 def check_resp(self, u, **kwargs):
     scraper = cfscrape.CloudflareScraper(**kwargs)
     resp = scraper.get(u)
     self.assertEqual(resp and resp.content, requested_page)
import cfscrape
import requests
import os
from recaptcha import *
from bs4 import BeautifulSoup
import re

# Requests wrapper
#url = 'https://www.acgnx.se/'
url = 'https://www.acgnx.se/show-8A7C71BCBEB854DDF0880AF26FB4504A47F50B2D.html'
session = requests.session()
session.headers = 'content-type'
session.mount("http://", cfscrape.CloudflareScraper())
scraper = cfscrape.create_scraper(sess=session)
req = scraper.get(url).content
#print req

### Save request as HTML named as 'Result.html'
f_name = '\Result.html'
f = open(f_name, 'w')
f.write(req.encode('UTF-8'))
f.close

### Excute JavaScript file
start = time()
driver = webdriver.Chrome(
    os.getcwd() + "\chromedriver.exe"
)  # Optional argument, if not specified will search path.
driver.get(os.getcwd() + f_name)
#print driver.page_source
Exemple #24
0
    headers = {
        'user-agent': uaGen.random,
        'accept':
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        'referer': "https://www.internationalsaimoe.com/voting/",
        'accept-encoding': "gzip, deflate",
        'accept-language': acceptLanguage[random.randint(0, indexLanguage)],
        'cache-control': "no-cache",
        'connection': 'keep-alive',
        'X-Requested-With': 'XMLHttpRequest',
        'Upgrade-Insecure-Requests': '1',
    }
    return headers


localsession = cfscrape.CloudflareScraper(headers=genHeaders())

import numpy as np
from skimage.filters import gaussian
from skimage.exposure import equalize_hist
from skimage.morphology import opening, label
from skimage.measure import regionprops


def judge(img):  #判断能否识别验证码
    img = img[97:, :]
    img = gaussian(img, sigma=0.85)
    img = equalize_hist(img)
    img = (img > 0.7) * 1.0
    img = opening(img, selem=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]))
    image_region = img * 0