def open_welcome_page(self):
     url = self.url('caseSearch.do?welcomePage=welcomePage')
     page = self.opener.open(url)
     page_content = page.read()
     soup = BeautifulSoup(page_content)
     
     # See if we need to solve a captcha
     if 'By clicking Accept' in page_content:
         captcha_url = self.url('captchaVerification.do')
         captcha.solve(self.opener, captcha_url)
         
         page = self.opener.open(url)
         soup = BeautifulSoup(page.read())
     return soup
def test_captcha_solver(samples=None, **params):
    if not samples:
        return
    for file in os.listdir(samples):
        image = captcha.image_filter(os.path.join(samples, file), **params)
        result = captcha.solve(image, **params)
        assert ''.join(map(str, result)) == os.path.splitext(file)[0], samples
def test_captcha_solver(samples=None, **params):
    if not samples:
        return
    for file in os.listdir(samples):
        image = captcha.image_filter(os.path.join(samples, file), **params)
        result = captcha.solve(image, **params)
        assert ''.join(map(str, result)) == os.path.splitext(file)[0], samples
def test_get_image_by_url(params):
    for i in range(3):
        image_data = captcha.fetch(params['url'])
        image = captcha.image_filter(image_data, params)
        result = captcha.solve(image, params)
        assert len(result) == params.get('length', 4)
        assert all(x is not None for x in result)
def test_captcha_solver(params):
    path = params.get('samples')
    if path is None:
        return
    for file in os.listdir(path):
        image = captcha.image_filter(path + file, params)
        result = captcha.solve(image, params)
        assert ''.join(map(str, result)) == file.rsplit('.', 1)[0]
def test_get_image_by_url(url=None, length=4, **params):
    if not url:
        return
    for i in range(3):
        image_data = captcha.fetch(url)
        image = captcha.image_filter(image_data, **params)
        result = captcha.solve(image, **params)
        assert len(result) == length, url
        assert all(x is not None for x in result), url
def test_get_image_by_url(url=None, length=4, **params):
    if not url:
        return
    for i in range(3):
        image_data = captcha.fetch(url)
        image = captcha.image_filter(image_data, **params)
        result = captcha.solve(image, **params)
        assert len(result) == length, url
        assert all(x is not None for x in result), url
Exemple #8
0
def parse_first_webpage(thread_index, ind, lastname, firstname, checkmiss):
    print "+++++++++++++++++++++++++++++++++++++++++++++++++++START+++++++++++++++++++++++++++++++++++++++++++++++++"

    # no_result = db.session.query(DashboardNoResult).filter_by(LastName=lastname, FirstLetter=firstname).first()
    # if no_result is not None:
    #     print ("{} No Result in Last Name: {}, First Name: {}".format(prefix_letter(thread_index), lastname, firstname))
    #     save_names(lastname, firstname, nofound_filename)
    #     save_names(lastname, firstname, success_filename, "No Found")
    #     return

    tz = pytz.timezone('America/Los_Angeles')

    s = Scraper(use_cache=False,
                retries=3,
                timeout=30,
                proxy_file=proxy_file_name,
                one_proxy=True,
                log_file='logs/log{}.txt'.format(thread_index))

    logger = s.logger
    logger.info("{} Loading First URL -> {}".format(
        prefix_letter(thread_index), first_url))

    doc = s.load(first_url)
    if check_proxy_status(doc) == False:
        logger.info("{} Proxy Error".format(prefix_letter(thread_index)))
        return config.ERROR_NO_PROXY

    currentdate = datetime.now(tz).strftime('%Y-%m-%d')
    currenttime = datetime.now(tz).strftime('%H:%M')
    logger.info("{} Current Date & Time: {} , {}".format(
        prefix_letter(thread_index), currentdate, currenttime))

    formdata = {
        'method': 'post',
        'currentdate': currentdate,
        'currenttime': currenttime,
        'startdate': '03/05/2014',
        'starttime': '08 : 00',
        'enddate': '03/05/2014',
        'endtime': '12 : 00',
        'last_name': str(lastname),
        'first_name': str(firstname),
        'middle_name': '',
        'dob': '',
        'search': 'Search'
    }

    logger.info("{} Name Index: {},  Last Name: {}, First Name: {}".format(
        prefix_letter(thread_index), ind, lastname, firstname))

    doc = s.load(second_url, post=formdata)
    if check_proxy_status(doc) is False:
        logger.info("{} Proxy Error".format(prefix_letter(thread_index)))
        return config.ERROR_NO_PROXY

    img_url = doc.x('//img[@alt="Captcha image"]/@src')

    if img_url != "":
        ckey = doc.x('//input[@name="ckey"]/@value')
        imagefilepath = s.join_path(
            'images/captcha_{}.jpg'.format(thread_index))
        if os.path.exists(imagefilepath):
            os.remove(imagefilepath)

        u = s.client.opener.open(img_url)
        f = open(imagefilepath, 'wb')
        block_sz = 8192
        while True:
            buf = u.read(block_sz)
            if not buf:
                break
            f.write(buf)
        f.close()

        #captcha = CaptchaUpload(config.captcha_api_key)
        first_captcha_code = captcha.solve(imagefilepath)
        if str(first_captcha_code) == "1":
            logger.info("Captcha Service Error")
            # return config.ERROR_NO_CAPTCHA

        captcha_code_old = first_captcha_code
        logger.info('{} 1st_captcha2 -> {}'.format(prefix_letter(thread_index),
                                                   first_captcha_code))

        wait()
        return parse_second_webpage(s, thread_index, first_captcha_code,
                                    currentdate, currenttime, lastname,
                                    firstname, checkmiss)
Exemple #9
0
def parse_detail_page(sc_obj, thread_index, form_action_url, formdata,
                      old_captcha_code, currentdate, currenttime, lastname,
                      first_letter):
    logger = sc_obj.logger

    doc = sc_obj.load(form_action_url, post=formdata)
    if check_proxy_status(doc) == False:
        logger.info("{} Proxy Error of function parse_detail_page".format(
            prefix_letter(thread_index)))
        return config.ERROR_NO_PROXY

    img_url = doc.x('//img[@alt="Captcha image"]/@src')

    if img_url != "":
        imagefilepath = sc_obj.join_path(
            'images/captcha_{}.jpg'.format(thread_index))

        if os.path.exists(imagefilepath):
            os.remove(imagefilepath)

        u = sc_obj.client.opener.open(img_url)
        f = open(imagefilepath, 'wb')
        block_sz = 8192
        while True:
            buf = u.read(block_sz)
            if not buf:
                break
            f.write(buf)
        f.close()

        #captcha = CaptchaUpload(config.captcha_api_key)
        second_captcha_code = captcha.solve(imagefilepath)
        if str(second_captcha_code) == "1":
            logger.info("Captcha Service Error")
            return config.ERROR_NO_CAPTCHA

        logger.info('{} 2nd_captcha2_code -> {}'.format(
            prefix_letter(thread_index), second_captcha_code))

        formdata = {
            "ckey": old_captcha_code,
            "key": second_captcha_code,
            "submit": "Submit"
        }

        headers = {
            "Host": "app4.lasd.org",
            "User-Agent":
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0",
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Accept-Encoding": "gzip, deflate, br",
            "Referer": form_action_url,
            "Upgrade-Insecure-Requests": "1"
        }

        doc = sc_obj.load(form_action_url,
                          post=formdata,
                          headers=headers,
                          merge_headers=True)

        doc, captcha_error = captcha_retry(doc, sc_obj, thread_index,
                                           currentdate, currenttime, lastname,
                                           first_letter, form_action_url, True,
                                           old_captcha_code)
        if captcha_error == config.ERROR_NO_CAPTCHA:
            print "Captcha Error"
            return config.ERROR_NO_CAPTCHA

        if check_proxy_status(doc) == False:
            logger.info("{} Proxy Error of function parse_detail_page".format(
                prefix_letter(thread_index)))
            return config.ERROR_NO_PROXY

        logger.info('{} Captcha Code: Old -> {}, New -> {}'.format(
            prefix_letter(thread_index), old_captcha_code,
            second_captcha_code))
        logger.info('{} form_action_url -> {}'.format(
            prefix_letter(thread_index), form_action_url))

        try:
            booking_no_cointainer = doc.q("//tr[@class='Row2']/td")
            booking_no_cointainer = ''.join(
                [item.html() for item in booking_no_cointainer])

            BookingNo = re.search(
                'Booking No.: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            LastName = re.search(
                'Last Name: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            FirstName = re.search(
                'First Name: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            MiddleName = re.search(
                'Middle Name: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            Birthday = re.search(
                'Date Of Birth: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()

            Age = re.search('Age: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                            str(booking_no_cointainer),
                            re.M | re.I | re.S).group(2).strip()
            Sex = re.search('Sex: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                            str(booking_no_cointainer),
                            re.M | re.I | re.S).group(2).strip()
            Race = re.search(
                'Race: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            Hair = re.search(
                'Hair: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            Eyes = re.search(
                'Eyes: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            Height = re.search(
                'Height: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()
            Weight = re.search(
                'Weight: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(booking_no_cointainer),
                re.M | re.I | re.S).group(2).strip()

            contents = doc.q("//tr[@class='Caption2']/td[@align='center']")
            contents = ''.join([item.html() for item in contents])

            try:
                ArrestDateStr = re.search(
                    'Arrest Date: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                    contents, re.M | re.I | re.S).group(2).strip()
            except Exception as e:
                print '*******************************'
                print e
                print contents
                print BookingNo
                print "LEN =", len(doc.q("//tr[@class='Caption2']"))
                print '*******************************'

            ArrestDateStr = re.search(
                'Arrest Date: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                contents, re.M | re.I | re.S).group(2).strip()
            ArrestDateStr = ArrestDateStr.split('/')
            y = ArrestDateStr.pop()
            d = ArrestDateStr.pop()
            m = ArrestDateStr.pop()
            ArrestDate = y + "-" + m + "-" + d
            ArrestTimeStr = re.search(
                'Arrest Time: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            ArrestTime = ArrestTimeStr[:2] + ":" + ArrestTimeStr[2:]
            CapturedDate = currentdate
            CapturedTime = currenttime
            ArrestAgency = re.search(
                'Arrest Agency: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            AgencyDescription = re.search(
                'Agency Description: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            DateBooked = re.search(
                'Date Booked: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            TimeBooked = re.search(
                'Time Booked: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            # DateBooked=currentdate
            # TimeBooked=currenttime
            BookingLocation = re.search(
                'Booking Location: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            LocationDescription = re.search(
                'Location Description: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            TotalBailAmount = re.search(
                'Total Bail Amount: (\<strong\>)?([.\,\w\s]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            TotalHoldBailAmount = re.search(
                'Total Hold Bail Amount: (\<strong\>)?([.\,\w\s]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            GrandTotal = re.search(
                'Grand Total: (\<strong\>)?([.\,\w\s]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            HousingLocation = re.search(
                'Housing Location: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            PermanentHousingAssignedDate = re.search(
                'Permanent Housing Assigned Date: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            AssignedTime = re.search(
                'Assigned Time: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            # PermanentHousingAssignedDate=currentdate
            # AssignedTime=currenttime

            VisitorStatus = re.search(
                'Visitor Status: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            Facility = re.search(
                'Facility: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            Address = re.search(
                'Address: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()
            City = re.search(
                'City: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>',
                str(contents), re.M | re.I | re.S).group(2).strip()

            la_tz = pytz.timezone('America/Los_Angeles')

            today = datetime.now(la_tz).strftime('%Y-%m-%d')
            yesterday = (datetime.now(la_tz) +
                         timedelta(days=-1)).strftime('%Y-%m-%d')

            print "************************************************************"
            print today, yesterday
            print "Arrest = ", ArrestDate
            print "************************************************************"

            obj_key = lastname + "," + first_letter
            obj_booking_no = list_to_be_scraped[obj_key]

            booking_no = BookingNo
            if (obj_booking_no != ""):
                booking_no += config.PREFIX_VINE

            if (today == ArrestDate) or (yesterday == ArrestDate):
                # if True:
                booking = DashboardLasd(
                    s_bookingno=booking_no,
                    s_lastname=LastName,
                    s_firstname=FirstName,
                    s_middlename=MiddleName,
                    s_birthday=Birthday,
                    s_age=Age,
                    s_sex=Sex,
                    s_race=Race,
                    s_hair=Hair,
                    s_eyes=Eyes,
                    s_height=Height,
                    s_weight=Weight,
                    s_arrestdate=ArrestDate,
                    s_arresttime=ArrestTime,
                    s_captureddate=CapturedDate,
                    s_capturedtime=CapturedTime,
                    s_arrestagency=ArrestAgency,
                    s_agencydescription=AgencyDescription,
                    s_datebooked=DateBooked,
                    s_timebooked=TimeBooked,
                    s_bookinglocation=BookingLocation,
                    s_locationdescription=LocationDescription,
                    s_totalbailamount=TotalBailAmount,
                    s_totalholdbailamount=TotalHoldBailAmount,
                    s_grandtotal=GrandTotal,
                    s_housinglocation=HousingLocation,
                    s_permanenthousingassigneddate=PermanentHousingAssignedDate,
                    s_assignedtime=AssignedTime,
                    s_visitorstatus=VisitorStatus,
                    s_facility=Facility,
                    s_address=Address,
                    s_city=City,
                    s_jail=1)

                try:
                    db.session.add(booking)
                    db.session.commit()

                    if float(GrandTotal.replace(",", "")) >= 20000:
                        send_notify_email(booking)

                except Exception as e:
                    logger.info(e)

                logger.info('+++++++{} Data was saved {}, {}, {}, {}'.format(
                    prefix_letter(thread_index), BookingNo, ArrestDate,
                    LastName, FirstName))
                save_names(lastname, first_letter, success_filename, "",
                           booking_no, ArrestDate)
            else:
                logger.info('{} Arrest Date is {}, {}'.format(
                    prefix_letter(thread_index), ArrestDate, booking_no))
                save_names(lastname, first_letter, outofdate_filename, "",
                           booking_no, ArrestDate)
                save_names(lastname, first_letter, success_filename,
                           "Out Of Date", booking_no, ArrestDate)

            booking_history = DashboardJailHistory(s_bookingno=BookingNo,
                                                   s_lastname=LastName,
                                                   s_firstname=FirstName,
                                                   s_captureddate=CapturedDate,
                                                   s_capturedtime=CapturedTime,
                                                   s_duplication=0,
                                                   s_arrestdate=ArrestDate)

            try:
                db.session.add(booking_history)
                db.session.commit()
            except Exception as e:
                logger.info(e)

            try:
                obj_key = lastname + "," + first_letter
                obj_booking_no = list_to_be_scraped[obj_key]

                print "Delete VINE Booking No for ", obj_booking_no
                db.session.query(DashboardVineName).filter(
                    DashboardVineName.BookingNo == obj_booking_no).delete()
                db.session.commit()
            except Exception as e:
                print e

        except Exception as e:
            logger.info(e)
            show_exception_detail(e)
            pass

    return config.ERROR_NO_NONE
Exemple #10
0
def captcha_retry(doc, s, thread_index, currentdate, currenttime, lastname,
                  firstname, url, merge_headers, old_captcha_code):
    logger = s.logger
    img_url = doc.x('//img[@alt="Captcha image"]/@src')
    #logger.info('{} img_url -> {}'.format(prefix_letter(thread_index), img_url)
    # print "+++++++++++++++++++++++++"
    # print img_url
    # print "+++++++++++++++++++++++++"

    if merge_headers == True:
        old_code = old_captcha_code

    catpcha_try_count = 0
    while img_url != "":
        if catpcha_try_count > config.captcha_max_tries:
            print "--------------------Captcha tries reached as max value---------------"
            return doc, config.ERROR_NO_CAPTCHA

        catpcha_try_count += 1
        print "*****************************************"

        if merge_headers == True:
            print "Try to solve captcha in 2nd step:", img_url
        else:
            print "Try to solve captcha in 1st step:", img_url

        ckey = doc.x('//input[@name="ckey"]/@value')
        #logger.info('{} ckey -> {}, thread-> {}'.format(prefix_letter(thread_index), ckey, thread_index)

        imagefilepath = s.join_path(
            'images/captcha_{}.jpg'.format(thread_index))

        if os.path.exists(imagefilepath):
            os.remove(imagefilepath)

        u = s.client.opener.open(img_url)
        f = open(imagefilepath, 'wb')
        block_sz = 8192
        while True:
            buf = u.read(block_sz)
            if not buf:
                break
            f.write(buf)
        f.close()

        #captcha = CaptchaUpload(config.captcha_api_key)
        first_captcha_code = captcha.solve(imagefilepath)
        if str(first_captcha_code) == "1":
            logger.info("Captcha Service Error")
            # return config.ERROR_NO_CAPTCHA

        # global_export_filename = "retry/retry_{}_{}_{}.csv".format(currentdate, currenttime, thread_index)

        # global_s.save([
        #     "Current Date", currentdate,
        #     "Current Time", currenttime,
        #     "Captcha", first_captcha_code,
        #     ], global_export_filename)

        captcha_code_old = first_captcha_code

        if merge_headers == True:
            logger.info('{} 2nd_captcha2_code -> {}'.format(
                prefix_letter(thread_index), first_captcha_code))
        else:
            logger.info('{} 1st_captcha2_code -> {}'.format(
                prefix_letter(thread_index), first_captcha_code))

        wait()

        if merge_headers == True:

            formdata = {
                "ckey": old_code,
                "key": first_captcha_code,
                "submit": "Submit"
            }

            headers = {
                "Host": "app4.lasd.org",
                "User-Agent":
                "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0",
                "Accept":
                "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
                "Accept-Language": "en-US,en;q=0.5",
                "Accept-Encoding": "gzip, deflate, br",
                "Referer": url,
                "Upgrade-Insecure-Requests": "1"
            }
            doc = s.load(url,
                         post=formdata,
                         headers=headers,
                         merge_headers=True)
        else:
            formdata = {
                "key": first_captcha_code,
                "ckey": first_captcha_code,
                "submit": "Submit"
            }
            doc = s.load(second_url, post=formdata)

        img_url = doc.x('//img[@alt="Captcha image"]/@src')

        old_code = first_captcha_code
        # return parse_second_webpage(s, thread_index, first_captcha_code, currentdate, currenttime, lastname, firstname)
    if merge_headers == True:
        print "********2nd Step : Solved Captcha Successfully**********"
    else:
        print "********1st Step : Solved Captcha Successfully**********"
    return doc, config.ERROR_NO_NONE
Exemple #11
0
    def apiCall(self, method, params, retry=False):
        with self.api_lock:
            params['v'] = self.api_version
            url = 'https://api.vk.com/method/' + method + '?' + urllib.parse.urlencode(params) + '&access_token=' + self.getToken()
            now = time.time()
            if now - self.last_call < CALL_INTERVAL:
                time.sleep(CALL_INTERVAL - now + self.last_call)
            self.last_call = now
            try:
                json_string = urllib.request.urlopen(url, timeout=self.timeout).read()
            except OSError as e:
                log.warning(method + ' failed ({})'.format(e))
                time.sleep(1)
                return self.apiCall(method, params)
            except Exception as e:
                if retry:
                    log.error('({}) {}: {}'.format(method, e.__class__.__name__, str(e)), True)
                    return None
                else:
                    time.sleep(1)
                    log.warning('({}) {}: {}, retrying'.format(method, e.__class__.__name__, str(e)))
                    return self.apiCall(method, params, 1)

            try:
                data_array = json.loads(json_string.decode('utf-8'))
            except json.decoder.JSONDecodeError:
                log.error('Invalid JSON')
                data_array = None
            if self.logging:
                with open('inf.log', 'a') as f:
                    print('[{}]\nmethod: {}, params: {}\nresponse: {}\n'.format(time.strftime(log.datetime_format, time.localtime()), method, json.dumps(params), json.dumps(data_array)), file=f)
            duration = time.time() - now
            if duration > self.timeout:
                log.warning('{} timeout'.format(method))

            if data_array is None:
                return None
            if 'response' in data_array:
                if self.captcha_delayed or self.externalCaptcha:
                    self.captcha_delayed = 0
                    self.externalCaptcha = False
                    log.info('Captcha no longer needed')
                    self.captcha_sid = ''
                captcha.delete()
                return data_array['response']

            elif 'error' in data_array:
                if data_array['error']['error_code'] == 14: #Captcha needed
                    self.externalCaptcha = False
                    if self.captcha_delayed == 0:
                        log.warning('Captcha needed')
                        self.captcha_sid = data_array['error']['captcha_sid']
                        with open(accounts.getFile('captcha.txt'), 'w') as f:
                            f.write('sid ' + self.captcha_sid)
                        captcha.receive(data_array['error']['captcha_img'])
                    elif self.captcha_sid:
                        key = open(accounts.getFile('captcha.txt')).read()
                        if key.startswith('key'):
                            log.info('Trying a key from captcha.txt')
                            params['captcha_sid'] = self.captcha_sid
                            params['captcha_key'] = key.split()[1]
                            self.captcha_sid = ''
                            captcha.delete()
                            self.captcha_delayed = 0
                            self.externalCaptcha = True
                            return self.apiCall(method, params)
                    if self.captcha_delayed == self.checks_before_antigate:
                        log.info('Using antigate')
                        ans = captcha.solve()
                        if ans is None:
                            self.captchaError = True
                            time.sleep(5)
                        elif not ans:
                            captcha.receive(data_array['error']['captcha_img'])
                            self.captcha_sid = data_array['error']['captcha_sid']
                            return self.apiCall(method, params)
                        else:
                            params['captcha_sid'] = self.captcha_sid
                            params['captcha_key'] = ans
                            self.captcha_delayed = 0
                    else:
                        time.sleep(self.captcha_check_interval)
                        self.captcha_delayed += 1
                    return self.apiCall(method, params)
                elif data_array['error']['error_code'] == 5: #Auth error
                    self.login()
                    return self.apiCall(method, params)

                elif (data_array['error']['error_code'], method) in self.ignored_errors:
                    handler = self.ignored_errors[(data_array['error']['error_code'], method)]
                    if not handler:
                        return None
                    if retry or not handler[1]:
                        log.warning(handler[0])
                        return None
                    else:
                        log.warning(handler[0] + ', retrying')
                        time.sleep(3)
                        return self.apiCall(method, params, True)

                else:
                    log.error('{}, params {}\ncode {}: {}'.format(method, json.dumps(params), data_array['error']['error_code'], data_array['error'].get('error_msg')))
                    return None
            else:
                return self.apiCall(method, params)
Exemple #12
0
            print "found the success page, do the jump"
            jump_link = browser.find_by_xpath('/html/body/table/tbody/tr[2]/td/div/a')
            jump_link.click()
    elif entrance_no == 5:
        try_time = 0
        try_time_max = 50
        while True:
            captcha.do_delete()
            os.mkdir(captcha_folder)
            print "try to recognize the captcha.."
            captcha_img = browser.find_by_xpath('//*[@id="regimg"]')
            captcha_img = captcha_img[0]

            get_captcha(browser.driver, captcha_img, captcha_folder)
            captcha.preprocess('captcha.bmp', 'captcha_output.bmp')
            captcha_word = captcha.solve('captcha_output.bmp')
            if captcha_word == '':
                if try_time < try_time_max:
                    print "download_from_niuniu::captcha.solve() failed, try_time = " + str(try_time)
                    try_time += 1

                    print "refresh the captcha"
                    # refresh_captcha_link = browser.find_by_xpath('//*[@id="loginHtml"]/a[1]')
                    # '//*[@id="loginHtml"]/a[1]'
                    # refresh_captcha_link.click()
                    browser.reload()
                    continue
                else:
                    print "download_from_niuniu::captcha.solve() failed, too many attempts, abort."
                    browser.quit()
                    return ''
Exemple #13
0
browser = webdriver.Firefox()

browser.get('http://reddit.com');
browser.get(browser.find_element_by_class_name('login-required').get_attribute('href'))

browser.find_element_by_id('user_reg').click()
browser.find_element_by_id('user_reg').send_keys(username)

browser.find_element_by_id('passwd_reg').click()
browser.find_element_by_id('passwd_reg').send_keys(password)

browser.find_element_by_id('passwd2_reg').click()
browser.find_element_by_id('passwd2_reg').send_keys(password)

captcha_url = browser.find_element_by_class_name('capimage').get_attribute('src')

print "solving captcha: " + captcha_url
print "be patient ..."
solved_captcha = captcha.solve(captcha_url)
print "done."

browser.find_element_by_id('captcha_').click()
browser.find_element_by_id('captcha_').send_keys(solved_captcha)

for btn in browser.find_elements_by_class_name('button'):
	if btn.text == 'create account':
		btn.click()
		break