Exemple #1
0
class CaptchaForm(HTMLParser):
	""""""
	def __init__(self, url):
		""""""
		HTMLParser.__init__(self)
		self.link = None
		self.located = False
		while not self.link:
			p = CaptchaParser(URLOpen().open(url).read())
			if p.captcha:
				handle = URLOpen().open(p.captcha)
				if handle.info()["Content-Type"] == "image/gif":
					self.tess = Tesseract(handle.read())
					captcha = self.get_captcha()
					if captcha:
						handle = URLOpen().open(url, urllib.urlencode([(CAPTCHACODE, p.captchacode), (MEGAVAR, p.megavar), ("captcha", captcha)]))
						self.reset()
						self.feed(handle.read())
						self.close()
						logger.info("Captcha %s: %s" % (p.captcha, captcha))

	def handle_starttag(self, tag, attrs):
		""""""
		if tag == "a":
			if ((self.located) and (attrs[0][0] == "href")):
				self.located = False
				self.link = attrs[0][1]
		elif tag == "div":
			if ((len(attrs) > 1) and (attrs[1][1] == "downloadlink")):
				self.located = True

	def get_captcha(self):
		result = self.tess.get_captcha()
		if len(result) == 4:
			return result
Exemple #2
0
class CaptchaForm(HTMLParser):
    """"""
    def __init__(self, url):
        """"""
        HTMLParser.__init__(self)
        self.link = None
        self.located = False
        while not self.link:
            p = CaptchaParser(URLOpen().open(url).read())
            if p.captcha:
                handle = URLOpen().open(p.captcha)
                if handle.info()["Content-Type"] == "image/gif":
                    self.tess = Tesseract(handle.read())
                    captcha = self.get_captcha()
                    if captcha:
                        handle = URLOpen().open(
                            url,
                            urllib.urlencode([(CAPTCHACODE, p.captchacode),
                                              (MEGAVAR, p.megavar),
                                              ("captcha", captcha)]))
                        self.reset()
                        self.feed(handle.read())
                        self.close()
                        logger.info("Captcha %s: %s" % (p.captcha, captcha))

    def handle_starttag(self, tag, attrs):
        """"""
        if tag == "a":
            if ((self.located) and (attrs[0][0] == "href")):
                self.located = False
                self.link = attrs[0][1]
        elif tag == "div":
            if ((len(attrs) > 1) and (attrs[1][1] == "downloadlink")):
                self.located = True

    def get_captcha(self):
        result = self.tess.get_captcha()
        if len(result) == 4:
            return result
Exemple #3
0
	def build_candidates(self, characters4_pixels_list, uncertain_pixels):
		"""Build word candidates from characters and uncertains groups."""
		for plindex, characters4_pixels in enumerate(characters4_pixels_list):
			logging.debug("Generating words (%d) %d/%d" % (2**len(uncertain_pixels), plindex+1, len(characters4_pixels_list)))
			for length in range(len(uncertain_pixels)+1):
				for groups in combinations_no_repetition(uncertain_pixels, length):
					characters4_pixels_test = [x.copy() for x in characters4_pixels]
					for pixels in groups: 
						pair = get_pair_inclussion(characters4_pixels_test, center_of_mass(pixels)[0], pred=lambda x: center_of_mass(x)[0])
						if not pair:
							continue
						char1, char2 = pair
						char1.update(pixels)
						char2.update(pixels)

					images = [self.rotate_character(pixels, cindex) for cindex, pixels in enumerate(characters4_pixels_test)]
					clean_image = smooth(join_images_horizontal(images), 0)

					ocr = Tesseract(self.data, lambda x: clean_image)
					text = ocr.get_captcha().strip()

					filtered_text = filter_word(text)
					if filtered_text:
						yield filtered_text
Exemple #4
0
                self.form_action = attrs[0][1]


if __name__ == "__main__":
    urllib2.install_opener(
        urllib2.build_opener(urllib2.HTTPCookieProcessor(
            cookielib.CookieJar())))
    urllib2.urlopen(
        urllib2.Request(
            "http://www.gigasize.com/get.php/3196987695/p3x03sp.avi"))

    tes = Tesseract(
        urllib2.urlopen(
            urllib2.Request("http://www.gigasize.com/randomImage.php")).read(),
        True)
    captcha = tes.get_captcha(3)

    data = urllib.urlencode({
        "txtNumber": captcha,
        "btnLogin.x": "124",
        "btnLogin.y": "12",
        "btnLogin": "******"
    })
    handle = urllib2.urlopen(
        urllib2.Request("http://www.gigasize.com/formdownload.php"), data)
    f = FormParser(handle.read())
    handle.close()
    if f.form_action:
        timer = 60
        while timer > 0:
            time.sleep(1)
Exemple #5
0
		self.feed(data)
		self.close()
		print self.form_action

	def handle_starttag(self, tag, attrs):
		""""""
		if tag == "form":
			if ((len(attrs) == 3) and (attrs[2][1] == "formDownload")):
				self.form_action = attrs[0][1]

if __name__ == "__main__":
	urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())))
	urllib2.urlopen(urllib2.Request("http://www.gigasize.com/get.php/3196987695/p3x03sp.avi"))

	tes = Tesseract(urllib2.urlopen(urllib2.Request("http://www.gigasize.com/randomImage.php")).read(), True)
	captcha = tes.get_captcha(3)

	data = urllib.urlencode({"txtNumber": captcha, "btnLogin.x": "124", "btnLogin.y": "12", "btnLogin": "******"})
	handle = urllib2.urlopen(urllib2.Request("http://www.gigasize.com/formdownload.php"), data)
	f = FormParser(handle.read())
	handle.close()
	if f.form_action:
		timer = 60
		while timer > 0:
			time.sleep(1)
			timer -= 1
			print timer
		data = urllib.urlencode({"dlb": "Download"})
		handle = urllib2.urlopen(urllib2.Request("http://www.gigasize.com" + f.form_action), data)
		while len(data) > 0:
			data = handle.read(1024)