def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: it = URLOpen().open(url) for line in it: if 'File Name:' in line: name = it.next().split('>')[1].split('<')[0] if 'File Size:' in line: tmp = line.split('>')[3].split('<')[0] if "KB" in tmp: size = int(round(float(tmp.split("KB")[0]))) unit = "KB" elif "MB" in tmp: size = float(tmp.split("MB")[0]) if int(round(size)) > 0: size = int(round(size)) unit = "MB" else: size = int(round(1024 * size)) unit = "KB" elif "GB" in tmp: size = int(round(float(tmp.split("GB")[0]))) unit = "GB" except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def parse_wait(self, url): """""" link = None form = None wait = 0 found = False try: tmp_form = [] opener = URLOpen() for line in opener.open(url): if "download_file" in line: found = True elif found: if "method=post " in line: link = "%s%s" % (BASE_URL, line.split('action="')[1].split('" ')[0]) elif "name=action " in line: tmp_form.append(("action", line.split("value=")[1].split(">")[0])) elif "name=tm " in line: tmp_form.append(("tm", line.split("value=")[1].split(">")[0])) elif "name=tmhash " in line: tmp_form.append(("tmhash", line.split("value=")[1].split(">")[0])) elif "name=wait " in line: wait = int(line.split("value=")[1].split(">")[0]) tmp_form.append(("wait", wait)) elif "name=waithash " in line: tmp_form.append(("waithash", line.split("value=")[1].split(">")[0])) elif "name=upidhash " in line: tmp_form.append(("upidhash", line.split("value=")[1].split(">")[0])) found = False form = urllib.urlencode(tmp_form) except Exception, e: logger.exception("%s: %s" % (url, e))
def get_cookie(self, user, password, url=None): """""" opener = URLOpen() data = urllib.urlencode([("sub", "getaccountdetails_v1"), ("type", "prem"), ("login", user), ("password", password), ("withcookie", 1)]) for line in opener.open(API_URL, data).readlines(): if "ERROR" in line: return elif "cookie" in line: tmp_cookie = cookielib.Cookie(version=0, name='enc', value=line.split("=")[1].strip(), port=None, port_specified=False, domain='.rapidshare.com', domain_specified=False, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookie = cookielib.CookieJar() cookie.set_cookie(tmp_cookie) return cookie
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if '<span class="txtorange">' in line: tmp = it.next() name = tmp.split("<")[0].strip() tmp = tmp.split(">(")[1].split(")")[0] if "KB" in tmp: size = int(round(float(tmp.split("KB")[0]))) unit = "KB" elif "MB" in tmp: size = float(tmp.split("MB")[0]) if int(round(size)) > 0: size = int(round(size)) unit = "MB" else: size = int(round(1024 * size)) unit = "KB" elif "GB" in tmp: size = int(round(float(tmp.split("GB")[0]))) unit = "GB" except urllib2.HTTPError: pass except Exception, e: logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if 'fileInfo filename' in line: name = line.split('<strong>')[1].split('</strong>')[0] elif 'fileInfo filesize' in line: it.next() tmp = it.next().split('class="size">')[1].split("<")[0] if "KB" in tmp: size = int(round(float(tmp.split("KB")[0]))) unit = "KB" elif "MB" in tmp: size = float(tmp.split("MB")[0]) if int(round(size)) > 0: size = int(round(size)) unit = "MB" else: size = int(round(1024 * size)) unit = "KB" elif "GB" in tmp: size = int(round(float(tmp.split("GB")[0]))) unit = "GB" except Exception, e: logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if '/img/manager/mime/' in line: if ("generic" in line) or ("audio" in line) or ("archive" in line): tmp = line.split('/>')[1].split("</h1>")[0] if "video" in line: tmp = line.split('</a>')[1].split("<")[0] tmp = tmp.replace(" ","") tmp = tmp.replace("​","") name = tmp.replace("​","") elif '<div id="info" class="metadata">' in line: tmp = it.next() tmp = tmp.split("<span>")[1].split("file")[0].strip() size = int(round(float(tmp.split(" ")[0]))) unit = tmp.split(" ")[1].upper() elif 'Retry Download' in line: name = line.split('href="')[1].split('"')[0].split("/").pop() except Exception, e: logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if 'Filename:' in line: name = line.split(">")[1].split("<")[0] line = it.next() size_and_units = [] size_and_units = line.split(":")[1].split( "<")[0].lstrip().rstrip().split(" ") size = float(size_and_units[0]) unit = size_and_units[1].upper() if 'B' == unit: size = size / 1024 unit = "KB" break # Oron responds to unknown files as HTTP 404s followed by a redirect except urllib2.HTTPError as http_error: if http_error.code != 404: logger.warning( "Oron::check_links: Received unexpected HTTP error code: %s" % http_error.code) return None, -1, None except Exception, e: logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) handler = opener.open(url, None, content_range) if not wait_func(): return if "text/html" in handler.info()["Content-Type"]: cookie_value = cookie._cookies[".rapidshare.com"]["/"][ "enc"].value tmp = url.split("/") form = urllib.urlencode([("sub", "download_v1"), ("cookie", cookie_value), ("fileid", tmp[4]), ("filename", tmp[5])]) for line in opener.open( "http://api.rapidshare.com%s" % API_URL, form, content_range): if "DL:" in line: tmp_url = "http://%s%s" % ( line.split("DL:")[1].split(",")[0], API_URL) return opener.open(tmp_url, form, content_range) else: return handler except Exception, e: logger.exception("%s: %s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if '/img/manager/mime/' in line: if ("generic" in line) or ("audio" in line) or ("archive" in line): tmp = line.split('/>')[1].split("</h1>")[0] if "video" in line: tmp = line.split('</a>')[1].split("<")[0] tmp = tmp.replace(" ", "") tmp = tmp.replace("​", "") name = tmp.replace("​", "") elif '<div id="info" class="metadata">' in line: tmp = it.next() tmp = tmp.split("<span>")[1].split("file")[0].strip() size = int(round(float(tmp.split(" ")[0]))) unit = tmp.split(" ")[1].upper() elif 'Retry Download' in line: name = line.split('href="')[1].split('"')[0].split( "/").pop() except Exception, e: logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" link = None wait = 0 try: tmp = url.split("/") opener = URLOpen() url = "%s&fileid=%s" % (API_URL, tmp[4]) url = "%s&filename=%s" % (url, tmp[5]) for line in opener.open("http://%s%s" % ("api.rapidshare.com", url)): print line if "DL:" in line: tmp = line.split("DL:")[1].split(",") link = "http://%s%s&dlauth=%s" % (tmp[0], url, tmp[1]) wait = int(tmp[2]) print link if not wait_func(wait): return if link: return URLOpen().open(link, content_range) else: return self.set_limit_exceeded() except Exception, e: logger.exception("%s: %s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if 'Filename:' in line: name = line.split(">")[1].split("<")[0] line = it.next() size_and_units = [] size_and_units = line.split(":")[1].split("<")[0].lstrip().rstrip().split(" ") size = float(size_and_units[0]) unit = size_and_units[1].upper() if 'B' == unit: size = size / 1024 unit = "KB" break # Oron responds to unknown files as HTTP 404s followed by a redirect except urllib2.HTTPError as http_error: if http_error.code != 404: logger.warning("Oron::check_links: Received unexpected HTTP error code: %s" % http_error.code) return None, -1, None except Exception, e: logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" #Remove the filename from the url tmp = url.split("/file/")[1].split("/")[0] url = "%s/file/%s" % (BASE_URL,tmp) link = None retry = 3 try: opener = URLOpen() for line in opener.open(url): if 'check:' in line: check = line.split("check:'")[1].replace("'","").strip() elif "Recaptcha.create" in line: tmp = line.split('("')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) while not link and retry: challenge, response = c.solve_captcha() if response: if not wait_func(): return #Filefactory perfoms as check on its server by doing an #Ajax request sending the following data form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined"),("check", check)]) url = "%s/file/checkCaptcha.php" % BASE_URL #Getting the result back, status:{"ok"|"fail"} for line in opener.open(url, form): if 'status:"ok"' in line: tmp = line.split('path:"')[1].strip('"') tmp_link = "%s%s" %(BASE_URL,tmp) for line in opener.open(tmp_link): if '<span class="countdown">' in line: #Try to get WAIT from the page try: tmp = line.split('"countdown">')[1].split("</span")[0] tmp = int(tmp) except ValueError: pass else: if tmp > 0: WAIT = tmp if "Download with FileFactory Basic" in line: link = line.split('<a href="')[1].split('"')[0] break retry -= 1 break if link: if not wait_func(WAIT): return return opener.open(link, None, content_range, True) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """ See comment in anonymous_download.py for how oron links generally look like. Premimum accounts still have to suffer by having to post the random value at the bottom of the html page. There is also a download page that one has to parse to figure out what the actual direct download link is """ file_id = url.split("/")[3] file_name = self.check_links(url)[0] try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) web_page = opener.open(url, None, content_range) if not wait_func(): return rand_value = None for line in web_page: if '<input type="hidden" name="rand" value="' in line: rand_value = line.split('value="')[1].split('"')[0] break if not rand_value: logger.error("Oron.premium_download: could not find random value in " \ "download page. Premimum format changed?") form = urllib.urlencode({ "op": "download2", "id": file_id, "rand": rand_value, "referer": "", "method_free": "", "method_premium": "1", "down_direct": "1" }) download_page = opener.open(url, form, content_range) direct_link = None for line in download_page: if 'Download File</a></td>' in line: direct_link = line.split('a href="')[1].split( '" class="')[0] if not direct_link: return return opener.open(direct_link) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" auth_string = self.get_cookie() if not wait_func(): return encoded_link = 'http://api.hotfile.com/?action=getdirectdownloadlink&link=' + url + auth_string logger.info("Encoded link %s" % (encoded_link)) opener = URLOpen() handler = opener.open(encoded_link) actual_link = handler.readline() return opener.open(actual_link)
def parse(self, path): """""" tmp = URLOpen().open(API_URL).read() if tmp: #uploadid = str(int(time.time()))[-5:] + str(int(round(random.random()*1000000))) uploadid = "%s%i" % (str(int(time.time()))[-5:], random.randint(10000, 1000000)) server = tmp.split('"')[1].split('"')[0] url = "http://rs%sl3.rapidshare.com/cgi-bin/upload.cgi?rsuploadid=%s" % (server,uploadid) form = {"rsapi_v1" : "1", "realfolder" : "0" , "filecontent": open(path, "rb")} #rapidshare boundary handler has a bug boundary = "--%s" % uuid.uuid4().hex return MultipartEncoder(url, form, boundary)
def get_cookie(self, user, password, url=None): """""" cookie = cookielib.CookieJar() opener = URLOpen(cookie) opener.open( "http://www.megaupload.com/?c=login", urllib.urlencode({ "login": "******", "redir": "1", "username": user, "password": password })) if len(cookie) > 0: return cookie
def link_parser(self, url, wait_func, content_range=None): """""" try: url = url.split("&")[0] cookie = cookielib.CookieJar() opener = URLOpen(cookie) if not wait_func(): return retry = 5 while retry: it = opener.open(url) img_url = None for line in it: if "<iframe src='" in line: img_url = line.split("'")[1].split("'")[0] elif 'name="fileId"' in line: file_id = line.split('value="')[1].split('"')[0] if not img_url: return self.set_limit_exceeded() it = opener.open(img_url) for line in it: if 'AdsCaptcha Challenge' in line: img_url = line.split('src="')[1].split('"')[0] elif 'class="code">' in line: code = line.split('">')[1].split("<")[0] tes = Tesseract(opener.open(img_url).read()) captcha = tes.get_captcha() captcha = "".join([c for c in captcha if c.isdigit()]) #keep only the numbers data = urllib.urlencode([("fileId", file_id),("adscaptcha_response_field", captcha),("adscaptcha_challenge_field", code), ("adUnder", "")]) it = opener.open("%s/getoken" % BASE_URL, data) captcha = False for line in it: if '"status":1' in line: captcha = True #captcha is valid if captcha: if not wait_func(WAIT): return it = opener.open("%s/formtoken" % BASE_URL) for line in it: token = line rnd = "".join([str(random.randint(1,9)) for i in range(16)]) data = urllib.urlencode([("fileId", file_id),("token", token),("rnd", rnd)]) it = opener.open("%s/getoken" % BASE_URL, data) for line in it: if '"status":1' in line: link = line.split('":"')[1].split('"')[0].replace("\\","") return opener.open(link) retry -= 1 except Exception, e: logger.exception("%s: %s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if '"panel file_download"' in line: it.next() name = it.next().split(">")[1].split("<")[0] it.next() tmp = it.next().split("<strong>")[1].split("<")[0] unit = tmp[-2:] #Fix me : GB bug if unit == "GB": size = int(1024 * float(tmp[:-2])) unit = "MB" else: size = int(round(float(tmp[:-2]))) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" break except Exception, e: logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: #Remove the filename from the url tmp = url.split("/file/")[1].split("/")[0] url = "%s/file/%s" % (BASE_URL, tmp) file_id = url.split("/")[-1].strip("/") cookie = cookielib.CookieJar() opener = URLOpen(cookie) form = urllib.urlencode([("checkDownload", "check")]) #If the limit is exceeded if '"fail":"timeLimit"' in opener.open(url, form).read(): return self.set_limit_exceeded() it = opener.open(url) for line in it: if 'reCAPTCHA_publickey=' in line: tmp = line.split("'")[1].split("'")[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) for retry in range(3): challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", file_id) ]) recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL #Captcha is good if "success" in opener.open(recaptcha_url, form).read(): form = urllib.urlencode([("downloadLink", "wait")]) wait = int(opener.open(url, form).read()[-2:]) if not wait_func(wait): return form = urllib.urlencode([("downloadLink", "show")]) opener.open(url, form).read() form = urllib.urlencode([("download", "normal") ]) return opener.open(url, form) #,content_range) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" link = None retry = 3 try: if "?" in url: url = url.split("?")[0] tmp_link, tmp_form, wait = self.parse_wait(url) if not tmp_link or not tmp_form: return self.set_limit_exceeded() elif not wait_func(wait): return else: opener = URLOpen(cookielib.CookieJar()) it = opener.open(tmp_link, tmp_form) for line in it: if "function starthtimer(){" in line: it.next() try: tmp = int(it.next().split("+")[1].split(";")[0]) return self.set_limit_exceeded(int(tmp / 1000)) except Exception, e: logger.exception("%s: %s" % (url, e)) return elif "click_download" in line: link = line.split('href="')[1].split('"')[0] break elif "http://api.recaptcha.net/challenge" in line: recaptcha_link = line.split('src="')[1].split('"')[0] if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) while not link and retry: challenge, response = c.solve_captcha() if response: if not wait_func(): return form = urllib.urlencode([ ("action", "checkcaptcha"), ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response) ]) for line in opener.open(tmp_link, form): if "click_download" in line: link = line.split('href="')[1].split( '"')[0] break retry -= 1 break
def link_parser(self, url, wait_func, content_range=None): """""" found = False try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) handler = opener.open(url, None, content_range) if not wait_func(): return else: return handler except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: link = None opener = URLOpen() form = urllib.urlencode([('download',' REGULAR DOWNLOAD ')]) for line in opener.open(url,form): if '<span id="spn_download_link">' in line: link = line.split('href="')[1].split('"')[0] if not link: return if not wait_func(): return except Exception, e: logger.exception("%s: %s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: it = URLOpen().open(url) for line in it: if 'download_file_title" style="margin:20px 0;">' in line: name = line.split( 'download_file_title" style="margin:20px 0;">' )[1].split('<')[0].strip() tmp = line.split('color:#777;">')[1].split('<')[0].strip( "()") unit = tmp[-2:] size = int(round(float(tmp[:-2]))) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" break except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None size_found = False try: it = URLOpen().open(url) for line in it: if '<span id="fileNameTextSpan">' in line: name = line.split('<span id="fileNameTextSpan">')[1].split('</span>')[0].strip() break elif '<div class="small lgrey" style="margin-bottom:5px">' in line: size_found = True elif size_found: size_found = False tmp = line.split("<b>")[1].split("</b>")[0].split() unit = tmp[1] if "," in tmp[0]: size = int(tmp[0].replace(",", "")) else: size = int(tmp[0]) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: link = None opener = URLOpen() form = urllib.urlencode([('download', ' REGULAR DOWNLOAD ')]) for line in opener.open(url, form): if '<span id="spn_download_link">' in line: link = line.split('href="')[1].split('"')[0] if not link: return if not wait_func(): return except Exception, e: logger.exception("%s: %s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: it = URLOpen().open(url) for line in it: if '<div class="finfo">' in line: name = line.split('>')[1].split('<')[0].strip() if '<div class="ffileinfo">' in line: tmp = line.split(":")[2].split("<")[0] unit = tmp[-2:] size = int(round(float(tmp[:-2].strip()))) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" break except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: for line in URLOpen().open(url): if '<b>Name:</b>' in line: name = line.split('<b>Name:</b>')[1].split( '<br>')[0].strip() tmp = line.split('<b>Size:</b> ')[1].split( ' ')[0].strip() unit = tmp[-2:] size = int(round(float(tmp[:-2]))) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" break except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: # 'xmlURL=http://mp3.zing.vn/xml/song-xml/' page = URLOpen().open(url) for lines in page: if 'xmlURL=http://mp3.zing.vn/xml/song-xml/' in lines: songxml = lines.split( 'xmlURL=http://mp3.zing.vn/xml/song-xml/')[1].split( '&skin=http://static.mp3.zing.vn/skins')[0].strip( ) xml = URLOpen().open('http://mp3.zing.vn/xml/song-xml/' + songxml) for line in xml: if '<title><![CDATA[' in line: name = line.split('<title><![CDATA[')[1].split( ']]></title>')[0].strip() if '<source><![CDATA[' in line: mp3link = line.split('<source><![CDATA[')[1].split( ']]></source>')[0].strip() #get file size before download site = urllib.urlopen(mp3link) meta = site.info() size = int( meta.getheaders("Content-Length")[0]) / 1024 if size > 1024: unit = "KB" else: size_found = 0 name = None size = -1 unit = None break if '<performer><![CDATA[' in line: name += ' - ' + line.split('<performer><![CDATA[')[ 1].split(']]></performer>')[0].strip() name += '.mp3' except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None try: it = URLOpen().open(url) for line in it: if '"panel file_download"' in line: it.next() name = it.next().split(">")[1].split("<")[0] it.next() tmp = it.next().split("<strong>")[1].split("<")[0] unit = tmp[-2:] #Fix me : GB bug if unit == "GB": size = int(1024*float(tmp[:-2])) unit = "MB" else: size = int(round(float(tmp[:-2]))) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" break except Exception, e: logger.exception("%s :%s" % (url, e))
def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None cookie = cookielib.CookieJar() opener = URLOpen(cookie) encoded_str = urllib.urlencode({ "password": password, "login": user, "rand": "", "redirect": "", "op": "login" }) opener.open("http://www.oron.com/login", encoded_str) if len(cookie) > 0: return cookie
def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None cookie = cookielib.CookieJar() opener = URLOpen(cookie) encoded_str = urllib.urlencode({ "password": password, "login" : user, "rand" : "", "redirect": "", "op" : "login" }) opener.open("http://www.oron.com/login", encoded_str) if len(cookie) > 0: return cookie
def link_parser(self, url, wait_func, content_range=None): """""" link = None retry = 3 try: if "?" in url: url = url.split("?")[0] tmp_link, tmp_form, wait = self.parse_wait(url) if not tmp_link or not tmp_form: return self.set_limit_exceeded() elif not wait_func(wait): return else: opener = URLOpen(cookielib.CookieJar()) it = opener.open(tmp_link, tmp_form) for line in it: if "function starthtimer(){" in line: it.next() try: tmp = int(it.next().split("+")[1].split(";")[0]) return self.set_limit_exceeded(int(tmp/1000)) except Exception, e: logger.exception("%s: %s" % (url, e)) return elif "click_download" in line: link = line.split('href="')[1].split('"')[0] break elif "http://api.recaptcha.net/challenge" in line: recaptcha_link = line.split('src="')[1].split('"')[0] if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) while not link and retry: challenge, response = c.solve_captcha() if response: if not wait_func(): return form = urllib.urlencode([("action", "checkcaptcha"), ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)]) for line in opener.open(tmp_link, form): if "click_download" in line: link = line.split('href="')[1].split('"')[0] break retry -= 1 break
def parse(self, path): """""" tmp = URLOpen().open(API_URL) if tmp: url = None for line in tmp: if 'multipart/form-data' in line: url = line.split('action="')[1].split('"')[0] if url: form = {"uploads[]": open(path, "rb")} return MultipartEncoder(url, form, None)
def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: page = URLOpen().open(url) for lines in page: if '<param value="flashid=flash-player&defaultindex=0&autostart=true&file=http://www.nhaccuatui.com/api/playerv7.ashx?key2=' in lines: songxml = lines.split( '<param value="flashid=flash-player&defaultindex=0&autostart=true&file=http://www.nhaccuatui.com/api/playerv7.ashx?key2=' )[1].split('" name="flashvars" />')[0].strip() xml = URLOpen().open( 'http://www.nhaccuatui.com/api/playerv7.ashx?key2=' + songxml) for line in xml: name = line.split('<title><![CDATA[')[1].split( ']]></title>')[0].strip() name += ' - ' + line.split('<creator><![CDATA[')[ 1].split(']]></creator>')[0].strip() name += '.mp3' mp3link = line.split('<location><![CDATA[')[1].split( ']]></location>')[0].strip() #get file size before download site = urllib.urlopen(mp3link) meta = site.info() size = int(meta.getheaders("Content-Length")[0]) / 1024 if size > 1024: unit = "KB" else: size_found = 0 name = None size = -1 unit = None break except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: wait = WAIT link = None opener = URLOpen() #Transform the url into an english one url = "%s%s" % (BASE_URL, url.split("/files/")[1].split("/")[0]) form = urllib.urlencode([('gateway_result','1')]) for line in opener.open(url,form): #Try to get WAIT from the page if 'download_waiter_remain' in line: try: tmp = line.split(">")[2].split("<")[0] tmp = int(tmp) except Exception, e: pass else: if tmp > 0: wait = tmp elif "$('#download_container').load('" in line: try: tmp = line.split("load('")[1].split("'")[0] url = "%s%s" % ("http://depositfiles.com", tmp) except Exception, e: pass if not wait_func(wait + 1): return #Due to a bug in DepositFiles, sometimes it returns "Invalid params" #If it's the case, retry, 10 times and set limit exceeded for attempt in range(10): for line in opener.open(url): if "Invalid" in line: if not wait_func(): return break elif "action" in line: link = line.split('"')[1].split('"')[0] break if link: break
def link_parser(self, url, wait_func, content_range=None): """""" try: wait = WAIT link = None opener = URLOpen() #Transform the url into an english one url = "%s%s" % (BASE_URL, url.split("/files/")[1].split("/")[0]) form = urllib.urlencode([('gateway_result', '1')]) for line in opener.open(url, form): #Try to get WAIT from the page if 'download_waiter_remain' in line: try: tmp = line.split(">")[2].split("<")[0] tmp = int(tmp) except Exception, e: pass else: if tmp > 0: wait = tmp elif "$('#download_container').load('" in line: try: tmp = line.split("load('")[1].split("'")[0] url = "%s%s" % ("http://depositfiles.com", tmp) except Exception, e: pass if not wait_func(wait + 1): return #Due to a bug in DepositFiles, sometimes it returns "Invalid params" #If it's the case, retry, 10 times and set limit exceeded for attempt in range(10): for line in opener.open(url): if "Invalid" in line: if not wait_func(): return break elif "action" in line: link = line.split('"')[1].split('"')[0] break if link: break
def link_parser(self, url, wait_func, content_range=None): """""" found = False try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) handler = opener.open(url, None, content_range) if not wait_func(): return if "text/html" in handler.info()["Content-Type"]: for line in handler: if 'class="down_ad_butt1">' in line: return opener.open( line.split('href="')[1].split('"')[0], None, content_range) else: return handler except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" link = None try: page = URLOpen().open(url) for lines in page: if 'xmlURL=http://mp3.zing.vn/xml/song-xml/' in lines: songxml = lines.split( 'xmlURL=http://mp3.zing.vn/xml/song-xml/')[1].split( '&skin=http://static.mp3.zing.vn/skins')[0].strip( ) xml = URLOpen().open('http://mp3.zing.vn/xml/song-xml/' + songxml) for line in xml: if '<source><![CDATA[' in line: mp3link = line.split('<source><![CDATA[')[1].split( ']]></source>')[0].strip() if not mp3link: return except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" link = None try: xml = URLOpen().open('http://nhacso.net/flash/song/xnl/1/id/'+url[-13:-5]) for line in xml: if '<mp3link><![CDATA[' in line: mp3link = line.split('<mp3link><![CDATA[')[1].split(']]></mp3link>')[0].strip() if not mp3link: return except Exception, e: logger.exception("%s: %s" % (url, e))
def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None cookie = cookielib.CookieJar() opener = URLOpen(cookie) encoded_str = urllib.urlencode({ "loginUserName":user, "loginUserPassword":password, "autoLogin":"******", "recaptcha_response_field":"", "recaptcha_challenge_field":"", "recaptcha_shortencode_field":"", "loginFormSubmit":"Login"}) #logger.warning("Submitting this post: %s" % encoded_str) opener.open("http://www.fileserve.com/login.php", encoded_str) if len(cookie) > 0: return cookie
def link_parser(self, url, wait_func, content_range=None): """""" try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) handler = opener.open(url, None, content_range) if not wait_func(): return if "text/html" in handler.info()["Content-Type"]: cookie_value = cookie._cookies[".rapidshare.com"]["/"]["enc"].value tmp = url.split("/") form = urllib.urlencode([("sub", "download_v1"), ("cookie", cookie_value), ("fileid", tmp[4]), ("filename", tmp[5])]) for line in opener.open("http://api.rapidshare.com%s" % API_URL, form, content_range): if "DL:" in line: tmp_url = "http://%s%s" % (line.split("DL:")[1].split(",")[0], API_URL) return opener.open(tmp_url, form, content_range) else: return handler except Exception, e: logger.exception("%s: %s" % (url, e))
def check(self, url): """""" if url is None: return None name = None size = -1 unit = None status = -1 """ Split the string by '/': Hotfile urls are always of this form: http://hotfile.com/dl/ID/KEY/filename.html Thus we should get the (0 based) 4th & 5th entry in the returned list """ split_str = url.split('/') if len(split_str) is not 7: return None link_id = split_str[4] link_key = split_str[5] del split_str check_link_url = ("http://api.hotfile.com/?action=checklinks&ids=" + link_id + "&keys=" + link_key + "&fields=name,size,status") """ print ("Check link url: {0}".format(check_link_url)) """ try: link_name_size_status = URLOpen().open(check_link_url).readline() link_name_size_status_list = link_name_size_status.split(',') name = link_name_size_status_list[0] """ Hotfile glitch: sometimes removed files do not have size information """ if ( len(link_name_size_status_list[1]) != 0): size = int(link_name_size_status_list[1]) / 1024 status = int(link_name_size_status_list[2]) unit = "KB" except Exception, e: logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: link = None cookie = cookielib.CookieJar() opener = URLOpen(cookie) if "/video/" in url: url = url.replace("/video/", "/download/") elif "/audio/" in url: url = url.replace("/audio/", "/download/") elif "/image/" in url: url = url.replace("/image/", "/download/") try: form = urllib.urlencode([("download", 1)]) for line in opener.open(url,form): if 'link_enc=new Array' in line: tmp = line.strip().split("var link_enc=new Array(")[1].split(");")[0] link = tmp.replace("','","").replace("'","") #Try to get WAIT from the page if 'document|important' in line: try: tmp = line.split("here|")[1].split("|class")[0] tmp = int(tmp) except ValueError: pass else: if tmp > 0: WAIT = tmp break except Exception, e: logger.exception("%s :%s" % (url, e)) if not link: return if not wait_func(WAIT): return
def link_parser(self, url, wait_func, content_range=None): """""" try: #Remove the filename from the url tmp = url.split("/file/")[1].split("/")[0] url = "%s/file/%s" % (BASE_URL,tmp) file_id = url.split("/")[-1].strip("/") cookie = cookielib.CookieJar() opener = URLOpen(cookie) form = urllib.urlencode([("checkDownload", "check")]) #If the limit is exceeded if '"fail":"timeLimit"' in opener.open(url,form).read(): return self.set_limit_exceeded() it = opener.open(url) for line in it: if 'reCAPTCHA_publickey=' in line: tmp = line.split("'")[1].split("'")[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) for retry in range(3): challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field",file_id)]) recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL #Captcha is good if "success" in opener.open(recaptcha_url,form).read(): form = urllib.urlencode([("downloadLink", "wait")]) wait = int(opener.open(url,form).read()[-2:]) if not wait_func(wait): return form = urllib.urlencode([("downloadLink", "show")]) opener.open(url,form).read() form = urllib.urlencode([("download", "normal")]) return opener.open(url,form)#,content_range) except Exception, e: logger.exception("%s: %s" % (url, e))
def check_links(self, url): """""" name = None size = -1 unit = None size_found = 0 try: it = URLOpen().open(url) for line in it: if '<div class="info">' in line: name = it.next().split('="')[1].split('">')[0].strip() tmp = it.next().split('>')[2].split('<')[0].strip() unit = tmp[-2:] size = int(round(float(tmp[:-2].replace(" ","")))) if size > 1024: if unit == "KB": size = size / 1024 unit = "MB" break except Exception, e: name = None size = -1 logger.exception("%s :%s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: tmp_link = None link = None wait = WAIT opener = URLOpen(cookielib.CookieJar()) it = opener.open(url) for line in it: if "dbtn" in line: tmp_link = line.split('href="')[1].split('"')[0] if tmp_link: it = opener.open(tmp_link) for line in it: if "id='divDLStart'" in line: link = it.next().split("<a href='")[1].split("'")[0] elif '<div class="sec">' in line: wait = int(line.split(">")[1].split("<")[0]) if not link: return elif not wait_func(wait): return except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" link = None wait = 0 try: tmp = url.split("/") opener = URLOpen() url = "%s&fileid=%s" % (API_URL,tmp[4]) url = "%s&filename=%s" % (url,tmp[5]) for line in opener.open("http://%s%s" % ("api.rapidshare.com",url)): print line if "DL:" in line: tmp = line.split("DL:")[1].split(",") link = "http://%s%s&dlauth=%s" % (tmp[0],url,tmp[1]) wait = int(tmp[2]) print link if not wait_func(wait): return if link: return URLOpen().open(link, content_range) else: return self.set_limit_exceeded() except Exception, e: logger.exception("%s: %s" % (url, e))
class PremiumCookie: """""" def __init__(self): """""" self.digestURL = URLOpen() def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None DigestURLHandler = self.digestURL.open('http://api.hotfile.com/?action=getdigest') # retrieve MD5 digest md5Digest = DigestURLHandler.readline() md5pw = hashlib.md5(password).hexdigest() md5pw = hashlib.md5(md5pw+md5Digest).hexdigest() return '&username='******'&passwordmd5dig='+md5pw+'&digest='+md5Digest
def __init__(self): """""" self.digestURL = URLOpen()
def link_parser(self, url, wait_func, content_range=None): """""" try: pkr = None cookie = cookielib.CookieJar() opener = URLOpen(cookie) res = "" #Open the first page page = opener.open(url).readlines() for line in page: #Get pKr if "pKr='" in line: pkr = line.split("'")[1].split("'")[0] #Get the last block to unescape if "unescape" in line: tmp = line.split("break;}")[-1] tmp = tmp.split("var cb")[0] tmp = self.split_eval(tmp) #Eval the block until it's plain text res = self.decrypt(tmp) id_func = res.split("(")[0] #Name of the function containig the id refering to the div that contains the real link pk1 = res.split("'")[3].split("'")[0] qk = res.split("'")[1].split("'")[0] #Public ID of the file it = iter(page) for line in it: #Line containing the function to parse if id_func in line: #Try to get the crypted block tmp = line.split(id_func)[1].split("setTimeout")[0].split('"none";')[1] tmp = self.split_eval(tmp) #Eval until it's plain text res = self.decrypt(tmp) div_id = res.split('getElementById("')[1].split('"')[0] data = urllib.urlencode([("qk",qk), ("pk1", pk1), ("r", pkr),]) form_action = "http://www.mediafire.com/dynamic/download.php?%s" %data #Parse the GET res = opener.open(form_action, data) line = " ".join(res) #Long line containing the js if "var" in line: #Decrypt the table containig the final dl var tmp = line.split("function dz()")[0].split(";")[2:-1] tmp = ";".join(tmp) tmp = self.split_eval(tmp) table = self.decrypt(tmp) #Result is plain text if "http://download" in line: #Get all the dl links (even the fake ones) var = line.split('mediafire.com/" +') #Get the number of the server serv = line.split("http://download")[1].split(".")[0] #Get the name of the file name = var[1].split('+')[1].split("/")[2].split('"')[0].strip("\\") it = iter(var) #Find the real link among the fake ones for tmp in it: #Real link if div_id in tmp: tmp = it.next() tmp = tmp.split('+')[0] #Get the final dl var in the table dl = table.split(tmp+"=")[1].split(";")[0].strip("'") #Result is encrypted else: tmp = line.split("case 15:")[1] tmp = tmp.split("break;")[0] tmp = tmp.split("eval(") #Decrypt until the real link is found for t in tmp: if "unescape" in t: t = self.split_eval(t) res = self.decrypt(t,div_id) if len(res) == 3: serv = res[0] var = res[1] name = res[2] break dl = table.split(var+"=")[1].split(";")[0].strip("'") url = "http://download%s.mediafire.com/%sg/%s/%s" % (serv,dl,qk,name) try: handle = opener.open(url, None, content_range) except Exception, e: return self.set_limit_exceeded() else:
def link_parser(self, url, wait_func, content_range=None): """""" try: link = [] #One link at the end is in two parts captcha_url = None wait = WAIT cookie = cookielib.CookieJar() opener = URLOpen(cookie) if not wait_func(): return #Get the captcha url data = urllib.urlencode([("rs", "refreshImage"), ("rst", ""), ("rsrnd", int(time.time()))]) tmp = opener.open(url, data).read().split("+:var res = '")[1].split("'; res;")[0].replace('\\"', '"') form_action = tmp.split('action="')[1].split('"')[0] cap_id = tmp.split('name=cap_id value=')[1].split('>')[0] cap_secret = tmp.split('name=cap_secret value=')[1].split('>')[0] captcha_url = "%s%s" % (BASE_URL, tmp.split('img src="')[1].split('"')[0]) if captcha_url: solved = False cont = 0 while (not solved) and cont < 4: tes = Tesseract(opener.open(captcha_url).read(), self.filter_image) captcha = tes.get_captcha() #Crack trick to optimize the OCR if len(captcha) == 4 and captcha.isalnum(): if not captcha.isalpha(): for i, j in [("0", "O"),("1", "I"),("2", "Z"),("3", "B"),("4", "A"),("5", "S"),("6", "G"),("7", "T"),("8", "B"),("9", "B")]: captcha = captcha.replace(i,j) captcha = captcha.upper() #Captcha : 4 letters if len(captcha) == 4 and captcha.isalpha(): if not wait_func(): return logger.info("Captcha: %s" % captcha) data = urllib.urlencode([("user_code", captcha), ("cap_id",cap_id), ("cap_secret",cap_secret)]) it = opener.open(form_action, data) z = None h = None for line in it: if "'z':'I!" in line: z = line.split("'z':'")[1].split("'")[0] h = line.split("'h':'")[1].split("'")[0] elif 'window.location.href = dlUrl' in line: it.next() link.append(it.next().split('"')[1].split('"')[0]) solved = True #If there is this line, the captcha is good break cont += 1 #If the captcha is good if solved and z and h: logger.info("Good captcha") if not wait_func(): return data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:init"),("z","zvar"),("h","hvar")]) data = data.replace("zvar",z).replace("hvar",h) #The referer needs to be specify res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action) t = None wait = None z = None h = None for line in res: if "'z'" in line: z = line.split("'z': '")[1].split("'")[0] elif "'h'" in line: h = line.split("'h': '")[1].split("'")[0] elif "'t'" in line: t = line.split("'t': '")[1].split("'")[0] elif "check_n" in line: wait = int(line.split('[\'check_n\'] = "')[1].split('"')[0]) if not wait: wait = WAIT if not wait_func(wait): return data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:check"),("z","zvar"),("h","hvar"),("t",t)]) data = data.replace("zvar",z).replace("hvar",h) res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action) t = None z = None h = None #Sometimes it sends another check_n while True: if not wait_func(): return res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action) wait = None for line in res: if "check_n" in line: wait = int(line.split("=")[1].split(";")[0]) break elif "'z'" in line: z = line.split("'z': '")[1].split("'")[0] elif "'h'" in line: h = line.split("'h': '")[1].split("'")[0] elif "'t'" in line: t = line.split("'t': '")[1].split("'")[0] if not wait: break else: if not wait_func(wait): return if not wait_func(): return data = urllib.urlencode([("rs","getFileLink"),("rst",""),("rsrnd",int(time.time())),("rsargs[]","0"),("rsargs[]","yellow"),("rsargs[]","zvar"),("rsargs[]","hvar"),("rsargs[]",t),("rsargs[]","file"),("rsargs[]",form_action.split("/")[-1]),("rsargs[]","")]) data = data.replace("zvar",z).replace("hvar",h) #This cookie needs to be added manually gflcur = cookielib.Cookie(version=0, name='_gflCur', value='0', port=None, port_specified=False, domain='www.badongo.com', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookie.set_cookie(gflcur) res = opener.open(form_action, data,None,True,form_action).readlines() tmp = res[0].split('onclick')[2].split('(')[1].split("')")[0].replace('\\','').strip("'") link.append(tmp) if not wait_func(): return url = "%s%s?zenc=" %(link[1],link[0]) res = opener.open(url, data,None,True,form_action) for line in res: if "window.location.href = '" in line: final_url = line.split("window.location.href = '")[1].split("'")[0] break return opener.open("%s%s" % (BASE_URL,final_url), data,content_range,True,url) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """ Oron links usually look like this: http://www.oron.com/file_id/file_name.foo.html However, by testing it seems that the server pulls the file name out by using the file_id, which is some sort of hash. So the same file can aswell be accessed by: http://www.oron.com/file_id/file_name.foo.html.html and http://www.oron.com/file_id/file_name.foo.html.html(.html)* So we use check_links to get the file name form the HTML page, its slower, but more accurate as we cannot rely on the url passed here """ file_id = url.split("/")[3] file_name = self.check_links(url)[0] encoded_str = urllib.urlencode({ "op" : "download1", "usr_login" : "", "id" : file_id, "name" : file_name, "referer" : "", "method_free" : "+Regular+Download+"}) opener = URLOpen() """ The url we are currently trying to open is the origin (referring) URL preceding the post """ web_page = opener.open(url, encoded_str, False, url) for retry in range(3): if not wait_func(): return for line in web_page: if '<input type="hidden" name="rand" value="' in line: rand_value = line.split('value="')[1].split('"')[0] break if not rand_value: logger.warning("Oron Plugin: No random value in download page- template changed?"); return self.set_limit_exceeded(wait) for line in web_page: if '<span id="countdown">' in line: wait_length = line.split('<span id="countdown">')[1].split('<')[0] if not wait_func(int(wait_length)): return """ Check for longer limits """ if '<p class="err"' in line: parse_line = line.split('>')[1].split('<')[0] seconds = 0 minutes = 0 hours = 0 prev_word = '' for word in parse_line.split(' '): if word == 'hour,' or word == 'hours,': hours = int(prev_word) elif word == 'minute,' or word == 'minutes,': minutes = int(prev_word) elif word == 'second' or word == 'seconds': seconds = int(prev_word) break else: prev_word = word seconds = seconds + (minutes * 60) + (hours * 3600) return self.set_limit_exceeded(seconds) if 'http://api.recaptcha.net/challenge?' in line: recaptcha_link = line.split('src="')[1].split('"')[0] if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode({ "op" : "download2", "id" : file_id, "rand" : rand_value, "referer" : url, "method_free" : "+Regular+Download+", "method_premium" : "", "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : response, "down_direct" : 1 }) download_page = opener.open(url, form, None, False, url) #Get the link and return it for line in download_page: if 'Download File' in line: return opener.open(line.split('href="')[1].split('"')[0]) return
def link_parser(self, url, wait_func, content_range=None): """""" try: wait = WAIT opener = URLOpen() it = opener.open(url) first_wait = False #Check for first wait for line in it: if 'var wf =' in line: try: wait = int(line.split("=")[1].split(";")[0].strip()) first_wait = True except Exception, e: logger.exception("%s: %s" % (url, e)) return break #Necessary to loop to reload the page, due to the wait for loop in range(3): if not wait_func(): return #First wait if first_wait: if not wait_func(wait): return data = urllib.urlencode([("free", "Regular Download")]) url = "%sbilling?%s" % (url,data) it = opener.open(url,data) #No first wait else: it = opener.open(url) for line in it: if 'name="id"' in line: file_id = line.split('value="')[1].split('"')[0] elif 'id="dwait"' in line: it.next() it.next() tmp = it.next() #The download is possible if "form" in tmp: form_action = tmp.split('action="')[1].split('"')[0] #Necessary to wait else: it.next() it.next() wait = int(it.next().split("'")[1].split("'")[0]) if wait < 60: if not wait_func(wait): return #Next loop, reload the page break else: return self.set_limit_exceeded(wait) elif 'Recaptcha.create("' in line: tmp = line.split('"')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined")]) handle = opener.open(form_action, form, content_range) if not handle.info().getheader("Content-Type") == "text/html": #Captcha is good return handle
def link_parser(self, url, wait_func, content_range=None): """""" try: cookie = cookielib.CookieJar() opener = URLOpen(cookie) file_id = url.split("/")[-2] form_action = "%s?start=1" % (url) if not wait_func(): return it = opener.open(form_action) form_action = "%s?start=1" % it.geturl() #Get the redirect url end = form_action.split(".")[2].split("/")[0] #Get the .com replacement form_action2 = "%s/%s/%s?start=1" % (BASE_URL,file_id,file_id) form_action2 = form_action2.replace(".com",".%s" % end) form = urllib.urlencode([("foo","foo")]) #Force urllib2 to do a POST #FIXME : urlopen should be able to set custom headers headers = {"User-Agent": cons.USER_AGENT, "X-Requested-With": "XMLHttpRequest"} it = opener.opener.open(urllib2.Request(form_action2, None, headers), form) it_tmp = None #Loop until we get the captcha for loop in range(3): if not wait_func(): return #it_tmp is set after a wait if it_tmp: it = it_tmp for line in it: if 'Recaptcha.create("' in line: tmp = line.split('"')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) for retry in range(3): challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)]) it = opener.open(form_action, form) #Get the link for line in it: if 'downloadLink' in line: it.next() return opener.open(it.next().split('href="')[1].split('"')[0]) #Link already there elif 'downloadLink' in line: it.next() return opener.open(it.next().split('href="')[1].split('"')[0]) #Need to wait elif "name='tm'" in line: tm = line.split("value='")[1].split("'")[0]; tm_hash = it.next().split("value='")[1].split("'")[0]; form = urllib.urlencode([("tm", tm), ("tm_hash", tm_hash)]) #Need to wait elif "countDownDelay =" in line: wait = int(line.split("=")[1].split(";")[0]) if wait < 60: if not wait_func(wait): return it_tmp = opener.open(form_action, form) #fetch the page #Next loop, reload the page break else: return self.set_limit_exceeded(wait) except Exception, e: logger.exception("%s: %s" % (url, e))