Ejemplo n.º 1
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		size_found = 0
		try:
			it = URLOpen().open(url)
			for line in it:
				if 'File Name:' in line:
					name = it.next().split('>')[1].split('<')[0]
				if 'File Size:' in line:
					tmp = line.split('>')[3].split('<')[0]
					if "KB" in tmp:
						size = int(round(float(tmp.split("KB")[0])))
						unit = "KB"
					elif "MB" in tmp:
						size = float(tmp.split("MB")[0])
						if int(round(size)) > 0:
							size = int(round(size))
							unit = "MB"
						else:
							size = int(round(1024 * size))
							unit = "KB"
					elif "GB" in tmp:
						size = int(round(float(tmp.split("GB")[0])))
						unit = "GB"
		except Exception, e:
			name = None
			size = -1
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 2
0
 def check_links(self, url):
     """"""
     name = None
     size = -1
     unit = None
     size_found = 0
     try:
         it = URLOpen().open(url)
         for line in it:
             if 'File Name:' in line:
                 name = it.next().split('>')[1].split('<')[0]
             if 'File Size:' in line:
                 tmp = line.split('>')[3].split('<')[0]
                 if "KB" in tmp:
                     size = int(round(float(tmp.split("KB")[0])))
                     unit = "KB"
                 elif "MB" in tmp:
                     size = float(tmp.split("MB")[0])
                     if int(round(size)) > 0:
                         size = int(round(size))
                         unit = "MB"
                     else:
                         size = int(round(1024 * size))
                         unit = "KB"
                 elif "GB" in tmp:
                     size = int(round(float(tmp.split("GB")[0])))
                     unit = "GB"
     except Exception, e:
         name = None
         size = -1
         logger.exception("%s :%s" % (url, e))
Ejemplo n.º 3
0
	def parse_wait(self, url):
		""""""
		link = None
		form = None
		wait = 0
		found = False
		try:
			tmp_form = []
			opener = URLOpen()
			for line in opener.open(url):
				if "download_file" in line:
					found = True
				elif found:
					if "method=post " in line:
						link = "%s%s" % (BASE_URL, line.split('action="')[1].split('" ')[0])
					elif "name=action " in line:
						tmp_form.append(("action", line.split("value=")[1].split(">")[0]))
					elif "name=tm " in line:
						tmp_form.append(("tm", line.split("value=")[1].split(">")[0]))
					elif "name=tmhash " in line:
						tmp_form.append(("tmhash", line.split("value=")[1].split(">")[0]))
					elif "name=wait " in line:
						wait = int(line.split("value=")[1].split(">")[0])
						tmp_form.append(("wait", wait))
					elif "name=waithash " in line:
						tmp_form.append(("waithash", line.split("value=")[1].split(">")[0]))
					elif "name=upidhash " in line:
						tmp_form.append(("upidhash", line.split("value=")[1].split(">")[0]))
						found = False
			form = urllib.urlencode(tmp_form)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 4
0
 def get_cookie(self, user, password, url=None):
     """"""
     opener = URLOpen()
     data = urllib.urlencode([("sub", "getaccountdetails_v1"),
                              ("type", "prem"), ("login", user),
                              ("password", password), ("withcookie", 1)])
     for line in opener.open(API_URL, data).readlines():
         if "ERROR" in line:
             return
         elif "cookie" in line:
             tmp_cookie = cookielib.Cookie(version=0,
                                           name='enc',
                                           value=line.split("=")[1].strip(),
                                           port=None,
                                           port_specified=False,
                                           domain='.rapidshare.com',
                                           domain_specified=False,
                                           domain_initial_dot=True,
                                           path='/',
                                           path_specified=True,
                                           secure=False,
                                           expires=None,
                                           discard=True,
                                           comment=None,
                                           comment_url=None,
                                           rest={'HttpOnly': None},
                                           rfc2109=False)
             cookie = cookielib.CookieJar()
             cookie.set_cookie(tmp_cookie)
             return cookie
Ejemplo n.º 5
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		try:
			it = URLOpen().open(url)
			for line in it:
				if '<span class="txtorange">' in line:
					tmp = it.next()
					name = tmp.split("<")[0].strip()
					tmp = tmp.split(">(")[1].split(")")[0]
					if "KB" in tmp:
						size = int(round(float(tmp.split("KB")[0])))
						unit = "KB"
					elif "MB" in tmp:
						size = float(tmp.split("MB")[0])
						if int(round(size)) > 0:
							size = int(round(size))
							unit = "MB"
						else:
							size = int(round(1024 * size))
							unit = "KB"
					elif "GB" in tmp:
						size = int(round(float(tmp.split("GB")[0])))
						unit = "GB"
		except urllib2.HTTPError:
			pass
		except Exception, e:
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 6
0
 def check_links(self, url):
     """"""
     name = None
     size = -1
     unit = None
     try:
         it = URLOpen().open(url)
         for line in it:
             if 'fileInfo filename' in line:
                 name = line.split('<strong>')[1].split('</strong>')[0]
             elif 'fileInfo filesize' in line:
                 it.next()
                 tmp = it.next().split('class="size">')[1].split("<")[0]
                 if "KB" in tmp:
                     size = int(round(float(tmp.split("KB")[0])))
                     unit = "KB"
                 elif "MB" in tmp:
                     size = float(tmp.split("MB")[0])
                     if int(round(size)) > 0:
                         size = int(round(size))
                         unit = "MB"
                     else:
                         size = int(round(1024 * size))
                         unit = "KB"
                 elif "GB" in tmp:
                     size = int(round(float(tmp.split("GB")[0])))
                     unit = "GB"
     except Exception, e:
         logger.exception("%s :%s" % (url, e))
Ejemplo n.º 7
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		try:
			it = URLOpen().open(url)
			for line in it:
				if '/img/manager/mime/' in line:
					if ("generic" in line) or ("audio" in line) or ("archive" in line):
						tmp = line.split('/>')[1].split("</h1>")[0]
					if "video" in line:
						tmp = line.split('</a>')[1].split("<")[0]
						
					tmp = tmp.replace("&nbsp;","")
					tmp = tmp.replace("&#8203;","")
					name = tmp.replace("&#8203","")
					
				elif '<div id="info" class="metadata">' in line:
					tmp = it.next()
					tmp = tmp.split("<span>")[1].split("file")[0].strip()
					size = int(round(float(tmp.split(" ")[0])))
					unit = tmp.split(" ")[1].upper()
				elif 'Retry Download' in line:
					name = line.split('href="')[1].split('"')[0].split("/").pop()
		except Exception, e:
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 8
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        try:
            it = URLOpen().open(url)
            for line in it:
                if 'Filename:' in line:
                    name = line.split(">")[1].split("<")[0]
                    line = it.next()
                    size_and_units = []
                    size_and_units = line.split(":")[1].split(
                        "<")[0].lstrip().rstrip().split(" ")
                    size = float(size_and_units[0])
                    unit = size_and_units[1].upper()
                    if 'B' == unit:
                        size = size / 1024
                        unit = "KB"
                    break
        # Oron responds to unknown files as HTTP 404s followed by a redirect
        except urllib2.HTTPError as http_error:
            if http_error.code != 404:
                logger.warning(
                    "Oron::check_links: Received unexpected HTTP error code: %s"
                    % http_error.code)
            return None, -1, None

        except Exception, e:
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 9
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     try:
         cookie = self.get_cookie()
         if not wait_func():
             return
         opener = URLOpen(cookie)
         handler = opener.open(url, None, content_range)
         if not wait_func():
             return
         if "text/html" in handler.info()["Content-Type"]:
             cookie_value = cookie._cookies[".rapidshare.com"]["/"][
                 "enc"].value
             tmp = url.split("/")
             form = urllib.urlencode([("sub", "download_v1"),
                                      ("cookie", cookie_value),
                                      ("fileid", tmp[4]),
                                      ("filename", tmp[5])])
             for line in opener.open(
                     "http://api.rapidshare.com%s" % API_URL, form,
                     content_range):
                 if "DL:" in line:
                     tmp_url = "http://%s%s" % (
                         line.split("DL:")[1].split(",")[0], API_URL)
                     return opener.open(tmp_url, form, content_range)
         else:
             return handler
     except Exception, e:
         logger.exception("%s: %s" % (url, e))
Ejemplo n.º 10
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        try:
            it = URLOpen().open(url)
            for line in it:
                if '/img/manager/mime/' in line:
                    if ("generic" in line) or ("audio" in line) or ("archive"
                                                                    in line):
                        tmp = line.split('/>')[1].split("</h1>")[0]
                    if "video" in line:
                        tmp = line.split('</a>')[1].split("<")[0]

                    tmp = tmp.replace("&nbsp;", "")
                    tmp = tmp.replace("&#8203;", "")
                    name = tmp.replace("&#8203", "")

                elif '<div id="info" class="metadata">' in line:
                    tmp = it.next()
                    tmp = tmp.split("<span>")[1].split("file")[0].strip()
                    size = int(round(float(tmp.split(" ")[0])))
                    unit = tmp.split(" ")[1].upper()
                elif 'Retry Download' in line:
                    name = line.split('href="')[1].split('"')[0].split(
                        "/").pop()
        except Exception, e:
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 11
0
 def check_links(self, url):
     """"""
     name = None
     size = -1
     unit = None
     try:
         it = URLOpen().open(url)
         for line in it:
             if '<span class="txtorange">' in line:
                 tmp = it.next()
                 name = tmp.split("<")[0].strip()
                 tmp = tmp.split(">(")[1].split(")")[0]
                 if "KB" in tmp:
                     size = int(round(float(tmp.split("KB")[0])))
                     unit = "KB"
                 elif "MB" in tmp:
                     size = float(tmp.split("MB")[0])
                     if int(round(size)) > 0:
                         size = int(round(size))
                         unit = "MB"
                     else:
                         size = int(round(1024 * size))
                         unit = "KB"
                 elif "GB" in tmp:
                     size = int(round(float(tmp.split("GB")[0])))
                     unit = "GB"
     except urllib2.HTTPError:
         pass
     except Exception, e:
         logger.exception("%s :%s" % (url, e))
Ejemplo n.º 12
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     link = None
     wait = 0
     try:
         tmp = url.split("/")
         opener = URLOpen()
         url = "%s&fileid=%s" % (API_URL, tmp[4])
         url = "%s&filename=%s" % (url, tmp[5])
         for line in opener.open("http://%s%s" %
                                 ("api.rapidshare.com", url)):
             print line
             if "DL:" in line:
                 tmp = line.split("DL:")[1].split(",")
                 link = "http://%s%s&dlauth=%s" % (tmp[0], url, tmp[1])
                 wait = int(tmp[2])
                 print link
         if not wait_func(wait):
             return
         if link:
             return URLOpen().open(link, content_range)
         else:
             return self.set_limit_exceeded()
     except Exception, e:
         logger.exception("%s: %s" % (url, e))
Ejemplo n.º 13
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		try:
			it = URLOpen().open(url)
			for line in it:
				if 'Filename:' in line:
					name = line.split(">")[1].split("<")[0]
					line = it.next()
					size_and_units = []
					size_and_units = line.split(":")[1].split("<")[0].lstrip().rstrip().split(" ")
					size = float(size_and_units[0])
					unit = size_and_units[1].upper()
					if 'B' == unit:
						size = size / 1024
						unit = "KB"
					break
		# Oron responds to unknown files as HTTP 404s followed by a redirect
		except urllib2.HTTPError as http_error:
			if http_error.code != 404:
				logger.warning("Oron::check_links: Received unexpected HTTP error code: %s" % http_error.code)
			return None, -1, None

		except Exception, e:
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 14
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		#Remove the filename from the url
		tmp = url.split("/file/")[1].split("/")[0]
		url = "%s/file/%s" % (BASE_URL,tmp)

		link = None
		retry = 3
		try:
			opener = URLOpen()
			for line in opener.open(url):
				if 'check:' in line:
					check = line.split("check:'")[1].replace("'","").strip()
				elif "Recaptcha.create" in line:
					tmp = line.split('("')[1].split('"')[0]
					recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp 
					if not wait_func():
						return
					c = Recaptcha(BASE_URL, recaptcha_link)
					while not link and retry:
						challenge, response = c.solve_captcha()
						if response:
							if not wait_func():
								return

							#Filefactory perfoms as check on its server by doing an
							#Ajax request sending the following data
							form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined"),("check", check)])
							url = "%s/file/checkCaptcha.php" % BASE_URL

							#Getting the result back, status:{"ok"|"fail"}
							for line in opener.open(url, form):
								if 'status:"ok"' in line:
									tmp = line.split('path:"')[1].strip('"')
									tmp_link = "%s%s" %(BASE_URL,tmp)
									for line in opener.open(tmp_link):
										if '<span class="countdown">' in line:
											#Try to get WAIT from the page
											try:
												tmp = line.split('"countdown">')[1].split("</span")[0]
												tmp = int(tmp)
											except ValueError:
												pass
											else:
												if tmp > 0:
													WAIT = tmp
										if "Download with FileFactory Basic" in line:
											link = line.split('<a href="')[1].split('"')[0]
											break
						retry -= 1
					break
			if link:
				if not wait_func(WAIT):
					return
				return opener.open(link, None, content_range, True)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 15
0
    def link_parser(self, url, wait_func, content_range=None):
        """
		See comment in anonymous_download.py for how oron links generally look
		like. Premimum accounts still have to suffer by having to post the random
		value at the bottom of the html page. There is also a download page that 
		one has to parse to figure out what the actual direct download link is
		"""
        file_id = url.split("/")[3]
        file_name = self.check_links(url)[0]

        try:
            cookie = self.get_cookie()
            if not wait_func():
                return
            opener = URLOpen(cookie)
            web_page = opener.open(url, None, content_range)

            if not wait_func():
                return

            rand_value = None
            for line in web_page:
                if '<input type="hidden" name="rand" value="' in line:
                    rand_value = line.split('value="')[1].split('"')[0]
                    break
            if not rand_value:
                logger.error("Oron.premium_download: could not find random value in " \
                             "download page. Premimum format changed?")

            form = urllib.urlencode({
                "op": "download2",
                "id": file_id,
                "rand": rand_value,
                "referer": "",
                "method_free": "",
                "method_premium": "1",
                "down_direct": "1"
            })

            download_page = opener.open(url, form, content_range)
            direct_link = None
            for line in download_page:
                if 'Download File</a></td>' in line:
                    direct_link = line.split('a href="')[1].split(
                        '" class="')[0]

            if not direct_link:
                return

            return opener.open(direct_link)

        except Exception, e:
            logger.exception("%s: %s" % (url, e))
Ejemplo n.º 16
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		auth_string = self.get_cookie()
		if not wait_func():
			return

		encoded_link = 'http://api.hotfile.com/?action=getdirectdownloadlink&link=' + url + auth_string
		logger.info("Encoded link %s" % (encoded_link))
		opener = URLOpen()
		handler = opener.open(encoded_link)
		actual_link = handler.readline()
		return opener.open(actual_link)	
Ejemplo n.º 17
0
	def get_cookie(self, user, password, url=None):
		""""""
		opener = URLOpen()
		data = urllib.urlencode([("sub", "getaccountdetails_v1"), ("type", "prem"), ("login", user), ("password", password), ("withcookie", 1)])
		for line in opener.open(API_URL, data).readlines():
			if "ERROR" in line:
				return
			elif "cookie" in line:
				tmp_cookie = cookielib.Cookie(version=0, name='enc', value=line.split("=")[1].strip(), port=None, port_specified=False, domain='.rapidshare.com', domain_specified=False, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
				cookie = cookielib.CookieJar()
				cookie.set_cookie(tmp_cookie)
				return cookie
Ejemplo n.º 18
0
    def link_parser(self, url, wait_func, content_range=None):
        """"""
        auth_string = self.get_cookie()
        if not wait_func():
            return

        encoded_link = 'http://api.hotfile.com/?action=getdirectdownloadlink&link=' + url + auth_string
        logger.info("Encoded link %s" % (encoded_link))
        opener = URLOpen()
        handler = opener.open(encoded_link)
        actual_link = handler.readline()
        return opener.open(actual_link)
Ejemplo n.º 19
0
	def parse(self, path):
		""""""
		tmp = URLOpen().open(API_URL).read()
		if tmp:
			#uploadid = str(int(time.time()))[-5:] + str(int(round(random.random()*1000000)))
			uploadid = "%s%i" % (str(int(time.time()))[-5:], random.randint(10000, 1000000))
			server = tmp.split('"')[1].split('"')[0]
			url = "http://rs%sl3.rapidshare.com/cgi-bin/upload.cgi?rsuploadid=%s" % (server,uploadid)
			form = {"rsapi_v1" : "1", "realfolder" : "0" , "filecontent": open(path, "rb")}
			#rapidshare boundary handler has a bug
			boundary = "--%s" % uuid.uuid4().hex
			return MultipartEncoder(url, form, boundary)
Ejemplo n.º 20
0
 def get_cookie(self, user, password, url=None):
     """"""
     cookie = cookielib.CookieJar()
     opener = URLOpen(cookie)
     opener.open(
         "http://www.megaupload.com/?c=login",
         urllib.urlencode({
             "login": "******",
             "redir": "1",
             "username": user,
             "password": password
         }))
     if len(cookie) > 0:
         return cookie
Ejemplo n.º 21
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			url = url.split("&")[0]
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			
			if not wait_func():
				return
			
			retry = 5
			while retry:
				it = opener.open(url)
				img_url = None
				for line in it:
					if "<iframe src='" in line:
						img_url = line.split("'")[1].split("'")[0]
					elif 'name="fileId"' in line:
						file_id = line.split('value="')[1].split('"')[0]
				if not img_url:
					return self.set_limit_exceeded()
				it = opener.open(img_url)
				for line in it:
					if 'AdsCaptcha Challenge' in line:
						img_url = line.split('src="')[1].split('"')[0]
					elif 'class="code">' in line:
						code = line.split('">')[1].split("<")[0]

				tes = Tesseract(opener.open(img_url).read())
				captcha = tes.get_captcha()
				captcha = "".join([c for c in captcha if c.isdigit()]) #keep only the numbers
	
				data = urllib.urlencode([("fileId", file_id),("adscaptcha_response_field", captcha),("adscaptcha_challenge_field", code), ("adUnder", "")])
				it = opener.open("%s/getoken" % BASE_URL, data)
				captcha = False
				for line in it:
					if '"status":1' in line:
						captcha = True
				#captcha is valid
				if captcha:
					if not wait_func(WAIT):
						return
					it = opener.open("%s/formtoken" % BASE_URL)
					for line in it:
						token = line
					rnd = "".join([str(random.randint(1,9)) for i in range(16)])
					data = urllib.urlencode([("fileId", file_id),("token", token),("rnd", rnd)])
					it = opener.open("%s/getoken" % BASE_URL, data)
					for line in it:
						if '"status":1' in line:
							link = line.split('":"')[1].split('"')[0].replace("\\","")
					return opener.open(link)
				retry -= 1
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 22
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        try:
            it = URLOpen().open(url)
            for line in it:
                if '"panel file_download"' in line:
                    it.next()
                    name = it.next().split(">")[1].split("<")[0]
                    it.next()
                    tmp = it.next().split("<strong>")[1].split("<")[0]
                    unit = tmp[-2:]
                    #Fix me : GB bug
                    if unit == "GB":
                        size = int(1024 * float(tmp[:-2]))
                        unit = "MB"
                    else:
                        size = int(round(float(tmp[:-2])))

                    if size > 1024:
                        if unit == "KB":
                            size = size / 1024
                            unit = "MB"
                    break
        except Exception, e:
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 23
0
    def link_parser(self, url, wait_func, content_range=None):
        """"""
        try:
            #Remove the filename from the url
            tmp = url.split("/file/")[1].split("/")[0]
            url = "%s/file/%s" % (BASE_URL, tmp)

            file_id = url.split("/")[-1].strip("/")
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)

            form = urllib.urlencode([("checkDownload", "check")])
            #If the limit is exceeded
            if '"fail":"timeLimit"' in opener.open(url, form).read():
                return self.set_limit_exceeded()

            it = opener.open(url)
            for line in it:
                if 'reCAPTCHA_publickey=' in line:
                    tmp = line.split("'")[1].split("'")[0]
                    recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
                    if not wait_func():
                        return
                    c = Recaptcha(BASE_URL, recaptcha_link)
                    for retry in range(3):
                        challenge, response = c.solve_captcha()
                        if response:
                            if not wait_func():
                                return

                            #Submit the input to the recaptcha system
                            form = urllib.urlencode([
                                ("recaptcha_challenge_field", challenge),
                                ("recaptcha_response_field", response),
                                ("recaptcha_shortencode_field", file_id)
                            ])
                            recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL

                            #Captcha is good
                            if "success" in opener.open(recaptcha_url,
                                                        form).read():
                                form = urllib.urlencode([("downloadLink",
                                                          "wait")])
                                wait = int(opener.open(url, form).read()[-2:])
                                if not wait_func(wait):
                                    return
                                form = urllib.urlencode([("downloadLink",
                                                          "show")])
                                opener.open(url, form).read()
                                form = urllib.urlencode([("download", "normal")
                                                         ])
                                return opener.open(url, form)  #,content_range)
        except Exception, e:
            logger.exception("%s: %s" % (url, e))
Ejemplo n.º 24
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     link = None
     retry = 3
     try:
         if "?" in url:
             url = url.split("?")[0]
         tmp_link, tmp_form, wait = self.parse_wait(url)
         if not tmp_link or not tmp_form:
             return self.set_limit_exceeded()
         elif not wait_func(wait):
             return
         else:
             opener = URLOpen(cookielib.CookieJar())
             it = opener.open(tmp_link, tmp_form)
             for line in it:
                 if "function starthtimer(){" in line:
                     it.next()
                     try:
                         tmp = int(it.next().split("+")[1].split(";")[0])
                         return self.set_limit_exceeded(int(tmp / 1000))
                     except Exception, e:
                         logger.exception("%s: %s" % (url, e))
                         return
                 elif "click_download" in line:
                     link = line.split('href="')[1].split('"')[0]
                     break
                 elif "http://api.recaptcha.net/challenge" in line:
                     recaptcha_link = line.split('src="')[1].split('"')[0]
                     if not wait_func():
                         return
                     c = Recaptcha(BASE_URL, recaptcha_link)
                     while not link and retry:
                         challenge, response = c.solve_captcha()
                         if response:
                             if not wait_func():
                                 return
                             form = urllib.urlencode([
                                 ("action", "checkcaptcha"),
                                 ("recaptcha_challenge_field", challenge),
                                 ("recaptcha_response_field", response)
                             ])
                             for line in opener.open(tmp_link, form):
                                 if "click_download" in line:
                                     link = line.split('href="')[1].split(
                                         '"')[0]
                                     break
                         retry -= 1
                     break
Ejemplo n.º 25
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     found = False
     try:
         cookie = self.get_cookie()
         if not wait_func():
             return
         opener = URLOpen(cookie)
         handler = opener.open(url, None, content_range)
         if not wait_func():
             return
         else:
             return handler
     except Exception, e:
         logger.exception("%s: %s" % (url, e))
Ejemplo n.º 26
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			link = None
			opener = URLOpen()
			form =  urllib.urlencode([('download','&nbsp;REGULAR DOWNLOAD&nbsp;')])
			for line in opener.open(url,form):
				if '<span id="spn_download_link">' in line:
					link = line.split('href="')[1].split('"')[0]
			if not link:
				return
			if not wait_func():
				return
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 27
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		found = False
		try:
			cookie = self.get_cookie()
			if not wait_func():
				return
			opener = URLOpen(cookie)
			handler = opener.open(url, None, content_range)
			if not wait_func():
				return
			else:
				return handler
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 28
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        size_found = 0
        try:
            it = URLOpen().open(url)
            for line in it:
                if 'download_file_title" style="margin:20px 0;">' in line:
                    name = line.split(
                        'download_file_title" style="margin:20px 0;">'
                    )[1].split('<')[0].strip()
                    tmp = line.split('color:#777;">')[1].split('<')[0].strip(
                        "()")
                    unit = tmp[-2:]
                    size = int(round(float(tmp[:-2])))

                    if size > 1024:
                        if unit == "KB":
                            size = size / 1024
                            unit = "MB"
                    break
        except Exception, e:
            name = None
            size = -1
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 29
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		size_found = False
		try:
			it = URLOpen().open(url)
			for line in it:
				if '<span id="fileNameTextSpan">' in line:
					name = line.split('<span id="fileNameTextSpan">')[1].split('</span>')[0].strip()
					break
				elif '<div class="small lgrey" style="margin-bottom:5px">' in line:
					size_found = True
				elif size_found:
					size_found = False
					tmp = line.split("<b>")[1].split("</b>")[0].split()
					unit = tmp[1]
					if "," in tmp[0]:
						size = int(tmp[0].replace(",", ""))
					else:
						size = int(tmp[0])
					if size > 1024:
						if unit == "KB":
							size = size / 1024
							unit = "MB"
		except Exception, e:
			name = None
			size = -1
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 30
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     try:
         link = None
         opener = URLOpen()
         form = urllib.urlencode([('download',
                                   '&nbsp;REGULAR DOWNLOAD&nbsp;')])
         for line in opener.open(url, form):
             if '<span id="spn_download_link">' in line:
                 link = line.split('href="')[1].split('"')[0]
         if not link:
             return
         if not wait_func():
             return
     except Exception, e:
         logger.exception("%s: %s" % (url, e))
Ejemplo n.º 31
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        size_found = 0
        try:
            it = URLOpen().open(url)
            for line in it:
                if '<div class="finfo">' in line:
                    name = line.split('>')[1].split('<')[0].strip()
                if '<div class="ffileinfo">' in line:
                    tmp = line.split(":")[2].split("<")[0]
                    unit = tmp[-2:]
                    size = int(round(float(tmp[:-2].strip())))

                    if size > 1024:
                        if unit == "KB":
                            size = size / 1024
                            unit = "MB"
                    break
        except Exception, e:
            name = None
            size = -1
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 32
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        size_found = 0
        try:
            for line in URLOpen().open(url):
                if '<b>Name:</b>' in line:
                    name = line.split('<b>Name:</b>')[1].split(
                        '<br>')[0].strip()
                    tmp = line.split('<b>Size:</b> ')[1].split(
                        '   ')[0].strip()
                    unit = tmp[-2:]
                    size = int(round(float(tmp[:-2])))

                    if size > 1024:
                        if unit == "KB":
                            size = size / 1024
                            unit = "MB"
                    break
        except Exception, e:
            name = None
            size = -1
            logger.exception("%s :%s" % (url, e))
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        size_found = 0
        try:
            # 'xmlURL=http://mp3.zing.vn/xml/song-xml/'
            page = URLOpen().open(url)
            for lines in page:
                if 'xmlURL=http://mp3.zing.vn/xml/song-xml/' in lines:
                    songxml = lines.split(
                        'xmlURL=http://mp3.zing.vn/xml/song-xml/')[1].split(
                            '&skin=http://static.mp3.zing.vn/skins')[0].strip(
                            )
                    xml = URLOpen().open('http://mp3.zing.vn/xml/song-xml/' +
                                         songxml)
                    for line in xml:
                        if '<title><![CDATA[' in line:
                            name = line.split('<title><![CDATA[')[1].split(
                                ']]></title>')[0].strip()
                        if '<source><![CDATA[' in line:
                            mp3link = line.split('<source><![CDATA[')[1].split(
                                ']]></source>')[0].strip()
                            #get file size before download
                            site = urllib.urlopen(mp3link)
                            meta = site.info()
                            size = int(
                                meta.getheaders("Content-Length")[0]) / 1024
                            if size > 1024:
                                unit = "KB"
                            else:
                                size_found = 0
                                name = None
                                size = -1
                                unit = None
                                break
                        if '<performer><![CDATA[' in line:
                            name += ' - ' + line.split('<performer><![CDATA[')[
                                1].split(']]></performer>')[0].strip()
                            name += '.mp3'

        except Exception, e:
            name = None
            size = -1
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 34
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		try:
			it = URLOpen().open(url)
			for line in it:
				if '"panel file_download"' in line:
					it.next()
					name = it.next().split(">")[1].split("<")[0]
					it.next()
					tmp = it.next().split("<strong>")[1].split("<")[0]
					unit = tmp[-2:]
					#Fix me : GB bug
					if unit == "GB":
						size = int(1024*float(tmp[:-2]))
						unit = "MB"
					else:
						size = int(round(float(tmp[:-2])))
					
					if size > 1024:
						if unit == "KB":
							size = size / 1024
							unit = "MB"
					break
		except Exception, e:
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 35
0
    def get_cookie(self, user, password, url=None):
        """"""
        if user == None or password == None:
            return None

        cookie = cookielib.CookieJar()
        opener = URLOpen(cookie)
        encoded_str = urllib.urlencode({
            "password": password,
            "login": user,
            "rand": "",
            "redirect": "",
            "op": "login"
        })

        opener.open("http://www.oron.com/login", encoded_str)
        if len(cookie) > 0:
            return cookie
Ejemplo n.º 36
0
	def get_cookie(self, user, password, url=None):
		""""""
		if user == None or password == None:
			return None

		cookie = cookielib.CookieJar()
		opener = URLOpen(cookie)
		encoded_str = urllib.urlencode({
				"password": password,
				"login"   : user,
				"rand"    : "", 
				"redirect": "",
				"op"      : "login"
				})

		opener.open("http://www.oron.com/login", encoded_str)
		if len(cookie) > 0:
			return cookie
Ejemplo n.º 37
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		link = None
		retry = 3
		try:
			if "?" in url:
				url = url.split("?")[0]
			tmp_link, tmp_form, wait = self.parse_wait(url)
			if not tmp_link or not tmp_form:
				return self.set_limit_exceeded()
			elif not wait_func(wait):
				return
			else:
				opener = URLOpen(cookielib.CookieJar())
				it = opener.open(tmp_link, tmp_form)
				for line in it:
					if "function starthtimer(){" in line:
						it.next()
						try:
							tmp = int(it.next().split("+")[1].split(";")[0])
							return self.set_limit_exceeded(int(tmp/1000))
						except Exception, e:
							logger.exception("%s: %s" % (url, e))
							return
					elif "click_download" in line:
						link = line.split('href="')[1].split('"')[0]
						break
					elif "http://api.recaptcha.net/challenge" in line:
						recaptcha_link = line.split('src="')[1].split('"')[0]
						if not wait_func():
							return
						c = Recaptcha(BASE_URL, recaptcha_link)
						while not link and retry:
							challenge, response = c.solve_captcha()
							if response:
								if not wait_func():
									return
								form = urllib.urlencode([("action", "checkcaptcha"), ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)])
								for line in opener.open(tmp_link, form):
									if "click_download" in line:
										link = line.split('href="')[1].split('"')[0]
										break
							retry -= 1
						break
Ejemplo n.º 38
0
 def parse(self, path):
     """"""
     tmp = URLOpen().open(API_URL)
     if tmp:
         url = None
         for line in tmp:
             if 'multipart/form-data' in line:
                 url = line.split('action="')[1].split('"')[0]
         if url:
             form = {"uploads[]": open(path, "rb")}
             return MultipartEncoder(url, form, None)
Ejemplo n.º 39
0
    def check_links(self, url):
        """"""
        name = None
        size = -1
        unit = None
        size_found = 0
        try:
            page = URLOpen().open(url)
            for lines in page:
                if '<param value="flashid=flash-player&defaultindex=0&autostart=true&file=http://www.nhaccuatui.com/api/playerv7.ashx?key2=' in lines:
                    songxml = lines.split(
                        '<param value="flashid=flash-player&defaultindex=0&autostart=true&file=http://www.nhaccuatui.com/api/playerv7.ashx?key2='
                    )[1].split('" name="flashvars" />')[0].strip()
                    xml = URLOpen().open(
                        'http://www.nhaccuatui.com/api/playerv7.ashx?key2=' +
                        songxml)
                    for line in xml:
                        name = line.split('<title><![CDATA[')[1].split(
                            ']]></title>')[0].strip()
                        name += ' - ' + line.split('<creator><![CDATA[')[
                            1].split(']]></creator>')[0].strip()
                        name += '.mp3'
                        mp3link = line.split('<location><![CDATA[')[1].split(
                            ']]></location>')[0].strip()
                        #get file size before download
                        site = urllib.urlopen(mp3link)
                        meta = site.info()
                        size = int(meta.getheaders("Content-Length")[0]) / 1024
                        if size > 1024:
                            unit = "KB"
                        else:
                            size_found = 0
                            name = None
                            size = -1
                            unit = None
                            break

        except Exception, e:
            name = None
            size = -1
            logger.exception("%s :%s" % (url, e))
Ejemplo n.º 40
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			wait = WAIT
			link = None
			opener = URLOpen()
			#Transform the url into an english one
			url = "%s%s" % (BASE_URL, url.split("/files/")[1].split("/")[0])
			form =  urllib.urlencode([('gateway_result','1')])
			for line in opener.open(url,form):
				#Try to get WAIT from the page
				if 'download_waiter_remain' in line:
					try:
						tmp = line.split(">")[2].split("<")[0]
						tmp = int(tmp)
					except Exception, e:
						pass
					else:
						if tmp > 0:
							wait = tmp
				elif "$('#download_container').load('" in line:
					try:
						tmp = line.split("load('")[1].split("'")[0]
						url = "%s%s" % ("http://depositfiles.com", tmp)
					except Exception, e:
						pass
					if not wait_func(wait + 1):
						return
					#Due to a bug in DepositFiles, sometimes it returns "Invalid params"
					#If it's the case, retry, 10 times and set limit exceeded
					for attempt in range(10):
						for line in opener.open(url):
							if "Invalid" in line:
								if not wait_func():
									return
								break
							elif "action" in line:
								link = line.split('"')[1].split('"')[0]
								break
						if link:
							break
Ejemplo n.º 41
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     try:
         wait = WAIT
         link = None
         opener = URLOpen()
         #Transform the url into an english one
         url = "%s%s" % (BASE_URL, url.split("/files/")[1].split("/")[0])
         form = urllib.urlencode([('gateway_result', '1')])
         for line in opener.open(url, form):
             #Try to get WAIT from the page
             if 'download_waiter_remain' in line:
                 try:
                     tmp = line.split(">")[2].split("<")[0]
                     tmp = int(tmp)
                 except Exception, e:
                     pass
                 else:
                     if tmp > 0:
                         wait = tmp
             elif "$('#download_container').load('" in line:
                 try:
                     tmp = line.split("load('")[1].split("'")[0]
                     url = "%s%s" % ("http://depositfiles.com", tmp)
                 except Exception, e:
                     pass
                 if not wait_func(wait + 1):
                     return
                 #Due to a bug in DepositFiles, sometimes it returns "Invalid params"
                 #If it's the case, retry, 10 times and set limit exceeded
                 for attempt in range(10):
                     for line in opener.open(url):
                         if "Invalid" in line:
                             if not wait_func():
                                 return
                             break
                         elif "action" in line:
                             link = line.split('"')[1].split('"')[0]
                             break
                     if link:
                         break
Ejemplo n.º 42
0
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     found = False
     try:
         cookie = self.get_cookie()
         if not wait_func():
             return
         opener = URLOpen(cookie)
         handler = opener.open(url, None, content_range)
         if not wait_func():
             return
         if "text/html" in handler.info()["Content-Type"]:
             for line in handler:
                 if 'class="down_ad_butt1">' in line:
                     return opener.open(
                         line.split('href="')[1].split('"')[0], None,
                         content_range)
         else:
             return handler
     except Exception, e:
         logger.exception("%s: %s" % (url, e))
 def link_parser(self, url, wait_func, content_range=None):
     """"""
     link = None
     try:
         page = URLOpen().open(url)
         for lines in page:
             if 'xmlURL=http://mp3.zing.vn/xml/song-xml/' in lines:
                 songxml = lines.split(
                     'xmlURL=http://mp3.zing.vn/xml/song-xml/')[1].split(
                         '&skin=http://static.mp3.zing.vn/skins')[0].strip(
                         )
                 xml = URLOpen().open('http://mp3.zing.vn/xml/song-xml/' +
                                      songxml)
                 for line in xml:
                     if '<source><![CDATA[' in line:
                         mp3link = line.split('<source><![CDATA[')[1].split(
                             ']]></source>')[0].strip()
                 if not mp3link:
                     return
     except Exception, e:
         logger.exception("%s: %s" % (url, e))
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		link = None
		try:
			xml = URLOpen().open('http://nhacso.net/flash/song/xnl/1/id/'+url[-13:-5])
			for line in xml:
				if '<mp3link><![CDATA[' in line:
					mp3link = line.split('<mp3link><![CDATA[')[1].split(']]></mp3link>')[0].strip()
			if not mp3link:
				return
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 45
0
	def get_cookie(self, user, password, url=None):
		""""""
		if user == None or password == None:
			return None

		cookie = cookielib.CookieJar()
		opener = URLOpen(cookie)
		encoded_str = urllib.urlencode({
				"loginUserName":user, 
				"loginUserPassword":password,
				"autoLogin":"******",
				"recaptcha_response_field":"",
				"recaptcha_challenge_field":"",
				"recaptcha_shortencode_field":"",
				"loginFormSubmit":"Login"})

		#logger.warning("Submitting this post: %s" % encoded_str)

		opener.open("http://www.fileserve.com/login.php", encoded_str)
		if len(cookie) > 0:
			return cookie
Ejemplo n.º 46
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			cookie = self.get_cookie()
			if not wait_func():
				return
			opener = URLOpen(cookie)
			handler = opener.open(url, None, content_range)
			if not wait_func():
				return
			if "text/html" in handler.info()["Content-Type"]:
				cookie_value = cookie._cookies[".rapidshare.com"]["/"]["enc"].value
				tmp = url.split("/")
				form =  urllib.urlencode([("sub", "download_v1"), ("cookie", cookie_value), ("fileid", tmp[4]), ("filename", tmp[5])])
				for line in opener.open("http://api.rapidshare.com%s" % API_URL, form, content_range):
					if "DL:" in line:
						tmp_url = "http://%s%s" % (line.split("DL:")[1].split(",")[0], API_URL)
						return opener.open(tmp_url, form, content_range)
			else:
				return handler
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 47
0
	def check(self, url):
		""""""
		if url is None:
			return None

		name   = None
		size   = -1
		unit   = None
		status = -1
		
		"""
		Split the string by '/':
		Hotfile urls are always of this form:
			http://hotfile.com/dl/ID/KEY/filename.html
		Thus we should get the (0 based) 4th & 5th entry in the returned list
		"""
		split_str = url.split('/')
		if len(split_str) is not 7:
			return None

		link_id  = split_str[4]
		link_key = split_str[5]
		del split_str
		check_link_url = ("http://api.hotfile.com/?action=checklinks&ids=" + link_id + 
						  "&keys=" + link_key + "&fields=name,size,status")
		""" print ("Check link url: {0}".format(check_link_url))  """
		try: 
			link_name_size_status = URLOpen().open(check_link_url).readline()
			link_name_size_status_list = link_name_size_status.split(',')
			name   = link_name_size_status_list[0]
			""" Hotfile glitch: sometimes removed files do not have size information """
			if ( len(link_name_size_status_list[1]) != 0):
				size   = int(link_name_size_status_list[1]) / 1024

			status = int(link_name_size_status_list[2])
			unit = "KB"
		except Exception, e:
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 48
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			link = None
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			if "/video/" in url:
				url = url.replace("/video/", "/download/")
			elif "/audio/" in url:
				url = url.replace("/audio/", "/download/")
			elif "/image/" in url:
				url = url.replace("/image/", "/download/")
			try:
				form = urllib.urlencode([("download", 1)])
				for line in opener.open(url,form):
					if 'link_enc=new Array' in line:
						tmp = line.strip().split("var link_enc=new Array(")[1].split(");")[0]
						link = tmp.replace("','","").replace("'","")
					#Try to get WAIT from the page
					if 'document|important' in line:
						try:
							tmp = line.split("here|")[1].split("|class")[0]
							tmp = int(tmp)
						except ValueError:
							pass
						else:
							if tmp > 0:
								WAIT = tmp
						break
			except Exception, e:
				logger.exception("%s :%s" % (url, e))
				
			if not link:
				return
			if not wait_func(WAIT):
				return
Ejemplo n.º 49
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			#Remove the filename from the url
			tmp = url.split("/file/")[1].split("/")[0]
			url = "%s/file/%s" % (BASE_URL,tmp)
			
			file_id = url.split("/")[-1].strip("/")
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			
			form = urllib.urlencode([("checkDownload", "check")])
			#If the limit is exceeded
			if '"fail":"timeLimit"' in opener.open(url,form).read():
				return self.set_limit_exceeded()
				
			it = opener.open(url)
			for line in it:
				if 'reCAPTCHA_publickey=' in line:
					tmp = line.split("'")[1].split("'")[0]
					recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
					if not wait_func():
						return
					c = Recaptcha(BASE_URL, recaptcha_link)
					for retry in range(3):
						challenge, response = c.solve_captcha()
						if response:
							if not wait_func():
								return
							
							#Submit the input to the recaptcha system
							form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field",file_id)])
							recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL
							
							#Captcha is good
							if "success" in opener.open(recaptcha_url,form).read():
								form = urllib.urlencode([("downloadLink", "wait")])
								wait = int(opener.open(url,form).read()[-2:])
								if not wait_func(wait):
									return
								form = urllib.urlencode([("downloadLink", "show")])
								opener.open(url,form).read()
								form = urllib.urlencode([("download", "normal")])
								return opener.open(url,form)#,content_range)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 50
0
	def check_links(self, url):
		""""""
		name = None
		size = -1
		unit = None
		size_found = 0
		try:
			it = URLOpen().open(url)
			for line in it:
				if '<div class="info">' in line:
					name = it.next().split('="')[1].split('">')[0].strip()
					tmp = it.next().split('>')[2].split('<')[0].strip()
					unit = tmp[-2:]
					size = int(round(float(tmp[:-2].replace("&nbsp;",""))))
					
					if size > 1024:
						if unit == "KB":
							size = size / 1024
							unit = "MB"
					break
		except Exception, e:
			name = None
			size = -1
			logger.exception("%s :%s" % (url, e))
Ejemplo n.º 51
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			tmp_link = None
			link = None
			wait = WAIT
			opener = URLOpen(cookielib.CookieJar())
			it = opener.open(url)
			for line in it:
				if "dbtn" in line:
					tmp_link = line.split('href="')[1].split('"')[0]
			if tmp_link:
				it = opener.open(tmp_link)
				for line in it:
					if "id='divDLStart'" in line:
						link = it.next().split("<a href='")[1].split("'")[0]
					elif '<div class="sec">' in line:
						wait = int(line.split(">")[1].split("<")[0])
			if not link:
				return
			elif not wait_func(wait):
				return
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 52
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		link = None
		wait = 0
		try:
			tmp = url.split("/")
			opener = URLOpen()
			url = "%s&fileid=%s" % (API_URL,tmp[4])
			url = "%s&filename=%s" % (url,tmp[5])
			for line in opener.open("http://%s%s" % ("api.rapidshare.com",url)):
				print line
				if "DL:" in line:
					tmp = line.split("DL:")[1].split(",")
					link = "http://%s%s&dlauth=%s" % (tmp[0],url,tmp[1])
					wait = int(tmp[2])
					print link
			if not wait_func(wait):
				return
			if link:
				return URLOpen().open(link, content_range)
			else:
				return self.set_limit_exceeded()
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 53
0
class PremiumCookie:
	""""""
	def __init__(self):
		""""""
		self.digestURL = URLOpen()

	def get_cookie(self, user, password, url=None):
		""""""
		if user == None or password == None:
			return None

		DigestURLHandler = self.digestURL.open('http://api.hotfile.com/?action=getdigest')
		
		# retrieve MD5 digest
		md5Digest = DigestURLHandler.readline()
		md5pw = hashlib.md5(password).hexdigest()
		md5pw = hashlib.md5(md5pw+md5Digest).hexdigest()
		return '&username='******'&passwordmd5dig='+md5pw+'&digest='+md5Digest
Ejemplo n.º 54
0
	def __init__(self):
		""""""
		self.digestURL = URLOpen()
Ejemplo n.º 55
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			pkr = None
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			res = ""
			#Open the first page
			page = opener.open(url).readlines()
			for line in page:
				#Get pKr
				if "pKr='" in line:
					pkr = line.split("'")[1].split("'")[0]
				#Get the last block to unescape
				if "unescape" in line:
					tmp = line.split("break;}")[-1]
					tmp = tmp.split("var cb")[0]
					tmp = self.split_eval(tmp)
				
					#Eval the block until it's plain text
					res = self.decrypt(tmp)

			id_func = res.split("(")[0] #Name of the function containig the id refering to the div that contains the real link

			pk1 = res.split("'")[3].split("'")[0]
			qk = res.split("'")[1].split("'")[0] #Public ID of the file

			it = iter(page)
			for line in it:
				#Line containing the function to parse
				if id_func in line:
					#Try to get the crypted block
					tmp = line.split(id_func)[1].split("setTimeout")[0].split('"none";')[1]
					tmp = self.split_eval(tmp)

					#Eval until it's plain text
					res = self.decrypt(tmp)


			div_id = res.split('getElementById("')[1].split('"')[0]

			data = urllib.urlencode([("qk",qk), ("pk1", pk1), ("r", pkr),])

			form_action = "http://www.mediafire.com/dynamic/download.php?%s" %data

			#Parse the GET
			res = opener.open(form_action, data)
			line = " ".join(res)
			#Long line containing the js
			if "var" in line:
				#Decrypt the table containig the final dl var
				tmp = line.split("function dz()")[0].split(";")[2:-1]
				tmp = ";".join(tmp)
				tmp = self.split_eval(tmp)
				table = self.decrypt(tmp)
				#Result is plain text
				if "http://download" in line:
					#Get all the dl links (even the fake ones)
					var = line.split('mediafire.com/" +')
					#Get the number of the server
					serv = line.split("http://download")[1].split(".")[0]
					#Get the name of the file
					name = var[1].split('+')[1].split("/")[2].split('"')[0].strip("\\")
					
					it = iter(var)
					#Find the real link among the fake ones
					for tmp in it:
						#Real link
						if div_id in tmp:
							tmp = it.next()
							tmp = tmp.split('+')[0]
							#Get the final dl var in the table
							dl = table.split(tmp+"=")[1].split(";")[0].strip("'")
				#Result is encrypted
				else:
					tmp = line.split("case 15:")[1]
					tmp = tmp.split("break;")[0]
					tmp = tmp.split("eval(")
					#Decrypt until the real link is found
					for t in tmp:
						if "unescape" in t:
							t = self.split_eval(t)
							res = self.decrypt(t,div_id)
							if len(res) == 3:
								serv = res[0]
								var = res[1]
								name = res[2]
								break
					dl = table.split(var+"=")[1].split(";")[0].strip("'")
			url = "http://download%s.mediafire.com/%sg/%s/%s" % (serv,dl,qk,name)
			try:
				handle = opener.open(url, None, content_range)
			except Exception, e:
				return self.set_limit_exceeded()
			else:
Ejemplo n.º 56
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			link = [] #One link at the end is in two parts
			captcha_url = None
			wait = WAIT
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			if not wait_func():
				return
			#Get the captcha url
			data = urllib.urlencode([("rs", "refreshImage"), ("rst", ""), ("rsrnd", int(time.time()))])
			tmp = opener.open(url, data).read().split("+:var res = '")[1].split("'; res;")[0].replace('\\"', '"')
			form_action = tmp.split('action="')[1].split('"')[0]
			cap_id = tmp.split('name=cap_id value=')[1].split('>')[0]
			cap_secret = tmp.split('name=cap_secret value=')[1].split('>')[0]
			captcha_url = "%s%s" % (BASE_URL, tmp.split('img src="')[1].split('"')[0])

			if captcha_url:
				solved = False
				cont = 0
				while (not solved) and cont < 4:
					tes = Tesseract(opener.open(captcha_url).read(), self.filter_image)
					captcha = tes.get_captcha()
					#Crack trick to optimize the OCR
					if len(captcha) == 4 and captcha.isalnum():
					
						if not captcha.isalpha():
							for i, j in [("0", "O"),("1", "I"),("2", "Z"),("3", "B"),("4", "A"),("5", "S"),("6", "G"),("7", "T"),("8", "B"),("9", "B")]:
								captcha = captcha.replace(i,j)
								
					captcha = captcha.upper()
					#Captcha : 4 letters
					if len(captcha) == 4 and captcha.isalpha():
						if not wait_func():
							return
						logger.info("Captcha: %s" % captcha)
						
						data = urllib.urlencode([("user_code", captcha), ("cap_id",cap_id), ("cap_secret",cap_secret)])
						
						it = opener.open(form_action, data)
						z = None
						h = None
						for line in it:
							if "'z':'I!" in line:
								z = line.split("'z':'")[1].split("'")[0]
								h = line.split("'h':'")[1].split("'")[0]
							elif 'window.location.href = dlUrl' in line:
								it.next()
								link.append(it.next().split('"')[1].split('"')[0])
								solved = True #If there is this line, the captcha is good
								break

						cont += 1
						
						#If the captcha is good
						if solved and z and h:
							logger.info("Good captcha")
							if not wait_func():
								return
							data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:init"),("z","zvar"),("h","hvar")])
							data = data.replace("zvar",z).replace("hvar",h)
							#The referer needs to be specify
							res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action)
							t = None
							wait = None
							z = None
							h = None
							for line in res:
								if "'z'" in line:
									z = line.split("'z': '")[1].split("'")[0]
								elif "'h'" in line:
									h = line.split("'h': '")[1].split("'")[0]
								elif "'t'" in line:
									t = line.split("'t': '")[1].split("'")[0]
								elif "check_n" in line:
									wait = int(line.split('[\'check_n\'] = "')[1].split('"')[0])

							if not wait:
								wait = WAIT
						
							if not wait_func(wait):
								return
							
							data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:check"),("z","zvar"),("h","hvar"),("t",t)])
							data = data.replace("zvar",z).replace("hvar",h)
						
							res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action)
						
							t = None
							z = None
							h = None
							#Sometimes it sends another check_n
							while True:
								if not wait_func():
									return
								res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action)
								wait = None
								for line in res:
									if "check_n" in line:
										wait = int(line.split("=")[1].split(";")[0])
										break
									elif "'z'" in line:
										z = line.split("'z': '")[1].split("'")[0]
									elif "'h'" in line:
										h = line.split("'h': '")[1].split("'")[0]
									elif "'t'" in line:
										t = line.split("'t': '")[1].split("'")[0]
								if not wait:
									break
								else:
									if not wait_func(wait):
										return
										
							if not wait_func():
								return
							
							data = urllib.urlencode([("rs","getFileLink"),("rst",""),("rsrnd",int(time.time())),("rsargs[]","0"),("rsargs[]","yellow"),("rsargs[]","zvar"),("rsargs[]","hvar"),("rsargs[]",t),("rsargs[]","file"),("rsargs[]",form_action.split("/")[-1]),("rsargs[]","")])
							data = data.replace("zvar",z).replace("hvar",h)
							
							#This cookie needs to be added manually
							gflcur = cookielib.Cookie(version=0, name='_gflCur', value='0', port=None, port_specified=False, domain='www.badongo.com', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
							cookie.set_cookie(gflcur)
						
							res = opener.open(form_action, data,None,True,form_action).readlines()
							tmp = res[0].split('onclick')[2].split('(')[1].split("')")[0].replace('\\','').strip("'")
							link.append(tmp)
							
							if not wait_func():
								return
								
							url = "%s%s?zenc=" %(link[1],link[0])
							res = opener.open(url, data,None,True,form_action)
						
							for line in res:
								if "window.location.href = '" in line:
									final_url = line.split("window.location.href = '")[1].split("'")[0]
									break
							return opener.open("%s%s" % (BASE_URL,final_url), data,content_range,True,url)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))
Ejemplo n.º 57
0
	def link_parser(self, url, wait_func, content_range=None):
		"""
		Oron links usually look like this:
			http://www.oron.com/file_id/file_name.foo.html
		However, by testing it seems that the server pulls the file name out by
		using the file_id, which is some sort of hash. 
		So the same file can aswell be accessed by: 
			http://www.oron.com/file_id/file_name.foo.html.html
		and 
			http://www.oron.com/file_id/file_name.foo.html.html(.html)*
		So we use check_links to get the file name form the HTML page, its 
		slower, but more accurate as we cannot rely on the url passed here
		"""
		file_id   = url.split("/")[3]
		file_name = self.check_links(url)[0]
		encoded_str = urllib.urlencode({
			"op"          : "download1",
			"usr_login"   : "",
			"id"          : file_id,
			"name"        : file_name,
			"referer"     : "",
			"method_free" : "+Regular+Download+"})
		opener = URLOpen()

		"""
		The url we are currently trying to open is the origin (referring) URL 
		preceding the post
		"""
		web_page = opener.open(url, encoded_str, False, url)


		for retry in range(3):
			if not wait_func():
				return

			for line in web_page:
				if '<input type="hidden" name="rand" value="' in line:
					rand_value = line.split('value="')[1].split('"')[0]
					break

			if not rand_value:
				logger.warning("Oron Plugin: No random value in download page- template changed?");
				return self.set_limit_exceeded(wait)

			for line in  web_page:
				if '<span id="countdown">' in line:
					wait_length  = line.split('<span id="countdown">')[1].split('<')[0]
					if not wait_func(int(wait_length)):
						return

				"""
				Check for longer limits
				"""
				if '<p class="err"' in line:
					parse_line = line.split('>')[1].split('<')[0]
					seconds = 0 
					minutes = 0
					hours   = 0
					prev_word = ''

					for word in parse_line.split(' '):

						if  word == 'hour,' or word == 'hours,':
							hours = int(prev_word)
                             
						elif  word == 'minute,' or word == 'minutes,':
							minutes = int(prev_word)

						elif  word == 'second' or word == 'seconds':
							seconds = int(prev_word)
							break
						else:
							prev_word = word

					seconds = seconds + (minutes * 60) + (hours * 3600)
					return self.set_limit_exceeded(seconds)
				
				if  'http://api.recaptcha.net/challenge?' in line:
					recaptcha_link = line.split('src="')[1].split('"')[0]
					if not wait_func():
						return
					c = Recaptcha(BASE_URL, recaptcha_link)
					challenge, response = c.solve_captcha()
					if response:
						if not wait_func():
							return

						#Submit the input to the recaptcha system
						form =  urllib.urlencode({
								"op"                        : "download2",
								"id"                        : file_id,
								"rand"                      : rand_value,
								"referer"                   : url,
								"method_free"               : "+Regular+Download+",
								"method_premium"            : "",
								"recaptcha_challenge_field" : challenge,
								"recaptcha_response_field"  : response,
								"down_direct"               : 1			
								})
						download_page = opener.open(url, form, None, False, url)
						#Get the link and return it
						for line in download_page:
							if 'Download File' in line:
								return opener.open(line.split('href="')[1].split('"')[0])

		return
Ejemplo n.º 58
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			wait = WAIT
			opener = URLOpen()
			it = opener.open(url)
			first_wait = False
			#Check for first wait
			for line in it:
				if 'var wf =' in line:
					try:
						wait = int(line.split("=")[1].split(";")[0].strip())
						first_wait = True
					except Exception, e:
						logger.exception("%s: %s" % (url, e))
						return
				break
			#Necessary to loop to reload the page, due to the wait
			for loop in range(3):
				if not wait_func():
					return
				#First wait
				if first_wait:
					if not wait_func(wait):
						return
					data = urllib.urlencode([("free", "Regular Download")])
					url = "%sbilling?%s" % (url,data)
					it = opener.open(url,data)
				#No first wait
				else:
					it = opener.open(url)
				for line in it:
					if 'name="id"' in line:
						file_id = line.split('value="')[1].split('"')[0]
					elif 'id="dwait"' in line:
						it.next()
						it.next()
						tmp = it.next()
						#The download is possible
						if "form" in tmp:
							form_action = tmp.split('action="')[1].split('"')[0]
						#Necessary to wait
						else:
							it.next()
							it.next()
							wait = int(it.next().split("'")[1].split("'")[0])
							if wait < 60:
								if not wait_func(wait):
									return
								#Next loop, reload the page
								break
							else:
								return self.set_limit_exceeded(wait)
					elif 'Recaptcha.create("' in line:
						tmp = line.split('"')[1].split('"')[0]
						recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
						if not wait_func():
							return
						c = Recaptcha(BASE_URL, recaptcha_link)
						challenge, response = c.solve_captcha()
						if response:
							if not wait_func():
								return
						
							#Submit the input to the recaptcha system
							form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined")])
							handle = opener.open(form_action, form, content_range)
							if not handle.info().getheader("Content-Type") == "text/html":
								#Captcha is good
								return handle
Ejemplo n.º 59
0
	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			file_id = url.split("/")[-2]
			form_action = "%s?start=1" % (url)
			
			if not wait_func():
				return
			
			it = opener.open(form_action)
			form_action = "%s?start=1" % it.geturl() #Get the redirect url
			end = form_action.split(".")[2].split("/")[0] #Get the .com replacement
			form_action2 = "%s/%s/%s?start=1" % (BASE_URL,file_id,file_id)
			form_action2 = form_action2.replace(".com",".%s" % end)
			form = urllib.urlencode([("foo","foo")]) #Force urllib2 to do a POST
			#FIXME : urlopen should be able to set custom headers
			headers = {"User-Agent": cons.USER_AGENT, "X-Requested-With": "XMLHttpRequest"}
			it = opener.opener.open(urllib2.Request(form_action2, None, headers), form)
			it_tmp = None

			#Loop until we get the captcha
			for loop in range(3):
				if not wait_func():
					return
				#it_tmp is set after a wait
				if it_tmp:
					it = it_tmp
				for line in it:
					if 'Recaptcha.create("' in line:
						tmp = line.split('"')[1].split('"')[0]
						recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
						if not wait_func():
							return
						c = Recaptcha(BASE_URL, recaptcha_link)
						for retry in range(3):
							challenge, response = c.solve_captcha()
							if response:
								if not wait_func():
									return
							
								#Submit the input to the recaptcha system
								form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)])
								it = opener.open(form_action, form)
								#Get the link
								for line in it:
									if 'downloadLink' in line:
										it.next()
										return opener.open(it.next().split('href="')[1].split('"')[0])
				
					#Link already there
					elif 'downloadLink' in line:
						it.next()
						return opener.open(it.next().split('href="')[1].split('"')[0])
					
					#Need to wait
					elif "name='tm'" in line:
						tm = line.split("value='")[1].split("'")[0];
						tm_hash = it.next().split("value='")[1].split("'")[0];
						form = urllib.urlencode([("tm", tm), ("tm_hash", tm_hash)])
				
					#Need to wait
					elif "countDownDelay =" in line:
						wait = int(line.split("=")[1].split(";")[0])
						if wait < 60:
							if not wait_func(wait):
								return
							it_tmp = opener.open(form_action, form) #fetch the page
							#Next loop, reload the page
							break
						else:
							return self.set_limit_exceeded(wait)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))