コード例 #1
0
	def _find_link(self, contents):
		matches = re.search('kNO = "([^"]+)";', contents)
		
		if matches is None:
			self.state = "failed"
			print contents
			raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
		
		file_url = matches.group(1)
		
		try:
			file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
		except:
			self.state = "failed"
			raise TechnicalError("Could not find the download title.")
		
		file_dict = {
			'url'		: file_url,
			'method'	: "GET",
			'priority'	: 1,
			'format'	: "unknown"
		}
		
		self.results = {
			'title': file_title,
			'files': [file_dict]
		}
		
		self.state = "finished"
		return self
コード例 #2
0
class PastebinTask(Task):
    result_type = "text"

    name = "Pastebin"
    author = "Sven Slootweg"
    author_url = "http://cryto.net/~joepie91"

    def run(self):
        matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)",
                            self.url)

        if matches is None:
            self.state = "invalid"
            raise ResolverError(
                "The provided URL is not a valid Pastebin URL.")

        paste_id = matches.group(2)

        try:
            contents = self.fetch_page(self.url)
        except urllib2.URLError, e:
            self.state = "failed"
            raise ResolverError(
                "Could not retrieve the specified URL. The paste may not exist."
            )

        matches = re.search("<h1>([^<]+)</h1>", contents)

        if matches is None:
            self.state = "invalid"
            raise ResolverError("The provided URL is not a valid paste.")

        paste_title = unescape(matches.group(1))

        resolved = {
            'url': "http://pastebin.com/download.php?i=%s" % paste_id,
            'method': "GET",
            'priority': 1,
            'format': "text"
        }

        self.results = {'title': paste_title, 'files': [resolved]}

        self.state = "finished"
        return self
コード例 #3
0
class SockshareTask(Task):
    result_type = "video"

    name = "SockShare"
    author = "Sven Slootweg"
    author_url = "http://cryto.net/~joepie91"

    def run(self):
        try:
            import mechanize
        except ImportError:
            self.state = "failed"
            raise TechnicalError(
                "The Python mechanize module is required to resolve Sockshare URLs."
            )

        matches = re.search(
            "https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)",
            self.url)

        if matches is None:
            self.state = "invalid"
            raise ResolverError(
                "The provided URL is not a valid Sockshare URL.")

        video_id = matches.group(3)

        try:
            browser = mechanize.Browser()
            browser.set_handle_robots(False)
            browser.open("http://sockshare.com/embed/%s" % video_id)
        except:
            self.state = "failed"
            raise TechnicalError("The Sockshare site could not be reached.")

        try:
            browser.select_form(nr=0)
            result = browser.submit()
            page = result.read()
        except Exception, e:
            self.state = "nonexistent"
            raise ResolverError(
                "The file was removed, or the URL is incorrect.")

        matches = re.search("playlist: '([^']+)'", page)

        if matches is None:
            self.state = "failed"
            raise ResolverError(
                "No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links."
            )

        playlist = matches.group(1)

        try:
            browser.open("http://www.sockshare.com%s" % playlist)
        except:
            self.state = "failed"
            raise TechnicalError(
                "The playlist file for the given URL could not be loaded.")

        matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"",
                            browser.response().read())

        if matches is None:
            self.state = "failed"
            raise ResolverError(
                "The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links."
            )

        video_file = matches.group(1)

        try:
            video_title = unescape(
                re.search(
                    '<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>',
                    page).group(1))
        except:
            self.state = "failed"
            raise TechnicalError("Could not find the video title.")

        stream_dict = {
            'url': video_file,
            'method': "GET",
            'quality': "unknown",
            'priority': 1,
            'format': "unknown"
        }

        self.results = {'title': video_title, 'videos': [stream_dict]}

        self.state = "finished"
        return self
コード例 #4
0
class YoutubeTask(Task):
	result_type = "video"
	
	name = "YouTube"
	author = "Sven Slootweg"
	author_url = "http://cryto.net/~joepie91"
	
	extra_headers = {
		'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
		'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
		'Accept-Language': 'en-us,en;q=0.5'
	}
	
	def run(self):
		try:
			contents = self.fetch_page(self.url)
		except urllib2.URLError, e:
			self.state = "failed"
			raise TechnicalError("Could not retrieve the specified URL.")
		
		if '<meta property="og:video:type"' not in contents:
			self.state = "invalid"
			raise ResolverError("The specified URL is not a valid YouTube video.")
		
		map_start = "url_encoded_fmt_stream_map="
		map_end = "\\u0026amp;"
		
		try:
			pos_start = contents.index(map_start) + len(map_start)
			snippet = contents[pos_start:]
		except ValueError:
			self.state = "failed"
			raise TechnicalError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?")
		
		try:
			pos_end = snippet.index(map_end)
			stream_map = snippet[:pos_end]
		except ValueError:
			self.state = "failed"
			raise TechnicalError("The ending position for the YouTube player configuration could not be found.")
		
		try:
			stream_map = urllib.unquote(stream_map)
			streams = stream_map.split(',')
		except:
			self.state = "failed"
			raise TechnicalError("The YouTube player configuration is corrupted.")
		
		stream_pool = []
		
		for stream in streams:
			fields = urlparse.parse_qs(stream)
			
			if len(fields) < 6:
				self.state = "failed"
				raise TechnicalError("The amount of fields in the YouTube player configuration is incorrect.")
			
			signature = fields['sig'][0]
			video_url = "%s&signature=%s" % (fields['url'][0], signature)
			quality = fields['quality'][0]
			fallback_host = fields['fallback_host'][0]
			mimetype = fields['type'][0]
			itag = fields['itag'][0]
			
			if mimetype.startswith("video/mp4"):
				video_format = "mp4"
			elif mimetype.startswith("video/x-flv"):
				video_format = "flv"
			elif mimetype.startswith("video/3gpp"):
				video_format = "3gp"
			elif mimetype.startswith("video/webm"):
				video_format = "webm"
			else:
				video_format = "unknown"
			
			if quality == "small":
				video_quality = "240p"
				video_priority = 5
			elif quality == "medium":
				video_quality = "360p"
				video_priority = 4
			elif quality == "large":
				video_quality = "480p"
				video_priority = 3
			elif quality == "hd720":
				video_quality = "720p"
				video_priority = 2
			elif quality == "hd1080":
				video_quality = "1080p"
				video_priority = 1
			else:
				video_quality = "unknown"
				video_priority = 10
				print "UNKNOWN: %s" % quality
			
			stream_dict = {
				'url'		: video_url,
				'method'	: "GET",
				'quality'	: video_quality,
				'priority'	: video_priority,
				'format'	: video_format,
				'extra'		: {
					'itag':			itag,
					'mimetype':		mimetype,
					'fallback_host':	fallback_host
				}
			}
			
			stream_pool.append(stream_dict)
		
		try:
			video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
		except:
			self.state = "failed"
			raise TechnicalError("Could not find the video title.")
		
		self.results = {
			'title': video_title,
			'videos': stream_pool
		}
		
		self.state = "finished"
		return self