def _find_link(self, contents): matches = re.search('kNO = "([^"]+)";', contents) if matches is None: self.state = "failed" print contents raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.") file_url = matches.group(1) try: file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1)) except: self.state = "failed" raise TechnicalError("Could not find the download title.") file_dict = { 'url' : file_url, 'method' : "GET", 'priority' : 1, 'format' : "unknown" } self.results = { 'title': file_title, 'files': [file_dict] } self.state = "finished" return self
class PastebinTask(Task): result_type = "text" name = "Pastebin" author = "Sven Slootweg" author_url = "http://cryto.net/~joepie91" def run(self): matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", self.url) if matches is None: self.state = "invalid" raise ResolverError( "The provided URL is not a valid Pastebin URL.") paste_id = matches.group(2) try: contents = self.fetch_page(self.url) except urllib2.URLError, e: self.state = "failed" raise ResolverError( "Could not retrieve the specified URL. The paste may not exist." ) matches = re.search("<h1>([^<]+)</h1>", contents) if matches is None: self.state = "invalid" raise ResolverError("The provided URL is not a valid paste.") paste_title = unescape(matches.group(1)) resolved = { 'url': "http://pastebin.com/download.php?i=%s" % paste_id, 'method': "GET", 'priority': 1, 'format': "text" } self.results = {'title': paste_title, 'files': [resolved]} self.state = "finished" return self
class SockshareTask(Task): result_type = "video" name = "SockShare" author = "Sven Slootweg" author_url = "http://cryto.net/~joepie91" def run(self): try: import mechanize except ImportError: self.state = "failed" raise TechnicalError( "The Python mechanize module is required to resolve Sockshare URLs." ) matches = re.search( "https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", self.url) if matches is None: self.state = "invalid" raise ResolverError( "The provided URL is not a valid Sockshare URL.") video_id = matches.group(3) try: browser = mechanize.Browser() browser.set_handle_robots(False) browser.open("http://sockshare.com/embed/%s" % video_id) except: self.state = "failed" raise TechnicalError("The Sockshare site could not be reached.") try: browser.select_form(nr=0) result = browser.submit() page = result.read() except Exception, e: self.state = "nonexistent" raise ResolverError( "The file was removed, or the URL is incorrect.") matches = re.search("playlist: '([^']+)'", page) if matches is None: self.state = "failed" raise ResolverError( "No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links." ) playlist = matches.group(1) try: browser.open("http://www.sockshare.com%s" % playlist) except: self.state = "failed" raise TechnicalError( "The playlist file for the given URL could not be loaded.") matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) if matches is None: self.state = "failed" raise ResolverError( "The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links." ) video_file = matches.group(1) try: video_title = unescape( re.search( '<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) except: self.state = "failed" raise TechnicalError("Could not find the video title.") stream_dict = { 'url': video_file, 'method': "GET", 'quality': "unknown", 'priority': 1, 'format': "unknown" } self.results = {'title': video_title, 'videos': [stream_dict]} self.state = "finished" return self
class YoutubeTask(Task): result_type = "video" name = "YouTube" author = "Sven Slootweg" author_url = "http://cryto.net/~joepie91" extra_headers = { 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5' } def run(self): try: contents = self.fetch_page(self.url) except urllib2.URLError, e: self.state = "failed" raise TechnicalError("Could not retrieve the specified URL.") if '<meta property="og:video:type"' not in contents: self.state = "invalid" raise ResolverError("The specified URL is not a valid YouTube video.") map_start = "url_encoded_fmt_stream_map=" map_end = "\\u0026amp;" try: pos_start = contents.index(map_start) + len(map_start) snippet = contents[pos_start:] except ValueError: self.state = "failed" raise TechnicalError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?") try: pos_end = snippet.index(map_end) stream_map = snippet[:pos_end] except ValueError: self.state = "failed" raise TechnicalError("The ending position for the YouTube player configuration could not be found.") try: stream_map = urllib.unquote(stream_map) streams = stream_map.split(',') except: self.state = "failed" raise TechnicalError("The YouTube player configuration is corrupted.") stream_pool = [] for stream in streams: fields = urlparse.parse_qs(stream) if len(fields) < 6: self.state = "failed" raise TechnicalError("The amount of fields in the YouTube player configuration is incorrect.") signature = fields['sig'][0] video_url = "%s&signature=%s" % (fields['url'][0], signature) quality = fields['quality'][0] fallback_host = fields['fallback_host'][0] mimetype = fields['type'][0] itag = fields['itag'][0] if mimetype.startswith("video/mp4"): video_format = "mp4" elif mimetype.startswith("video/x-flv"): video_format = "flv" elif mimetype.startswith("video/3gpp"): video_format = "3gp" elif mimetype.startswith("video/webm"): video_format = "webm" else: video_format = "unknown" if quality == "small": video_quality = "240p" video_priority = 5 elif quality == "medium": video_quality = "360p" video_priority = 4 elif quality == "large": video_quality = "480p" video_priority = 3 elif quality == "hd720": video_quality = "720p" video_priority = 2 elif quality == "hd1080": video_quality = "1080p" video_priority = 1 else: video_quality = "unknown" video_priority = 10 print "UNKNOWN: %s" % quality stream_dict = { 'url' : video_url, 'method' : "GET", 'quality' : video_quality, 'priority' : video_priority, 'format' : video_format, 'extra' : { 'itag': itag, 'mimetype': mimetype, 'fallback_host': fallback_host } } stream_pool.append(stream_dict) try: video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1)) except: self.state = "failed" raise TechnicalError("Could not find the video title.") self.results = { 'title': video_title, 'videos': stream_pool } self.state = "finished" return self