def __ReadTorrentPageInternal( self, logger, releaseInfo, description ): # Get release name. matches = re.search( r"<title>Digital Hive :: Details for torrent "(.+)"</title>", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." ) releaseName = DecodeHtmlEntities( matches.group( 1 ) ) # Get IMDb id. if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ): releaseInfo.ImdbId = NfoParser.GetImdbId( description ) if ( not releaseInfo.HasImdbId() ): self.__TryGettingImdbIdFromNfoPage( logger, releaseInfo ) # Get size. # Two possible formats: # <tr><td class="heading" valign="top" align="right">Size</td><td valign="top" align="left">4.47 GB (4,799,041,437bytes )</td></tr> # <tr><td class='heading' valign='top' align='right'>Size</td><td valign='top' align='left'>4.47 GB (4,799,041,437bytes )</td></tr> matches = re.search( r"""<tr><th><b>Size</b></th><th>.+ \((.+bytes) ?\)</th></tr>""", description ) if matches is None: logger.warning( "Size not found on torrent page." ) else: size = matches.group( 1 ) releaseInfo.Size = GetSizeFromText( size ) return releaseName
def GetIdFromAutodlIrssiUrl( self, url ): # https://hd-torrents.org//download.php?id=808b75cd4c5517d5a3001becb3b7c6ce5274ca62&f=Brief%20Encounter%201945%20720p%20BluRay%20FLAC%20x264-HDB.torrent result = re.match( r".*hd-torrents\.org\/\/download\.php\?id=(\w+)&f", url ) if result is None: return "" else: return DecodeHtmlEntities( result.group( 1 ))
def __GetReleaseNameAndSize(self, logger, releaseInfo): url = "https://www.torrentleech.org/torrent/%s" % releaseInfo.AnnouncementId logger.info("Downloading release name and size from page '%s'." % url) result = MyGlobals.session.get(url) result.raise_for_status() response = result.text self.__CheckIfLoggedInFromResponse(response) # Get release name. matches = re.search( "<title>Torrent Details for (.+) :: TorrentLeech.org</title>", response) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page.") releaseName = DecodeHtmlEntities(matches.group(1)) releaseName = self.__RestoreReleaseName(releaseName) # Get size. # <td class="label">Size</td><td>5.47 GB</td></tr> size = 0 matches = re.search( r"""<td class="label">Size</td><td>(.+)</td></tr>""", response) if matches is None: logger.warning("Size not found on torrent page.") else: size = GetSizeFromText(matches.group(1)) return releaseName, size
def __ReadTorrentPage( self, logger, releaseInfo ): url = "https://www.funfile.org/details.php?id=%s&filelist=1" % releaseInfo.AnnouncementId logger.info( "Downloading NFO from page '%s'." % url ) result = MakeRetryingHttpGetRequestWithRequests( url ) response = result.text self.__CheckIfLoggedInFromResponse( response ) # Make sure we only get information from the description and not from the comments. descriptionEndIndex = response.find( """<p><a name="startcomments"></a></p>""" ) if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found. Probably the layout of the site has changed." ) description = response[ :descriptionEndIndex ] # Get release name. matches = re.search( r"""Details for torrent "(.+)"</title>""", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." ) releaseName = DecodeHtmlEntities( matches.group( 1 ) ) # Get IMDb id. if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ): releaseInfo.ImdbId = NfoParser.GetImdbId( description ) # Get size. # <tr><td class="td_head">Size</td><td class="td_col">699.98 MB (733,983,002 bytes)</td></tr> matches = re.search( r"""<tr><td class="rowhead" >Size<span id="filelist"></span></td><td class="row1" >.+ \((.+bytes)\)""", description ) if matches is None: logger.warning( "Size not found on torrent page." ) else: size = matches.group( 1 ) releaseInfo.Size = GetSizeFromText( size ) # Store the download URL. # <td class="td_head">Download</td><td class="td_col"><a href="download/442572/AAAA/Winnie.the.Pooh.RERIP.DVDRip.XviD-NeDiVx.torrent"> matches = re.search( r"""<tr><td class="rowhead">Action</td><td class="row1"><span style="float:left"><a class="index" href="download.php/(.+?)">""", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Download link can't be found on torrent page." ) releaseInfo.SceneAccessDownloadUrl = "https://www.funfile.org/download.php/" + matches.group( 1 ) return releaseName
def __ReadTorrentPage( self, logger, releaseInfo ): url = "http://hd-torrents.org/details.php?id=%s" % releaseInfo.AnnouncementId logger.info( "Downloading NFO from page '%s'." % url ) result = MyGlobals.session.get( url ) result.raise_for_status() response = result.text self.__CheckIfLoggedInFromResponse( response ) # Make sure we only get information from the description and not from the comments. descriptionEndIndex = response.find( """<a name="comments" />""" ) if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't be found. Probably the layout of the site has changed." ) description = response[ :descriptionEndIndex ] # Get release name. matches = re.search( r"""<title>HD-Torrents.org - (.+?)</title>""", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." ) releaseName = DecodeHtmlEntities( matches.group( 1 ) ) # Get IMDb id. if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ): releaseInfo.ImdbId = NfoParser.GetImdbId( description ) # Get size. # <tr><td align=right class="detailsleft"> Size:</td><td class="detailsright" align="left">4.26 GiB</td></tr> matches = re.search( r"""Size:</td><td class="detailsright" align="left">(.*)</td>""", description ) if matches is None: logger.warning( "Size not found on torrent page." ) else: size = matches.group( 1 ) releaseInfo.Size = GetSizeFromText( size ) # Store the download URL. #<a href="download.php?id=c787dc1e59f6245c159a02f4402a089141933f4d&f=Hand+Of+God+S01E01+Pilot+720p+WEBRip+x264-W4F+.torrent"> matches = re.search( r"""<a href="download.php\?(.+?)">""", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Download link can't be found on torrent page." ) releaseInfo.SceneAccessDownloadUrl = "http://hd-torrents.org/download.php?" + matches.group( 1 ) return releaseName
def __ReadTorrentPage( self, logger, releaseInfo ): url = "http://torrentshack.me/torrents.php?torrentid=%s" % releaseInfo.AnnouncementId logger.info( "Downloading NFO from page '%s'." % url ) result = MakeRetryingHttpGetRequestWithRequests( url ) response = result.text self.__CheckIfLoggedInFromResponse( response ) # Make sure we only get information from the description and not from the comments. descriptionEndIndex = response.find( """<a name="comments">""" ) if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found. Probably the layout of the site has changed." ) description = response[ :descriptionEndIndex ] # Get release name. matches = re.search( r"""<title>(.+) :: TorrentShack.me</title>""", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." ) releaseName = DecodeHtmlEntities( matches.group( 1 ) ) # Get IMDb id. if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ): releaseInfo.ImdbId = NfoParser.GetImdbId( description ) # Get size. matches = re.search( r"""<td class="nobr">(.+)</td>""", description ) if matches is None: logger.warning( "Size not found on torrent page." ) else: size = matches.group( 1 ) releaseInfo.Size = GetSizeFromText( size ) # Store the download URL. #<a href="torrents.php?action=download&id=562922&authkey=XXXXXXXXXXXXX&torrent_pass=XXXXXXXXXXX" title="Download">DL</a> matches = re.search( r"""<a href="torrents.php?(.+?)" title="Download">DL</a>""", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Download link can't be found on torrent page." ) # We have to change "&" to "&". releaseInfo.SceneAccessDownloadUrl = "http://torrentshack.me/torrents.php" + matches.group( 1 ).replace("&", "&") return releaseName
def __ReadTorrentPage(self, logger, releaseInfo): url = "https://alpharatio.cc/torrents.php?torrentid=%s" % releaseInfo.AnnouncementId logger.info("Downloading NFO from page '%s'." % url) result = MakeRetryingHttpGetRequestWithRequests(url) response = result.text self.__CheckIfLoggedInFromResponse(response) # Make sure we only get information from the description and not from the comments. descriptionEndIndex = response.find("""<a name="comments">""") if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found. Probably the layout of the site has changed." ) description = response[:descriptionEndIndex] # Get release name. matches = re.search(r"""<title>(.+) :: AlphaRatio</title>""", description) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page.") releaseName = DecodeHtmlEntities(matches.group(1)) # Get IMDb id. if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()): releaseInfo.ImdbId = NfoParser.GetImdbId(description) # Get size. matches = re.search(r"""<td class="nobr">(.+)</td>""", description) if matches is None: logger.warning("Size not found on torrent page.") else: size = matches.group(1) releaseInfo.Size = GetSizeFromText(size) return releaseName
def __ReadTorrentPageInternal(self, logger, releaseInfo, description): # Get release name. matches = re.search( r"<title>GFT \d+ :: Details for torrent "(.+)"</title>", description) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page.") releaseName = DecodeHtmlEntities(matches.group(1)) # Get IMDb id. if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()): releaseInfo.ImdbId = NfoParser.GetImdbId(description) if (not releaseInfo.HasImdbId()): self.__TryGettingImdbIdFromNfoPage(logger, releaseInfo) # Check if pretime presents. # TODO: this is unreliable as the uploaders on GFT set this #if description.find( """<td><img src='/pic/scene.jpg' alt='Scene' /></td>""" ) != -1: # releaseInfo.SetSceneRelease() # Get size. # Two possible formats: # <tr><td class="heading" valign="top" align="right">Size</td><td valign="top" align="left">4.47 GB (4,799,041,437bytes )</td></tr> # <tr><td class='heading' valign='top' align='right'>Size</td><td valign='top' align='left'>4.47 GB (4,799,041,437bytes )</td></tr> matches = re.search( r"""<tr><td class=.heading. valign=.top. align=.right.>Size</td><td valign=.top. align=.left.>.+ \((.+bytes) ?\)</td></tr>""", description) if matches is None: logger.warning("Size not found on torrent page.") else: size = matches.group(1) releaseInfo.Size = GetSizeFromText(size) return releaseName
def __ParsePage( self, logger, releaseInfo, html, parseForExternalCreateJob = False ): # Make sure we only get information from the description and not from the comments. descriptionEndIndex = html.find( '<p><a name="startcomments"></a></p>' ) if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found on torrent page. Probably the layout of the site has changed." ) description = html[ :descriptionEndIndex ] # We will use the torrent's name as release name. if not parseForExternalCreateJob: matches = re.search( r'href="download.php\?id=(\d+)&name=.+">(.+)\.torrent</a>', description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Can't get release name from torrent page." ) releaseInfo.ReleaseName = DecodeHtmlEntities( matches.group( 2 ) ) # Get source and format type sourceType = "" formatType = "" if ( not releaseInfo.IsSourceSet() ) or ( not releaseInfo.IsCodecSet() ): matches = None if parseForExternalCreateJob: matches = re.search( r'torrent details for "(.+) \[(\d+)/(.+)/(.+)\]"', description ) else: matches = re.search( r"torrent details for "(.+) \[(\d+)/(.+)/(.+)\]"", description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Can't get release source and format type from torrent page." ) sourceType = matches.group( 3 ) formatType = matches.group( 4 ) if '/' in sourceType and not releaseInfo.ResolutionType: sourceType, _, resolutionType = sourceType.partition( '/' ) resolutionType.strip( 'p' ) if resolutionType in ["720", "1080"]: releaseInfo.ResolutionType = resolutionType # Get IMDb id. if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ): matches = re.search( r'imdb\.com/title/tt(\d+)', description ) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "IMDb id can't be found on torrent page." ) releaseInfo.ImdbId = matches.group( 1 ) # Get size. # Two possible formats: # <tr><td class="rowhead" valign="top" align="right">Size</td><td valign="top" align="left">1.46 GB (1,570,628,119 bytes)</td></tr> # <tr><td class="rowhead" valign="top" align="right">Size</td><td valign="top" align=left>1.46 GB (1,570,628,119 bytes)</td></tr> matches = re.search( r"""<tr><td class="rowhead" valign="top" align="right">Size</td><td valign="top" align="?left"?>.+ \((.+ bytes)\)</td></tr>""", description ) if matches is None: logger.warning( "Size not found on torrent page." ) else: size = matches.group( 1 ) releaseInfo.Size = GetSizeFromText( size ) # Ignore XXX releases. if description.find( '>Type</td><td valign="top" align=left>XXX<' ) != -1: raise PtpUploaderException( JobRunningState.Ignored_Forbidden, "Marked as XXX." ) self.__MapSourceAndFormatToPtp( releaseInfo, sourceType, formatType, html ) # Make sure that this is not a wrongly categorized DVDR. if ( not releaseInfo.IsDvdImage() ) and ( re.search( r"\.vob</td>", description, re.IGNORECASE ) or re.search( r"\.iso</td>", description, re.IGNORECASE ) ): raise PtpUploaderException( JobRunningState.Ignored_NotSupported, "Wrongly categorized DVDR." )
def __ParsePage(self, logger, releaseInfo, html, parseForExternalCreateJob=False): # Make sure we only get information from the description and not from the comments. descriptionEndIndex = html.find('<p><a name="startcomments"></a></p>') if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found on torrent page. Probably the layout of the site has changed." ) description = html[:descriptionEndIndex] # We will use the torrent's name as release name. if not parseForExternalCreateJob: matches = re.search( r'href="/down.php/(\d+)/.+?">(.+?)\.torrent</a>', description) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Can't get release name from torrent page.") releaseName = DecodeHtmlEntities(matches.group(2)) # Remove the extension of the container from the release name. (It is there on single file releases.) # Optional flags parameter for sub function was only introduced in Python v2.7 so we use compile.sub instead. releaseName = re.compile(r"\.avi$", re.IGNORECASE).sub("", releaseName) releaseName = re.compile(r"\.mkv$", re.IGNORECASE).sub("", releaseName) releaseName = re.compile(r"\.mp4$", re.IGNORECASE).sub("", releaseName) if ( not releaseInfo.IsReleaseNameSet() ) or releaseInfo.ReleaseName == "none": # "none" can come from FlexGet from the announcement directory. releaseInfo.ReleaseName = releaseName # Make sure it is under the movie category. # <tr><td class="heading" align="right" valign="top">Type</td><td colspan="2" align="left" valign="top"><a href="browse.php?cat=1">Movie</a></td></tr> matches = re.search( r"""<tr><td.*?>Type</td><td.*?><a href="browse.php\?cat=1">Movie</a></td></tr>""", description) if matches is None: raise PtpUploaderException(JobRunningState.Ignored_NotSupported, "Type is not movie.") # Get IMDb id. if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()): matches = re.search(r'imdb\.com/title/tt(\d+)', description) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "IMDb id can't be found on torrent page.") releaseInfo.ImdbId = matches.group(1) # Get size. # <tr><td class="heading" align="right" valign="top">Size</td><td colspan="2" align="left" valign="top">1.37GB (1,476,374,914 bytes)</td></tr> matches = re.search( r"""<tr><td.*?>Size</td><td.*?>.+ \((.+ bytes)\)</td></tr>""", description) if matches is None: logger.warning("Size not found on torrent page.") else: size = matches.group(1) releaseInfo.Size = GetSizeFromText(size) self.__DownloadNfoParseSourceType(releaseInfo, description) self.__DownloadNfoParseFormatType(releaseInfo, description) self.__DownloadNfoParseResolution(releaseInfo, description) self.__DownloadNfoParseSubtitles(releaseInfo, description) # Make sure that this is not a wrongly categorized DVDR. if (not releaseInfo.IsDvdImage()) and (re.search( r"<td>.+?\.vob</td>", description, re.IGNORECASE) or re.search( r"<td>.+?\.iso</td>", description, re.IGNORECASE)): raise PtpUploaderException(JobRunningState.Ignored_NotSupported, "Wrongly categorized DVDR.")
def __DownloadNfo(self, logger, releaseInfo): url = "http://cinematik.net/details.php?id=%s&filelist=1" % releaseInfo.AnnouncementId logger.info("Collecting info from torrent page '%s'." % url) result = MyGlobals.session.get(url) result.raise_for_status() response = result.text self.__CheckIfLoggedInFromResponse(response) # Make sure we only get information from the description and not from the comments. descriptionEndIndex = response.find( '<p><a name="startcomments"></a></p>') if descriptionEndIndex == -1: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found on torrent page. Probably the layout of the site has changed." ) description = response[:descriptionEndIndex] # Get source and format type # <title>Cinematik :: Behind the Mask: The Rise of Leslie Vernon (2006) NTSC DVD9 VIDEO_TS</title> matches = re.search( r"<title>Cinematik :: (.+?) \((\d+)\) (.+?) (.+?) (.+?)</title>", description) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Can't get resolution type, codec and container from torrent page." ) title = DecodeHtmlEntities(matches.group(1)).strip() year = DecodeHtmlEntities(matches.group(2)).strip() resolutionType = DecodeHtmlEntities(matches.group(3)).strip() codec = DecodeHtmlEntities(matches.group(4)).strip() container = DecodeHtmlEntities(matches.group(5)).strip() releaseName = "%s (%s) %s %s" % (title, year, resolutionType, codec) releaseInfo.ReleaseName = RemoveDisallowedCharactersFromPath( releaseName) # Get IMDb id. if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()): matches = re.search(r"imdb\.com/title/tt(\d+)", description) if matches is None: raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "IMDb id can't be found on torrent page.") releaseInfo.ImdbId = matches.group(1) # Get size. # Two formats: # <td class="heading" align="right" valign="top">Size</td><td align="left" valign="top">6.81 GB (7,313,989,632 bytes)</td> # <td class="heading" valign="top" align="right">Size</td><td valign="top" align="left">4.38 GB (4,699,117,568 bytes)</td> matches = re.search( r"""<td class="heading" v?align=".+?" v?align=".+?">Size</td><td v?align=".+?" v?align=".+?">.+\((.+ bytes)\)</td>""", description) if matches is None: logger.warning("Size not found on torrent page.") else: size = matches.group(1) releaseInfo.Size = GetSizeFromText(size) return resolutionType, codec, container