Python DecodeHtmlEntitiesの例、Helper.DecodeHtmlEntities Pythonの例

コード例 #1

0

ファイルを表示

	def __ReadTorrentPageInternal( self, logger, releaseInfo, description ):
		# Get release name.
		matches = re.search( r"<title>Digital Hive :: Details for torrent &quot;(.+)&quot;</title>", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." )

		releaseName = DecodeHtmlEntities( matches.group( 1 ) )

		# Get IMDb id.
		if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ):
			releaseInfo.ImdbId = NfoParser.GetImdbId( description )
			if ( not releaseInfo.HasImdbId() ):
				self.__TryGettingImdbIdFromNfoPage( logger, releaseInfo )

		# Get size.
		# Two possible formats:
		# <tr><td class="heading" valign="top" align="right">Size</td><td valign="top" align="left">4.47 GB (4,799,041,437bytes )</td></tr>
		# <tr><td class='heading' valign='top' align='right'>Size</td><td valign='top' align='left'>4.47 GB (4,799,041,437bytes )</td></tr>
		matches = re.search( r"""<tr><th><b>Size</b></th><th>.+ \((.+bytes) ?\)</th></tr>""", description )
		if matches is None:
			logger.warning( "Size not found on torrent page." )
		else:
			size = matches.group( 1 )
			releaseInfo.Size = GetSizeFromText( size )

		return releaseName

コード例 #2

0

ファイルを表示

	def GetIdFromAutodlIrssiUrl( self, url ):
		# https://hd-torrents.org//download.php?id=808b75cd4c5517d5a3001becb3b7c6ce5274ca62&f=Brief%20Encounter%201945%20720p%20BluRay%20FLAC%20x264-HDB.torrent
		result = re.match( r".*hd-torrents\.org\/\/download\.php\?id=(\w+)&f", url )
		if result is None:
			return ""
		else:
			return DecodeHtmlEntities( result.group( 1 ))

コード例 #3

0

ファイルを表示

ファイル: TorrentLeech.py プロジェクト: Aniverse/p1

    def __GetReleaseNameAndSize(self, logger, releaseInfo):
        url = "https://www.torrentleech.org/torrent/%s" % releaseInfo.AnnouncementId
        logger.info("Downloading release name and size from page '%s'." % url)

        result = MyGlobals.session.get(url)
        result.raise_for_status()
        response = result.text
        self.__CheckIfLoggedInFromResponse(response)

        # Get release name.
        matches = re.search(
            "<title>Torrent Details for (.+) :: TorrentLeech.org</title>",
            response)
        if matches is None:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Release name can't be found on torrent page.")
        releaseName = DecodeHtmlEntities(matches.group(1))
        releaseName = self.__RestoreReleaseName(releaseName)

        # Get size.
        # <td class="label">Size</td><td>5.47 GB</td></tr>
        size = 0
        matches = re.search(
            r"""<td class="label">Size</td><td>(.+)</td></tr>""", response)
        if matches is None:
            logger.warning("Size not found on torrent page.")
        else:
            size = GetSizeFromText(matches.group(1))

        return releaseName, size

コード例 #4

0

ファイルを表示

	def __ReadTorrentPage( self, logger, releaseInfo ):
		url = "https://www.funfile.org/details.php?id=%s&filelist=1" % releaseInfo.AnnouncementId
		logger.info( "Downloading NFO from page '%s'." % url )

		result = MakeRetryingHttpGetRequestWithRequests( url )
		response = result.text
		self.__CheckIfLoggedInFromResponse( response )

		# Make sure we only get information from the description and not from the comments.
		descriptionEndIndex = response.find( """<p><a name="startcomments"></a></p>""" )
		if descriptionEndIndex == -1:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found. Probably the layout of the site has changed." )

		description = response[ :descriptionEndIndex ]

		# Get release name.
		matches = re.search( r"""Details for torrent &quot;(.+)&quot;</title>""", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." )

		releaseName = DecodeHtmlEntities( matches.group( 1 ) )

		# Get IMDb id.
		if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ):
			releaseInfo.ImdbId = NfoParser.GetImdbId( description )

		# Get size.
		# <tr><td class="td_head">Size</td><td class="td_col">699.98 MB (733,983,002 bytes)</td></tr>
		matches = re.search( r"""<tr><td class="rowhead" >Size<span id="filelist"></span></td><td class="row1" >.+ \((.+bytes)\)""", description )
		if matches is None:
			logger.warning( "Size not found on torrent page." )
		else:
			size = matches.group( 1 )
			releaseInfo.Size = GetSizeFromText( size )

		# Store the download URL.
		# <td class="td_head">Download</td><td class="td_col"><a href="download/442572/AAAA/Winnie.the.Pooh.RERIP.DVDRip.XviD-NeDiVx.torrent">
		matches = re.search( r"""<tr><td class="rowhead">Action</td><td class="row1"><span style="float:left"><a class="index" href="download.php/(.+?)">""", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Download link can't be found on torrent page." )

		releaseInfo.SceneAccessDownloadUrl = "https://www.funfile.org/download.php/" + matches.group( 1 )

		return releaseName

コード例 #5

0

ファイルを表示

ファイル: HDTorrents.py プロジェクト: Aniverse/p1

	def __ReadTorrentPage( self, logger, releaseInfo ):
		url = "http://hd-torrents.org/details.php?id=%s" % releaseInfo.AnnouncementId
		logger.info( "Downloading NFO from page '%s'." % url )

		result = MyGlobals.session.get( url )
		result.raise_for_status()
		response = result.text
		self.__CheckIfLoggedInFromResponse( response )

		# Make sure we only get information from the description and not from the comments.
		descriptionEndIndex = response.find( """<a name="comments" />""" )
		if descriptionEndIndex == -1:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't be found. Probably the layout of the site has changed." )

		description = response[ :descriptionEndIndex ]

		# Get release name.
		matches = re.search( r"""<title>HD-Torrents.org - (.+?)</title>""", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." )

		releaseName = DecodeHtmlEntities( matches.group( 1 ) )

		# Get IMDb id.
		if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ):
			releaseInfo.ImdbId = NfoParser.GetImdbId( description )

		# Get size.
		# <tr><td align=right class="detailsleft"> Size:</td><td class="detailsright" align="left">4.26 GiB</td></tr>
		matches = re.search( r"""Size:</td><td class="detailsright" align="left">(.*)</td>""", description )
		if matches is None:
			logger.warning( "Size not found on torrent page." )
		else:
			size = matches.group( 1 )
			releaseInfo.Size = GetSizeFromText( size )

		# Store the download URL.
		#<a href="download.php?id=c787dc1e59f6245c159a02f4402a089141933f4d&f=Hand+Of+God+S01E01+Pilot+720p+WEBRip+x264-W4F+.torrent">
		matches = re.search( r"""<a href="download.php\?(.+?)">""", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Download link can't be found on torrent page." )
		releaseInfo.SceneAccessDownloadUrl = "http://hd-torrents.org/download.php?" + matches.group( 1 )
		return releaseName

コード例 #6

0

ファイルを表示

	def __ReadTorrentPage( self, logger, releaseInfo ):
		url = "http://torrentshack.me/torrents.php?torrentid=%s" % releaseInfo.AnnouncementId
		logger.info( "Downloading NFO from page '%s'." % url )

		result = MakeRetryingHttpGetRequestWithRequests( url )
		response = result.text
		self.__CheckIfLoggedInFromResponse( response )

		# Make sure we only get information from the description and not from the comments.
		descriptionEndIndex = response.find( """<a name="comments">""" )
		if descriptionEndIndex == -1:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found. Probably the layout of the site has changed." )

		description = response[ :descriptionEndIndex ]

		# Get release name.
		matches = re.search( r"""<title>(.+) :: TorrentShack.me</title>""", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Release name can't be found on torrent page." )

		releaseName = DecodeHtmlEntities( matches.group( 1 ) )

		# Get IMDb id.
		if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ):
			releaseInfo.ImdbId = NfoParser.GetImdbId( description )

		# Get size.
		matches = re.search( r"""<td class="nobr">(.+)</td>""", description )
		if matches is None:
			logger.warning( "Size not found on torrent page." )
		else:
			size = matches.group( 1 )
			releaseInfo.Size = GetSizeFromText( size )

		# Store the download URL.
		#<a href="torrents.php?action=download&amp;id=562922&amp;authkey=XXXXXXXXXXXXX&amp;torrent_pass=XXXXXXXXXXX" title="Download">DL</a>
		matches = re.search( r"""<a href="torrents.php?(.+?)" title="Download">DL</a>""", description )
		if matches is None:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Download link can't be found on torrent page." )
		# We have to change "&amp;" to "&".
		releaseInfo.SceneAccessDownloadUrl = "http://torrentshack.me/torrents.php" + matches.group( 1 ).replace("&amp;", "&")
		return releaseName

コード例 #7

0

ファイルを表示

ファイル: AlphaRatio.py プロジェクト: waldens/PtpUploader

    def __ReadTorrentPage(self, logger, releaseInfo):
        url = "https://alpharatio.cc/torrents.php?torrentid=%s" % releaseInfo.AnnouncementId
        logger.info("Downloading NFO from page '%s'." % url)

        result = MakeRetryingHttpGetRequestWithRequests(url)
        response = result.text
        self.__CheckIfLoggedInFromResponse(response)

        # Make sure we only get information from the description and not from the comments.
        descriptionEndIndex = response.find("""<a name="comments">""")
        if descriptionEndIndex == -1:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Description can't found. Probably the layout of the site has changed."
            )

        description = response[:descriptionEndIndex]

        # Get release name.
        matches = re.search(r"""<title>(.+) :: AlphaRatio</title>""",
                            description)
        if matches is None:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Release name can't be found on torrent page.")

        releaseName = DecodeHtmlEntities(matches.group(1))

        # Get IMDb id.
        if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()):
            releaseInfo.ImdbId = NfoParser.GetImdbId(description)

        # Get size.
        matches = re.search(r"""<td class="nobr">(.+)</td>""", description)
        if matches is None:
            logger.warning("Size not found on torrent page.")
        else:
            size = matches.group(1)
            releaseInfo.Size = GetSizeFromText(size)

        return releaseName

コード例 #8

0

ファイルを表示

ファイル: Gft.py プロジェクト: Aniverse/p1

    def __ReadTorrentPageInternal(self, logger, releaseInfo, description):
        # Get release name.
        matches = re.search(
            r"<title>GFT \d+ :: Details for torrent &quot;(.+)&quot;</title>",
            description)
        if matches is None:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Release name can't be found on torrent page.")

        releaseName = DecodeHtmlEntities(matches.group(1))

        # Get IMDb id.
        if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()):
            releaseInfo.ImdbId = NfoParser.GetImdbId(description)
            if (not releaseInfo.HasImdbId()):
                self.__TryGettingImdbIdFromNfoPage(logger, releaseInfo)

        # Check if pretime presents.
        # TODO: this is unreliable as the uploaders on GFT set this
        #if description.find( """<td><img src='/pic/scene.jpg' alt='Scene' /></td>""" ) != -1:
        #	releaseInfo.SetSceneRelease()

        # Get size.
        # Two possible formats:
        # <tr><td class="heading" valign="top" align="right">Size</td><td valign="top" align="left">4.47 GB (4,799,041,437bytes )</td></tr>
        # <tr><td class='heading' valign='top' align='right'>Size</td><td valign='top' align='left'>4.47 GB (4,799,041,437bytes )</td></tr>
        matches = re.search(
            r"""<tr><td class=.heading. valign=.top. align=.right.>Size</td><td valign=.top. align=.left.>.+ \((.+bytes) ?\)</td></tr>""",
            description)
        if matches is None:
            logger.warning("Size not found on torrent page.")
        else:
            size = matches.group(1)
            releaseInfo.Size = GetSizeFromText(size)

        return releaseName

コード例 #9

0

ファイルを表示

ファイル: Cinemageddon.py プロジェクト: Aniverse/p1

	def __ParsePage( self, logger, releaseInfo, html, parseForExternalCreateJob = False ):
		# Make sure we only get information from the description and not from the comments.
		descriptionEndIndex = html.find( '<p><a name="startcomments"></a></p>' )
		if descriptionEndIndex == -1:
			raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Description can't found on torrent page. Probably the layout of the site has changed." )

		description = html[ :descriptionEndIndex ]

		# We will use the torrent's name as release name.
		if not parseForExternalCreateJob:
			matches = re.search( r'href="download.php\?id=(\d+)&name=.+">(.+)\.torrent</a>', description )
			if matches is None:
				raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Can't get release name from torrent page." )

			releaseInfo.ReleaseName = DecodeHtmlEntities( matches.group( 2 ) )

		# Get source and format type
		sourceType = ""
		formatType = ""
		if ( not releaseInfo.IsSourceSet() ) or ( not releaseInfo.IsCodecSet() ):
			matches = None
			if parseForExternalCreateJob:
				matches = re.search( r'torrent details for "(.+) \[(\d+)/(.+)/(.+)\]"', description )
			else:
				matches = re.search( r"torrent details for &quot;(.+) \[(\d+)/(.+)/(.+)\]&quot;", description )

			if matches is None:
				raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "Can't get release source and format type from torrent page." )

			sourceType = matches.group( 3 )
			formatType = matches.group( 4 )

			if '/' in sourceType and not releaseInfo.ResolutionType:
				sourceType, _, resolutionType = sourceType.partition( '/' )
				resolutionType.strip( 'p' )
				if resolutionType in ["720", "1080"]:
					releaseInfo.ResolutionType = resolutionType

		# Get IMDb id.
		if ( not releaseInfo.HasImdbId() ) and ( not releaseInfo.HasPtpId() ):
			matches = re.search( r'imdb\.com/title/tt(\d+)', description )
			if matches is None:
				raise PtpUploaderException( JobRunningState.Ignored_MissingInfo, "IMDb id can't be found on torrent page." )

			releaseInfo.ImdbId = matches.group( 1 )

		# Get size.
		# Two possible formats:
		# <tr><td class="rowhead" valign="top" align="right">Size</td><td valign="top" align="left">1.46 GB (1,570,628,119 bytes)</td></tr>
		# <tr><td class="rowhead" valign="top" align="right">Size</td><td valign="top" align=left>1.46 GB (1,570,628,119 bytes)</td></tr>
		matches = re.search( r"""<tr><td class="rowhead" valign="top" align="right">Size</td><td valign="top" align="?left"?>.+ \((.+ bytes)\)</td></tr>""", description )
		if matches is None:
			logger.warning( "Size not found on torrent page." )
		else:
			size = matches.group( 1 )
			releaseInfo.Size = GetSizeFromText( size )

		# Ignore XXX releases.
		if description.find( '>Type</td><td valign="top" align=left>XXX<' ) != -1:
			raise PtpUploaderException( JobRunningState.Ignored_Forbidden, "Marked as XXX." )

		self.__MapSourceAndFormatToPtp( releaseInfo, sourceType, formatType, html )

		# Make sure that this is not a wrongly categorized DVDR.
		if ( not releaseInfo.IsDvdImage() ) and ( re.search( r"\.vob</td>", description, re.IGNORECASE ) or re.search( r"\.iso</td>", description, re.IGNORECASE ) ):
			raise PtpUploaderException( JobRunningState.Ignored_NotSupported, "Wrongly categorized DVDR." )

コード例 #10

0

ファイルを表示

    def __ParsePage(self,
                    logger,
                    releaseInfo,
                    html,
                    parseForExternalCreateJob=False):
        # Make sure we only get information from the description and not from the comments.
        descriptionEndIndex = html.find('<p><a name="startcomments"></a></p>')
        if descriptionEndIndex == -1:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Description can't found on torrent page. Probably the layout of the site has changed."
            )

        description = html[:descriptionEndIndex]

        # We will use the torrent's name as release name.
        if not parseForExternalCreateJob:
            matches = re.search(
                r'href="/down.php/(\d+)/.+?">(.+?)\.torrent</a>', description)
            if matches is None:
                raise PtpUploaderException(
                    JobRunningState.Ignored_MissingInfo,
                    "Can't get release name from torrent page.")

            releaseName = DecodeHtmlEntities(matches.group(2))

            # Remove the extension of the container from the release name. (It is there on single file releases.)
            # Optional flags parameter for sub function was only introduced in Python v2.7 so we use compile.sub instead.
            releaseName = re.compile(r"\.avi$",
                                     re.IGNORECASE).sub("", releaseName)
            releaseName = re.compile(r"\.mkv$",
                                     re.IGNORECASE).sub("", releaseName)
            releaseName = re.compile(r"\.mp4$",
                                     re.IGNORECASE).sub("", releaseName)
            if (
                    not releaseInfo.IsReleaseNameSet()
            ) or releaseInfo.ReleaseName == "none":  # "none" can come from FlexGet from the announcement directory.
                releaseInfo.ReleaseName = releaseName

        # Make sure it is under the movie category.
        # <tr><td class="heading" align="right" valign="top">Type</td><td colspan="2" align="left" valign="top"><a href="browse.php?cat=1">Movie</a></td></tr>
        matches = re.search(
            r"""<tr><td.*?>Type</td><td.*?><a href="browse.php\?cat=1">Movie</a></td></tr>""",
            description)
        if matches is None:
            raise PtpUploaderException(JobRunningState.Ignored_NotSupported,
                                       "Type is not movie.")

        # Get IMDb id.
        if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()):
            matches = re.search(r'imdb\.com/title/tt(\d+)', description)
            if matches is None:
                raise PtpUploaderException(
                    JobRunningState.Ignored_MissingInfo,
                    "IMDb id can't be found on torrent page.")

            releaseInfo.ImdbId = matches.group(1)

        # Get size.
        # <tr><td class="heading" align="right" valign="top">Size</td><td colspan="2" align="left" valign="top">1.37GB (1,476,374,914 bytes)</td></tr>
        matches = re.search(
            r"""<tr><td.*?>Size</td><td.*?>.+ \((.+ bytes)\)</td></tr>""",
            description)
        if matches is None:
            logger.warning("Size not found on torrent page.")
        else:
            size = matches.group(1)
            releaseInfo.Size = GetSizeFromText(size)

        self.__DownloadNfoParseSourceType(releaseInfo, description)
        self.__DownloadNfoParseFormatType(releaseInfo, description)
        self.__DownloadNfoParseResolution(releaseInfo, description)
        self.__DownloadNfoParseSubtitles(releaseInfo, description)

        # Make sure that this is not a wrongly categorized DVDR.
        if (not releaseInfo.IsDvdImage()) and (re.search(
                r"<td>.+?\.vob</td>", description, re.IGNORECASE) or re.search(
                    r"<td>.+?\.iso</td>", description, re.IGNORECASE)):
            raise PtpUploaderException(JobRunningState.Ignored_NotSupported,
                                       "Wrongly categorized DVDR.")

コード例 #11

0

ファイルを表示

    def __DownloadNfo(self, logger, releaseInfo):
        url = "http://cinematik.net/details.php?id=%s&filelist=1" % releaseInfo.AnnouncementId
        logger.info("Collecting info from torrent page '%s'." % url)

        result = MyGlobals.session.get(url)
        result.raise_for_status()
        response = result.text
        self.__CheckIfLoggedInFromResponse(response)

        # Make sure we only get information from the description and not from the comments.
        descriptionEndIndex = response.find(
            '<p><a name="startcomments"></a></p>')
        if descriptionEndIndex == -1:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Description can't found on torrent page. Probably the layout of the site has changed."
            )

        description = response[:descriptionEndIndex]

        # Get source and format type
        # <title>Cinematik :: Behind the Mask: The Rise of Leslie Vernon (2006) NTSC DVD9 VIDEO_TS</title>
        matches = re.search(
            r"<title>Cinematik :: (.+?) \((\d+)\) (.+?) (.+?) (.+?)</title>",
            description)
        if matches is None:
            raise PtpUploaderException(
                JobRunningState.Ignored_MissingInfo,
                "Can't get resolution type, codec and container from torrent page."
            )

        title = DecodeHtmlEntities(matches.group(1)).strip()
        year = DecodeHtmlEntities(matches.group(2)).strip()
        resolutionType = DecodeHtmlEntities(matches.group(3)).strip()
        codec = DecodeHtmlEntities(matches.group(4)).strip()
        container = DecodeHtmlEntities(matches.group(5)).strip()

        releaseName = "%s (%s) %s %s" % (title, year, resolutionType, codec)
        releaseInfo.ReleaseName = RemoveDisallowedCharactersFromPath(
            releaseName)

        # Get IMDb id.
        if (not releaseInfo.HasImdbId()) and (not releaseInfo.HasPtpId()):
            matches = re.search(r"imdb\.com/title/tt(\d+)", description)
            if matches is None:
                raise PtpUploaderException(
                    JobRunningState.Ignored_MissingInfo,
                    "IMDb id can't be found on torrent page.")

            releaseInfo.ImdbId = matches.group(1)

        # Get size.
        # Two formats:
        # <td class="heading" align="right" valign="top">Size</td><td align="left" valign="top">6.81 GB &nbsp;&nbsp;&nbsp;(7,313,989,632 bytes)</td>
        # <td class="heading" valign="top" align="right">Size</td><td valign="top" align="left">4.38 GB    (4,699,117,568 bytes)</td>
        matches = re.search(
            r"""<td class="heading" v?align=".+?" v?align=".+?">Size</td><td v?align=".+?" v?align=".+?">.+\((.+ bytes)\)</td>""",
            description)
        if matches is None:
            logger.warning("Size not found on torrent page.")
        else:
            size = matches.group(1)
            releaseInfo.Size = GetSizeFromText(size)

        return resolutionType, codec, container