Ejemplo n.º 1
0
    def validateRSS(self):

        try:
            if self.cookies:
                cookie_validator = re.compile("^(\w+=\w+)(;\w+=\w+)*$")
                if not cookie_validator.match(self.cookies):
                    return (False, 'Cookie is not correctly formatted: ' + self.cookies)

            data = self.cache._getRSSData()['entries']
            if not data:
                return (False, 'No items found in the RSS feed ' + self.url)

            (title, url) = self._get_title_and_url(data[0])

            if not title:
                return (False, 'Unable to get title from first item')

            if not url:
                return (False, 'Unable to get torrent url from first item')

            if url.startswith('magnet:') and re.search('urn:btih:([\w]{32,40})', url):
                return (True, 'RSS feed Parsed correctly')
            else:
                if self.cookies:
                    requests.utils.add_dict_to_cookiejar(self.session.cookies,
                                                         dict(x.rsplit('=', 1) for x in (self.cookies.split(';'))))
                torrent_file = self.getURL(url)
                try:
                    bdecode(torrent_file)
                except Exception, e:
                    self.dumpHTML(torrent_file)
                    return (False, 'Torrent link is not a valid torrent file: ' + ex(e))

            return (True, 'RSS feed Parsed correctly')
Ejemplo n.º 2
0
    def validateRSS(self):

        try:

            data = self.cache._getRSSData()
            if not data:
                return (False, 'No data returned from url: ' + self.url)

            items = data.entries
            if not len(items) > 0:
                return (False, 'No items found in the RSS feed ' + self.url)

            (title, url) = self._get_title_and_url(items[0])

            if not title:
                return (False, 'Unable to get title from first item')

            if not url:
                return (False, 'Unable to get torrent url from first item')

            if url.startswith('magnet:') and re.search('urn:btih:([\w]{32,40})', url):
                return (True, 'RSS feed Parsed correctly')
            else:
                torrent_file = self.getURL(url)
                try:
                    bdecode(torrent_file)
                except Exception, e:
                    self.dumpHTML(torrent_file)
                    return (False, 'Torrent link is not a valid torrent file: ' + ex(e))

            return (True, 'RSS feed Parsed correctly')
Ejemplo n.º 3
0
    def validate_feed(self):

        success, err_msg = self._check_cookie()
        if not success:
            return success, err_msg

        try:
            items = self.cache_data()

            for item in items:
                title, url = self._title_and_url(item)
                if not (title and url):
                    continue
                if url.startswith('magnet:'):
                    if re.search('urn:btih:([0-9a-f]{32,40})', url):
                        break
                else:
                    torrent_file = self.get_url(url)
                    try:
                        bdecode(torrent_file)
                        break
                    except Exception:
                        pass
            else:
                return False, '%s fetched RSS feed data: %s' % \
                              (('Fail to validate', 'No items found in the')[0 == len(items)], self.url)

            return True, None

        except Exception as e:
            return False, 'Error when trying to load RSS: ' + ex(e)
Ejemplo n.º 4
0
    def validateRSS(self):

        try:
            if self.cookies:
                cookie_validator = re.compile("^(\w+=\w+)(;\w+=\w+)*$")
                if not cookie_validator.match(self.cookies):
                    return (False, 'Cookie is not correctly formatted: ' + self.cookies)

            data = self.cache._getRSSData()['entries']
            if not data:
                return (False, 'No items found in the RSS feed ' + self.url)

            (title, url) = self._get_title_and_url(data[0])

            if not title:
                return (False, 'Unable to get title from first item')

            if not url:
                return (False, 'Unable to get torrent url from first item')

            if url.startswith('magnet:') and re.search('urn:btih:([\w]{32,40})', url):
                return (True, 'RSS feed Parsed correctly')
            else:
                if self.cookies:
                    requests.utils.add_dict_to_cookiejar(self.session.cookies,
                                                         dict(x.rsplit('=', 1) for x in (self.cookies.split(';'))))
                torrent_file = self.getURL(url)
                try:
                    bdecode(torrent_file)
                except Exception, e:
                    self.dumpHTML(torrent_file)
                    return (False, 'Torrent link is not a valid torrent file: ' + ex(e))

            return (True, 'RSS feed Parsed correctly')
Ejemplo n.º 5
0
    def _get_torrent_hash(self, result):

        if result.url.startswith('magnet'):
            result.hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0]
            if len(result.hash) == 32:
                result.hash = b16encode(b32decode(result.hash)).lower()
        else:
            if not result.content:
                logger.log('Torrent without content', logger.ERROR)
                raise Exception('Torrent without content')

            try:
                torrent_bdecode = bdecode(result.content)
            except BTFailure as e:
                logger.log('Unable to bdecode torrent', logger.ERROR)
                logger.log('Torrent bencoded data: {0}'.format(str(result.content)), logger.DEBUG)
                raise
            try:
                info = torrent_bdecode["info"]
            except Exception as e:
                logger.log('Unable to find info field in torrent', logger.ERROR)
                raise
            result.hash = sha1(bencode(info)).hexdigest()

        return result
Ejemplo n.º 6
0
    def _get_torrent_hash(self, result):

        if result.url.startswith('magnet'):
            result.hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0]
            if len(result.hash) == 32:
                result.hash = b16encode(b32decode(result.hash)).lower()
        else:
            if not result.content:
                logger.log('Torrent without content', logger.ERROR)
                raise Exception('Torrent without content')

            try:
                torrent_bdecode = bdecode(result.content)
            except BTFailure as e:
                logger.log('Unable to bdecode torrent', logger.ERROR)
                logger.log(
                    'Torrent bencoded data: {0}'.format(str(result.content)),
                    logger.DEBUG)
                raise
            try:
                info = torrent_bdecode["info"]
            except Exception as e:
                logger.log('Unable to find info field in torrent',
                           logger.ERROR)
                raise
            result.hash = sha1(bencode(info)).hexdigest()

        return result
Ejemplo n.º 7
0
    def get_torrent(self, url, savelocation=None):

        torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
        self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
        downloadurl = 'http://dl.rutracker.org/forum/dl.php?t=' + torrent_id
        torrent_name = torrent_id + '.torrent'

        try:
            prev = os.umask(headphones.UMASK)
            page = self.opener.open(downloadurl)
            torrent = page.read()
            decoded = bdecode(torrent)
            metainfo = decoded['info']
            tor_hash = sha1(bencode(metainfo)).hexdigest()
            if savelocation:
                download_path = os.path.join(savelocation, torrent_name)
            else:
                tempdir = mkdtemp(suffix='_rutracker_torrents')
                download_path = os.path.join(tempdir, torrent_name)
            fp = open (download_path, 'wb')
            fp.write (torrent)
            fp.close ()
            os.umask(prev)

            # Add file to utorrent
            if headphones.TORRENT_DOWNLOADER == 2:
                self.utorrent_add_file(download_path)

        except Exception, e:
            logger.error('Error getting torrent: %s' % e)
            return False
Ejemplo n.º 8
0
    def _get_torrent_hash(self, result):

        if result.url.startswith('magnet'):
            torrent_hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0]
        else:
            info = bdecode(result.content)["info"]
            torrent_hash = sha1(bencode(info)).hexdigest()

        return torrent_hash
Ejemplo n.º 9
0
    def _get_torrent_hash(self, result):
        
        if result.url.startswith('magnet'):
            torrent_hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0]
        else:
            info = bdecode(result.content)["info"]
            torrent_hash = sha1(bencode(info)).hexdigest()

        return torrent_hash
Ejemplo n.º 10
0
def _TorrentHash(url=None,torrent=None):
    hash=None
    if url.startswith('magnet'):
        hash = re.search('urn:btih:([\w]{32,40})', url).group(1)
        if len(hash) == 32:
            hash = b16encode(b32decode(hash)).upper()
    else:
        info = bdecode(torrent)["info"]
        hash = sha1(bencode(info)).hexdigest().upper()
    return hash
Ejemplo n.º 11
0
 def run(self):
     self.rejoin_dht()
     while True:
         # noinspection PyBroadException
         try:
             (data, address) = self.ufd.recvfrom(65536)
             msg = bdecode(data)
             self.on_message(msg, address)
         except Exception:
             pass
Ejemplo n.º 12
0
    def validate_feed(self):

        success, err_msg = self._check_cookie()
        if not success:
            return success, err_msg

        try:
            items = self._search_provider({'Validate': ['']})

            for item in items:
                title, url = self._title_and_url(item)
                if not (title and url):
                    continue
                if url.startswith('magnet:'):
                    btih = None
                    try:
                        btih = re.findall('urn:btih:([\w]{32,40})', url)[0]
                        if 32 == len(btih):
                            from base64 import b16encode, b32decode
                            btih = b16encode(b32decode(btih))
                    except (StandardError, Exception):
                        pass
                    if re.search('(?i)[0-9a-f]{32,40}', btih):
                        break
                else:
                    torrent_file = self.get_url(url)
                    if self.should_skip():
                        break

                    try:
                        bdecode(torrent_file)
                        break
                    except (StandardError, Exception):
                        pass
            else:
                return False, '%s fetched RSS feed data: %s' % \
                              (('Fail to validate', 'No items found in the')[0 == len(items)], self.url)

            return True, None

        except Exception as e:
            return False, 'Error when trying to load RSS: ' + ex(e)
Ejemplo n.º 13
0
    def validate_feed(self):

        success, err_msg = self._check_cookie()
        if not success:
            return success, err_msg

        try:
            items = self._search_provider({'Validate': ['']})

            for item in items:
                title, url = self._title_and_url(item)
                if not (title and url):
                    continue
                if url.startswith('magnet:'):
                    btih = None
                    try:
                        btih = re.findall('urn:btih:([\w]{32,40})', url)[0]
                        if 32 == len(btih):
                            from base64 import b16encode, b32decode
                            btih = b16encode(b32decode(btih))
                    except (StandardError, Exception):
                        pass
                    if re.search('(?i)[0-9a-f]{32,40}', btih):
                        break
                else:
                    torrent_file = self.get_url(url)
                    if self.should_skip():
                        break

                    try:
                        bdecode(torrent_file)
                        break
                    except (StandardError, Exception):
                        pass
            else:
                return False, '%s fetched RSS feed data: %s' % \
                              (('Fail to validate', 'No items found in the')[0 == len(items)], self.url)

            return True, None

        except Exception as e:
            return False, 'Error when trying to load RSS: ' + ex(e)
Ejemplo n.º 14
0
def CalcTorrentHash(torrent):

    if torrent.startswith('magnet'):
        hash = re.findall('urn:btih:([\w]{32,40})', torrent)[0]
        if len(hash) == 32:
            hash = b16encode(b32decode(hash)).lower()
    else:
        info = bdecode(torrent)["info"]
        hash = sha1(bencode(info)).hexdigest()
    logger.debug('Torrent Hash: ' + hash)
    return hash
Ejemplo n.º 15
0
    def _get_torrent_hash(self, result):

        if result.url.startswith('magnet'):
            result.hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0]
            if len(result.hash) == 32:
                result.hash = b16encode(b32decode(result.hash)).lower()
        else:
            info = bdecode(result.content)['info']
            result.hash = sha1(bencode(info)).hexdigest()

        return result
Ejemplo n.º 16
0
    def _get_torrent_hash(self, result):

        if result.url.startswith("magnet"):
            result.hash = re.findall("urn:btih:([\w]{32,40})", result.url)[0]
            if len(result.hash) == 32:
                result.hash = b16encode(b32decode(result.hash)).lower()
        else:
            result.content = result.provider.getURL(result.url)
            info = bdecode(result.content)["info"]
            result.hash = sha1(bencode(info)).hexdigest()

        return result
Ejemplo n.º 17
0
 def _get_torrent_hash(self, result):
     
     if result.url.startswith('magnet'):
         torrent_hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0]
     else:
         if hasattr(result , 'extraInfo') and len(result.extraInfo)>0:
             torrent_hash = result.extraInfo[0]
         elif hasattr(result,'content') :
             info = bdecode(result.content)["info"]
             torrent_hash = sha1(bencode(info)).hexdigest()
         else:
             torrent_hash = result.url
     
     return torrent_hash
def from_torrent_url(url):
    import base64
    from lib import bencode
    import hashlib

    print "#### url: %s" % (url)
    torrent_data = url_get(url)
    metadata = bencode.bdecode(torrent_data)
    hashcontents = bencode.bencode(metadata["info"])
    digest = hashlib.sha1(hashcontents).digest()
    b32hash = base64.b32encode(digest)
    params = {"dn": metadata["info"]["name"], "tr": metadata["announce"]}
    logger.info(params)
    paramstr = urllib.urlencode(params)
    return "magnet:?%s&%s" % ("xt=urn:btih:%s" % b32hash, paramstr)
Ejemplo n.º 19
0
def from_torrent_url(url):
    import base64
    from lib import bencode
    import hashlib
    print "#### url: %s" % (url)
    torrent_data = url_get(url)
    metadata = bencode.bdecode(torrent_data)
    hashcontents = bencode.bencode(metadata['info'])
    digest = hashlib.sha1(hashcontents).digest()
    b32hash = base64.b32encode(digest)
    params = {
        'dn': metadata['info']['name'],
        'tr': metadata['announce'],
    }
    logger.info(params)
    paramstr = urllib.urlencode(params)
    return 'magnet:?%s&%s' % ('xt=urn:btih:%s' % b32hash, paramstr)
Ejemplo n.º 20
0
def calculate_torrent_hash(link, data=None):
    """
    Calculate the torrent hash from a magnet link or data. Returns empty string
    when it cannot create a torrent hash given the input data.
    """

    if link.startswith("magnet:"):
        torrent_hash = re.findall("urn:btih:([\w]{32,40})", link)[0]
        if len(torrent_hash) == 32:
            torrent_hash = b16encode(b32decode(torrent_hash)).lower()
    elif data:
        try:
            # noinspection PyUnresolvedReferences
            info = bdecode(data)["info"]
            torrent_hash = sha1(bencode(info)).hexdigest()
        except Exception as e:
            logger.error("Error calculating hash: %s" % e)
            return ''
    else:
        logger.error("Cannot calculate torrent hash without magnet link or data")
        return ''

    logger.debug('Torrent Hash: ' + torrent_hash)
    return torrent_hash
Ejemplo n.º 21
0
def main():
    path = os.getcwd() + '\\test\\blah.torrent'
    with open(path, 'r') as torrent_file:
        data = torrent_file.read()
    torrent = bdecode(data)
    print(torrent)
Ejemplo n.º 22
0
    def search(self, searchurl, maxsize, minseeders, albumid):
        """
        Parse the search results and return valid torrent list
        """

        titles = []
        urls = []
        seeders = []
        sizes = []
        torrentlist = []
        rulist = []

        try:

            page = self.opener.open(searchurl, timeout=60)
            soup = BeautifulSoup(page.read())

            # Debug
            #logger.debug (soup.prettify())

            # Title
            for link in soup.find_all('a', attrs={'class' : 'med tLink hl-tags bold'}):
                title = link.get_text()
                titles.append(title)

            # Download URL
            for link in soup.find_all('a', attrs={'class' : 'small tr-dl dl-stub'}):
                url = link.get('href')
                urls.append(url)

            # Seeders
            for link in soup.find_all('b', attrs={'class' : 'seedmed'}):
                seeder = link.get_text()
                seeders.append(seeder)

            # Size
            for link in soup.find_all('td', attrs={'class' : 'row4 small nowrap tor-size'}):
                size = link.u.string
                sizes.append(size)

        except :
            pass

        # Combine lists
        torrentlist = zip(titles, urls, seeders, sizes)

        # return if nothing found
        if not torrentlist:
            return False

        # don't bother checking track counts anymore, let searcher filter instead
        # leave code in just in case
        check_track_count = False

        if check_track_count:

            # get headphones track count for album, return if not found
            myDB = db.DBConnection()
            tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid])
            hptrackcount = len(tracks)

            if not hptrackcount:
                logger.info('headphones track info not found, cannot compare to torrent')
                return False

            # Return all valid entries, ignored, required words now checked in searcher.py

            #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd']

            formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif']
            deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive']

        for torrent in torrentlist:

            returntitle = torrent[0].encode('utf-8')
            url = torrent[1]
            seeders = torrent[2]
            size = torrent[3]

            if int(size) <= maxsize and int(seeders) >= minseeders:

                #Torrent topic page
                torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
                topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id

                # add to list
                if not check_track_count:
                    valid = True
                else:

                    # Check torrent info
                    self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))

                    # Debug
                    #for cookie in self.cookiejar:
                    #    logger.debug ('Cookie: %s' % cookie)

                    try:
                        page = self.opener.open(url)
                        torrent = page.read()
                        if torrent:
                            decoded = bdecode(torrent)
                            metainfo = decoded['info']
                        page.close ()
                    except Exception, e:
                        logger.error('Error getting torrent: %s' % e)
                        return False

                    # get torrent track count and check for cue
                    trackcount = 0
                    cuecount = 0

                    if 'files' in metainfo: # multi
                        for pathfile in metainfo['files']:
                            path = pathfile['path']
                            for file in path:
                                if any(file.lower().endswith('.' + x.lower()) for x in formatlist):
                                    trackcount += 1
                                if '.cue' in file:
                                    cuecount += 1

                    title = returntitle.lower()
                    logger.debug ('torrent title: %s' % title)
                    logger.debug ('headphones trackcount: %s' % hptrackcount)
                    logger.debug ('rutracker trackcount: %s' % trackcount)

                    # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
                    # This is for the case where we have a single .flac/.wav which can be split by cue
                    # Not great, but shouldn't be doing this too often
                    totallogcount = 0
                    if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
                        page = self.opener.open(topicurl, timeout=60)
                        soup = BeautifulSoup(page.read())
                        findtoc = soup.find_all(text='TOC of the extracted CD')
                        if not findtoc:
                            findtoc = soup.find_all(text='TOC извлечённого CD')
                        for toc in findtoc:
                            logcount = 0
                            for toccontent in toc.find_all_next(text=True):
                                cut_string = toccontent.split('|')
                                new_string = cut_string[0].lstrip().rstrip()
                                if new_string == '1' or new_string == '01':
                                    logcount = 1
                                elif logcount > 0:
                                    if new_string.isdigit():
                                        logcount += 1
                                    else:
                                        break
                            totallogcount = totallogcount + logcount

                    if totallogcount > 0:
                        trackcount = totallogcount
                        logger.debug ('rutracker logtrackcount: %s' % totallogcount)

                    # If torrent track count = hp track count then return torrent,
                    # if greater, check for deluxe/special/foreign editions
                    # if less, then allow if it's a single track with a cue
                    valid = False

                    if trackcount == hptrackcount:
                        valid = True
                    elif trackcount > hptrackcount:
                        if any(deluxe in title for deluxe in deluxelist):
                            valid = True

                # Add to list
                if valid:
                    rulist.append((returntitle, size, topicurl))
                else:
                    if topicurl:
                        logger.info(u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org' % (topicurl, trackcount, hptrackcount))
            else:
                logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders)))
Ejemplo n.º 23
0
def main():
    path = os.getcwd() + '\\test\\blah.torrent'
    with open(path, 'r') as torrent_file:
        data = torrent_file.read()
    torrent = bdecode(data)
    print(torrent)
Ejemplo n.º 24
0
    def search(self, searchurl, maxsize, minseeders, albumid, bitrate):
        """
        Parse the search results and return the first valid torrent
        """

        titles = []
        urls = []
        seeders = []
        sizes = []
        torrentlist = []
        rulist = []

        try:

            page = self.opener.open(searchurl, timeout=60)
            soup = BeautifulSoup(page.read())

            # Debug
            # logger.debug (soup.prettify())

            # Title

            for link in soup.find_all("a", attrs={"class": "med tLink bold"}):
                title = link.get_text()
                titles.append(title)

            # Download URL

            for link in soup.find_all("a", attrs={"class": "small tr-dl dl-stub"}):
                url = link.get("href")
                urls.append(url)

            # Seeders

            for link in soup.find_all("td", attrs={"class": "row4 seedmed"}):
                seeder = link.get_text()
                seeders.append(seeder)

            # Size

            for link in soup.find_all("td", attrs={"class": "row4 small nowrap tor-size"}):
                size = link.u.string
                sizes.append(size)

        except:
            pass

        # Combine lists

        torrentlist = zip(titles, urls, seeders, sizes)

        # return if nothing found

        if not torrentlist:
            return False

        # get headphones track count for album, return if not found

        hptrackcount = 0

        myDB = db.DBConnection()
        tracks = myDB.select("SELECT TrackTitle from tracks WHERE AlbumID=?", [albumid])
        for track in tracks:
            hptrackcount += 1

        if not hptrackcount:
            logger.info("headphones track info not found, cannot compare to torrent")
            return False

        # Return the first valid torrent, unless we want a preferred bitrate then we want all valid entries

        for torrent in torrentlist:

            returntitle = torrent[0].encode("utf-8")
            url = torrent[1]
            seeders = torrent[2]
            size = torrent[3]

            # Attempt to filter out unwanted

            title = returntitle.lower()

            if (
                "promo" not in title
                and "vinyl" not in title
                and "songbook" not in title
                and "tvrip" not in title
                and "hdtv" not in title
                and "dvd" not in title
                and int(size) <= maxsize
                and int(seeders) >= minseeders
            ):

                # Check torrent info

                torrent_id = dict([part.split("=") for part in urlparse(url)[4].split("&")])["t"]
                self.cookiejar.set_cookie(
                    cookielib.Cookie(
                        version=0,
                        name="bb_dl",
                        value=torrent_id,
                        port=None,
                        port_specified=False,
                        domain=".rutracker.org",
                        domain_specified=False,
                        domain_initial_dot=False,
                        path="/",
                        path_specified=True,
                        secure=False,
                        expires=None,
                        discard=True,
                        comment=None,
                        comment_url=None,
                        rest={"HttpOnly": None},
                        rfc2109=False,
                    )
                )

                # Debug
                # for cookie in self.cookiejar:
                #    logger.debug ('Cookie: %s' % cookie)

                try:
                    page = self.opener.open(url)
                    torrent = page.read()
                    if torrent:
                        decoded = bencode.bdecode(torrent)
                        metainfo = decoded["info"]
                    page.close()
                except Exception, e:
                    logger.error("Error getting torrent: %s" % e)
                    return False

                # get torrent track count and check for cue

                trackcount = 0
                cuecount = 0

                if "files" in metainfo:  # multi
                    for pathfile in metainfo["files"]:
                        path = pathfile["path"]
                        for file in path:
                            if (
                                ".ape" in file
                                or ".flac" in file
                                or ".ogg" in file
                                or ".m4a" in file
                                or ".aac" in file
                                or ".mp3" in file
                                or ".wav" in file
                                or ".aif" in file
                            ):
                                trackcount += 1
                            if ".cue" in file:
                                cuecount += 1

                # Torrent topic page

                topicurl = "http://rutracker.org/forum/viewtopic.php?t=" + torrent_id
                logger.debug("torrent title: %s" % title)
                logger.debug("headphones trackcount: %s" % hptrackcount)
                logger.debug("rutracker trackcount: %s" % trackcount)

                # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
                # This is for the case where we have a single .flac/.wav which can be split by cue
                # Not great, but shouldn't be doing this too often

                totallogcount = 0
                if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
                    page = self.opener.open(topicurl, timeout=60)
                    soup = BeautifulSoup(page.read())
                    findtoc = soup.find_all(text="TOC of the extracted CD")
                    if not findtoc:
                        findtoc = soup.find_all(text="TOC извлечённого CD")
                    for toc in findtoc:
                        logcount = 0
                        for toccontent in toc.find_all_next(text=True):
                            cut_string = toccontent.split("|")
                            new_string = cut_string[0].lstrip().rstrip()
                            if new_string == "1" or new_string == "01":
                                logcount = 1
                            elif logcount > 0:
                                if new_string.isdigit():
                                    logcount += 1
                                else:
                                    break
                        totallogcount = totallogcount + logcount

                if totallogcount > 0:
                    trackcount = totallogcount
                    logger.debug("rutracker logtrackcount: %s" % totallogcount)

                # If torrent track count = hp track count then return torrent,
                # if greater, check for deluxe/special/foreign editions
                # if less, then allow if it's a single track with a cue

                valid = False

                if trackcount == hptrackcount:
                    valid = True
                elif trackcount > hptrackcount:
                    if "deluxe" in title or "edition" in title or "japanese" or "exclusive" in title:
                        valid = True

                # return 1st valid torrent if not checking by bitrate, else add to list and return at end

                if valid:
                    rulist.append((returntitle, size, topicurl))
                    if not bitrate:
                        return rulist
Ejemplo n.º 25
0
                elif headphones.TORRENTBLACKHOLE_DIR != "":
                
                    # Get torrent name from .torrent, this is usually used by the torrent client as the folder name


                    torrent_name = torrent_folder_name + '.torrent'
                    download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name)
                    try:
			#Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name.
			torrent_file = open(download_path, 'wb')
			torrent_file.write(data)
			torrent_file.close()
			#Open the fresh torrent file again so we can extract the proper torrent name
			#Used later in post-processing.
			torrent_file = open(download_path, 'rb')
                        torrent_info = bencode.bdecode(torrent_file.read())
			torrent_file.close()
                        torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8')
                        logger.info('Torrent folder name: %s' % torrent_folder_name)
                    except Exception, e:
                        logger.error('Couldn\'t get name from Torrent file: %s' % e)
                        break
                        
                myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
                myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name])

def preprocesstorrent(resultlist):
    selresult = ""
    for result in resultlist:
        if selresult == "":
            selresult = result
Ejemplo n.º 26
0
                if headphones.TORRENTBLACKHOLE_DIR == "sendtracker":

                    torrent = classes.TorrentDataSearchResult()
                    torrent.extraInfo.append(data)
                    torrent.name = torrent_folder_name
                    sab.sendTorrent(torrent)

                elif headphones.TORRENTBLACKHOLE_DIR != "":
                
                    # Get torrent name from .torrent, this is usually used by the torrent client as the folder name
                    
                    torrent_name = torrent_folder_name + '.torrent'
                    download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name)
                    try:
                        torrent_file = open(download_path, 'rb').read()
                        torrent_info = bencode.bdecode(torrent_file)
                        torrent_folder_name = torrent_info['info'].get('name','')
                        logger.info('Torrent folder name: %s' % torrent_folder_name)
                    except Exception, e:
                        logger.error('Couldn\'t get name from Torrent file: %s' % e)
                        break
                        
                myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
                myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name])

def preprocesstorrent(resultlist):
    selresult = ""
    for result in resultlist:
        if selresult == "":
            selresult = result
        elif int(selresult[1]) < int(result[1]): # if size is lower than new result replace previous selected result (bigger size = better quality?)
Ejemplo n.º 27
0
    def search(self, searchurl, maxsize, minseeders, albumid, bitrate):
        """
        Parse the search results and return valid torrent list
        """

        titles = []
        urls = []
        seeders = []
        sizes = []
        torrentlist = []
        rulist = []
        result_table = []
        entries = []
        page = self.opener.open(searchurl, timeout=60)
        data = page.read()
        html = BeautifulSoup(data)
        torrent_table = html.find('table', attrs = {'class' : 'torrentlist'})
        if torrent_table:
            torrents = torrent_table.find_all('tr')
            if torrents:
                for result in torrents[1:]:
                    torrent = result.find_all('td')[1].find('a').find('b').string
                    torrent_name = torrent.string
#                    torrent_detail_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
#                    torrent_download_url = self.urls['base_url'] + (result.find_all('td')[2].find('a'))['href']
                    torrent_download_url = self.urls['base_url'] + (result.find_all('td')[2].find('a')['href'])
                    torrent_seeders = int((result.find_all('td')[8].find('b').string))
                    torrent_size = int(float(result.find_all('td')[6].find('br').previous))

                    titles.append(torrent)
                    urls.append(torrent_download_url)
                    seeders.append(torrent_seeders)
                    sizes.append(torrent_size)
                    logger.info('got stuff from deildu torrent %s ' % torrent)
#                except:
#                    logger.info('deildu.net exception')
#                    pass

        # Combine lists

        torrentlist = zip(titles, urls, seeders, sizes)

        # return if nothing found

        if not torrentlist:
            logger.info('nothing in torrent list ....')
            return False

        # get headphones track count for album, return if not found

        myDB = db.DBConnection()
        tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid])
        hptrackcount = len(tracks)

        if not hptrackcount:
            logger.info('headphones track info not found, cannot compare to torrent')
            return False

        # Return all valid entries, ignored, required words now checked in searcher.py

        #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd']

        formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif']
        deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive']

        for torrent in torrentlist:

            returntitle = torrent[0].encode('utf-8')
            url = torrent[1]
            seeders = torrent[2]
            size = torrent[3]

            title = returntitle.lower()

            if int(size) <= maxsize and int(seeders) >= minseeders:

                # Check torrent info

#                torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
                torrent_id = (urlparse(url)[2].split('/'))[2]
#                self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='deildu.net', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))

                # Debug
                #for cookie in self.cookiejar:
                #    logger.debug ('Cookie: %s' % cookie)

                try:
                    page = self.opener.open(url)
                    torrent = page.read()
                    if torrent:
                        decoded = bencode.bdecode(torrent)
                        metainfo = decoded['info']
                    page.close ()
                except Exception, e:
                    logger.error('Error getting torrent: %s' % e)
                    return False

                # get torrent track count and check for cue

                trackcount = 0
                cuecount = 0

                if 'files' in metainfo: # multi
                    for pathfile in metainfo['files']:
                        path = pathfile['path']
                        for file in path:
                            if any(file.lower().endswith('.' + x.lower()) for x in formatlist):
                                trackcount += 1
                            if '.cue' in file:
                                cuecount += 1

                #Torrent topic page
#               http://deildu.net/details.php?id=133636
#                topicurl = 'http://deildu.net/details.php?id=' + torrent_id
                topicurl = url
                logger.debug ('torrent title: %s' % title)
                logger.debug ('headphones trackcount: %s' % hptrackcount)
                logger.debug ('deildu trackcount: %s' % trackcount)

                # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
                # This is for the case where we have a single .flac/.wav which can be split by cue
                # Not great, but shouldn't be doing this too often

                totallogcount = 0
                if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
                    page = self.opener.open(topicurl, timeout=60)
                    soup = BeautifulSoup(page.read())
                    findtoc = soup.find_all(text='TOC of the extracted CD')
                    if not findtoc:
                        findtoc = soup.find_all(text='TOC .... CD')
                    for toc in findtoc:
                        logcount = 0
                        for toccontent in toc.find_all_next(text=True):
                            cut_string = toccontent.split('|')
                            new_string = cut_string[0].lstrip().rstrip()
                            if new_string == '1' or new_string == '01':
                                logcount = 1
                            elif logcount > 0:
                                if new_string.isdigit():
                                    logcount += 1
                                else:
                                    break
                        totallogcount = totallogcount + logcount

                if totallogcount > 0:
                    trackcount = totallogcount
                    logger.debug ('deildu logtrackcount: %s' % totallogcount)

                # If torrent track count = hp track count then return torrent,
                # if greater, check for deluxe/special/foreign editions
                # if less, then allow if it's a single track with a cue

                valid = True

#                if trackcount == hptrackcount:
#                    valid = True
#                elif trackcount > hptrackcount:
#                    if any(deluxe in title for deluxe in deluxelist):
#                        valid = True

                # Add to list

                if valid:
                    rulist.append((returntitle, size, topicurl))
#                else:
#                    if topicurl:
#                        logger.info(u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for deildu.net' % (topicurl, trackcount, hptrackcount))

            else:
                logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders)))
Ejemplo n.º 28
0
    def search(self, searchurl, maxsize, minseeders, albumid, bitrate):
        """
        Parse the search results and return valid torrent list
        """

        titles = []
        urls = []
        seeders = []
        sizes = []
        torrentlist = []
        rulist = []

        try:

            page = self.opener.open(searchurl, timeout=60)
            soup = BeautifulSoup(page.read())

            # Debug
            # logger.debug (soup.prettify())

            # Title

            for link in soup.find_all("a", attrs={"class": "med tLink hl-tags bold"}):
                title = link.get_text()
                titles.append(title)

            # Download URL

            for link in soup.find_all("a", attrs={"class": "small tr-dl dl-stub"}):
                url = link.get("href")
                urls.append(url)

            # Seeders

            for link in soup.find_all("b", attrs={"class": "seedmed"}):
                seeder = link.get_text()
                seeders.append(seeder)

            # Size

            for link in soup.find_all("td", attrs={"class": "row4 small nowrap tor-size"}):
                size = link.u.string
                sizes.append(size)

        except:
            pass

        # Combine lists

        torrentlist = zip(titles, urls, seeders, sizes)

        # return if nothing found

        if not torrentlist:
            return False

        # get headphones track count for album, return if not found

        myDB = db.DBConnection()
        tracks = myDB.select("SELECT * from tracks WHERE AlbumID=?", [albumid])
        hptrackcount = len(tracks)

        if not hptrackcount:
            logger.info("headphones track info not found, cannot compare to torrent")
            return False

        # Return all valid entries, ignored, required words now checked in searcher.py

        # unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd']

        formatlist = ["ape", "flac", "ogg", "m4a", "aac", "mp3", "wav", "aif"]
        deluxelist = ["deluxe", "edition", "japanese", "exclusive"]

        for torrent in torrentlist:

            returntitle = torrent[0].encode("utf-8")
            url = torrent[1]
            seeders = torrent[2]
            size = torrent[3]

            title = returntitle.lower()

            if int(size) <= maxsize and int(seeders) >= minseeders:

                # Check torrent info

                torrent_id = dict([part.split("=") for part in urlparse(url)[4].split("&")])["t"]
                self.cookiejar.set_cookie(
                    cookielib.Cookie(
                        version=0,
                        name="bb_dl",
                        value=torrent_id,
                        port=None,
                        port_specified=False,
                        domain=".rutracker.org",
                        domain_specified=False,
                        domain_initial_dot=False,
                        path="/",
                        path_specified=True,
                        secure=False,
                        expires=None,
                        discard=True,
                        comment=None,
                        comment_url=None,
                        rest={"HttpOnly": None},
                        rfc2109=False,
                    )
                )

                # Debug
                # for cookie in self.cookiejar:
                #    logger.debug ('Cookie: %s' % cookie)

                try:
                    page = self.opener.open(url)
                    torrent = page.read()
                    if torrent:
                        decoded = bencode.bdecode(torrent)
                        metainfo = decoded["info"]
                    page.close()
                except Exception, e:
                    logger.error("Error getting torrent: %s" % e)
                    return False

                # get torrent track count and check for cue

                trackcount = 0
                cuecount = 0

                if "files" in metainfo:  # multi
                    for pathfile in metainfo["files"]:
                        path = pathfile["path"]
                        for file in path:
                            if any(file.lower().endswith("." + x.lower()) for x in formatlist):
                                trackcount += 1
                            if ".cue" in file:
                                cuecount += 1

                # Torrent topic page

                topicurl = "http://rutracker.org/forum/viewtopic.php?t=" + torrent_id
                logger.debug("torrent title: %s" % title)
                logger.debug("headphones trackcount: %s" % hptrackcount)
                logger.debug("rutracker trackcount: %s" % trackcount)

                # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
                # This is for the case where we have a single .flac/.wav which can be split by cue
                # Not great, but shouldn't be doing this too often

                totallogcount = 0
                if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
                    page = self.opener.open(topicurl, timeout=60)
                    soup = BeautifulSoup(page.read())
                    findtoc = soup.find_all(text="TOC of the extracted CD")
                    if not findtoc:
                        findtoc = soup.find_all(text="TOC извлечённого CD")
                    for toc in findtoc:
                        logcount = 0
                        for toccontent in toc.find_all_next(text=True):
                            cut_string = toccontent.split("|")
                            new_string = cut_string[0].lstrip().rstrip()
                            if new_string == "1" or new_string == "01":
                                logcount = 1
                            elif logcount > 0:
                                if new_string.isdigit():
                                    logcount += 1
                                else:
                                    break
                        totallogcount = totallogcount + logcount

                if totallogcount > 0:
                    trackcount = totallogcount
                    logger.debug("rutracker logtrackcount: %s" % totallogcount)

                # If torrent track count = hp track count then return torrent,
                # if greater, check for deluxe/special/foreign editions
                # if less, then allow if it's a single track with a cue

                valid = False

                if trackcount == hptrackcount:
                    valid = True
                elif trackcount > hptrackcount:
                    if any(deluxe in title for deluxe in deluxelist):
                        valid = True

                # Add to list

                if valid:
                    rulist.append((returntitle, size, topicurl))
                else:
                    if topicurl:
                        logger.info(
                            u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org'
                            % (topicurl, trackcount, hptrackcount)
                        )

            else:
                logger.info(
                    "%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)"
                    % (returntitle, int(size), int(seeders))
                )