Exemple #1
0
    def process(self, buff):
        """
        Finds referentes to torrents in buffer.
        Returns a list with torrent infos
        """
        torrents = []

        feed = feedparser.parse(buff)
        for entry in feed['entries']:
            try:
                torrent = Torrent(
                    uri = entry['link'],
                    name = entry['title'],
                    timestamp = int(time.mktime(entry['published_parsed'])),
                    size = int(entry['contentlength']),
                    language = 'en-US',
                    seeds = None,
                    leechers = None,
                    type = None)
                self._logger.debug(_("Torrent found: %s") % torrent)
                torrents.append(torrent)
            except TypeError:
                self._logger.warning(_("Invalid entry found: {1}").format(''.join(str(entry).split("\n"))))

        return torrents
Exemple #2
0
    def process(self, buff):
        """
        Finds referentes to torrents in buffer.
        Returns a list with torrent infos
        """
        soup = bs4.BeautifulSoup(buff)
        trs = soup.select('table > tr')[:-1]

        torrents = []
        for tr in trs:
            details = tr.select('font.detDesc')[0].text
            try:
                (amount, suffix) = re.findall(r'([0-9\.]+)\s([GMK])iB', details, re.IGNORECASE)[0]
                size = int(float(amount) * self._SIZE_TABLE[suffix])
            except:
                self._logger.warning(_("Invalid torrent found"))
                continue

            data = {
                'name'     : tr.findAll('a')[2].text,
                'language' : 'en-US',
                'uri'      : tr.findAll('a')[3]['href'],
                'size'     : size,
                'timestamp': utcnow_timestamp(),
                'seeds'    : int(tr.findAll('td')[-2].text),
                'leechers' : int(tr.findAll('td')[-1].text)
            }
            torrents.append(Torrent(**data))

        return torrents
Exemple #3
0
    def run(self, **kwargs):
        sess = create_session(dbpath)

        limit = kwargs.get('items', -1)
        filter_type = kwargs.get('filter_type')

        query = sess.query(Torrent)
        query = query.order_by(sqlalchemy.desc(Torrent.timestamp))
        if filter_type:
            query = query.filter_by(type = filter_type)

        if limit >= 0:
            query = query.limit(limit)


        items = ''

        for row in [row2dict(row) for row in query.all()]:
            qs = urllib.parse.parse_qs(urllib.parse.urlparse(row['uri']).query)

            qs_xt_p = qs['xt'][0].split(':')
            if qs_xt_p[0] != 'urn' or qs_xt_p[1] != 'btih':
                self._logger.warning(_("Magnet URI doen't follow expected schema"))
                continue

            d = row
            d['infohash'] = qs_xt_p[2]
            d['timestamp_formated'] =  utils.formatdate(d['timestamp'])
            d['name_encoded'] = urllib.parse.quote_plus(d['name'])
            d['uri_encoded'] = d['uri'].replace('&','&')

            d['trackers'] = ''
            if 'trackers' in qs:
                for tracker in qs['tr']:
                    d['trackers'] += templates.tracker % {'tracker' : tracker}

            self._logger.debug(_("Feed item for {}").format(d['name']))
            items += templates.item % d

        buff = templates.feed % {'items' : items}
        if kwargs.get('output'):
            with open(kwargs.get('output'), 'w') as fh:
                fh.write(buff)
        else:
            sys.stdout.write(buff)
Exemple #4
0
        def get_analisys(analizer, url):
            buff_src = None

            # Fetch buffer for url
            buff = None
            if self._cachebag:
                buff = self._cachebag.get(url.encode('utf-8'))
                buff_src = 'cache' if buff else buff_src

            if not buff:
                try:
                    buff = self._fetcher.fetch(url)
                except FetchError as e:
                    self._logger.warning("Unable to fetch '{}': {}".format(url, e))
                    return []
                buff_src = 'fetcher' if buff else buff_src

            if not buff:
                raise Exception('Cannot get url')

            self._logger.info(_('Got {} from {}').format(shortify(url), buff_src))

            # Try analizer
            try:
                analisys = analizer.process(buff)
            except InvalidMarkup:
                self._logger.warning(_('Invalid markup on {}').format(shortify(url)))
                return []
            except Exception as e:
                self._logger.warning(_('Unknow error {} on {}').format(e.__class__.__name__, shortify(url)))
                return []

            # Save to cachebag
            if self._cachebag:
                if type(buff) != bytes:
                    buff = bytes(buff, encoding='utf-8')
                self._cachebag.set(url.encode('utf-8'), buff)

            return analisys
Exemple #5
0
    def run(self, analizer,
            seed_url=None, iterations=1, forced_type=None, forced_language=None,
            reset_db=False, dry_run=False):

        def get_analisys(analizer, url):
            buff_src = None

            # Fetch buffer for url
            buff = None
            if self._cachebag:
                buff = self._cachebag.get(url.encode('utf-8'))
                buff_src = 'cache' if buff else buff_src

            if not buff:
                try:
                    buff = self._fetcher.fetch(url)
                except FetchError as e:
                    self._logger.warning("Unable to fetch '{}': {}".format(url, e))
                    return []
                buff_src = 'fetcher' if buff else buff_src

            if not buff:
                raise Exception('Cannot get url')

            self._logger.info(_('Got {} from {}').format(shortify(url), buff_src))

            # Try analizer
            try:
                analisys = analizer.process(buff)
            except InvalidMarkup:
                self._logger.warning(_('Invalid markup on {}').format(shortify(url)))
                return []
            except Exception as e:
                self._logger.warning(_('Unknow error {} on {}').format(e.__class__.__name__, shortify(url)))
                return []

            # Save to cachebag
            if self._cachebag:
                if type(buff) != bytes:
                    buff = bytes(buff, encoding='utf-8')
                self._cachebag.set(url.encode('utf-8'), buff)

            return analisys

        sess = create_session(dbpath=dbpath)

        if reset_db:
            if not dry_run:
                for t in sess.query(Torrent):
                    t.state = Torrent.State.NONE
            else:
                self._logger.info('Reset db')

        # Build analizer
        self._analizers = AnalizersMng()
        try:
            analizer = self._analizers.get_addon(analizer)()
        except KeyError as e:
            raise InvalidAnalizer("Analizer {} not found: {}".format(analizer, e))

        # Prepare loop
        url = seed_url or analizer.get_default_url()
        iter_ = 0
        counters = {}

        while iter_ < max(1, iterations):
            self._logger.debug(_("Analizing {0}").format(url))

            counters[url] = 0

            torrents = get_analisys(analizer, url)

            for torrent in torrents:
                torrent.provider = analizer.PROVIDER
                if forced_type:
                    torrent.type = forced_type
                if forced_language:
                    torrent.language = forced_language

            self._logger.info(_("{0} torrents from {1} added").format(len(torrents), shortify(url)))
            if not dry_run:
                for torrent in torrents:
                    prev_torrent = sess.query(Torrent).filter_by(uri=torrent.uri).first()
                    if not prev_torrent:
                        counters[url] += 1
                        sess.add(torrent)
                    else:
                        # This is… emmm
                        for k in 'name type language timestamp seeds leechers size provider'.split(' '):
                            setattr(torrent, k, getattr(prev_torrent, k))
                sess.commit()

            iter_ += 1
            url = analizer.get_next_url(url)

        total = sum(counters.values())
        if total:
            notify(msg=_('Found {} torrent(s) from {}').format(total, analizer.PROVIDER))