def process(self, buff): """ Finds referentes to torrents in buffer. Returns a list with torrent infos """ torrents = [] feed = feedparser.parse(buff) for entry in feed['entries']: try: torrent = Torrent( uri = entry['link'], name = entry['title'], timestamp = int(time.mktime(entry['published_parsed'])), size = int(entry['contentlength']), language = 'en-US', seeds = None, leechers = None, type = None) self._logger.debug(_("Torrent found: %s") % torrent) torrents.append(torrent) except TypeError: self._logger.warning(_("Invalid entry found: {1}").format(''.join(str(entry).split("\n")))) return torrents
def process(self, buff): """ Finds referentes to torrents in buffer. Returns a list with torrent infos """ soup = bs4.BeautifulSoup(buff) trs = soup.select('table > tr')[:-1] torrents = [] for tr in trs: details = tr.select('font.detDesc')[0].text try: (amount, suffix) = re.findall(r'([0-9\.]+)\s([GMK])iB', details, re.IGNORECASE)[0] size = int(float(amount) * self._SIZE_TABLE[suffix]) except: self._logger.warning(_("Invalid torrent found")) continue data = { 'name' : tr.findAll('a')[2].text, 'language' : 'en-US', 'uri' : tr.findAll('a')[3]['href'], 'size' : size, 'timestamp': utcnow_timestamp(), 'seeds' : int(tr.findAll('td')[-2].text), 'leechers' : int(tr.findAll('td')[-1].text) } torrents.append(Torrent(**data)) return torrents
def run(self, **kwargs): sess = create_session(dbpath) limit = kwargs.get('items', -1) filter_type = kwargs.get('filter_type') query = sess.query(Torrent) query = query.order_by(sqlalchemy.desc(Torrent.timestamp)) if filter_type: query = query.filter_by(type = filter_type) if limit >= 0: query = query.limit(limit) items = '' for row in [row2dict(row) for row in query.all()]: qs = urllib.parse.parse_qs(urllib.parse.urlparse(row['uri']).query) qs_xt_p = qs['xt'][0].split(':') if qs_xt_p[0] != 'urn' or qs_xt_p[1] != 'btih': self._logger.warning(_("Magnet URI doen't follow expected schema")) continue d = row d['infohash'] = qs_xt_p[2] d['timestamp_formated'] = utils.formatdate(d['timestamp']) d['name_encoded'] = urllib.parse.quote_plus(d['name']) d['uri_encoded'] = d['uri'].replace('&','&') d['trackers'] = '' if 'trackers' in qs: for tracker in qs['tr']: d['trackers'] += templates.tracker % {'tracker' : tracker} self._logger.debug(_("Feed item for {}").format(d['name'])) items += templates.item % d buff = templates.feed % {'items' : items} if kwargs.get('output'): with open(kwargs.get('output'), 'w') as fh: fh.write(buff) else: sys.stdout.write(buff)
def get_analisys(analizer, url): buff_src = None # Fetch buffer for url buff = None if self._cachebag: buff = self._cachebag.get(url.encode('utf-8')) buff_src = 'cache' if buff else buff_src if not buff: try: buff = self._fetcher.fetch(url) except FetchError as e: self._logger.warning("Unable to fetch '{}': {}".format(url, e)) return [] buff_src = 'fetcher' if buff else buff_src if not buff: raise Exception('Cannot get url') self._logger.info(_('Got {} from {}').format(shortify(url), buff_src)) # Try analizer try: analisys = analizer.process(buff) except InvalidMarkup: self._logger.warning(_('Invalid markup on {}').format(shortify(url))) return [] except Exception as e: self._logger.warning(_('Unknow error {} on {}').format(e.__class__.__name__, shortify(url))) return [] # Save to cachebag if self._cachebag: if type(buff) != bytes: buff = bytes(buff, encoding='utf-8') self._cachebag.set(url.encode('utf-8'), buff) return analisys
def run(self, analizer, seed_url=None, iterations=1, forced_type=None, forced_language=None, reset_db=False, dry_run=False): def get_analisys(analizer, url): buff_src = None # Fetch buffer for url buff = None if self._cachebag: buff = self._cachebag.get(url.encode('utf-8')) buff_src = 'cache' if buff else buff_src if not buff: try: buff = self._fetcher.fetch(url) except FetchError as e: self._logger.warning("Unable to fetch '{}': {}".format(url, e)) return [] buff_src = 'fetcher' if buff else buff_src if not buff: raise Exception('Cannot get url') self._logger.info(_('Got {} from {}').format(shortify(url), buff_src)) # Try analizer try: analisys = analizer.process(buff) except InvalidMarkup: self._logger.warning(_('Invalid markup on {}').format(shortify(url))) return [] except Exception as e: self._logger.warning(_('Unknow error {} on {}').format(e.__class__.__name__, shortify(url))) return [] # Save to cachebag if self._cachebag: if type(buff) != bytes: buff = bytes(buff, encoding='utf-8') self._cachebag.set(url.encode('utf-8'), buff) return analisys sess = create_session(dbpath=dbpath) if reset_db: if not dry_run: for t in sess.query(Torrent): t.state = Torrent.State.NONE else: self._logger.info('Reset db') # Build analizer self._analizers = AnalizersMng() try: analizer = self._analizers.get_addon(analizer)() except KeyError as e: raise InvalidAnalizer("Analizer {} not found: {}".format(analizer, e)) # Prepare loop url = seed_url or analizer.get_default_url() iter_ = 0 counters = {} while iter_ < max(1, iterations): self._logger.debug(_("Analizing {0}").format(url)) counters[url] = 0 torrents = get_analisys(analizer, url) for torrent in torrents: torrent.provider = analizer.PROVIDER if forced_type: torrent.type = forced_type if forced_language: torrent.language = forced_language self._logger.info(_("{0} torrents from {1} added").format(len(torrents), shortify(url))) if not dry_run: for torrent in torrents: prev_torrent = sess.query(Torrent).filter_by(uri=torrent.uri).first() if not prev_torrent: counters[url] += 1 sess.add(torrent) else: # This is… emmm for k in 'name type language timestamp seeds leechers size provider'.split(' '): setattr(torrent, k, getattr(prev_torrent, k)) sess.commit() iter_ += 1 url = analizer.get_next_url(url) total = sum(counters.values()) if total: notify(msg=_('Found {} torrent(s) from {}').format(total, analizer.PROVIDER))