def get(self): # new movies self.template_values['movies'] = Torrent.query(Torrent.category_code == 207, Torrent.uploader == 'YIFY', Torrent.resolution == 720).order(-Torrent.uploaded_at).fetch(30) # new series self.template_values['series_new'] = Torrent.query(Torrent.category_code == 205, Torrent.series_episode == 1).order(-Torrent.uploaded_at).fetch(15) episodes_new = [] series_watching = [] # watching series uts = UserTorrent.query(UserTorrent.user == users.get_current_user(), UserTorrent.category_code == 205).fetch() if uts: series_watching = set() for ut in [ut for ut in uts if ut.torrent.get().series_title]: series_watching.add(ut.torrent.get().series_title) logging.info('{0} series being watched by user'.format(len(uts))) # new episodes if series_watching: cutoff = arrow.utcnow().replace(days=-14).datetime episodes_new = Torrent.query(Torrent.series_title.IN(series_watching), Torrent.uploaded_at > cutoff, Torrent.category_code == 205).order(-Torrent.uploaded_at).fetch() logging.info('{0} episodes fetched for watched series'.format(len(episodes_new))) self.template_values['series_watching'] = series_watching self.template_values['episodes_new'] = episodes_new # logging.info('{0}'.format(self.template_values)) template = JINJA_ENVIRONMENT.get_template('main/templates/index.html') self.response.write(template.render(self.template_values))
def clean(self): logging.info('Kickass: cleaning...') results = [] # cleaning old torrents cutoff = arrow.utcnow().replace(days=-7).datetime torrents = Torrent.query(Torrent.updated_at < cutoff).order(Torrent.updated_at).fetch() logging.info('{0} torrents found that is older than {1}'.format(len(torrents), cutoff)) for torrent in torrents: results.append(torrent.title) torrent.key.delete() logging.info('Deleted T {0}'.format(torrent.title.encode('utf-8'))) # cleaning invalid user torrents uts = UserTorrent.query().fetch() logging.info('{0} usertorrents found that is invalid'.format(len(uts))) for ut in uts: if not ut.get_torrent(): results.append(str(ut.key.id())) ut.key.delete() logging.info('Deleted UT {0}'.format(ut.key.id())) mail.send_mail( sender='*****@*****.**', to='*****@*****.**', subject="Torrents cleaned", body='\n'.join(results), ) logging.info('Kickass: cleaned')
def clean(self): logging.info('Cleaner: cleaning...') results = [] # fetch old torrents cutoff = arrow.utcnow().replace(days=-28).datetime torrents = Torrent.query(Torrent.updated_at < cutoff).order(Torrent.updated_at).fetch() logging.info('{0} torrents found that is older than {1}'.format(len(torrents), cutoff)) # delete old torrents for torrent in torrents: results.append(torrent.title) torrent.key.delete() logging.info('Deleted T {0}'.format(torrent.title.encode('utf-8'))) # cleaning associated user torrents uts = UserTorrent.query(UserTorrent.torrent == torrent.key).fetch() logging.info('{0} usertorrents found that is invalid'.format(len(uts))) for ut in uts: results.append(str(ut.key.id())) ut.key.delete() logging.info('Deleted UT {0}'.format(ut.key.id())) mail.send_mail( sender='*****@*****.**', to='*****@*****.**', subject="Torrents cleaned", body='\n'.join(results), ) logging.info('Cleaner: cleaned')
def get(self, cat): logging.info('cat {0}'.format(cat)) self.template_values['cat'] = int(cat) # get torrents torrents = Torrent.query(Torrent.category_code == int(cat)).order(-Torrent.uploaded_at).fetch() self.template_values['torrents'] = torrents logging.info('torrents {0}'.format(len(torrents))) template = JINJA_ENVIRONMENT.get_template('main/templates/category.html') self.response.write(template.render(self.template_values))
def saveList(self, list): logging.info('list: saving...') for item in list: torrent = Torrent.query(Torrent.url == item['url']).get() if not torrent: torrent = Torrent(**item) torrent.populate(**item) torrent.put() logging.info('list: saved {0}'.format(torrent)) logging.info('list: saved')
def extract(self): logging.info('series: extraction running...') cutoff = arrow.utcnow().replace(days=-3).datetime torrents = Torrent.query(Torrent.category_code == 205, Torrent.series_title == None, Torrent.uploaded_at > cutoff).fetch() logging.info('{0} torrents fetched'.format(len(torrents))) results = [] for torrent in torrents: logging.info('parsing {0}...'.format(torrent.title.encode('utf-8'))) title_groups = re.match(r'(.*)\s(s\d{1,2})(e\d{1,2})\s', torrent.title.replace('.', ' ').strip(), re.I) if title_groups is not None: logging.info('series and episode found {0}'.format(title_groups.groups())) torrent.series_title = title_groups.group(1) torrent.series_season = int(title_groups.group(2)[1:]) torrent.series_episode = int(title_groups.group(3)[1:]) torrent.put() # pprint(torrent) msg = '[200] {0} S{1} E{2}'.format(torrent.series_title, torrent.series_season, torrent.series_episode) else: logging.info('series and episode not found') title_groups = re.match(r'(.*)\s(e\d{1,3})\s', torrent.title.replace('.', ' ').strip(), re.I) if title_groups is not None: logging.info('only episode found') torrent.series_title = title_groups.group(1).replace('.', ' ').strip() torrent.series_season = None torrent.series_episode = int(title_groups.group(2)[1:]) torrent.put() # pprint(torrent) msg = '[200] {0} E{1}'.format(torrent.series_title, torrent.series_episode) else: logging.info('only episode not found') title_groups = re.match(r'(.*)(\d{1,2})x(\d{1,2})\s', torrent.title.replace('.', ' ').strip(), re.I) if title_groups is not None: logging.info('series x episode found') torrent.series_title = title_groups.group(1).replace('.', ' ').strip() torrent.series_season = int(title_groups.group(2)) torrent.series_episode = int(title_groups.group(3)) torrent.put() # pprint(torrent) msg = '[200] <= {0} S{1} E{2}'.format(torrent.series_title, torrent.series_season, torrent.series_episode) else: logging.info('absolutely not found') msg = '[404] <= {0}'.format(torrent.title.encode('utf-8')) results.append(msg) logging.info(msg) self.notify(results)
def runMovies(self): logging.info('IMDB: movies running...') cutoff = arrow.utcnow().replace(days=-3).datetime torrents = Torrent.query(Torrent.category_code == 207, Torrent.rating == None, Torrent.uploaded_at > cutoff).fetch() logging.info('{0} torrents fetched'.format(len(torrents))) results = {} for torrent in torrents: # find year matches = re.match(r'(.*)\(?(19[5-9]\d|20[0-1]\d)', torrent.title) if matches is None: results[torrent.title] = 'MM%' logging.info('No match for {0}'.format(torrent.title.encode('utf-8'))) else: # remove brackets in title title = matches.group(1).replace('(', '') + matches.group(2) # get imdb search results links = self.searchTitle(title.replace(' ', '+')) rating, header = self.searchTitleRanking(links) if not rating or not header: results[torrent.title] = 'PP%' logging.info('IMDB Title links not found {0}'.format(torrent.title.encode('utf-8'))) continue logging.info('IMDB Title found {0}'.format(title.encode('utf-8'))) if r'1080p' in torrent.title.lower(): torrent.resolution = 1080 elif r'720p' in torrent.title.lower(): torrent.resolution = 720 torrent.title_rating = header torrent.rating = rating torrent.rated_at = arrow.utcnow().datetime.replace(tzinfo=None) logging.info('Saved {0}'.format(torrent)) torrent.put() results[torrent.title] = '{0}%'.format(rating) self.notify(results) logging.info('IMDB: movies ran')
def scrapePage(self, group, category, p): logging.info('PirateBay: scrapePage: {0} {1} {2}'.format( group['name'], category['name'], p)) item = { 'group_code': group['code'], 'group_name': group['name'], 'category_code': category['code'], 'category_name': category['name'], } # 3 tries to scrape page rows = None for n in xrange(3): try: url = 'http://thepiratebay.se/browse/{0}/{1}/7/0'.format( category['code'], p) logging.info('PirateBay: scrapePage: url {0}'.format(url)) res = urlfetch.fetch(url) # logging.info('res {0}'.format(res.content)) html = BeautifulSoup(res.content) rows = html.find('table', id='searchResult').find_all('tr')[1:-1] break except: logging.error('Could not scrape with try {0}'.format(n)) sleep(1) if rows: for row in rows: # logging.info('row html {0}'.format(row)) row_top = row.find('div', class_='detName') # title item['title'] = row_top.find('a').text # url item['url'] = row_top.find('a')['href'] # magnet item['magnet'] = row.find( 'a', title='Download this torrent using magnet')['href'] details = row.find('font', class_='detDesc').text details_date, details_size, details_uploader = details.split( ',') # date details_date_val = details_date.split(' ', 1)[1].replace( u"\xa0", u" ") if 'Y-day' in details_date_val: details_datetime = datetime.utcnow().replace( hour=int(details_date_val[-5:-3]), minute=int(details_date_val[-2:])) + timedelta(days=-1) elif 'Today' in details_date_val: details_datetime = datetime.utcnow().replace( hour=int(details_date_val[-5:-3]), minute=int(details_date_val[-2:])) elif 'mins ago' in details_date_val: details_datetime = datetime.utcnow().replace( minute=int(details_date_val.split(' ')[0])) elif ':' in details_date: details_datetime = datetime.strptime( details_date_val, '%m-%d %H:%M') details_datetime = details_datetime.replace( year=datetime.utcnow().year) else: details_datetime = datetime.strptime( details_date_val, '%m-%d %Y') item['uploaded_at'] = details_datetime.replace(tzinfo=None) # logging.info('Date extracted {0} from {1}'.format(item['uploaded_at'], details_date.encode('utf-8'))) # size details_size_split = details_size.replace( u"\xa0", u" ").strip().split(' ') details_size_mul = 9 if 'GiB' in details_size_split[2] else ( 6 if 'MiB' in details_size_split[2] else 3) item['size'] = int( (float(details_size_split[1])) * 10**details_size_mul) # uploader item['uploader'] = details_uploader.split(' ')[-1] # seeders item['seeders'] = int(row.find_all('td')[2].text) # leechers item['leechers'] = int(row.find_all('td')[3].text) # logging.info('item {0}'.format(item)) # save url_split = item['url'].split('/') item_key = ndb.Key('Torrent', url_split[2]) torrent = item_key.get() if not torrent: torrent = Torrent(key=item_key) torrent.populate(**item) torrent.put() logging.info('Torrent {0}'.format(torrent))