def cache(self, url): try: result = client.request(url) result = re.sub(r'[^\x00-\x7F]+', ' ', result) result = zip(client.parseDOM(result, 'series', ret='srsid'), client.parseDOM(result, 'series')) result = [(i[0], cleantitle.get(i[1])) for i in result] return result except: pass
def item_list_11(self): try: result = client.request(self.tvshows_link) filter = client.parseDOM(result, 'div', attrs={'class': 'panel-row row-.+?'})[0] filter = client.parseDOM(filter, 'div', attrs={'class': 'views.+?limit-'}) filter = client.parseDOM(filter, 'a', ret='href') filter = [x for y, x in enumerate(filter) if x not in filter[:y]] threads = [] for i in range(0, 7): threads.append( workers.Thread(self.thread, i, self.tvshows_link_2, self.tvshows_link_3 % str(i))) self.data.append('') [i.start() for i in threads] [i.join() for i in threads] items = '' for i in self.data: items += json.loads(i)[1]['data'] items = client.parseDOM(items, 'li') except: return for item in items: try: title = client.parseDOM(item, 'div', attrs={'class': 'views-field-title'})[0] title = client.parseDOM(title, 'a')[0] title = client.replaceHTMLCodes(title) title = title.encode('utf-8') url = client.parseDOM(item, 'a', ret='href')[0] flt = True if any(url == i for i in filter) else False url = urlparse.urljoin(self.base_link, url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') image = client.parseDOM(item, "img", ret="src")[0] image = urlparse.urljoin(self.base_link, image) image = client.replaceHTMLCodes(image) image = image.encode('utf-8') self.list.append({ 'title': title, 'url': url, 'image': image, 'filter': flt }) except: pass return self.list
def item_list(self, url): try: url = urlparse.urljoin(self.base_link, url) result = client.request(url) items = client.parseDOM(result, 'item') except: return for item in items: try: title = client.parseDOM(item, 'title')[0] title = client.replaceHTMLCodes(title) title = title.encode('utf-8') image = client.parseDOM(item, 'media:thumbnail', ret='url')[0] image = image.split('?')[0] image = client.replaceHTMLCodes(image) image = image.encode('utf-8') url = client.parseDOM(item, 'enclosure', ret='url') check = False if len(url) > 0 else True url = url[0] if len(url) > 0 else '0' url = client.replaceHTMLCodes(url) url = url.encode('utf-8') link = client.parseDOM(item, 'link')[0] link = re.sub('.+?//.+?/','/', link) link = client.replaceHTMLCodes(link) link = link.encode('utf-8') self.list.append({'title': title, 'url': url, 'image': image, 'link': link, 'check': check}) except: pass threads = [] for i in range(0, len(self.list)): threads.append(workers.Thread(self.item_list_worker, i)) [i.start() for i in threads] [i.join() for i in threads] self.list = [i for i in self.list if i['check'] == False] threads = [] for i in range(0, len(self.list)): threads.append(workers.Thread(self.item_list_worker_2, i)) [i.start() for i in threads] [i.join() for i in threads] return self.list
def cookie(self): try: login = '******' token = client.request(login) token = client.parseDOM(token, 'input', ret='value', attrs={'name': 'csrfmiddlewaretoken'})[0] headers = {'Cookie': 'csrftoken=%s' % token} post = { 'username': self.user, 'password': self.password, 'csrfmiddlewaretoken': token, 'next': '' } post = urllib.urlencode(post) c = client.request(login, post=post, headers=headers, output='cookie') return c except: pass
def get(self, query): try: filter = ['freeprojectx', 'subs4series', 'Εργαστήρι Υποτίτλων'.decode('iso-8859-7')] query = ' '.join(urllib.unquote_plus(re.sub('%\w\w', ' ', urllib.quote_plus(query))).split()) url = 'http://www.subtitles.gr/search.php?name=%s&sort=downloads+desc' % urllib.quote_plus(query) result = client.request(url) #result = result.decode('iso-8859-7').encode('utf-8') items = client.parseDOM(result, 'tr', attrs = {'on.+?': '.+?'}) except: return for item in items: try: if not 'flags/el.gif' in item: raise Exception() try: uploader = client.parseDOM(item, 'a', attrs = {'class': 'link_from'})[0].strip() except: uploader = 'other' if uploader in filter: raise Exception() if uploader == '': uploader = 'other' try: downloads = client.parseDOM(item, 'td', attrs = {'class': 'latest_downloads'})[0] except: downloads = '0' downloads = re.sub('[^0-9]', '', downloads) name = client.parseDOM(item, 'a', attrs = {'onclick': 'runme.+?'})[0] name = ' '.join(re.sub('<.+?>', '', name).split()) name = '[%s] %s [%s DLs]' % (uploader, name, downloads) name = client.replaceHTMLCodes(name) name = name.encode('utf-8') url = client.parseDOM(item, 'a', ret='href', attrs = {'onclick': 'runme.+?'})[0] url = url.split('"')[0].split('\'')[0].split(' ')[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') rating = self._rating(downloads) self.list.append({'name': name, 'url': url, 'source': 'subtitlesgr', 'rating': rating}) except: pass return self.list
def get(self, query): try: title, season, episode = re.findall('(.+?) S(\d+)E(\d+)$', query)[0] season, episode = '%01d' % int(season), '%02d' % int(episode) title = re.sub('^THE\s+|^A\s+', '', title.strip().upper()) title = cleantitle.get(title) url = 'http://www.xsubs.tv/series/all.xml' srsid = cache.get(self.cache, 48, url) srsid = [i[0] for i in srsid if title == i[1]][0] url = 'http://www.xsubs.tv/series/%s/main.xml' % srsid result = client.request(url) ssnid = client.parseDOM(result, 'series_group', ret='ssnid', attrs = {'ssnnum': season})[0] url = 'http://www.xsubs.tv/series/%s/%s.xml' % (srsid, ssnid) result = client.request(url) items = client.parseDOM(result, 'subg') items = [(client.parseDOM(i, 'etitle', ret='number'), i) for i in items] items = [i[1] for i in items if len(i[0]) > 0 and i[0][0] == episode][0] items = re.findall('(<sr .+?</sr>)', items) except: return for item in items: try: p = client.parseDOM(item, 'sr', ret='published_on')[0] if p == '': raise Exception() name = client.parseDOM(item, 'sr')[0] name = name.rsplit('<hits>', 1)[0] name = re.sub('</.+?><.+?>|<.+?>', ' ', name).strip() name = '%s %s' % (query, name) name = client.replaceHTMLCodes(name) name = name.encode('utf-8') url = client.parseDOM(item, 'sr', ret='rlsid')[0] url = 'http://www.xsubs.tv/xthru/getsub/%s' % url url = client.replaceHTMLCodes(url) url = url.encode('utf-8') self.list.append({'name': name, 'url': url, 'source': 'xsubstv', 'rating': 5}) except: pass return self.list
def item_list_2(self, url): try: base_link = re.findall('(http(?:s|)://.+?)/', url) if base_link: base_link = base_link[0] else: base_link = self.base_link if not '/webtv/' in url: result = client.request(url + '/webtv/') result = re.findall('"actual_args"\s*:\s*\["(.+?)"', result)[0] else: url, result = url.split('/webtv/') url = '%s/webtv/%s?page=%s' % (url, result.lower(), '%s') self.data.append('') self.thread(0, url % '0', None) try: result = client.parseDOM(self.data[0], 'div', attrs={'role': 'main'})[0] result = client.parseDOM(result, 'div', attrs={'class': 'view.+?'})[0] num = client.parseDOM( result, 'li', attrs={'class': 'pager__item pager__item--last'})[0] num = re.findall('page=(\d+)', num)[0] if num > 9: num = 9 num = int(num) + 1 threads = [] for i in range(1, num): self.data.append('') threads.append( workers.Thread(self.thread, i, url % str(i), None)) [i.start() for i in threads] [i.join() for i in threads] except: pass items = '' for i in self.data: items += i items = client.parseDOM(items, 'div', attrs={'role': 'main'}) items = [ client.parseDOM(i, 'div', attrs={'class': 'view.+?'}) for i in items ] items = [i[0] for i in items if len(i) > 0] items = client.parseDOM(items, 'article') except: return for item in items: try: t = client.parseDOM(item, 'div', attrs={'class': 'itemtitle'})[0] title = client.parseDOM(t, 'span') if title: title = title[0] else: title = t if title == '' or 'sneak preview' in title.lower(): raise Exception() title = client.replaceHTMLCodes(title) title = title.encode('utf-8') tvshowtitle = client.parseDOM(item, 'div', attrs={'class': 'showtitle'}) if tvshowtitle: tvshowtitle = tvshowtitle[0] else: tvshowtitle = title tvshowtitle = client.replaceHTMLCodes(tvshowtitle) tvshowtitle = tvshowtitle.encode('utf-8') url = client.parseDOM(item, 'a', ret='href')[0] url = urlparse.urljoin(base_link, url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in [i['url'] for i in self.list]: raise Exception() image = client.parseDOM(item, 'img', ret='src')[0] image = urlparse.urljoin(base_link, image) image = client.replaceHTMLCodes(image) image = image.encode('utf-8') self.list.append({ 'title': title, 'url': url, 'image': image, 'tvshowtitle': tvshowtitle }) except: pass return self.list
def get(self, query): try: match = re.findall('(.+?) \((\d{4})\)$', query) if len(match) > 0: title, year = match[0][0], match[0][1] query = ' '.join( urllib.unquote_plus( re.sub('%\w\w', ' ', urllib.quote_plus(title))).split()) url = 'http://subztv.gr/search?q=%s' % urllib.quote_plus(query) result = client.request(url) result = re.sub(r'[^\x00-\x7F]+', ' ', result) url = client.parseDOM(result, 'div', attrs={'id': 'movies'})[0] url = re.findall('(/movies/\d+)', url) url = [x for y, x in enumerate(url) if x not in url[:y]] url = [urlparse.urljoin('http://subztv.gr', i) for i in url] url = url[:3] for i in url: c = cache.get(self.cache, 2200, i) if not c == None: if cleantitle.get(c[0]) == cleantitle.get( title) and c[1] == year: try: item = self.r except: item = client.request(i) break else: title, season, episode = re.findall('(.+?) S(\d+)E(\d+)$', query)[0] season, episode = '%01d' % int(season), '%01d' % int(episode) query = ' '.join( urllib.unquote_plus( re.sub('%\w\w', ' ', urllib.quote_plus(title))).split()) url = 'http://subztv.gr/search?q=%s' % urllib.quote_plus(query) result = client.request(url) result = re.sub(r'[^\x00-\x7F]+', ' ', result) url = client.parseDOM(result, 'div', attrs={'id': 'series'})[0] url = re.findall('(/series/\d+)', url) url = [x for y, x in enumerate(url) if x not in url[:y]] url = [urlparse.urljoin('http://subztv.gr', i) for i in url] url = url[:3] for i in url: c = cache.get(self.cache, 2200, i) if not c == None: if cleantitle.get(c[0]) == cleantitle.get(title): item = i break item = '%s/seasons/%s/episodes/%s' % (item, season, episode) item = client.request(item) item = re.sub(r'[^\x00-\x7F]+', ' ', item) items = client.parseDOM(item, 'tr', attrs={'data-id': '\d+'}) except: return for item in items: try: if not 'img/el.png' in item: raise Exception() name = client.parseDOM(item, 'td', attrs={'class': '.+?'})[-1] name = name.split('>')[-1].strip() name = re.sub('\s\s+', ' ', name) name = client.replaceHTMLCodes(name) name = name.encode('utf-8') url = re.findall('\'(http(?:s|)\://.+?)\'', item)[-1] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') self.list.append({ 'name': name, 'url': url, 'source': 'subztvgr', 'rating': 5 }) except: pass return self.list