Beispiel #1
0
    def __search(self, titles, type, year, season=0, episode=False):
        try:
            years = [str(year), str(int(year) + 1), str(int(year) - 1)]
            years = ['&veroeffentlichung[]=%s' % i for i in years]

            query = self.search_link % (type, urllib.quote_plus(cleantitle.query(titles[0])))
            query += ''.join(years)
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = self.__proceed_search(query)
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and int(i[2]) == int(season)][0]

            url = source_utils.strip_domain(r)
            if episode:
                r = client.request(urlparse.urljoin(self.base_link, url))
                r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'})
                r = dom_parser.parse_dom(r, 'li')
                r = dom_parser.parse_dom(r, 'a', req='href')
                r = [i.attrs['href'] for i in r if i and int(i.content) == int(episode)][0]

                url = source_utils.strip_domain(r)
            return url
        except:
            return
Beispiel #2
0
    def __search(self, titles, year):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0])))

            query = urlparse.urljoin(self.base_link, query)

            t = cleantitle.get(titles[0])
            scraper = cfscrape.create_scraper()
            data = scraper.get(query).content
            #data = client.request(query, referer=self.base_link)
            data = client.parseDOM(data, 'div', attrs={'class': 'result-item'})
            r = dom_parser.parse_dom(data, 'div', attrs={'class': 'title'})
            r = zip(dom_parser.parse_dom(r, 'a'), dom_parser.parse_dom(data, 'span', attrs={'class': 'year'}))

            url = []
            for i in range(len(r)):
                title = cleantitle.get(r[i][0][1])
                title = re.sub('(\d+p|4k|3d|hd|season\d+)','',title)
                y = r[i][1][1]
                link = r[i][0][0]['href']
                if 'season' in title: continue
                if t == title and y == year:
                    if 'season' in link:
                        url.append(source_utils.strip_domain(link))
                        print url[0]
                        return url[0]
                    else: url.append(source_utils.strip_domain(link))

            return url
        except:
            return
Beispiel #3
0
    def __search(self, titles, year, content):
        try:

            query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0])))

            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i][0] #cleantitle.get(titles[0])

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs={'class': 'tab-content clearfix'})

            if content == 'movies':
                r = client.parseDOM(r, 'div', attrs={'id': 'movies'})
            else:
                r = client.parseDOM(r, 'div', attrs={'id': 'series'})

            data = dom_parser.parse_dom(r, 'figcaption')

            for i in data:
                title = i[0]['title']
                title = cleantitle.get(title)
                if title in t:
                    x = dom_parser.parse_dom(i, 'a', req='href')
                    return source_utils.strip_domain(x[0][0]['href'])
                else:
                    url = dom_parser.parse_dom(i, 'a', req='href')
                    data = client.request(url[0][0]['href'])
                    data = re.findall('<h1><a.+?">(.+?)\((\d{4})\).*?</a></h1>', data, re.DOTALL)[0]
                    if titles[0] in data[0] and year == data[1]: return source_utils.strip_domain(url[0][0]['href'])

            return
        except:
            return
Beispiel #4
0
    def __search(self, titles, year, season=0, episode=False):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'container'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ml-item-content'})

            f = []
            for i in r:
                _url = dom_parser.parse_dom(i, 'a', attrs={'class': 'ml-image'}, req='href')[0].attrs['href']

                _title = re.sub('<.+?>|</.+?>', '', dom_parser.parse_dom(i, 'h6')[0].content).strip()
                try: _title = re.search('(.*?)\s(?:staf+el|s)\s*(\d+)', _title, re.I).group(1)
                except: pass

                _season = '0'

                _year = re.findall('calendar.+?>.+?(\d{4})', ''.join([x.content for x in dom_parser.parse_dom(i, 'ul', attrs={'class': 'item-params'})]))
                _year = _year[0] if len(_year) > 0 else '0'

                if season > 0:
                    s = dom_parser.parse_dom(i, 'span', attrs={'class': 'season-label'})
                    s = dom_parser.parse_dom(s, 'span', attrs={'class': 'el-num'})
                    if s: _season = s[0].content.strip()

                if cleantitle.get(_title) in t and _year in y and int(_season) == int(season):
                    f.append((_url, _year))
            r = f
            r = sorted(r, key=lambda i: int(i[1]), reverse=True)  # with year > no year
            r = [i[0] for i in r if r[0]][0]

            url = source_utils.strip_domain(r)
            if episode:
                r = client.request(urlparse.urljoin(self.base_link, url))
                r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'})
                r = dom_parser.parse_dom(r, 'li')
                r = dom_parser.parse_dom(r, 'a', req='href')
                r = [(i.attrs['href'], i.content) for i in r]
                r = [i[0] for i in r if i[1] and int(i[1]) == int(episode)][0]
                url = source_utils.strip_domain(r)
            return url
        except:
            return
Beispiel #5
0
    def __search(self, titles, year):
        try:
            url = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': titles[0], 'years_ot': str(int(year) - 1), 'years_do': str(int(year) + 1)}
            r = client.request(url, post=post, XHR=True)

            if len(r) < 1000:
                url = urlparse.urljoin(self.base_link, self.search_old % urllib.quote_plus(titles[0]))
                r = client.request(url)

            r = r.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(r, 'article')
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'full'})
            r = [(dom_parser.parse_dom(i, 'a', attrs={'itemprop': 'url'}, req='href'),
                  dom_parser.parse_dom(i, 'h3', attrs={'class': 'name'}, req='content'),
                  dom_parser.parse_dom(i, 'div', attrs={'class': 'origin-name'}, req='content'),
                  dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r]
            r = [(i[0][0].attrs['href'], i[1][0].attrs['content'], i[2][0].attrs['content'], dom_parser.parse_dom(i[3], 'a', attrs={'itemprop': 'copyrightYear'})) for i in r if i[0] and i[1] and i[2]]
            r = [(i[0], i[1], i[2], i[3][0].content) for i in r if i[3]]
            r = [i[0] for i in r if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] in y][0]

            return source_utils.strip_domain(r)
        except:
            return
    def __search(self, titles, year, season='0'):
        try:
            url = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1}
            r = client.request(url, post=post)

            r = r.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'eBlock'})
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'eTitle'}), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*\d+_goda/')})) for i in r]
            r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), [x.content for x in i[1] if re.match('\d{4}', x.content)][0] if i[1] else '0') for i in r if i[0]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]]
            r = [(i[0], i[1], i[2], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r if i]
            r = [(i[0], i[3][0][0] if i[3] else i[1], i[2]) for i in r]
            r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return
    def __search(self, search_link, imdb, titles):
        try:
            query = search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'})
            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'})
            r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [i.attrs['href']for i in r if i and cleantitle.get(i.content) in t][0]

            url = source_utils.strip_domain(r)

            r = client.request(urlparse.urljoin(self.base_link, url))
            r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href')
            r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r]
            r = [i[0] for i in r if i]

            return url if imdb in r else None
        except:
            return
    def __search(self, titles, year, season='0'):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1}
            html = client.request(self.base_link, post=post)

            html = html.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(html, 'div', attrs={'id': re.compile('news-id-\d+')})
            r = [(i.attrs['id'], dom_parser.parse_dom(i, 'a', req='href')) for i in r]
            r = [(re.sub('[^\d]+', '', i[0]), dom_parser.parse_dom(i[1], 'img', req='title')) for i in r]
            r = [(i[0], i[1][0].attrs['title'], '') for i in r if i[1]]
            r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1]), i[3]) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0', i[3]) for i in r]
            r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]
            r = dom_parser.parse_dom(html, 'a', attrs={'href': re.compile('.*/%s-' % r)}, req='href')[0].attrs['href']

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #9
0
    def __search(self, titles, year):
        try:

            query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0]+' '+year)))

            query = urlparse.urljoin(self.base_link, query)

            t =  cleantitle.get(titles[0])

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs={'class': 'card'})

            r = client.parseDOM(r, 'h3')

            for i in r:
                data = re.findall('<span.*?>(.+?)</span>.+?date">\s*\((\d{4}).*?</span>', i, re.DOTALL)
                for title, year in data:
                    title = cleantitle.get(title)
                    y = year
                    if title in t and year == y:
                        url = client.parseDOM(i, 'a', ret='href')[0]
                        return source_utils.strip_domain(url)

            return
        except:
            return
Beispiel #10
0
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            tvshowtitle = data['tvshowtitle']
            localtvshowtitle = data['localtvshowtitle']
            aliases = source_utils.aliases_to_array(eval(data['aliases']))

            url = self.__search([localtvshowtitle] + aliases, data['year'], season)
            if not url and tvshowtitle != localtvshowtitle: url = self.__search([tvshowtitle] + aliases, data['year'], season)
            if not url: return

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['list-inline', 'list-film']})
            r = dom_parser.parse_dom(r, 'li')
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content) for i in r if i]
            r = [(i[0], i[1] if re.compile("^(\d+)$").match(i[1]) else '0') for i in r]
            r = [i[0] for i in r if int(i[1]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #11
0
    def __search(self, titles, year, season='0'):
        try:
            query = self.search_link % (urllib.quote_plus(titles[0]))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'list_movies'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item_movie'})
            r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'tit'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content.lower()) for i in r if i]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:\s*-?\s*(?:season|s))\s*(\d+)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #12
0
    def __search(self, titles, year, season='0'):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['products', 'row']})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': ['box-product', 'clearfix']})
            if int(season) > 0:
                r = [i for i in r if dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})]
            else:
                r = [i for i in r if not dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})]
            r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title-product'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content.lower()) for i in r if i]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0]for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            url = source_utils.strip_domain(r)
            url = url.replace('-info', '-stream')
            return url
        except:
            return
Beispiel #13
0
    def __search(self, titles):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'})
            r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'})
            r = dom_parser.parse_dom(r, 'a', req='href')

                           
                                         

            for i in r:
                title = i[1]
                if re.search('\*(?:.*?)\*', title) is not None:
                    title = re.sub('\*(?:.*?)\*', '', title)
                title = cleantitle.get(title)
                if title in t:
                    return source_utils.strip_domain(i[0]['href'])
                else:
                    return
        except:
            return
Beispiel #14
0
    def __search(self, titles, year):
        try:
            query = self.search_link % (cleantitle.getsearch(titles[0].replace(' ','%20')))

            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i][0]

            r = client.request(query)

            r = client.parseDOM(r, 'li', attrs={'class': 'item everyone-item over_online haveTooltip'})

            for i in r:
                title = client.parseDOM(i, 'a', ret='title')[0]
                url = client.parseDOM(i, 'a', ret='href')[0]
                data = client.request(url)
                y = re.findall('<p><span>Año:</span>(\d{4})',data)[0]
                original_t = re.findall('movie-text">.+?h2.+?">\((.+?)\)</h2>',data, re.DOTALL)[0]
                original_t, title = cleantitle.get(original_t), cleantitle.get(title)

                if (t in title or t in original_t) and y == year :
                    x = dom_parser.parse_dom(i, 'a', req='href')
                    return source_utils.strip_domain(x[0][0]['href'])

            return
        except:
            return
Beispiel #15
0
    def __search(self, titles, year, content):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]

            c = client.request(urlparse.urljoin(self.base_link, self.year_link % int(year)), output='cookie')

            p = urllib.urlencode({'search': cleantitle.query(titles[0])})
            c = client.request(urlparse.urljoin(self.base_link, self.search_link), cookie=c, post=p, output='cookie')
            r = client.request(urlparse.urljoin(self.base_link, self.type_link % content), cookie=c, post=p)

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'content'})
            r = dom_parser.parse_dom(r, 'tr')
            r = [dom_parser.parse_dom(i, 'td') for i in r]
            r = [dom_parser.parse_dom(i, 'a', req='href') for i in r]

            r = [(i[0].attrs['href'], i[0].content, i[1].content) for i in r if i]
            x = []
            for i in r:
                if re.search('(?<=<i>\().*$', i[1]):
                    x.append((i[0], re.search('(.*?)(?=\s<)', i[1]).group(), re.search('(?<=<i>\().*$', i[1]).group(), i[2]))
                else:
                    x.append((i[0], i[1], i[1], i[2]))
            r = [i[0] for i in x if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] == year][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #16
0
    def __search(self, titles, year, imdb):
        try:
            query = self.search_link % (urllib.quote_plus(titles[0]))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie_cell'})
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'bottom'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r]
            r = [(dom_parser.parse_dom(i[0], 'a', req=['href', 'title']), re.findall('[(](\d{4})[)]', i[1][0].content)) for i in r if i[0] and i[1]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0]) for i in r if i[0] and i[1]]
            r = [(i[0], i[1].lower(), i[2]) for i in r if i[2] in y]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t]

            if len(r) > 1:
                for i in r:
                    data = client.request(urlparse.urljoin(self.base_link, i))
                    data = dom_parser.parse_dom(data, 'a', attrs={'name': re.compile('.*/tt\d+.*')}, req='name')
                    data = [re.findall('.+?(tt\d+).*?', d.attrs['name']) for d in data]
                    data = [d[0] for d in data if len(d) > 0 and d[0] == imdb]

                    if len(data) >= 1:
                        url = i
            else:
                url = r[0]

            if url:
                return source_utils.strip_domain(url)
        except:
            return
Beispiel #17
0
    def __search(self, titles, year):
        try:

            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])+' '+year))

            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i][0]

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs={'class': 'karatula'})

            for i in r:
                title = client.parseDOM(i, 'a', ret='title')[0]
                y = re.findall('(\d{4})',title)[0]
                title = cleantitle.get_simple(title)

                if t in title and y == year :
                    x = dom_parser.parse_dom(i, 'a', req='href')
                    return source_utils.strip_domain(x[0][0]['href'])

            return
        except:
            return
    def __search(self, titles, year):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0]+' '+year)))

            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i][0]

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'v_pict'})

            for i in r:
                title = re.findall('alt="(.+?)"',i[1], re.DOTALL)[0]
                y = re.findall('(\d{4})', title, re.DOTALL)[0]
                title = re.sub('<\w+>|</\w+>','',title)
                title = cleantitle.get(title)
                title = re.findall('(\w+)', cleantitle.get(title))[0]

                if title in t and year == y:
                    url = re.findall('href="(.+?)"',i[1], re.DOTALL)[0]
                    return source_utils.strip_domain(url)
            return
        except:
            return
    def __search(self, titles, year, episode='0'):
        try:
            title = titles[0]
            if int(episode) > 0: title += ' episode %s' % episode

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(cleantitle.query(title)))

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'entries'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'post'})
            r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content.lower()) for i in r if i]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:episode)\s+(\d+)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(episode) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            url = '%s%01d-sezon-%01d-bolum.html' % (url.replace('.html', ''), int(season), int(episode))
            return source_utils.strip_domain(url)
        except:
            return []
    def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year):
        try:
            t = [tvshowtitle] + source_utils.aliases_to_array(aliases)
            t = [cleantitle.get(i) for i in set(t) if i]

            url = [i[0] for i in cache.get(self.sezonlukdizi_tvcache, 120) if cleantitle.get(i[1]) in t][0]

            return source_utils.strip_domain(url)
        except:
            return
    def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year):
        try:
            t = [tvshowtitle] + source_utils.aliases_to_array(aliases)
            t = [cleantitle.get(i) for i in set(t) if i]

            url = [i[0] for i in cache.get(self.sezonlukdizi_tvcache, 120) if cleantitle.get(i[1]) in t][0]

            return source_utils.strip_domain(url)
        except:
            failure = traceback.format_exc()
            log_utils.log('SezonlukDizi - Exception: \n' + str(failure))
            return
Beispiel #23
0
    def __search(self, title):
        try:
            t = cleantitle.get(title)

            r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'suchbegriff': title})
            r = dom_parser.parse_dom(r, 'a', attrs={'class': 'ausgabe_1'}, req='href')
            r = [(i.attrs['href'], i.content) for i in r]
            r = [i[0] for i in r if cleantitle.get(i[1]) == t][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #24
0
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            url = urlparse.urljoin(self.base_link, url)
            r = client.request(url)
            r = dom_parser.parse_dom(r, 'a', attrs={'class': 'episode-block', 'href': re.compile('.*/saison-%s/episode-%s/.*' % (season, episode))}, req='href')
            r = [i.attrs['href'] for i in r][0]  # maybe also get the VF/VOSTFR to get the VF first

            return source_utils.strip_domain(r)
        except:
            return
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'episodes'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'})
            r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/episode-%s.*' % episode)}, req='href')[0].attrs['href']

            return source_utils.strip_domain(r)
        except:
            return
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            r = client.request(urlparse.urljoin(self.base_link, url))
            r = dom_parser.parse_dom(r, 'article', {'class': 'SeasonList'})
            r = dom_parser.parse_dom(r, 'ul')
            r = dom_parser.parse_dom(r, 'li')
            r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('[^"]+-season-%s-episode-%s(?!\d)[^"]*' % (season, episode))}, req='href')[0].attrs['href']

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #27
0
    def __search(self, titles, year):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(urlparse.urljoin(self.base_link, self.search_link), post=urllib.urlencode({'val': cleantitle.query(titles[0])}), XHR=True)
            r = dom_parser.parse_dom(r, 'li')
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content, re.findall('\((\d{4})', i.content)) for i in r]
            r = [(i[0], i[1], i[2][0] if i[2] else '0') for i in r]
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #28
0
    def __search_movie(self, imdb, year, type='filme'):
        try:
            years = [str(year), str(int(year) + 1), str(int(year) - 1)]
            years = ['&veroeffentlichung[]=%s' % i for i in years]

            query = self.search_link % (type, imdb)
            query += ''.join(years)
            query = urlparse.urljoin(self.base_link, query)

            r = self.__proceed_search(query)

            if len(r) == 1:
                return source_utils.strip_domain(r[0][0])
        except:
            return
Beispiel #29
0
    def __search(self, titles):
        try:
            query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0]))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query, XHR=True)
            r = json.loads(r)

            r = [(i.get('url'), i.get('name'))for i in r]
            r = [(i[0]) for i in r if cleantitle.get(i[1]) in t][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #30
0
    def __get_episode_link(self, url, episode='1'):
        try:
            if not url:
                return

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'all-episode'})
            r = dom_parser.parse_dom(r, 'li')
            r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*-episode-%s\.\w+.*?' % episode)}, req='href')[0].attrs['href']

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #31
0
    def movie(self, imdb, title, localtitle, aliases, year):
        try:
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)

            t = cleantitle.get(title)
            y = [
                '%s' % str(year),
                '%s' % str(int(year) + 1),
                '%s' % str(int(year) - 1), '0'
            ]

            r = client.request(query)
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item'})
            r = [(dom_parser.parse_dom(i,
                                       'a',
                                       attrs={'class': 'cluetip'},
                                       req='href'),
                  dom_parser.parse_dom(i,
                                       'div',
                                       attrs={'class': 'description'}))
                 for i in r]
            r = [(i[0][0].attrs['href'],
                  dom_parser.parse_dom(i[1],
                                       'h3',
                                       attrs={'class': 'text-nowrap'}),
                  dom_parser.parse_dom(i[1], 'div', attrs={'class': 'meta'}))
                 for i in r if i[0] and i[1]]
            r = [(i[0], i[1][0].content,
                  dom_parser.parse_dom(i[2],
                                       'span',
                                       attrs={'class': 'pull-left'}))
                 for i in r if i[0] and i[1] and i[2]]
            r = [(i[0], i[1], re.sub('[^\d]+', '', i[2][0].content)) for i in r
                 if i[2]]
            r = sorted(r, key=lambda i: int(i[2]),
                       reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) == t and i[2] in y][0]

            return source_utils.strip_domain(r)
        except:
            return
    def __search(self, titles, year):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]

            for title in titles:
                query = self.search_link
                try:
                    title.encode('UTF-8')
                    query %= urllib.quote_plus(title)
                except:
                    query %= title.decode('UTF-8').encode('Windows-1252')

                query = urlparse.urljoin(self.base_link, query)

                r = client.request(query)

                links = dom_parser.parse_dom(r,
                                             'div',
                                             attrs={'class': 'search_frame'})
                links = [dom_parser.parse_dom(i, 'a') for i in links]
                links = [(i[1], i[2]) for i in links]
                links = [(i[0].attrs['href'],
                          re.findall('>(.*?)<', i[0].content)[0], i[1].content)
                         for i in links]
                links = sorted(links, key=lambda i: int(i[2]),
                               reverse=True)  # with year > no year
                links = [
                    i[0] for i in links
                    if cleantitle.get(i[1]) in t and year == i[2]
                ]

                if len(links) > 0:
                    return source_utils.strip_domain(links[0])

            return
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch,
                                         titles[0])
            except:
                return
            return
Beispiel #33
0
    def __search(self, titles, imdb, year):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query, XHR=True)
            r = json.loads(r)

            r = [(i.get('title'), i.get('custom_fields', {})) for i in r.get('posts', [])]
            r = [(i[0], i[1]) for i in r if i[0] and i[1]]
            r = [(i[0], i[1].get('Streaming', ['']), i[1].get('Jahr', ['0']), i[1].get('IMDb-Link', [''])) for i in r if i]
            r = [(i[0], i[1][0], i[2][0], re.findall('.+?(tt\d+).*?', i[3][0])) for i in r if i[0] and i[1] and i[2] and i[3]]
            r = [i[1] for i in r if imdb in i[3] or (cleantitle.get(i[0]) in t and i[2] in y)][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #34
0
 def __search(self, titles, year):
     try:
         query = self.search_link % (urllib.quote_plus(
             cleantitle.query(titles[0] + ' ' + year)))
         query = urlparse.urljoin(self.base_link, query)
         t = [cleantitle.get(i) for i in set(titles) if i]
         r = self.scraper.get(query).content
         r = dom_parser.parse_dom(r,
                                  'figure',
                                  attrs={'class': 'pretty-figure'})
         r = dom_parser.parse_dom(r, 'figcaption')
         for i in r:
             title = client.replaceHTMLCodes(i[0]['title'])
             title = cleantitle.get(title)
             if title in t:
                 x = dom_parser.parse_dom(i, 'a', req='href')
                 return source_utils.strip_domain(x[0][0]['href'])
         return
     except:
         return
Beispiel #35
0
    def __search(self, titles):
        try:
            query = self.search_link % (urllib.quote_plus(urllib.quote_plus(cleantitle.query(titles[0]))))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            post = urllib.urlencode({'movlang_de': '1', 'movlang': ''})

            r = client.request(query, post=post)

            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'table'})
            r = dom_parser.parse_dom(r, 'a', attrs={'class': 'PreviewImage'})

            for x in r:
                title = cleantitle.get(x[1])
                if title in t:
                    return source_utils.strip_domain(x[0]['href'])
            return
        except:
            return
Beispiel #36
0
 def episode(self, url, imdb, tvdb, title, premiered, season, episode):
     try:
         if not url:
             return
         url = urlparse.urljoin(self.base_link, url)
         url = client.request(url, output='geturl')
         if season == 1 and episode == 1:
             season = episode = ''
         r = client.request(url)
         r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'episodios'})
         r = dom_parser.parse_dom(
             r,
             'a',
             attrs={
                 'href':
                 re.compile('[^\'"]*%s' % ('-%sx%s' % (season, episode)))
             })[0].attrs['href']
         return source_utils.strip_domain(r)
     except:
         return
Beispiel #37
0
    def __search(self, titles):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'MovieList'})
            r = dom_parser.parse_dom(r, 'li', attrs={'class': 'TPostMv'})
            r = dom_parser.parse_dom(r, 'a')

            for i in r:
                title = dom_parser.parse_dom(i, 'h2', attrs={'class': 'Title'})
                title = cleantitle.get(title[0][1])
                if title in t:
                    return source_utils.strip_domain(i[0]['href'])
        except:
            return
Beispiel #38
0
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return
            r = self.scraper.get(url).content

            if season == 1 and episode == 1:
                season = episode = ''

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'episodios'})
            r = dom_parser.parse_dom(
                r,
                'a',
                attrs={
                    'href':
                    re.compile('[^\'"]*%s' % ('-%sx%s' % (season, episode)))
                })[0].attrs['href']
            return source_utils.strip_domain(r)
        except:
            return ""
Beispiel #39
0
    def __search(self, titles, year):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'article')
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'title'}), dom_parser.parse_dom(i, 'span', attrs={'class': 'year'})) for i in r]
            r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), i[1][0].content) for i in r if i[0] and i[1]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0]

            return source_utils.strip_domain(r)
        except:
            return
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            tvshowtitle = data['tvshowtitle']
            aliases = source_utils.aliases_to_array(eval(data['aliases']))
            aliases.append(data['localtvshowtitle'])

            url = self.__search([tvshowtitle] + aliases, data['year'], season)
            if not url: return

            urlWithEpisode = url + "?episode=" + str(
                episode) + "?season=" + str(season)
            return source_utils.strip_domain(urlWithEpisode)
        except:
            return
    def __get_episode_link(self, url, episode='1'):
        try:
            if not url:
                return

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'all-episode'})
            r = dom_parser.parse_dom(r, 'li')
            r = dom_parser.parse_dom(
                r,
                'a',
                attrs={'href': re.compile('.*-episode-%s\.\w+.*?' % episode)},
                req='href')[0].attrs['href']

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #42
0
    def __search(self, title, year):
        try:
            r = urllib.urlencode({'keyword': title})
            r = client.request(urlparse.urljoin(self.base_link, self.search_link), XHR=True, post=r)

            t = cleantitle.get(title)
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = json.loads(r)
            r = [(i['link'], re.sub('<.+?>|</.+?>', '', i['title'])) for i in r if 'title' in i and 'link' in i]
            r = [(i[0], i[1], re.findall('(.+?)\s*Movie \d+:.+?$', i[1], re.DOTALL)) for i in r]
            r = [(i[0], i[2][0] if len(i[2]) > 0 else i[1]) for i in r]
            r = [(i[0], i[1], re.findall('(.+?) \((\d{4})\)?', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if t == cleantitle.get(i[1]) and i[2] in y][0]

            return source_utils.strip_domain(r)
        except:
            return
    def __search(self, titles, year):
        try:
            r = cache.get(client.request, 48, urlparse.urljoin(self.base_link, self.search_link))

            t = [cleantitle.get(i) for i in set(titles) if i]

            links = dom_parser.parse_dom(r, "div", attrs={"class" : "genre"})
            links = dom_parser.parse_dom(links, "a")
            links = [(i.attrs["href"], i.content) for i in links]

            links = [i for i in links if cleantitle.get(i[1]) in t or any([a in cleantitle.get(i[1]) for a in t])]

            if len(links) > 1:
                links = [i for i in links if cleantitle.get(i[1]) in t]
            if len(links) > 0:
                return source_utils.strip_domain(links[0][0])
            raise Exception()
        except:

            return
Beispiel #44
0
 def __search(self, titles, year):
     try:
         query = self.search_link % (urllib.quote_plus(
             cleantitle.getsearch(titles[0] + ' ' + year)))
         query = urlparse.urljoin(self.base_link, query)
         t = [cleantitle.get(i) for i in set(titles) if i][0]
         r = client.request(query)
         r = dom_parser.parse_dom(r, 'div', attrs={'class': 'v_pict'})
         for i in r:
             title = re.findall('alt="(.+?)"', i[1], re.DOTALL)[0]
             y = re.findall('(\d{4})', title, re.DOTALL)[0]
             title = re.sub('<\w+>|</\w+>', '', title)
             title = cleantitle.get(title)
             title = re.findall('(\w+)', cleantitle.get(title))[0]
             if title in t and year == y:
                 url = re.findall('href="(.+?)"', i[1], re.DOTALL)[0]
                 return source_utils.strip_domain(url)
         return
     except:
         return
Beispiel #45
0
    def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year):
        try:
            clean_title = cleantitle.geturl(tvshowtitle)
            search_url = urlparse.urljoin(
                self.base_link,
                self.search_link % clean_title.replace('-', '+'))
            r = client.request(search_url)
            r = client.parseDOM(r, 'div', {'class': 'result-item'})
            r = [(dom_parser2.parse_dom(i, 'a', req='href')[0],
                  client.parseDOM(i, 'img', ret='alt')[0],
                  dom_parser2.parse_dom(i, 'span', attrs={'class': 'year'}))
                 for i in r]
            r = [(i[0].attrs['href'], i[1], i[2][0].content) for i in r
                 if (cleantitle.get(i[1]) == cleantitle.get(tvshowtitle)
                     and i[2][0].content == year)]
            url = source_utils.strip_domain(r[0][0])

            return url
        except:
            return
Beispiel #46
0
    def __search(self, imdb):
        try:
            r = client.request(urlparse.urljoin(self.base_link, self.search_link % imdb))
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [i.attrs['href'] for i in r if i]

            if len(r) > 1:
                for i in r:
                    data = client.request(urlparse.urljoin(self.base_link, i))
                    data = re.compile('(imdbid\s*[=|:]\s*"%s"\s*,)' % imdb, re.DOTALL).findall(data)

                    if len(data) >= 1:
                        url = i
            else:
                url = r[0]

            if url:
                return source_utils.strip_domain(url)
        except:
            return
    def __search(self, titles, year, episode='0'):
        try:
            title = titles[0]
            if int(episode) > 0: title += ' episode %s' % episode

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = [
                '%s' % str(year),
                '%s' % str(int(year) + 1),
                '%s' % str(int(year) - 1), '0'
            ]

            r = client.request(
                urlparse.urljoin(self.base_link, self.search_link) %
                urllib.quote_plus(cleantitle.query(title)))

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'entries'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'post'})
            r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content.lower()) for i in r if i]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1],
                  i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2],
                  re.findall('(.+?)\s+(?:episode)\s+(\d+)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2],
                  i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1].replace(' hd', ''), i[2],
                  '1' if int(episode) > 0 and i[3] == '0' else i[3])
                 for i in r]
            r = sorted(r, key=lambda i: int(i[2]),
                       reverse=True)  # with year > no year
            r = [
                i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y
                and int(i[3]) == int(episode)
            ][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #48
0
    def __search(self, titles, year):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]
            y = [
                '%s' % str(year),
                '%s' % str(int(year) + 1),
                '%s' % str(int(year) - 1), '0'
            ]

            r = cache.get(client.request,
                          4,
                          urlparse.urljoin(self.base_link, self.search_link),
                          post={'query': cleantitle.query(titles[0])})

            r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'})
            r = dom_parser.parse_dom(r,
                                     'div',
                                     attrs={'class': 've-screen'},
                                     req='title')
            r = [(dom_parser.parse_dom(i, 'a', req='href'),
                  i.attrs['title'].split(' - ')[0]) for i in r]
            r = [(i[0][0].attrs['href'], i[1],
                  re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1],
                  i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]),
                       reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y]
            if len(r) > 0:
                r = r[0]
            else:
                return

            return source_utils.strip_domain(r)
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch,
                                         titles[0])
            except:
                return
            return
Beispiel #49
0
    def __search(self, isSerieSearch, titles, isTitleClean):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]
            if isTitleClean:
                t = [cleantitle.get(self.titleclean(i)) for i in set(titles) if i]
            for title in titles:
                if isTitleClean:
                    title = self.titleclean(title)
                query = self.search_link % (urllib.quote_plus(title))
                query = urlparse.urljoin(self.base_link, query)

                oRequest = cRequestHandler(query)
                oRequest.removeBreakLines(False)
                oRequest.removeNewLines(False)
                r = oRequest.request()
                r = dom_parser.parse_dom(r, 'article')
                r = dom_parser.parse_dom(r, 'a', attrs={'class': 'rb'}, req='href')
                r = [(i.attrs['href'], i.content) for i in r]

                if len(r) > 0:

                    if isSerieSearch:
                        r = [i[0] for i in r if cleantitle.get(i[1]) in t and not isSerieSearch or cleantitle.get(
                            re.findall('(.*?)S\d', i[1])[0]) and isSerieSearch]

                    else:
                        r = [i[0] for i in r if cleantitle.get(
                            i[1]) in t and not isSerieSearch]

                    if len(r) > 0:
                        url = source_utils.strip_domain(r[0])
                        return url

            return
        except:
            try:
                source_faultlog.logFault(
                    __name__, source_faultlog.tagSearch, titles[0])
            except:
                return
            return
Beispiel #50
0
    def __search(self, isSerieSearch, titles, year):
        try:
            q = self.search_link % titles[0]
            q = urlparse.urljoin(self.base_link, q)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = cache.get(self.scraper.get, 4, q).content

            links = dom_parser.parse_dom(
                r, 'tr', attrs={'id': re.compile('coverPreview.+?')})
            tds = [dom_parser.parse_dom(i, 'td') for i in links]
            tuples = [(dom_parser.parse_dom(i[0], 'a')[0],
                       re.findall('>(\d{4})', i[1].content)) for i in tds
                      if 'ger' in i[4].content]

            tuplesSortByYear = [(i[0].attrs['href'], i[0].content)
                                for i in tuples if year in i[1]]

            if len(tuplesSortByYear) > 0 and not isSerieSearch:
                tuples = tuplesSortByYear
            elif isSerieSearch:
                tuples = [(i[0].attrs['href'], i[0].content) for i in tuples
                          if "serie" in i[0].content.lower()]
            else:
                tuples = [(i[0].attrs['href'], i[0].content) for i in tuples]

            urls = [i[0] for i in tuples if cleantitle.get(i[1]) in t]

            if len(urls) == 0:
                urls = [i[0] for i in tuples if 'untertitel' not in i[1]]

            if len(urls) > 0:
                return source_utils.strip_domain(urls[0])
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch,
                                         titles[0])
            except:
                return
            return
Beispiel #51
0
    def __search(self, titles):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]

            for title in titles:
                params = {
                    'do': 'search',
                    'subaction': 'search',
                    'story': title
                }

                result = cache.get(client.request,
                                   4,
                                   self.base_link,
                                   post=params,
                                   headers={
                                       'Content-Type':
                                       'application/x-www-form-urlencoded'
                                   },
                                   error=True)

                links = dom_parser.parse_dom(result,
                                             'div',
                                             attrs={'class': 'shortstory-in'})
                links = [dom_parser.parse_dom(i, 'a')[0] for i in links]
                links = [(i.attrs['href'], i.attrs['title']) for i in links]
                links = [
                    i[0] for i in links
                    if any(a in cleantitle.get(i[1]) for a in t)
                ]

                if len(links) > 0:
                    return source_utils.strip_domain(links[0])
            return
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch,
                                         titles[0])
            except:
                return
            return
Beispiel #52
0
    def __search(self, titles, year):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'})
            r = [(dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r]
            r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]]
            r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]]
            r = [(i[0], i[1], i[2].strip()) for i in r if i[2]]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year][0]

            return source_utils.strip_domain(r)
        except:
            return
    def __search(self, titles):
        try:
            query = urlparse.urljoin(self.base_link, self.search_link)
            post = urllib.urlencode({'keyword': titles[0]})

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query, post=post)
            r = json.loads(r)
            r = r['content']

            r = dom_parser.parse_dom(r, 'li')

            for i in r:
                title = dom_parser.parse_dom(i[1],
                                             'a',
                                             attrs={'class': 'ss-title'})
                if cleantitle.get(title[0][1]) in t:
                    return source_utils.strip_domain(title[0][0]['href'])
        except:
            return
    def __search(self, titles, year):
        try:
            r = cache.get(client.request, 4, urlparse.urljoin(self.base_link, self.search_link))

            t = [cleantitle.get(i) for i in set(titles) if i]

            links = dom_parser.parse_dom(r, "div", attrs={"class" : "genre"})
            links = dom_parser.parse_dom(links, "a")
            links = [(i.attrs["href"], i.content) for i in links]

            links = [i[0] for i in links if cleantitle.get(i[1]) in t]

            if len(links) > 0:
                return source_utils.strip_domain(links[0])
            return ""
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0])
            except:
                return
            return
Beispiel #55
0
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            url = urlparse.urljoin(self.base_link, url)
            scraper = cfscrape.create_scraper()
            data = scraper.get(url).content
            data = client.parseDOM(data, 'ul', attrs={'class': 'episodios'})
            links  = client.parseDOM(data, 'div', attrs={'class': 'episodiotitle'})
            sp = zip(client.parseDOM(data, 'div', attrs={'class': 'numerando'}), client.parseDOM(links, 'a', ret='href'))

            Sea_Epi = '%dx%d'% (int(season), int(episode))
            for i in sp:
                sep = i[0]
                if sep == Sea_Epi:
                    url = source_utils.strip_domain(i[1])

            return url
        except:
            return
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            r = client.request(urlparse.urljoin(self.base_link, url))
            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'episodes'})
            r = dom_parser.parse_dom(
                r, 'tr', attrs={'class': ['episode',
                                          'season_%s' % season]})
            r = dom_parser.parse_dom(r, 'span', attrs={'class': 'normal'})
            r = [(dom_parser.parse_dom(i, 'a', req='href'),
                  dom_parser.parse_dom(i, 'b')) for i in r]
            r = [(i[0][0].attrs['href'], i[1][0].content) for i in r
                 if i[0] and i[1]]
            r = [i[0] for i in r if i[1].upper() == 'E%s' % episode]

            if len(r) >= 1:
                return source_utils.strip_domain(r[0])
        except:
            return
Beispiel #57
0
    def __get_episode_link(self, url, episode='1'):
        try:
            if not url:
                return

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'list-espisode'})
            r = dom_parser.parse_dom(r,
                                     'div',
                                     attrs={'class': 'movie-item-espisode'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content) for i in r]
            r = [(i[0], re.findall('EP\s*(\d+)$', i[1])) for i in r]
            r = [i[0] for i in r if i[1] and int(i[1][0]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return
Beispiel #58
0
    def __search(self, isSerieSearch, titles):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]
            url = self.search % titles[0]

            sHtmlContent = cache.get(self.scraper.get, 4, url).content
            search_results = dom_parser.parse_dom(sHtmlContent, 'div', attrs={'class': 'title'})
            search_results = dom_parser.parse_dom(search_results, 'a')
            search_results = [(i.attrs['href'], i.content) for i in search_results]
            search_results = [(i[0], re.findall('(.*?)\((\d+)', i[1])[0]) for i in search_results]
            search_results = [i[0] for i in search_results if cleantitle.get(i[1][0]) in t and not isSerieSearch or isSerieSearch and cleantitle.get(re.findall('(.*?)Staffel', i[1][0])[0])]
                
            if len(search_results) > 0:
                return source_utils.strip_domain(search_results[0])
            return
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0])
            except:
                return
            return
Beispiel #59
0
    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return
            url = urlparse.urljoin(self.base_link, url)

            r = cache.get(client.request, 4, url)

            seasons = dom_parser.parse_dom(r, "div", attrs={"class": "section-watch-season"})
            seasons = seasons[len(seasons)-int(season)]
            episodes = dom_parser.parse_dom(seasons, "tr")
            episodes = [(dom_parser.parse_dom(i, "th")[0].content, i.attrs["onclick"]) for i in episodes if "onclick" in i.attrs]
            episodes = [re.findall("'(.*?)'", i[1])[0] for i in episodes if i[0] == episode][0]

            return source_utils.strip_domain(episodes)
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch, title)
            except:
                return
            return ""
Beispiel #60
0
    def __search(self, imdb):
        try:
            r = cache.get(self.scraper.get, 4, urlparse.urljoin(self.base_link, self.search_link % imdb)).content
            r = dom_parser.parse_dom(r, 'table', attrs={'id': 'RsltTableStatic'})
            r = dom_parser.parse_dom(r, 'tr')
            r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'img', attrs={'alt': 'language'}, req='src')) for i in r]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0].attrs['src']) for i in r if i[0] and i[1]]
            r = [(i[0], i[1], re.findall('.+?(\d+)\.', i[2])) for i in r]
            r = [(i[0], i[1], i[2][0] if len(i[2]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]))  # german > german/subbed
            r = [i[0] for i in r if i[2] in ['1', '15']]

            if len(r) > 0:
                return source_utils.strip_domain(r[0])
            return ""
        except:
            try:
                source_faultlog.logFault(__name__, source_faultlog.tagSearch, imdb)
            except:
                return
            return