Python BeautifulSoup.findの例、imdb.parser.http.bsouplxml._bsoup.BeautifulSoup.find Pythonの例

コード例 #1

0

ファイルを表示

ファイル: eta.py プロジェクト: FrozenCow/CouchPotato

    def getItems(self, data):

        results = []

        soup = BeautifulSoup(data)
        table = soup.find("table", { "class" : "chart" })

        try:
            for tr in table.findAll("tr"):
                item = {}

                for td in tr.findAll('td'):

                    # Get title and ID from <a>
                    if td.a and not td.a.img:
                        item['id'] = int(td.a['href'].split('/')[-1])
                        item['name'] = str(td.a.contents[0])

                    # Get year from <td>
                    if not td.h3 and not td.a:
                        if len(td.contents) == 1:
                            for y in td.contents:
                                try:
                                    item['year'] = int(y)
                                except ValueError:
                                    pass
                if item:
                    results.append(item)
        except AttributeError:
            log.error('No search results.')

        return results

コード例 #2

0

ファイルを表示

ファイル: hdtrailers.py プロジェクト: andme/CouchPotato

    def findByProvider(self, data, provider):

        results = {'480p':[], '720p':[], '1080p':[]}
        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'class':'bottomTable'})


            for tr in resultTable.findAll('tr'):
                trtext = str(tr).lower()
                if 'clips' in trtext:
                    break
                if 'trailer' in trtext and not 'clip' in trtext and provider in trtext:
                    nr = 0
                    resolutions = tr.findAll('td', attrs = {'class':'bottomTableResolution'})
                    #sizes = tr.findNext('tr').findAll('td', attrs = {'class':'bottomTableFileSize'})
                    for res in resolutions:
                        results[str(res.a.contents[0])].insert(0, res.a['href'])
                        #int(sizes[nr].contents[0].replace('MB', ''))
                        nr += 1

            return results

        except AttributeError:
            log.debug('No trailers found in provider %s.' % provider)
            results['404'] = True

        return results

コード例 #3

0

ファイルを表示

ファイル: updater.py プロジェクト: andme/CouchPotato

    def checkForUpdateWindows(self):
        try:
            data = urllib2.urlopen(self.downloads, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % self.downloads)
            return False

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'id':'s3_downloads'})

            latestUrl = 'http://github.com' + resultTable.find('a')['href'].replace(' ', '%20')
            try:
                latest = urllib2.urlopen(latestUrl, timeout = self.timeout)
            except (IOError, URLError):
                log.error('Failed to open %s.' % latestUrl)
                return False

            downloadUrl = latest.geturl()

            if 'r' + str(version.windows) in downloadUrl:
                return False

            return downloadUrl

        except AttributeError:
            log.debug('Nothing found.')

        return False

コード例 #4

0

ファイルを表示

ファイル: x264.py プロジェクト: maaso/CouchPotato

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.searchUrl):
            return results

        url = self.searchUrl % quote_plus(self.toSearchString(movie.name + ' ' + quality))
        log.info('Searching: %s' % url)
        data = urllib.urlopen(url)

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'class':'requests'})
            for result in resultTable.findAll('tr', attrs = {'class':'req_filled'}):
                new = self.feedItem()

                id = result.find('td', attrs = {'class':'reqid'})
                new.id = id.contents[0]
                name = result.find('td', attrs = {'class':'release'})
                new.name = self.toSaveString(name.contents[0])
                new.size = 9999
                new.content = 'x264'
                new.type = 'nzb'
                new.url = self.downloadUrl % (new.id)
                new.date = time.time()
                new.score = self.calcScore(new, movie)

                if self.isCorrectMovie(new, movie, type):
                    results.append(new)
                    log.info('Found: %s' % new.name)
            return results

        except AttributeError:
            log.debug('No search results found.')

コード例 #5

0

ファイルを表示

ファイル: subscene.py プロジェクト: newrain7803/CouchPotatoV1

    def getItems(self, data):

        results = []

        soup = BeautifulSoup(data)
        table = soup.find("table", {"class": "filmSubtitleList"})

        try:
            for tr in table.findAll("tr"):
                item = {}

                for td in tr.findAll('td'):
                    if td.a:
                        spans = td.a.findAll('span')
                        if len(spans) == 2:
                            item['id'] = int(spans[1].get('id').replace(
                                'r', ''))
                            item['name'] = str(spans[1].contents[0]).strip()
                            item['rating'] = int(spans[0].get('class',
                                                              '0').replace(
                                                                  'r', ''))

                            # Language
                            lang = str(spans[0].contents[0]).strip()
                            item['language'] = self.languages.get(lang, lang)
                    if td.div:
                        item['hi'] = td.div.get('id') == 'imgEar'

                if item.get('name'):
                    results.append(item)
        except AttributeError:
            log.error('No search results.')

        return results

コード例 #6

0

ファイルを表示

ファイル: subscene.py プロジェクト: censer/CouchPotato

    def getItems(self, data):

        results = []

        soup = BeautifulSoup(data)
        table = soup.find("table", { "class" : "filmSubtitleList" })

        try:
            for tr in table.findAll("tr"):
                item = {}

                for td in tr.findAll('td'):
                    if td.a:
                        spans = td.a.findAll('span')
                        if len(spans) == 2:
                            item['id'] = int(spans[1].get('id').replace('r', ''))
                            item['name'] = str(spans[1].contents[0]).strip()
                            item['rating'] = int(spans[0].get('class', '0').replace('r', ''))

                            # Language
                            lang = str(spans[0].contents[0]).strip()
                            item['language'] = self.languages.get(lang, lang)
                    if td.div:
                        item['hi'] = td.div.get('id') == 'imgEar'

                if item.get('name'):
                    results.append(item)
        except AttributeError:
            log.error('No search results.')

        return results

コード例 #7

0

ファイルを表示

    def findByProvider(self, data, provider):

        results = {'480p':[], '720p':[], '1080p':[]}
        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'class':'bottomTable'})


            for tr in resultTable.findAll('tr'):
                trtext = str(tr).lower()
                if 'clips' in trtext:
                    break
                if 'trailer' in trtext and not 'clip' in trtext and provider in trtext:
                    nr = 0
                    resolutions = tr.findAll('td', attrs = {'class':'bottomTableResolution'})
                    #sizes = tr.findNext('tr').findAll('td', attrs = {'class':'bottomTableFileSize'})
                    for res in resolutions:
                        results[str(res.a.contents[0])].insert(0, res.a['href'])
                        #int(sizes[nr].contents[0].replace('MB', ''))
                        nr += 1

            return results

        except AttributeError:
            log.debug('No trailers found in provider %s.' % provider)
            results['404'] = True

        return results

コード例 #8

0

ファイルを表示

    def getInfo(self, url):
        log.debug('Getting info: %s' % url)
        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
            pass
        except IOError:
            log.error('Failed to open %s.' % url)
            return ''

        div = SoupStrainer('div')
        html = BeautifulSoup(data, parseOnlyThese = div)
        html = html.find('div', attrs = {'class':'nfo'})
        return str(html).decode("utf-8", "replace")

コード例 #9

0

ファイルを表示

    def getInfo(self, url):
        log.debug('Getting info: %s' % url)
        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
            pass
        except IOError:
            log.error('Failed to open %s.' % url)
            return ''

        tables = SoupStrainer('table')
        html = BeautifulSoup(data)
        movieInformation = html.find('div', attrs = {'class':'i_info'})
        return str(movieInformation).decode("utf-8", "replace")

コード例 #10

0

ファイルを表示

ファイル: sceneaccess.py プロジェクト: bwq/CouchPotato

    def getInfo(self, url):
        log.debug('Getting info: %s' % url)
        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
            pass
        except IOError:
            log.error('Failed to open %s.' % url)
            return ''

        tables = SoupStrainer('table')
        html = BeautifulSoup(data)
        movieInformation = html.find('div', attrs = {'class':'i_info'})
        return str(movieInformation).decode("utf-8", "replace")

コード例 #11

0

ファイルを表示

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.searchUrl):
            return results

        url = self.searchUrl % quote_plus(
            self.toSearchString(movie.name + ' ' + quality))
        log.info('Searching: %s' % url)

        try:
            data = urllib2.urlopen(url, timeout=self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese=tables)
            resultable = html.find('table', attrs={'class': 't'})
            for result in resultable.findAll('span', attrs={'class': 'cname'}):
                new = self.feedItem()
                a = result.find('a')
                id = re.search('(?<=detail\?c\=)\w+', a['href'])
                new.id = id.group(0)
                text = a.findAll(text=True)
                words = ''
                for text in a.findAll(text=True):
                    words = words + unicode(text).encode('utf-8')
                new.name = words
                new.size = 9999
                new.content = 'mysterbin'
                new.type = 'nzb'
                new.url = self.downloadUrl % (new.id)
                new.date = time.time()
                new.score = self.calcScore(new, movie)

                if self.isCorrectMovie(new, movie, type):
                    results.append(new)
                    log.info('Found: %s' % new.name)
            return results

        except AttributeError:
            log.debug('No search results found.')

        return results

コード例 #12

0

ファイルを表示

ファイル: mysterbin.py プロジェクト: Belgar/CouchPotato

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.searchUrl):
            return results

        url = self.searchUrl % quote_plus(self.toSearchString(movie.name + ' ' + quality))
        log.info('Searching: %s' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultable = html.find('table', attrs = {'class':'t'})
            for result in resultable.findAll('span', attrs = {'class':'cname'}):
                new = self.feedItem()
                a = result.find('a')
                id = re.search('(?<=detail\?c\=)\w+', a['href'])
                new.id = id.group(0)
                text = a.findAll(text = True)
                words = ''
                for text in a.findAll(text = True):
                    words = words + unicode(text).encode('utf-8')
                new.name = words
                new.size = 9999
                new.content = 'mysterbin'
                new.type = 'nzb'
                new.url = self.downloadUrl % (new.id)
                new.date = time.time()
                new.score = self.calcScore(new, movie)

                if self.isCorrectMovie(new, movie, type):
                    results.append(new)
                    log.info('Found: %s' % new.name)
            return results

        except AttributeError:
            log.debug('No search results found.')

        return results

コード例 #13

0

ファイルを表示

ファイル: x264.py プロジェクト: stourwalk/CouchPotato

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.searchUrl):
            return results

        url = self.searchUrl % quote_plus(
            self.toSearchString(movie.name + ' ' + quality))
        log.info('Searching: %s' % url)

        try:
            data = urllib2.urlopen(url, timeout=self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese=tables)
            resultTable = html.find('table', attrs={'class': 'requests'})
            for result in resultTable.findAll('tr',
                                              attrs={'class': 'req_filled'}):
                new = self.feedItem()

                id = result.find('td', attrs={'class': 'reqid'})
                new.id = id.contents[0]
                name = result.find('td', attrs={'class': 'release'})
                new.name = self.toSaveString(name.contents[0])
                new.size = 9999
                new.content = 'x264'
                new.type = 'nzb'
                new.url = self.downloadUrl % (new.id)
                new.date = time.time()
                new.score = self.calcScore(new, movie)

                if self.isCorrectMovie(new, movie, type):
                    results.append(new)
                    log.info('Found: %s' % new.name)
            return results

        except AttributeError:
            log.debug('No search results found.')

        return results

コード例 #14

0

ファイルを表示

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.apiUrl):
            return results

        url = self.apiUrl % (quote_plus(self.toSearchString(movie.name + ' ' + quality) + self.makeIgnoreString(type)), self.getCatId(type))

        log.info('Searching: %s' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'id':'searchResult'})
            for result in resultTable.findAll('tr'):
                details = result.find('a', attrs = {'class':'detLink'})
                if details:
                    href = re.search('/(?P<id>\d+)/', details['href'])
                    id = href.group('id')
                    name = self.toSaveString(details.contents[0])
                    desc = result.find('font', attrs = {'class':'detDesc'}).contents[0].split(',')
                    date = ''
                    size = 0
                    for item in desc:
                        # Weird date stuff
                        if 'uploaded' in item.lower():
                            date = item.replace('Uploaded', '')
                            date = date.replace('Today', '')

                            # Do something with yesterday
                            yesterdayMinus = 0
                            if 'Y-day' in date:
                                date = date.replace('Y-day', '')
                                yesterdayMinus = 86400

                            datestring = date.replace('&nbsp;', ' ').strip()
                            date = int(time.mktime(parse(datestring).timetuple())) - yesterdayMinus
                        # size
                        elif 'size' in item.lower():
                            size = item.replace('Size', '')

                    seedleech = []
                    for td in result.findAll('td'):
                        try:
                            seedleech.append(int(td.contents[0]))
                        except ValueError:
                            pass

                    seeders = 0
                    leechers = 0
                    if len(seedleech) == 2 and seedleech[0] > 0 and seedleech[1] > 0:
                        seeders = seedleech[0]
                        leechers = seedleech[1]

                    # to item
                    new = self.feedItem()
                    new.id = id
                    new.type = 'torrent'
                    new.name = name
                    new.date = date
                    new.size = self.parseSize(size)
                    new.seeders = seeders
                    new.leechers = leechers
                    new.url = self.downloadLink(id, name)
                    new.score = self.calcScore(new, movie) + self.uploader(result) + (seeders / 10)

                    if seeders > 0 and (new.date + (int(self.conf('wait')) * 60 * 60) < time.time()) and Qualities.types.get(type).get('minSize') <= new.size:
                        new.detailUrl = self.detailLink(id)
                        new.content = self.getInfo(new.detailUrl)
                        if self.isCorrectMovie(new, movie, type):
                            results.append(new)
                            log.info('Found: %s' % new.name)

            return results

        except AttributeError:
            log.debug('No search results found.')

        return []

コード例 #15

0

ファイルを表示

ファイル: subscene.py プロジェクト: newrain7803/CouchPotatoV1

    def download(self, subtitle):

        subtitle = subtitle['subtitles'].pop()
        url = self.downloadUrl % subtitle['id']

        try:
            data = self.urlopen(url, timeout=self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return False

        soup = BeautifulSoup(data)

        postUrl = self.siteUrl + soup.find("a", {
            'id': 's_lc_bcr_downloadLink'
        }).get('href').split('"')[-2]
        typeId = soup.find("input", {"name": "typeId"}).get('value')
        params = urllib.urlencode({
            '__EVENTTARGET':
            's$lc$bcr$downloadLink',
            '__EVENTARGUMENT':
            '',
            '__VIEWSTATE':
            soup.find("input", {
                "id": "__VIEWSTATE"
            }).get('value'),
            '__PREVIOUSPAGE':
            soup.find("input", {
                "id": "__PREVIOUSPAGE"
            }).get('value'),
            'subtitleId':
            soup.find("input", {
                "id": "subtitleId"
            }).get('value'),
            'typeId':
            typeId,
            'filmId':
            soup.find("input", {
                "name": "filmId"
            }).get('value')
        })

        # No unrarring yet
        if 'rar' in typeId:
            log.error('Unrar not supported yet.')
            return False

        req = urllib2.Request(
            postUrl,
            headers={
                'Referer':
                url,
                'User-Agent':
                'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8'
            })

        subtitleFiles = []
        try:
            self.wait()
            data = urllib2.urlopen(req, params)
            self.lastUse = time.time()
            hash = hashlib.md5(url).hexdigest()

            tempdir = cherrypy.config.get('cachePath')
            tempSubtitleFile = os.path.join(tempdir, hash + '.' + typeId)

            # Remove the old
            if os.path.isfile(tempSubtitleFile): os.remove(tempSubtitleFile)

            with open(tempSubtitleFile, 'wb') as f:
                f.write(data.read())

            if 'zip' in typeId:
                zip = ZipFile(tempSubtitleFile)

                extract = []
                for name in zip.namelist():
                    for ext in self.extensions:
                        if ext.replace('*', '') in name:
                            subtitleFiles.append(os.path.join(tempdir, name))
                            extract.append(name)

                zip.extractall(tempdir, extract)
                os.remove(tempSubtitleFile)
            else:
                subtitleFiles.append(tempSubtitleFile)

            log.info('Subtitle download "%s" finished. %dKB.' %
                     (subtitle['name'],
                      int(data.info().getheaders("Content-Length")[0]) / 1024))
            return subtitleFiles

        except:
            log.error('Subtitle download %s failed.' % subtitle['name'])
            return False

コード例 #16

0

ファイルを表示

ファイル: subscene.py プロジェクト: censer/CouchPotato

    def download(self, subtitle):

        subtitle = subtitle['subtitles'].pop()
        url = self.downloadUrl % subtitle['id']

        try:
            data = self.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return False

        soup = BeautifulSoup(data)

        postUrl = self.siteUrl + soup.find("a", {'id' : 's_lc_bcr_downloadLink' }).get('href').split('"')[-2]
        typeId = soup.find("input", {"name" : "typeId" }).get('value')
        params = urllib.urlencode({
           '__EVENTTARGET': 's$lc$bcr$downloadLink',
           '__EVENTARGUMENT': '',
           '__VIEWSTATE': soup.find("input", {"id" : "__VIEWSTATE" }).get('value'),
           '__PREVIOUSPAGE': soup.find("input", { "id" : "__PREVIOUSPAGE" }).get('value'),
           'subtitleId': soup.find("input", {"id" : "subtitleId" }).get('value'),
           'typeId': typeId,
           'filmId': soup.find("input", {"name" : "filmId" }).get('value')
        })

        # No unrarring yet
        if 'rar' in typeId:
            log.error('Unrar not supported yet.')
            return False

        req = urllib2.Request(postUrl, headers = {
            'Referer' : url,
            'User-Agent' : 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8'
        })

        subtitleFiles = []
        try:
            self.wait()
            data = urllib2.urlopen(req, params)
            self.lastUse = time.time()
            hash = hashlib.md5(url).hexdigest()

            tempdir = cherrypy.config.get('cachePath')
            tempSubtitleFile = os.path.join(tempdir, hash + '.' + typeId)

            # Remove the old
            if os.path.isfile(tempSubtitleFile): os.remove(tempSubtitleFile)

            with open(tempSubtitleFile, 'wb') as f:
                f.write(data.read())

            if 'zip' in typeId:
                zip = ZipFile(tempSubtitleFile)

                extract = []
                for name in zip.namelist():
                    for ext in self.extensions:
                        if ext.replace('*', '') in name:
                            subtitleFiles.append(os.path.join(tempdir, name))
                            extract.append(name)

                zip.extractall(tempdir, extract)
                os.remove(tempSubtitleFile)
            else:
                subtitleFiles.append(tempSubtitleFile)

            log.info('Subtitle download "%s" finished. %dKB.' % (subtitle['name'], int(data.info().getheaders("Content-Length")[0]) / 1024))
            return subtitleFiles

        except:
            log.error('Subtitle download %s failed.' % subtitle['name'])
            return False