コード例 #1
0
    def sqlite2cookie(self, filename):
        from cStringIO import StringIO
        try:
            from pysqlite2 import dbapi2 as sqlite
        except ImportError:
            try:
                from sqlite3 import dbapi2 as sqlite  # try the 2.5+ stdlib
            except ImportError:
                raise PluginWarning('Unable to use sqlite3 or pysqlite2', log)

        log.debug('connecting: %s' % filename)
        try:
            con = sqlite.connect(filename)
        except:
            raise PluginError('Unable to open cookies sqlite database')

        cur = con.cursor()
        try:
            cur.execute(
                'select host, path, isSecure, expiry, name, value from moz_cookies'
            )
        except:
            raise PluginError(
                '%s does not appear to be a valid Firefox 3 cookies file' %
                filename, log)

        ftstr = ['FALSE', 'TRUE']

        s = StringIO()
        s.write("""\
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
# This is a generated file!  Do not edit.
""")
        count = 0
        failed = 0

        log.debug('fetching all cookies')

        def notabs(val):
            if isinstance(val, basestring):
                return val.replace('\t', '')
            return val

        while True:
            try:
                item = cur.next()
                # remove \t from item (#582)
                item = [notabs(field) for field in item]
                try:
                    s.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                            (item[0], ftstr[item[0].startswith('.')], item[1],
                             ftstr[item[2]], item[3], item[4], item[5]))

                    log.trace('Adding cookie for %s. key: %s value: %s' %
                              (item[0], item[4], item[5]))
                    count += 1
                except:
                    to_hex = lambda x: ''.join(
                        [hex(ord(c))[2:].zfill(2) for c in x])
                    i = 0
                    for val in item:
                        if isinstance(val, basestring):
                            log.debug('item[%s]: %s' % (i, to_hex(val)))
                        else:
                            log.debug('item[%s]: %s' % (i, val))
                        i += 1
                    failed += 1

            except UnicodeDecodeError:
                # for some god awful reason the sqlite module can throw UnicodeDecodeError ...
                log.debug('got UnicodeDecodeError from sqlite, ignored')
                failed += 1
            except StopIteration:
                break

        log.debug('Added %s cookies to jar. %s failed (non-ascii)' %
                  (count, failed))

        s.seek(0)
        con.close()

        cookie_jar = cookielib.MozillaCookieJar()
        cookie_jar._really_load(s, '', True, True)
        return cookie_jar
コード例 #2
0
 def _request(self, method, url, **kwargs):
     try:
         return requests.request(method, url, **kwargs)
     except Exception as e:
         raise PluginError(str(e))
コード例 #3
0
 def check_auth():
     if task.requests.post(
             'http://api.trakt.tv/account/test/' + config['api_key'],
             data=json.dumps(auth),
             raise_status=False).status_code != 200:
         raise PluginError('Authentication to trakt failed.')
コード例 #4
0
            except NetrcParseError, e:
                log.error('netrc: %s, file: %s, line: %s' %
                          (e.msg, e.filename, e.lineno))
        else:
            if 'username' in config:
                user = config['username']
            if 'password' in config:
                password = config['password']

        try:
            cli = transmissionrpc.Client(config['host'], config['port'], user,
                                         password)
        except TransmissionError, e:
            if isinstance(e.original, HTTPHandlerError):
                if e.original.code == 111:
                    raise PluginError(
                        "Cannot connect to transmission. Is it running?")
                elif e.original.code == 401:
                    raise PluginError(
                        "Username/password for transmission is incorrect. Cannot connect."
                    )
                elif e.original.code == 110:
                    raise PluginError(
                        "Cannot connect to transmission: Connection timed out."
                    )
                else:
                    raise PluginError("Error connecting to transmission: %s" %
                                      e.original.message)
            else:
                raise PluginError("Error connecting to transmission: %s" %
                                  e.message)
        return cli
コード例 #5
0
ファイル: lostfilm.py プロジェクト: Winter/Flexget
    def on_task_input(self, task, config):
        config = self.build_config(config)
        logger.trace('Config is {}', config)
        if not config['enabled']:
            return
        if config.get('lf_session') is not None:
            task.requests.cookies.set('lf_session', config['lf_session'])
            logger.debug('lf_session is set')
        task.requests.headers.update({
            'Cache-Control': 'no-cache',
            'Pragma': 'no-cache'
        })
        prefilter_list = set()
        if config['prefilter']:
            prefilter_list = self._get_series(task)
            if prefilter_list:
                logger.verbose('Generated pre-filter list with {} entries',
                               len(prefilter_list))
            else:
                logger.warning(
                    'Pre-filter list is empty. No series names are configured?'
                )

        proxy_handler = None
        if task.requests.proxies is not None:
            proxy_handler = ProxyHandler(task.requests.proxies)

        site_urls = config['site_urls']
        tried_urls = []

        while site_urls:
            rss_url = site_urls[
                0] + "rss.xml"  # If RSS url changes, update it here
            logger.trace('Trying to get and parse the RSS feed: {}', rss_url)
            try:
                rss = feedparser.parse(rss_url,
                                       handlers=[proxy_handler],
                                       request_headers={
                                           'Cache-Control': 'no-cache',
                                           'Pragma': 'no-cache'
                                       })
                status = rss.get('status')
                if status == 200:
                    logger.verbose('Received RSS feed from {}', rss_url)
                    break
                logger.info(
                    'Received {} status instead of 200 (OK) when trying to download the RSS feed {}',
                    status, rss_url)
            except Exception as e:
                logger.info('Cannot get or parse the RSS feed {}. Error: {}',
                            rss_url, e)
            rss = None
            tried_urls.append(site_urls.pop(0))

        if not rss:
            raise PluginError('Cannot get the RSS feed')
        # Use failed site locations as the last resot option for the redirect page
        site_urls.extend(tried_urls)

        entries = []
        for idx, item in enumerate(rss.entries, 1):
            series_name_rus = series_name_org = None
            episode_name_rus = episode_name_org = None
            season_num = episode_num = None
            perfect_match = False

            if item.get('title') is None:
                logger.warning('RSS item doesn\'t have a title')
            else:
                logger.trace('Got RSS item title: {}', item['title'])
                title_match = RSS_TITLE_REGEXP.fullmatch(item['title'])
                if title_match is not None:
                    if title_match['sr_org'] is not None:
                        series_name_org = title_match['sr_org']
                        series_name_rus = title_match['sr_rus']
                        if title_match['ep_rus'] is not None:
                            perfect_match = True
                    else:
                        series_name_org = title_match['sr_rus']
                        series_name_rus = None
                    season_num = int(title_match['season'])
                    episode_num = int(title_match['episode'])
                    episode_name_rus = title_match['ep_rus']
                else:
                    logger.warning('Cannot parse RSS item title: {}',
                                   item['title'])

            # Skip series names that are not configured.
            # Do not filter out the current item if it is not matched perfectly.
            # It's better to process an extra item which will be filtered later by
            # series plugin than throw out actually needed item because incorrectly
            # matched name was not found in the pre-filter list.
            if prefilter_list:
                if perfect_match:
                    try:
                        folded_name = self._simplify_name(series_name_org)
                    except TextProcessingError as e:
                        logger.warning(
                            'RSS item series name "{}" could be wrong',
                            series_name_org)
                        folded_name = None
                    if folded_name and folded_name not in prefilter_list:
                        if idx != len(
                                rss.entries) or entries or task.no_entries_ok:
                            logger.debug(
                                'Skipping "{}" as "{}" was not found in the list of configured series',
                                item['title'], series_name_org)
                            continue
                        else:
                            logger.debug(
                                'Force adding the last RSS item to the result to avoid warning of empty output'
                            )
                    else:
                        logger.trace(
                            '"{}" was found in the list of configured series',
                            series_name_org)
                else:
                    logger.debug(
                        'Not skipping RSS item as series names may be detected incorrectly'
                    )

            if item.get('description') is None:
                logger.warning(
                    'RSS item doesn\'t have a description, skipping')
                continue
            lostfilm_id_match = RSS_LF_ID_REGEXP.search(item['description'])
            if lostfilm_id_match is None or lostfilm_id_match['id'] is None:
                logger.warning(
                    'RSS item doesn\'t have lostfilm id in the description: {}, skipping'
                    .item['description'])
                continue
            lostfilm_id = int(lostfilm_id_match['id'])

            if not series_name_org or season_num is None or episode_num is None:
                if item.get('link') is None:
                    logger.warning('RSS item doesn\'t have a link, skipping')
                    continue
                link_match = RSS_LINK_REGEXP.fullmatch(item['link'])
                if link_match is None:
                    logger.warning('Cannot parse RSS item link, skipping: {}',
                                   item['link'])
                    continue
                series_name_org = link_match['sr_org2'].replace('_', ' ')
                season_num = int(link_match['season'])
                episode_num = int(link_match['episode'])
                logger.verbose(
                    'Using imprecise information from RSS item \'link\'')

            logger.trace((
                'Processing RSS entry: names: series "{}", series ru "{}", episode ru "{}"; '
                'numbers: season "{}", episode "{}", lostfilm id "{}"; perfect detect: {}'
            ), series_name_org, series_name_rus, episode_name_rus, season_num,
                         episode_num, lostfilm_id, perfect_match)
            params = {'c': lostfilm_id, 's': season_num, 'e': episode_num}

            tried_urls = []
            while site_urls:
                redirect_url = site_urls[0] + 'v_search.php'
                logger.trace('Trying to get the redirect page: {}',
                             redirect_url)
                try:
                    response = task.requests.get(redirect_url, params=params)
                    if response.status_code == 200:
                        logger.debug('The redirect page is downloaded from {}',
                                     redirect_url)
                        break
                    logger.verbose(
                        'Got status {} while retriving the redirect page {}',
                        response.status_code, redirect_url)
                except RequestException as e:
                    logger.verbose(
                        'Failed to get the redirect page from {}. Error: {}',
                        redirect_url, e)
                except cf_exceptions as e:
                    logger.verbose(
                        'Cannot bypass CF page protection to get the redirect page {}. Error: {}',
                        redirect_url, e)
                except Exception as e:
                    # Catch other errors related to download to avoid crash
                    logger.warning(
                        'Got unexpected exception when trying to get the redirect page. Error: {}',
                        redirect_url, e)
                response = None
                tried_urls.append(site_urls.pop(0))

            # Use failed site locations as the last resot option for the next attempts
            site_urls.extend(tried_urls)

            if not response:
                if config.get('lf_session') is not None:
                    logger.error('Failed to get the redirect page. ' \
                                 'Check whether "lf_session" parameter is correct.')
                else:
                    logger.error('Failed to get the redirect page. ' \
                                 'Specify your "lf_session" cookie value in plugin parameters.')
                continue

            page = get_soup(response.content)

            download_page_url = None
            find_item = page.find('html', recursive=False)
            if find_item is not None:
                find_item = find_item.find('head', recursive=False)
                if find_item is not None:
                    find_item = find_item.find('meta',
                                               attrs={'http-equiv': "refresh"},
                                               recursive=False)
                    if find_item is not None and find_item.has_attr(
                            'content') and find_item['content'].startswith(
                                '0; url=http'):
                        download_page_url = find_item['content'][7:]
            if not download_page_url:
                if config.get('lf_session') is not None:
                    logger.error('Links were not foung on lostfilm.tv torrent download page. ' \
                                 'Check whether "lf_session" parameter is correct.')
                else:
                    logger.error('Links were not foung on lostfilm.tv torrent download page. ' \
                                 'Specify your "lf_session" cookie value in plugin parameters.')
                continue

            try:
                response = task.requests.get(download_page_url)
            except RequestException as e:
                logger.error('Failed to get the download page {}. Error: {}',
                             download_page_url, e)
                continue
            except cf_exceptions as e:
                logger.error(
                    'Cannot pass CF page protection to get the download page {}. Error: {}',
                    download_page_url, e)
                continue
            except Exception as e:
                # Catch other errors related to download to avoid crash
                logger.error(
                    'Got unexpected exception when trying to get the download page {}. Error: {}',
                    download_page_url, e)
                continue

            page = get_soup(response.content)

            if not perfect_match:
                logger.trace(
                    'Trying to find series names in the final torrents download page'
                )
                find_item = page.find('div', class_='inner-box--subtitle')
                if find_item is not None:
                    title_org_div = find_item.text.strip()
                    if title_org_div.endswith(
                            ', сериал') and len(title_org_div) != 8:
                        series_name_org = title_org_div[:-8]
                    else:
                        logger.info(
                            'Cannot parse text on the final download page for original series name'
                        )
                else:
                    logger.info(
                        'Cannot parse the final download page for original series name'
                    )

                find_item = page.find('div', class_='inner-box--title')
                if find_item is not None and \
                   find_item.text.strip():
                    series_name_rus = find_item.text.strip()
                else:
                    logger.info(
                        'Cannot parse the final download page for russian series name'
                    )

            find_item = page.find('div', class_='inner-box--text')
            if find_item is not None:
                info_match = PAGE_TEXT_REGEXP.fullmatch(find_item.text.strip())
                if info_match is not None:
                    if int(info_match['season']) != season_num or int(
                            info_match['episode']) != episode_num:
                        logger.warning((
                            'Using season number ({}) and episode number ({}) from download page instead of '
                            'season number ({}) and episode number ({}) in RSS item'
                        ), int(info_match['season']),
                                       int(info_match['episode']), season_num,
                                       episode_num)
                        season_num = int(info_match['season'])
                        eposode_num = int(info_match['episode'])
                    if info_match['ep_org'] is not None:
                        episode_name_org = info_match['ep_org'].strip()
                    if not perfect_match and info_match['ep_rus'] is not None and \
                      info_match['ep_rus'].strip():
                        episode_name_rus = info_match['ep_rus'].strip()
                else:
                    logger.info(
                        'Cannot parse text on the final download page for episode names'
                    )
            else:
                logger.info(
                    'Cannot parse the final download page for episode names')

            r_type = ''
            find_item = page.find('div', class_='inner-box--link main')
            if find_item:
                find_item = find_item.find('a')
                if find_item:
                    info_match = PAGE_LINKMAIN_REGEXP.search(find_item.text)
                    if info_match:
                        r_type = info_match['tp']
                        logger.debug('Found rip type "{}"', r_type)

            if not series_name_org:
                find_item = item.get['title']
                if find_item:
                    logger.warning((
                        'Unable to detect series name. Full RSS item title will be used in hope '
                        'that series parser will be able to detect something: {}'
                    ), find_item)
                    series_name_org = None
                else:
                    logger.error(
                        'Unable to detect series name. Skipping RSS item.')
                    continue

            d_items = page.find_all('div', class_='inner-box--item')
            if not d_items:
                logger.error(
                    'No download links were found on the download page')
                continue

            episode_id = 'S{:02d}E{:02d}'.format(season_num, episode_num)
            for d_item in d_items:
                find_item = d_item.find('div',
                                        class_='inner-box--link sub').a['href']
                if not find_item:
                    logger.warning('Download item does not have a link')
                    continue
                torrent_link = find_item

                find_item = d_item.find('div', class_='inner-box--label')
                if not find_item:
                    logger.warning(
                        'Download item does not have quality indicator')
                    continue
                lf_quality = find_item.text.strip()

                if quality_map.get(lf_quality):
                    quality = quality_map.get(lf_quality)
                else:
                    logger.info(
                        'Download item has unknown quality indicator: {}',
                        lf_quality)
                    quality = lf_quality
                if series_name_org:
                    new_title = '.'.join([
                        series_name_org, episode_id, quality, r_type,
                        'LostFilm.TV'
                    ])
                else:
                    new_title = '{} {}'.format(item['title'], quality).strip()
                new_entry = Entry()
                new_entry['title'] = new_title
                new_entry['url'] = torrent_link
                if series_name_org:
                    new_entry['series_name'] = series_name_org
                    new_entry['series_name_org'] = series_name_org
                if perfect_match:
                    new_entry['series_exact'] = True
                new_entry['series_id'] = episode_id
                new_entry['series_id_type'] = 'ep'
                new_entry['series_season'] = season_num
                new_entry['series_episode'] = episode_num
                new_entry['series_episodes'] = 1
                new_entry['season_pack'] = None
                new_entry['proper'] = False
                new_entry['proper_count'] = 0
                new_entry['special'] = False
                new_entry['release_group'] = 'LostFilm.TV'
                if quality_map.get(lf_quality):
                    if r_type:
                        new_entry['quality'] = qualities.Quality('.'.join(
                            [quality, r_type]))
                    else:
                        new_entry['quality'] = qualities.Quality(quality)
                if series_name_rus:
                    new_entry['series_name_rus'] = series_name_rus
                if episode_name_rus:
                    new_entry['episode_name_rus'] = episode_name_rus
                if episode_name_org:
                    new_entry['episode_name_org'] = episode_name_org
                new_entry['lostfilm_id'] = lostfilm_id
                entries.append(new_entry)
                logger.trace((
                    'Added new entry: names: series "{}", series ru "{}", episode "{}", episode ru "{}"; '
                    'numbers: season "{}", episode "{}", lostfilm id "{}"; quality: "{}", perfect detect: {}'
                ), series_name_org, series_name_rus, episode_name_org,
                             episode_name_rus, season_num, episode_num,
                             lostfilm_id, quality, perfect_match)

        return entries
コード例 #6
0
 def prepare_config(self, config):
     if "username" not in config:
         raise PluginError("username is expected in PluginXunleiLixian")
     if "password" not in config:
         raise PluginError("password is expected in PluginXunleiLixian")
     return config
コード例 #7
0
    def create_entries(self, page_url, soup, config):

        queue = []
        duplicates = {}
        duplicate_limit = 4

        def title_exists(title):
            """Helper method. Return True if title is already added to entries"""
            for entry in queue:
                if entry['title'] == title:
                    return True

        for link in soup.find_all('a'):
            # not a valid link
            if not link.has_attr('href'):
                continue
            # no content in the link
            if not link.contents:
                continue

            url = link['href']
            log_link = url
            log_link = log_link.replace('\n', '')
            log_link = log_link.replace('\r', '')

            # fix broken urls
            if url.startswith('//'):
                url = 'http:' + url
            elif not url.startswith('http://') or not url.startswith(
                    'https://'):
                url = urlparse.urljoin(page_url, url)

            # get only links matching regexp
            regexps = config.get('links_re', None)
            if regexps:
                accept = False
                for regexp in regexps:
                    if re.search(regexp, url):
                        accept = True
                if not accept:
                    continue

            title_from = config.get('title_from', 'auto')
            if title_from == 'url':
                title = self._title_from_url(url)
                log.debug('title from url: %s' % title)
            elif title_from == 'title':
                if not link.has_attr('title'):
                    log.warning(
                        'Link `%s` doesn\'t have title attribute, ignored.' %
                        log_link)
                    continue
                title = link['title']
                log.debug('title from title: %s' % title)
            elif title_from == 'auto':
                title = self._title_from_link(link, log_link)
                if title is None:
                    continue
                # automatic mode, check if title is unique
                # if there are too many duplicate titles, switch to title_from: url
                if title_exists(title):
                    # ignore index links as a counter
                    if 'index' in title and len(title) < 10:
                        log.debug('ignored index title %s' % title)
                        continue
                    duplicates.setdefault(title, 0)
                    duplicates[title] += 1
                    if duplicates[title] > duplicate_limit:
                        # if from url seems to be bad choice use title
                        from_url = self._title_from_url(url)
                        switch_to = 'url'
                        for ext in ('.html', '.php'):
                            if from_url.endswith(ext):
                                switch_to = 'title'
                        log.info(
                            'Link names seem to be useless, auto-configuring \'title_from: %s\'. '
                            'This may not work well, you might need to configure it yourself.'
                            % switch_to)
                        config['title_from'] = switch_to
                        # start from the beginning  ...
                        return self.create_entries(page_url, soup, config)
            elif title_from == 'link' or title_from == 'contents':
                # link from link name
                title = self._title_from_link(link, log_link)
                if title is None:
                    continue
                log.debug('title from link: %s' % title)
            else:
                raise PluginError('Unknown title_from value %s' % title_from)

            if not title:
                log.debug('title could not be determined for %s' % log_link)
                continue

            # strip unicode white spaces
            title = title.replace(u'\u200B', u'').strip()

            # in case the title contains xxxxxxx.torrent - foooo.torrent clean it a bit (get up to first .torrent)
            # TODO: hack
            if title.lower().find('.torrent') > 0:
                title = title[:title.lower().find('.torrent')]

            if title_exists(title):
                # title link should be unique, add CRC32 to end if it's not
                hash = zlib.crc32(url.encode("utf-8"))
                crc32 = '%08X' % (hash & 0xFFFFFFFF)
                title = '%s [%s]' % (title, crc32)
                # truly duplicate, title + url crc already exists in queue
                if title_exists(title):
                    continue
                log.debug('uniqued title to %s' % title)

            entry = Entry()
            entry['url'] = url
            entry['title'] = title

            queue.append(entry)

        # add from queue to task
        return queue
コード例 #8
0
ファイル: imdb_lookup.py プロジェクト: drbashar315/Flexget
    def lookup(self, entry, search_allowed=True):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')

        session = Session()

        try:
            # entry sanity checks
            for field in ['imdb_votes', 'imdb_score']:
                if entry.get(field, eval_lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError('Entry field %s should be a number!' % field)

            # if imdb_id is included, build the url.
            if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
                entry['imdb_url'] = make_url(entry['imdb_id'])

            # make sure imdb url is valid
            if entry.get('imdb_url', eval_lazy=False):
                imdb_id = extract_id(entry['imdb_url'])
                if imdb_id:
                    entry['imdb_url'] = make_url(imdb_id)
                else:
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                    del(entry['imdb_url'])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get('imdb_url', eval_lazy=False):
                result = session.query(SearchResult).\
                    filter(SearchResult.title == entry['title']).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug('%s will fail lookup' % entry['title'])
                        raise PluginError('Title `%s` lookup fails' % entry['title'])
                    else:
                        if result.url:
                            log.trace('Setting imdb url for %s from db' % entry['title'])
                            entry['imdb_url'] = result.url

            # no imdb url, but information required, try searching
            if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
                log.verbose('Searching from imdb `%s`' % entry['title'])

                search = ImdbSearch()
                search_result = search.smart_match(entry['title'])
                if search_result:
                    entry['imdb_url'] = search_result['url']
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry['title'], entry['imdb_url'])
                    session.add(result)
                    log.verbose('Found %s' % (entry['imdb_url']))
                else:
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
                    # store FAIL for this title
                    result = SearchResult(entry['title'])
                    result.fails = True
                    session.add(result)
                    raise PluginError('Title `%s` lookup failed' % entry['title'])

            # check if this imdb page has been parsed & cached
            movie = session.query(Movie).\
                options(joinedload_all(Movie.genres),
                    joinedload_all(Movie.languages),
                    joinedload_all(Movie.actors),
                    joinedload_all(Movie.directors)).\
                filter(Movie.url == entry['imdb_url']).first()

            # determine whether or not movie details needs to be parsed
            req_parse = False
            if not movie:
                req_parse = True
            elif movie.expired:
                req_parse = True

            if req_parse:
                if movie is not None:
                    if movie.expired:
                        log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
                    # Remove the old movie, we'll store another one later.
                    session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete()
                    session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()

                # search and store to cache
                if 'title' in entry:
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
                else:
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
                try:
                    movie = self._parse_new_movie(entry['imdb_url'], session)
                except UnicodeDecodeError:
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' %
                              entry['imdb_url'])
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry['imdb_url']
                    session.add(movie)
                    raise PluginError('UnicodeDecodeError')
                except ValueError as e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)
        finally:
            log.trace('committing session')
            session.commit()
コード例 #9
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None,
                 only_cached=False, session=None):
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
            filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
            filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(
                    lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            # TODO: extract to method
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    min_match = difflib.SequenceMatcher(lambda x: x == ' ',
                                                        re.sub('\s+\(.*\)$', '', result['title'].lower()),
                                                        title.lower()).ratio() < MIN_MATCH
                    if title and min_match:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    result['year'] = int(result['year'])
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' %
                                    (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).\
                            filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                      'forcing an update' % movie)
                            movie = _set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)

            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                                                     '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                                                     '%Y-%m-%d').tm_year
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        movie = _set_movie_details(movie, session, result)
                        if imdb_id and not filter(
                            lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'),
                                movie.alternate_ids):  # TODO: get rid of these confusing lambdas
                            log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                            movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',
                                                                                 imdb_id.lstrip('t')))
                        session.add(movie)
                        session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
コード例 #10
0
ファイル: lostfilm.py プロジェクト: sirtyface/Flexget-1
    def on_task_input(self, task, config):
        config = self.build_config(config)
        if config is False:
            return
        try:
            rss = feedparser.parse(config['url'])
        except Exception:
            raise PluginError('Cannot parse rss feed')
        status = rss.get('status')
        if status != 200:
            raise PluginError('Received %s status instead of 200 (OK)' % status)
        entries = []
        for item in rss.entries:
            if item.get('link') is None:
                logger.debug('Item doesn\'t have a link')
                continue
            if item.get('description') is None:
                logger.debug('Item doesn\'t have a description')
                continue
            try:
                lostfilm_num = LOSTFILM_ID_REGEXP.search(item['description']).groups()
            except Exception:
                logger.debug('Item doesn\'t have lostfilm id in description')
                continue
            try:
                season_num, episode_num = [
                    int(x) for x in EPISODE_REGEXP.search(item['link']).groups()
                ]
            except Exception:
                logger.debug('Item doesn\'t have episode id in link')
                continue
            params = {'c': lostfilm_num, 's': season_num, 'e': episode_num}
            redirect_url = 'https://www.lostfilm.tv/v_search.php'
            try:
                response = task.requests.get(redirect_url, params=params)
            except RequestException as e:
                logger.error('Could not connect to redirect url: {:s}'.format(e))
                continue

            page = get_soup(response.content)
            try:
                redirect_url = page.head.meta['content'].split('url=')[1]
            except Exception:
                logger.error('Missing redirect')
                continue

            try:
                response = task.requests.get(redirect_url)
            except RequestException as e:
                logger.error('Could not connect to redirect url2: {:s}', e)
                continue

            page = get_soup(response.content)

            episode_name_rus = episode_name_eng = series_name_rus = None
            series_name_eng = None
            try:
                series_name_rus = page.find('div', 'inner-box--title').text.strip()
                title_eng_div = page.find('div', 'inner-box--subtitle').text.strip() or None
                series_name_eng = (
                    (title_eng_div.endswith(', сериал')) and title_eng_div[:-8] or None
                )
                text_div = page.find('div', 'inner-box--text').text.strip() or None
                episode_name_rus, episode_name_eng = TEXT_REGEXP.findall(text_div).pop()
                episode_name_rus = episode_name_rus.strip()
                episode_name_eng = episode_name_eng.strip()
            except Exception:
                logger.debug('Cannot parse head info')
                continue

            episode_id = 'S{:02d}E{:02d}'.format(season_num, episode_num)

            for item in page.findAll('div', 'inner-box--item'):
                torrent_link = quality = None
                try:
                    torrent_link = item.find('div', 'inner-box--link sub').a['href']
                    quality = item.find('div', 'inner-box--label').text.strip()
                except Exception:
                    logger.debug('Item doesn\'t have a link or quality')
                    continue
                if torrent_link is None or quality is None:
                    logger.debug('Item doesn\'t have a link or quality')
                    continue
                if quality_map.get(quality):
                    quality, file_ext = quality_map.get(quality)
                else:
                    file_ext = 'avi'
                if series_name_eng:
                    new_title = '.'.join(
                        [
                            series_name_eng,
                            episode_id,
                            quality,
                            'rus.LostFilm.TV',
                            file_ext,
                            'torrent',
                        ]
                    ).replace(' ', '.')
                else:
                    if item.get('title') is not None:
                        new_title = '{} {}'.format(item['title'], quality)
                    else:
                        logger.debug('Item doesn\'t have a title')
                        continue
                new_entry = Entry()
                new_entry['url'] = torrent_link
                new_entry['title'] = new_title.strip()
                if series_name_rus:
                    new_entry['series_name_rus'] = series_name_rus
                if episode_name_rus:
                    new_entry['episode_name_rus'] = episode_name_rus
                if series_name_eng:
                    new_entry['series_name_eng'] = series_name_eng
                if episode_name_eng:
                    new_entry['episode_name_eng'] = episode_name_eng
                entries.append(new_entry)

        return entries
コード例 #11
0
ファイル: imdb.py プロジェクト: moogar0880/Flexget
    def parse(self, imdb_id, soup=None):
        self.imdb_id = extract_id(imdb_id)
        url = make_url(self.imdb_id)
        self.url = url

        if not soup:
            page = requests.get(url)
            soup = get_soup(page.text)

        title_wrapper = soup.find('div', attrs={'class': 'title_wrapper'})

        data = json.loads(
            soup.find('script', {
                'type': 'application/ld+json'
            }).text)

        if not data:
            raise PluginError(
                'IMDB parser needs updating, imdb format changed. Please report on Github.'
            )

        # Parse stuff from the title-overview section
        name_elem = data['name']
        if name_elem:
            self.name = name_elem.strip()
        else:
            log.error(
                'Possible IMDB parser needs updating, Please report on Github.'
            )
            raise PluginError('Unable to set imdb_name for %s from %s' %
                              (self.imdb_id, self.url))

        year = soup.find('span', attrs={'id': 'titleYear'})
        if year:
            m = re.search(r'([0-9]{4})', year.text)
            if m:
                self.year = int(m.group(1))

        if not self.year:
            log.debug('No year found for %s', self.imdb_id)

        mpaa_rating_elem = data.get('contentRating')
        if mpaa_rating_elem:
            self.mpaa_rating = mpaa_rating_elem
        else:
            log.debug('No rating found for %s', self.imdb_id)

        photo_elem = data.get('image')
        if photo_elem:
            self.photo = photo_elem
        else:
            log.debug('No photo found for %s', self.imdb_id)

        original_name_elem = title_wrapper.find('div',
                                                {'class': 'originalTitle'})
        if original_name_elem:
            self.name = title_wrapper.find('h1').contents[0].strip()
            self.original_name = original_name_elem.contents[0].strip().strip(
                '"')
        else:
            log.debug('No original title found for %s', self.imdb_id)

        votes_elem = data.get('aggregateRating', {}).get('ratingCount')
        if votes_elem:
            self.votes = str_to_int(votes_elem) if not isinstance(
                votes_elem, int) else votes_elem
        else:
            log.debug('No votes found for %s', self.imdb_id)

        score_elem = data.get('aggregateRating', {}).get('ratingValue')
        if score_elem:
            self.score = float(score_elem)
        else:
            log.debug('No score found for %s', self.imdb_id)

        meta_score_elem = soup.find(attrs={'class': 'metacriticScore'})
        if meta_score_elem:
            self.meta_score = str_to_int(meta_score_elem.text)
        else:
            log.debug('No Metacritic score found for %s', self.imdb_id)

        # get director(s)
        directors = data.get('director', [])
        if not isinstance(directors, list):
            directors = [directors]

        for director in directors:
            if director['@type'] != 'Person':
                continue
            director_id = extract_id(director['url'])
            director_name = director['name']
            self.directors[director_id] = director_name

        # get writer(s)
        writers = data.get('creator', [])
        if not isinstance(writers, list):
            writers = [writers]

        for writer in writers:
            if writer['@type'] != 'Person':
                continue
            writer_id = extract_id(writer['url'])
            writer_name = writer['name']
            self.writers[writer_id] = writer_name

        # Details section
        title_details = soup.find('div', attrs={'id': 'titleDetails'})
        if title_details:
            # get languages
            for link in title_details.find_all(
                    'a',
                    href=re.compile('^/search/title\?title_type=feature'
                                    '&primary_language=')):
                lang = link.text.strip().lower()
                if lang not in self.languages:
                    self.languages.append(lang.strip())

        # Storyline section
        storyline = soup.find('div', attrs={'id': 'titleStoryLine'})
        if storyline:
            plot_elem = storyline.find('p')
            if plot_elem:
                # Remove the "Written By" part.
                if plot_elem.em:
                    plot_elem.em.replace_with('')
                self.plot_outline = plot_elem.text.strip()
            else:
                log.debug('No storyline found for %s', self.imdb_id)

        genres = data.get('genre', [])
        if not isinstance(genres, list):
            genres = [genres]

        self.genres = [g.strip().lower() for g in genres]

        # Cast section
        cast = soup.find('table', attrs={'class': 'cast_list'})
        if cast:
            for actor in cast.select('tr > td:nth-of-type(2) > a'):
                actor_id = extract_id(actor['href'])
                actor_name = actor.text.strip()
                # tag instead of name
                if isinstance(actor_name, Tag):
                    actor_name = None
                self.actors[actor_id] = actor_name
コード例 #12
0
    def on_task_input(self, task, config):

        # Let details plugin know that it is ok if this task doesn't produce any entries
        task.no_entries_ok = True

        filename = os.path.expanduser(config['file'])
        encoding = config.get('encoding', None)
        file = open(filename, 'r')

        last_pos = task.simple_persistence.setdefault(filename, 0)
        if os.path.getsize(filename) < last_pos:
            log.info('File size is smaller than in previous execution, reseting to beginning of the file')
            last_pos = 0

        file.seek(last_pos)

        log.debug('continuing from last position %s' % last_pos)

        entry_config = config.get('entry')
        format_config = config.get('format', {})

        # keep track what fields have been found
        used = {}
        entries = []
        entry = Entry()

        # now parse text

        while True:
            line = file.readline()
            if encoding:
                try:
                    line = line.decode(encoding)
                except UnicodeError:
                    raise PluginError('Failed to decode file using %s. Check encoding.' % encoding)

            if not line:
                task.simple_persistence[filename] = file.tell()
                break

            for field, regexp in entry_config.iteritems():
                #log.debug('search field: %s regexp: %s' % (field, regexp))
                match = re.search(regexp, line)
                if match:
                    # check if used field detected, in such case start with new entry
                    if used.has_key(field):
                        if entry.isvalid():
                            log.info('Found field %s again before entry was completed. \
                                      Adding current incomplete, but valid entry and moving to next.' % field)
                            self.format_entry(entry, format_config)
                            entries.append(entry)
                        else:
                            log.info('Invalid data, entry field %s is already found once. Ignoring entry.' % field)
                        # start new entry
                        entry = Entry()
                        used = {}

                    # add field to entry
                    entry[field] = match.group(1)
                    used[field] = True
                    log.debug('found field: %s value: %s' % (field, entry[field]))

                # if all fields have been found
                if len(used) == len(entry_config):
                    # check that entry has at least title and url
                    if not entry.isvalid():
                        log.info('Invalid data, constructed entry is missing mandatory fields (title or url)')
                    else:
                        self.format_entry(entry, format_config)
                        entries.append(entry)
                        log.debug('Added entry %s' % entry)
                        # start new entry
                        entry = Entry()
                        used = {}
        return entries
コード例 #13
0
    def on_task_filter(self, task, config):
        log.debug('check for enforcing')

        # parse config
        if isinstance(config, bool):
            # configured a boolean false, disable plugin
            if not config:
                return
            # configured a boolean true, disable timeframe
            timeframe = None
        else:
            # parse time window
            log.debug('interval: %s' % config)
            try:
                timeframe = parse_timedelta(config)
            except ValueError:
                raise PluginError('Invalid time format', log)

        # throws DependencyError if not present aborting task
        imdb_lookup = get_plugin_by_name('imdb_lookup').instance

        for entry in task.entries:

            parser = MovieParser()
            parser.data = entry['title']
            parser.parse()

            # if we have imdb_id already evaluated
            if entry.get('imdb_id', None, eval_lazy=False) is None:
                try:
                    # TODO: fix imdb_id_lookup, cumbersome that it returns None and or throws exception
                    # Also it's crappy name!
                    imdb_id = imdb_lookup.imdb_id_lookup(
                        movie_title=parser.name, raw_title=entry['title'])
                    if imdb_id is None:
                        continue
                    entry['imdb_id'] = imdb_id
                except PluginError as pe:
                    log_once(pe.value)
                    continue

            quality = parser.quality.name

            log.debug('quality: %s' % quality)
            log.debug('imdb_id: %s' % entry['imdb_id'])
            log.debug('current proper count: %s' % parser.proper_count)

            proper_movie = task.session.query(ProperMovie).\
                filter(ProperMovie.imdb_id == entry['imdb_id']).\
                filter(ProperMovie.quality == quality).\
                order_by(desc(ProperMovie.proper_count)).first()

            if not proper_movie:
                log.debug('no previous download recorded for %s' %
                          entry['imdb_id'])
                continue

            highest_proper_count = proper_movie.proper_count
            log.debug('highest_proper_count: %i' % highest_proper_count)

            accept_proper = False
            if parser.proper_count > highest_proper_count:
                log.debug('proper detected: %s ' % proper_movie)

                if timeframe is None:
                    accept_proper = True
                else:
                    expires = proper_movie.added + timeframe
                    log.debug('propers timeframe: %s' % timeframe)
                    log.debug('added: %s' % proper_movie.added)
                    log.debug('propers ignore after: %s' % str(expires))
                    if datetime.now() < expires:
                        accept_proper = True
                    else:
                        log.verbose(
                            'Proper `%s` has past it\'s expiration time' %
                            entry['title'])

            if accept_proper:
                log.info(
                    'Accepting proper version previously downloaded movie `%s`'
                    % entry['title'])
                # TODO: does this need to be called?
                # fire_event('forget', entry['imdb_url'])
                fire_event('forget', entry['imdb_id'])
                entry.accept('proper version of previously downloaded movie')
コード例 #14
0
 def lazy_a(entry):
     if 'fail' in entry:
         raise PluginError('oh no!')
     for f in ['a_field', 'ab_field', 'a_fail']:
         entry[f] = 'a'
コード例 #15
0
    def items(self):
        if self._items is None:
            log.debug('fetching items from IMDB')
            try:
                r = self.session.get(
                    'https://www.imdb.com/list/export?list_id=%s&author_id=%s'
                    % (self.list_id, self.user_id),
                    cookies=self.cookies,
                )
                lines = list(r.iter_lines(decode_unicode=True))
            except RequestException as e:
                raise PluginError(e.args[0])
            # Normalize headers to lowercase
            lines[0] = lines[0].lower()
            self._items = []
            for row in csv_dictreader(lines):
                log.debug('parsing line from csv: %s', row)

                try:
                    item_type = row['title type'].lower()
                    name = row['title']
                    year = int(row['year']) if row['year'] != '????' else None
                    created = (datetime.strptime(row['created'], '%Y-%m-%d')
                               if row.get('created') else None)
                    modified = (datetime.strptime(row['modified'], '%Y-%m-%d')
                                if row.get('modified') else None)
                    entry = Entry({
                        'title':
                        '%s (%s)' % (name, year) if year != '????' else name,
                        'url':
                        row['url'],
                        'imdb_id':
                        row['const'],
                        'imdb_url':
                        row['url'],
                        'imdb_list_position':
                        int(row['position']) if 'position' in row else None,
                        'imdb_list_created':
                        created,
                        'imdb_list_modified':
                        modified,
                        'imdb_list_description':
                        row.get('description'),
                        'imdb_name':
                        name,
                        'imdb_year':
                        year,
                        'imdb_user_score':
                        float(row['imdb rating'])
                        if row['imdb rating'] else None,
                        'imdb_votes':
                        int(row['num votes']) if row['num votes'] else None,
                        'imdb_genres':
                        [genre.strip() for genre in row['genres'].split(',')],
                    })

                except ValueError as e:
                    log.debug(
                        'no movie row detected, skipping. %s. Exception: %s',
                        row, e)
                    continue

                if item_type in MOVIE_TYPES:
                    entry['movie_name'] = name
                    entry['movie_year'] = year
                elif item_type in SERIES_TYPES:
                    entry['series_name'] = name
                    entry['series_year'] = year
                elif item_type in OTHER_TYPES:
                    entry['title'] = name
                else:
                    log.verbose(
                        'Unknown IMDB type entry received: %s. Skipping',
                        item_type)
                    continue
                self._items.append(entry)
        return self._items
コード例 #16
0
ファイル: api_rottentomatoes.py プロジェクト: ksurl/Flexget
def lookup_movie(
    title: Optional[str] = None,
    year: Optional[int] = None,
    rottentomatoes_id: Optional[int] = None,
    smart_match: Optional[bool] = None,
    only_cached: bool = False,
    session: Optional[Session] = None,
    api_key: Optional[str] = None,
) -> RottenTomatoesMovie:
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param string title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :param api_key: optionaly specify an API key to use
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup
    """
    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = plugin.get('parsing', 'api_rottentomatoes').parse_movie(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    search_string = ""
    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not rottentomatoes_id:
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str() -> str:
        return f'<title={title},year={year},rottentomatoes_id={rottentomatoes_id}>'

    logger.debug('Looking up rotten tomatoes information for {}', id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = (
            session.query(RottenTomatoesMovie)
            .filter(RottenTomatoesMovie.id == rottentomatoes_id)
            .first()
        )
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(
            func.lower(RottenTomatoesMovie.title) == title.lower()
        )
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            logger.debug('No matches in movie cache found, checking search cache.')
            found = (
                session.query(RottenTomatoesSearchResult)
                .filter(func.lower(RottenTomatoesSearchResult.search) == search_string)
                .first()
            )
            if found and found.movie:
                logger.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            logger.debug(
                'Cache has expired for {}, attempting to refresh from Rotten Tomatoes.', id_str()
            )
            try:
                result = movies_info(movie.id, api_key)
                movie = _set_movie_details(movie, session, result, api_key)
                session.merge(movie)
            except URLError:
                logger.error(
                    'Error refreshing movie details from Rotten Tomatoes, cached info being used.'
                )
        else:
            logger.debug('Movie {} information restored from cache.', id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        logger.debug('Movie {} not found in cache, looking up from rotten tomatoes.', id_str())
        try:
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id, api_key)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result, api_key)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                logger.verbose('Searching from rt `{}`', search_string)
                results = movies_search(search_string, api_key=api_key)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower()
                            )
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        logger.debug('Checking year against theater release date')
                                        release_year = time.strptime(
                                            movie_res['release_dates'].get('theater'), '%Y-%m-%d'
                                        ).tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        logger.debug('Checking year against dvd release date')
                                        release_year = time.strptime(
                                            movie_res['release_dates'].get('dvd'), '%Y-%m-%d'
                                        ).tm_year
                                    if not (release_year and release_year == year):
                                        logger.debug(
                                            'removing {} - {} (wrong year: {})',
                                            movie_res['title'],
                                            movie_res['id'],
                                            str(release_year or movie_res['year']),
                                        )
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                logger.debug('removing {} (min_match)', movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            logger.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                logger.debug(
                                    'unable to determine correct movie, min_diff too small(`{} ({}) - {}` <-?-> `{} ({}) - {}`)',
                                    results[0]['title'],
                                    results[0]['year'],
                                    results[0]['id'],
                                    results[1]['title'],
                                    results[1]['year'],
                                    results[1]['id'],
                                )
                                for r in results:
                                    logger.debug(
                                        'remain: {} (match: {}) {}',
                                        r['title'],
                                        r['match'],
                                        r['id'],
                                    )
                                raise PluginError('min_diff')

                        result = movies_info(results[0].get('id'), api_key)

                        if not result:
                            result = results[0]

                        movie = (
                            session.query(RottenTomatoesMovie)
                            .filter(RottenTomatoesMovie.id == result['id'])
                            .first()
                        )

                        if not movie:
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result, api_key)
                            session.add(movie)
                            session.commit()

                        if title.lower() != movie.title.lower():
                            logger.debug("Saving search result for '{}'", search_string)
                            session.add(
                                RottenTomatoesSearchResult(search=search_string, movie=movie)
                            )
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in [
            'alternate_ids',
            'cast',
            'directors',
            'genres',
            'links',
            'posters',
            'release_dates',
        ]:
            getattr(movie, attr)
        session.commit()
        return movie
コード例 #17
0
ファイル: torrentday.py プロジェクト: newrain7803/Flexget
    def search(self, task, entry, config=None):
        """
        Search for name from torrentday.
        """

        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in categories
        ]
        params = {'cata': 'yes', 'clear-new': 1}
        params.update({str(c): 1 for c in categories})

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):

            url = 'https://www.torrentday.com/t'
            params['q'] = normalize_unicode(search_string).replace(':', '')
            cookies = {
                'uid': config['uid'],
                'pass': config['passkey'],
                '__cfduid': config['cfduid'],
            }

            try:
                page = requests.get(url, params=params,
                                    cookies=cookies).content
            except RequestException as e:
                raise PluginError(
                    'Could not connect to torrentday: {}'.format(e))

            # the following should avoid table being None due to a malformed
            # html in td search results
            soup = get_soup(
                page).contents[1].contents[1].contents[1].next.nextSibling
            table = soup.find('table', {'id': 'torrentTable'})
            if table is None:
                raise PluginError(
                    'Search returned by torrentday appears to be empty or malformed.'
                )

            # the first row is the header so skip it
            for tr in table.find_all('tr')[1:]:
                entry = Entry()
                # find the torrent names
                td = tr.find('td', {'class': 'torrentNameInfo'})
                if not td:
                    logger.warning(
                        'Could not find entry torrentNameInfo for {}.',
                        search_string)
                    continue
                title = td.find('a')
                if not title:
                    logger.warning('Could not determine title for {}.',
                                   search_string)
                    continue
                entry['title'] = title.contents[0]
                logger.debug('title: {}', title.contents[0])

                # find download link
                torrent_url = tr.find('td', {'class': 'ac'})
                if not torrent_url:
                    logger.warning('Could not determine download link for {}.',
                                   search_string)
                    continue
                torrent_url = torrent_url.find('a').get('href')

                # construct download URL
                torrent_url = ('https://www.torrentday.com/' + torrent_url +
                               '?torrent_pass='******'rss_key'])
                logger.debug('RSS-ified download link: {}', torrent_url)
                entry['url'] = torrent_url

                # us tr object for seeders/leechers
                seeders = tr.find('td', {'class': 'ac seedersInfo'})
                leechers = tr.find('td', {'class': 'ac leechersInfo'})
                entry['torrent_seeds'] = int(seeders.contents[0].replace(
                    ',', ''))
                entry['torrent_leeches'] = int(leechers.contents[0].replace(
                    ',', ''))
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])

                # use tr object for size
                size = tr.find(
                    'td',
                    text=re.compile(r'([\.\d]+) ([TGMKk]?)B')).contents[0]
                size = re.search(r'([\.\d]+) ([TGMKk]?)B', str(size))

                entry['content_size'] = parse_filesize(size.group(0))

                entries.add(entry)

        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('torrent_availability'))
コード例 #18
0
ファイル: api_rottentomatoes.py プロジェクト: ksurl/Flexget
def _set_movie_details(
    movie: RottenTomatoesMovie,
    session: Session,
    movie_data: Optional[Dict[str, Any]] = None,
    api_key: Optional[str] = None,
) -> Any:
    """
    Populate ``movie`` object from given data

    :param movie: movie object to update
    :param session: session to use, returned Movie will be live in that session
    :param api_key: optionally specify an API key to use
    :param movie_data: data to copy into the :movie:
    """

    if not movie_data:
        if not movie.id:
            raise PluginError('Cannot get rotten tomatoes details without rotten tomatoes id')
        movie_data = movies_info(movie.id, api_key)
    if movie_data:
        if movie.id:
            logger.debug(
                "Updating movie info (actually just deleting the old info and adding the new)"
            )
            del movie.release_dates[:]
            del movie.posters[:]
            del movie.alternate_ids[:]
            del movie.links[:]
        movie.update_from_dict(movie_data)
        movie.update_from_dict(movie_data.get('ratings'))
        genres = movie_data.get('genres')
        if genres:
            for name in genres:
                genre = (
                    session.query(RottenTomatoesGenre)
                    .filter(func.lower(RottenTomatoesGenre.name) == name.lower())
                    .first()
                )
                if not genre:
                    genre = RottenTomatoesGenre(name)
                movie.genres.append(genre)
        release_dates = movie_data.get('release_dates')
        if release_dates:
            for name, date in list(release_dates.items()):
                movie.release_dates.append(ReleaseDate(name, date))
        posters = movie_data.get('posters')
        if posters:
            for name, url in list(posters.items()):
                movie.posters.append(RottenTomatoesPoster(name, url))
        cast = movie_data.get('abridged_cast')
        if cast:
            for res_actor in cast:
                actor = (
                    session.query(RottenTomatoesActor)
                    .filter(func.lower(RottenTomatoesActor.rt_id) == res_actor['id'])
                    .first()
                )
                if not actor:
                    actor = RottenTomatoesActor(res_actor['name'], res_actor['id'])
                movie.cast.append(actor)
        directors = movie_data.get('abridged_directors')
        if directors:
            for res_director in directors:
                director = (
                    session.query(RottenTomatoesDirector)
                    .filter(
                        func.lower(RottenTomatoesDirector.name) == res_director['name'].lower()
                    )
                    .first()
                )
                if not director:
                    director = RottenTomatoesDirector(res_director['name'])
                movie.directors.append(director)
        alternate_ids = movie_data.get('alternate_ids')
        if alternate_ids:
            for name, id in list(alternate_ids.items()):
                movie.alternate_ids.append(RottenTomatoesAlternateId(name, id))
        links = movie_data.get('links')
        if links:
            for name, url in list(links.items()):
                movie.links.append(RottenTomatoesLink(name, url))
        movie.updated = datetime.now()
    else:
        raise PluginError('No movie_data for rottentomatoes_id %s' % movie.id)

    return movie
コード例 #19
0
ファイル: plugin_deluge.py プロジェクト: Donavan/Flexget
 def on_connect_fail(self, result):
     """Pauses the reactor, returns PluginError. Gets called when connection to deluge daemon fails."""
     log.debug('Connect to deluge daemon failed, result: %s' % result)
     reactor.callLater(
         0, reactor.pause,
         PluginError('Could not connect to deluge daemon', log))
コード例 #20
0

def replace_from_entry(field, entry, field_name, logger, default=''):
    """This is a helper function to do string replacement from an entry dict.
    It catches exceptions from the string replacement and prints errors to the given log.
    field_name is the description to use when printing the error.
    Returns the result of the replacemnt, or default if there is an error."""
    try:
        result = field % entry
    except KeyError, e:
        logger("Could not set %s for %s: does not contain the field '%s'." %
               (field_name, entry['title'], e))
        result = default
    except ValueError, e:
        from flexget.plugin import PluginError
        raise PluginError("%s has invalid string replacement: %s: %s" %
                          (field_name, e, field))
    return result


class ReList(list):
    """
    A list that stores regexps.

    You can add compiled or uncompiled regexps to the list.
    It will always return the compiled version.
    It will compile the text regexps on demand when first accessed.
    """

    # Set the default flags
    flags = re.IGNORECASE | re.UNICODE
コード例 #21
0
    def output(self, task, entry, config):
        """Moves temp-file into final destination

        Raises:
            PluginError if operation fails
        """

        if 'file' not in entry and not task.manager.options.test:
            log.debug('file missing, entry: %s' % entry)
            raise PluginError('Entry `%s` has no temp file associated with' %
                              entry['title'])

        try:
            # use path from entry if has one, otherwise use from download definition parameter
            path = entry.get('path', config.get('path'))
            if not isinstance(path, basestring):
                raise PluginError('Invalid `path` in entry `%s`' %
                                  entry['title'])

            # override path from command line parameter
            if task.manager.options.dl_path:
                path = task.manager.options.dl_path

            # expand variables in path
            try:
                path = os.path.expanduser(entry.render(path))
            except RenderError, e:
                task.fail(
                    entry,
                    'Could not set path. Error during string replacement: %s' %
                    e)
                return

            # Clean illegal characters from path name
            path = pathscrub(path)

            # If we are in test mode, report and return
            if task.manager.options.test:
                log.info('Would write `%s` to `%s`' % (entry['title'], path))
                # Set a fake location, so the exec plugin can do string replacement during --test #1015
                entry['output'] = os.path.join(path, 'TEST_MODE_NO_OUTPUT')
                return

            # make path
            if not os.path.isdir(path):
                log.debug('Creating directory %s' % path)
                try:
                    os.makedirs(path)
                except:
                    raise PluginError('Cannot create path %s' % path, log)

            # check that temp file is present
            if not os.path.exists(entry['file']):
                tmp_path = os.path.join(task.manager.config_base, 'temp')
                log.debug('entry: %s' % entry)
                log.debug('temp: %s' % ', '.join(os.listdir(tmp_path)))
                raise PluginWarning(
                    'Downloaded temp file `%s` doesn\'t exist!?' %
                    entry['file'])

            # if we still don't have a filename, try making one from title (last resort)
            if not entry.get('filename'):
                entry['filename'] = entry['title']
                log.debug('set filename from title %s' % entry['filename'])
                if not 'mime-type' in entry:
                    log.warning(
                        'Unable to figure proper filename for %s. Using title.'
                        % entry['title'])
                else:
                    guess = mimetypes.guess_extension(entry['mime-type'])
                    if not guess:
                        log.warning(
                            'Unable to guess extension with mime-type %s' %
                            guess)
                    else:
                        self.filename_ext_from_mime(entry)

            name = entry.get('filename', entry['title'])
            # Remove illegal characters from filename #325, #353
            name = pathscrub(name)
            # Remove directory separators from filename #208
            name = name.replace('/', ' ')
            if sys.platform.startswith('win'):
                name = name.replace('\\', ' ')
            # remove duplicate spaces
            name = ' '.join(name.split())
            # combine to full path + filename
            destfile = os.path.join(path, name)
            log.debug('destfile: %s' % destfile)

            if os.path.exists(destfile):
                import filecmp
                if filecmp.cmp(entry['file'], destfile):
                    log.debug("Identical destination file '%s' already exists",
                              destfile)
                elif config.get('overwrite'):
                    log.debug("Overwriting already existing file %s" %
                              destfile)
                else:
                    log.info(
                        'File `%s` already exists and is not identical, download failed.'
                        % destfile)
                    task.fail(
                        entry,
                        'File `%s` already exists and is not identical.' %
                        destfile)
                    return
            else:
                # move temp file
                log.debug('moving %s to %s' % (entry['file'], destfile))

                try:
                    shutil.move(entry['file'], destfile)
                except OSError, err:
                    # ignore permission errors, see ticket #555
                    import errno
                    if not os.path.exists(destfile):
                        raise PluginError('Unable to write %s' % destfile)
                    if err.errno != errno.EPERM:
                        raise
コード例 #22
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrentleech.
        """
        request_headers = {'User-Agent': 'curl/7.54.0'}
        rss_key = config['rss_key']

        # build the form request:
        data = {'username': config['username'], 'password': config['password']}
        # POST the login form:
        try:
            login = task.requests.post(
                'https://www.torrentleech.org/user/account/login/',
                data=data,
                headers=request_headers,
                allow_redirects=True)
        except RequestException as e:
            raise PluginError('Could not connect to torrentleech: %s', str(e))

        if not isinstance(config, dict):
            config = {}
            # sort = SORT.get(config.get('sort_by', 'seeds'))
            # if config.get('sort_reverse'):
            # sort += 1
        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in categories
        ]
        filter_url = '/categories/{}'.format(','.join(
            str(c) for c in categories))
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string).replace(":", "")
            # urllib.quote will crash if the unicode string has non ascii characters,
            # so encode in utf-8 beforehand

            url = ('https://www.torrentleech.org/torrents/browse/list/query/' +
                   quote(query.encode('utf-8')) + filter_url)
            log.debug('Using %s as torrentleech search url', url)

            results = task.requests.get(url,
                                        headers=request_headers,
                                        cookies=login.cookies).json()

            for torrent in results['torrentList']:
                entry = Entry()
                entry['download_headers'] = request_headers
                entry['title'] = torrent['name']

                # construct download URL
                torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(
                    torrent['fid'], rss_key, torrent['filename'])
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # seeders/leechers
                entry['torrent_seeds'] = torrent['seeders']
                entry['torrent_leeches'] = torrent['leechers']
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                entry['content_size'] = parse_filesize(
                    str(torrent['size']) + ' b')
                entries.add(entry)

        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('search_sort'))
コード例 #23
0
ファイル: fuzer.py プロジェクト: itsamenathan/Flexget
    def search(self, task, entry, config=None):
        """
        Search for name from fuzer.
        """
        self.rss_key = config['rss_key']
        username = config['username']
        password = hashlib.md5(config['password'].encode('utf-8')).hexdigest()

        # build the form request:
        data = {
            'cookieuser': '******',
            'do': 'login',
            's': '',
            'securitytoken': 'guest',
            'vb_login_username': username,
            'vb_login_password': '',
            'vb_login_md5password': password,
            'vb_login_md5password_utf': password
        }
        # POST the login form:
        try:
            login = requests.post('https://www.fuzer.me/login.php?do=login',
                                  data=data)
        except RequestException as e:
            raise PluginError('Could not connect to fuzer: %s' % str(e))

        login_check_phrases = ['ההתחברות נכשלה', 'banned']
        if any(phrase in login.text for phrase in login_check_phrases):
            raise PluginError('Login to Fuzer failed, check credentials')

        self.user_id = requests.cookies.get('fzr2userid')
        category = config.get('category', [0])
        # Make sure categories is a list
        if not isinstance(category, list):
            category = [category]

        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in category
        ]

        c_list = []
        for c in categories:
            c_list.append('c{}={}'.format(quote_plus('[]'), c))

        entries = []
        if entry.get('imdb_id'):
            log.debug('imdb_id {} detected, using in search.'.format(
                entry['imdb_id']))
            soup = self.get_fuzer_soup(entry['imdb_id'], c_list)
            entries = self.extract_entry_from_soup(soup)
            if entries:
                for e in list(entries):
                    e['imdb_id'] = entry.get('imdb_id')
        else:
            for search_string in entry.get('search_strings', [entry['title']]):
                query = normalize_unicode(search_string).replace(":", "")
                text = quote_plus(query.encode('windows-1255'))
                soup = self.get_fuzer_soup(text, c_list)
                entries += self.extract_entry_from_soup(soup)
        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('search_sort')) if entries else []
コード例 #24
0
    def items(self):
        if self._items is None:
            log.debug('fetching items from IMDB')
            try:
                r = self.session.get(
                    'http://www.imdb.com/list/export?list_id=%s&author_id=%s' %
                    (self.list_id, self.user_id),
                    cookies=self.cookies)

            except RequestException as e:
                raise PluginError(e.args[0])
            lines = r.iter_lines(decode_unicode=True)
            # Throw away first line with headers
            next(lines)
            self._items = []
            for row in csv_reader(lines):
                log.debug('parsing line from csv: %s', ', '.join(row))
                if not len(row) == 16:
                    log.debug('no movie row detected, skipping. %s',
                              ', '.join(row))
                    continue
                entry = Entry({
                    'title':
                    '%s (%s)' %
                    (row[5], row[11]) if row[11] != '????' else '%s' % row[5],
                    'url':
                    row[15],
                    'imdb_id':
                    row[1],
                    'imdb_url':
                    row[15],
                    'imdb_list_position':
                    int(row[0]),
                    'imdb_list_created':
                    datetime.strptime(row[2], '%a %b %d %H:%M:%S %Y')
                    if row[2] else None,
                    'imdb_list_modified':
                    datetime.strptime(row[3], '%a %b %d %H:%M:%S %Y')
                    if row[3] else None,
                    'imdb_list_description':
                    row[4],
                    'imdb_name':
                    row[5],
                    'imdb_year':
                    int(row[11]) if row[11] != '????' else None,
                    'imdb_score':
                    float(row[9]) if row[9] else None,
                    'imdb_user_score':
                    float(row[8]) if row[8] else None,
                    'imdb_votes':
                    int(row[13]) if row[13] else None,
                    'imdb_genres':
                    [genre.strip() for genre in row[12].split(',')]
                })
                item_type = row[6].lower()
                name = row[5]
                year = int(row[11]) if row[11] != '????' else None
                if item_type in MOVIE_TYPES:
                    entry['movie_name'] = name
                    entry['movie_year'] = year
                elif item_type in SERIES_TYPES:
                    entry['series_name'] = name
                    entry['series_year'] = year
                elif item_type in OTHER_TYPES:
                    entry['title'] = name
                else:
                    log.verbose(
                        'Unknown IMDB type entry received: %s. Skipping',
                        item_type)
                    continue
                self._items.append(entry)
        return self._items
コード例 #25
0
ファイル: delay.py プロジェクト: gazpachoking/Flexget
 def get_delay(self, config):
     log.debug('delay: %s' % config)
     try:
         return parse_timedelta(config)
     except ValueError:
         raise PluginError('Invalid time format', log)
コード例 #26
0
ファイル: imdb.py プロジェクト: itsamenathan/Flexget
    def parse(self, imdb_id):
        self.imdb_id = extract_id(imdb_id)
        url = make_url(self.imdb_id)
        self.url = url
        page = requests.get(url)
        soup = get_soup(page.text)

        title_overview = soup.find('div', attrs={'class': 'title-overview'})
        if not title_overview:
            raise PluginError(
                'IMDB parser needs updating, imdb format changed. Please report on Github.'
            )

        # Parse stuff from the title-overview section
        name_elem = title_overview.find('h1', attrs={'itemprop': 'name'})
        if name_elem:
            self.name = name_elem.contents[0].strip()
        else:
            log.error(
                'Possible IMDB parser needs updating, Please report on Github.'
            )
            raise PluginError('Unable to set imdb_name for %s from %s' %
                              (self.imdb_id, self.url))

        year = title_overview.find('span', attrs={'id': 'titleYear'})
        if year:
            m = re.search(r'([0-9]{4})', year.text)
            if m:
                self.year = int(m.group(1))

        if not self.year:
            log.debug('No year found for %s' % self.imdb_id)

        mpaa_rating_elem = title_overview.find(itemprop='contentRating')
        if mpaa_rating_elem:
            self.mpaa_rating = mpaa_rating_elem['content']
        else:
            log.debug('No rating found for %s' % self.imdb_id)

        photo_elem = title_overview.find(itemprop='image')
        if photo_elem:
            self.photo = photo_elem['src']
        else:
            log.debug('No photo found for %s' % self.imdb_id)

        original_name_elem = title_overview.find(
            attrs={'class': 'originalTitle'})
        if original_name_elem:
            self.original_name = original_name_elem.contents[0].strip().strip(
                '"')
        else:
            log.debug('No original title found for %s' % self.imdb_id)

        votes_elem = title_overview.find(itemprop='ratingCount')
        if votes_elem:
            self.votes = str_to_int(votes_elem.text)
        else:
            log.debug('No votes found for %s' % self.imdb_id)

        score_elem = title_overview.find(itemprop='ratingValue')
        if score_elem:
            self.score = float(score_elem.text)
        else:
            log.debug('No score found for %s' % self.imdb_id)

        # get director(s)
        for director in title_overview.select('[itemprop="director"] > a'):
            director_id = extract_id(director['href'])
            director_name = director.text
            # tag instead of name
            if isinstance(director_name, Tag):
                director_name = None
            self.directors[director_id] = director_name

        # get writer(s)
        for writer in title_overview.select('[itemprop="creator"] > a'):
            writer_id = extract_id(writer['href'])
            writer_name = writer.text
            # tag instead of name
            if isinstance(writer_name, Tag):
                writer_name = None
            self.writers[writer_id] = writer_name

        # Details section
        title_details = soup.find('div', attrs={'id': 'titleDetails'})
        if title_details:
            # get languages
            for link in title_details.find_all(
                    'a',
                    href=re.compile(
                        '^/search/title\?title_type=feature&languages=')):
                lang = link.text.strip().lower()
                if lang not in self.languages:
                    self.languages.append(lang.strip())

        # Storyline section
        storyline = soup.find('div', attrs={'id': 'titleStoryLine'})
        if storyline:
            plot_elem = storyline.find('p')
            if plot_elem:
                # Remove the "Written By" part.
                if plot_elem.em:
                    plot_elem.em.replace_with('')
                self.plot_outline = plot_elem.text.strip()
            else:
                log.debug('No storyline found for %s' % self.imdb_id)
            self.genres = [
                i.text.strip().lower()
                for i in storyline.select('[itemprop="genre"] > a')
            ]

        # Cast section
        cast = soup.find('div', attrs={'id': 'titleCast'})
        if cast:
            for actor in cast.select('[itemprop="actor"] > a'):
                actor_id = extract_id(actor['href'])
                actor_name = actor.text.strip()
                # tag instead of name
                if isinstance(actor_name, Tag):
                    actor_name = None
                self.actors[actor_id] = actor_name
コード例 #27
0
ファイル: task.py プロジェクト: vxcamiloxv/Flexget
 def merge_config(self, new_config):
     try:
         merge_dict_from_to(new_config, self.config)
     except MergeException as e:
         raise PluginError('Failed to merge configs for task %s: %s' %
                           (self.name, e))
コード例 #28
0
    def authenticate(self):
        """Authenticates a session with IMDB, and grabs any IDs needed for getting/modifying list."""
        cached_credentials = False
        with Session() as session:
            user = (session.query(IMDBListUser).filter(
                IMDBListUser.user_name == self.config.get(
                    'login')).one_or_none())
            if user and user.cookies and user.user_id:
                log.debug('login credentials found in cache, testing')
                self.user_id = user.user_id
                if not self.get_user_id_and_hidden_value(cookies=user.cookies):
                    log.debug('cache credentials expired')
                    user.cookies = None
                    self._session.cookies.clear()
                else:
                    self.cookies = user.cookies
                    cached_credentials = True
            if not cached_credentials:
                log.debug(
                    'user credentials not found in cache or outdated, fetching from IMDB'
                )
                url_credentials = (
                    'https://www.imdb.com/ap/signin?openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-'
                    'handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&'
                    'openid.assoc_handle=imdb_mobile_us&openid.mode=checkid_setup&openid.claimed_id=http%3A%'
                    '2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.ope'
                    'nid.net%2Fauth%2F2.0')
                try:
                    # we need to get some cookies first
                    self._session.get('https://www.imdb.com')
                    r = self._session.get(url_credentials)
                except RequestException as e:
                    raise PluginError(e.args[0])
                soup = get_soup(r.content)
                form = soup.find('form', attrs={'name': 'signIn'})
                inputs = form.select('input')
                data = dict((i['name'], i.get('value')) for i in inputs
                            if i.get('name'))
                data['email'] = self.config['login']
                data['password'] = self.config['password']
                action = form.get('action')
                log.debug('email=%s, password=%s', data['email'],
                          data['password'])
                self._session.headers.update({'Referer': url_credentials})
                self._session.post(action, data=data)
                self._session.headers.update(
                    {'Referer': 'https://www.imdb.com/'})

                self.user_id = self.get_user_id_and_hidden_value()
                if not self.user_id:
                    raise plugin.PluginError(
                        'Login to IMDB failed. Check your credentials.')
                self.cookies = self._session.cookies.get_dict(
                    domain='.imdb.com')
                # Get list ID
            if user:
                for list in user.lists:
                    if self.config['list'] == list.list_name:
                        log.debug(
                            'found list ID %s matching list name %s in cache',
                            list.list_id,
                            list.list_name,
                        )
                        self.list_id = list.list_id
            if not self.list_id:
                log.debug(
                    'could not find list ID in cache, fetching from IMDB')
                if self.config['list'] == 'watchlist':
                    data = {
                        'consts[]': 'tt0133093',
                        'tracking_tag': 'watchlistRibbon'
                    }
                    wl_data = self._session.post(
                        'https://www.imdb.com/list/_ajax/watchlist_has',
                        data=data,
                        cookies=self.cookies,
                    ).json()
                    try:
                        self.list_id = wl_data['list_id']
                    except KeyError:
                        raise PluginError(
                            'No list ID could be received. Please initialize list by '
                            'manually adding an item to it and try again')
                elif self.config['list'] in IMMUTABLE_LISTS or self.config[
                        'list'].startswith('ls'):
                    self.list_id = self.config['list']
                else:
                    data = {'tconst': 'tt0133093'}
                    list_data = self._session.post(
                        'https://www.imdb.com/list/_ajax/wlb_dropdown',
                        data=data,
                        cookies=self.cookies,
                    ).json()
                    for li in list_data['items']:
                        if li['wlb_text'] == self.config['list']:
                            self.list_id = li['data_list_id']
                            break
                    else:
                        raise plugin.PluginError('Could not find list %s' %
                                                 self.config['list'])

            user = IMDBListUser(self.config['login'], self.user_id,
                                self.cookies)
            list = IMDBListList(self.list_id, self.config['list'],
                                self.user_id)
            user.lists.append(list)
            session.merge(user)

        self._authenticated = True
コード例 #29
0
class TraktList(object):
    """Creates an entry for each item in your trakt list.

    Syntax:

    trakt_list:
      username: <value>
      api_key: <value>
      strip_dates: <yes|no>
      movies: <all|loved|hated|collection|watchlist>
      series: <all|loved|hated|collection|watchlist|watched>
      custom: <value>

    Options username and api_key are required.
    """

    movie_map = {
        'title': 'title',
        'url': 'url',
        'imdb_id': 'imdb_id',
        'tmdb_id': 'tmdb_id',
        # Generic fields filled by all movie lookup plugins:
        'movie_name': 'title',
        'movie_year': 'year'
    }

    series_map = {
        'title': 'title',
        'url': 'url',
        'imdb_id': 'imdb_id',
        'thetvdb_id': 'tvdb_id',
        'tvrage_id': 'tvrage_id'
    }

    def validator(self):
        from flexget import validator
        root = validator.factory('dict')
        root.accept('text', key='username', requried=True)
        root.accept('text', key='api_key', required=True)
        root.accept('text', key='password')
        root.accept('choice', key='movies').accept_choices(
            ['all', 'loved', 'hated', 'collection', 'watchlist'])
        root.accept('choice', key='series').accept_choices(
            ['all', 'loved', 'hated', 'collection', 'watched', 'watchlist'])
        root.accept('text', key='custom')
        root.accept('boolean', key='strip_dates')
        return root

    @cached('trakt_list', persist='2 hours')
    def on_task_input(self, task, config):
        # Don't edit the config, or it won't pass validation on rerun
        url_params = config.copy()
        if 'movies' in config and 'series' in config:
            raise PluginError(
                'Cannot use both series list and movies list in the same task.'
            )
        if 'movies' in config:
            url_params['data_type'] = 'movies'
            url_params['list_type'] = config['movies']
            map = self.movie_map
        elif 'series' in config:
            url_params['data_type'] = 'shows'
            url_params['list_type'] = config['series']
            map = self.series_map
        elif 'custom' in config:
            url_params['data_type'] = 'custom'
            # Do some translation from visible list name to prepare for use in url
            list_name = config['custom'].lower()
            # These characters are just stripped in the url
            for char in '!@#$%^*()[]{}/=?+\\|-_':
                list_name = list_name.replace(char, '')
            # These characters get replaced
            list_name = list_name.replace('&', 'and')
            list_name = list_name.replace(' ', '-')
            url_params['list_type'] = list_name
            # Map type is per item in custom lists
        else:
            raise PluginError(
                'Must define movie or series lists to retrieve from trakt.')

        url = 'http://api.trakt.tv/user/'
        auth = None
        if url_params['data_type'] == 'custom':
            url += 'list.json/%(api_key)s/%(username)s/%(list_type)s'
        elif url_params['list_type'] == 'watchlist':
            url += 'watchlist/%(data_type)s.json/%(api_key)s/%(username)s'
        else:
            url += 'library/%(data_type)s/%(list_type)s.json/%(api_key)s/%(username)s'
        url = url % url_params

        if 'password' in config:
            auth = {
                'username': config['username'],
                'password': hashlib.sha1(config['password']).hexdigest()
            }

        entries = []
        log.verbose('Retrieving list %s %s...' %
                    (url_params['data_type'], url_params['list_type']))

        result = task.requests.get(url, data=json.dumps(auth))
        try:
            data = task.requests.post(url, data=json.dumps(auth)).json
        except RequestException, e:
            raise PluginError('Could not retrieve list from trakt (%s)' %
                              e.message)

        def check_auth():
            if task.requests.post(
                    'http://api.trakt.tv/account/test/' + config['api_key'],
                    data=json.dumps(auth),
                    raise_status=False).status_code != 200:
                raise PluginError('Authentication to trakt failed.')

        if 'error' in data:
            check_auth()
            raise PluginError('Error getting trakt list: %s' % data['error'])
        if not data:
            check_auth()
            log.warning('No data returned from trakt.')
            return
        if url_params['data_type'] == 'custom':
            if not isinstance(data['items'], list):
                raise PluginError('Faulty custom items in response: %s' %
                                  data['items'])
            data = data['items']
        for item in data:
            if url_params['data_type'] == 'custom':
                if item['type'] == 'movie':
                    map = self.movie_map
                    item = item['movie']
                else:
                    map = self.series_map
                    item = item['show']
            entry = Entry()
            entry.update_using_map(map, item)
            if entry.isvalid():
                if config.get('strip_dates'):
                    # Remove year from end of name if present
                    entry['title'] = re.sub('\s+\(\d{4}\)$', '',
                                            entry['title'])
                entries.append(entry)

        return entries
コード例 #30
0
    def on_task_input(self, task, config):
        sess = requests.Session()
        if config.get('username') and config.get('password'):

            log.verbose('Logging in ...')

            # Log in to imdb with our handler
            params = {
                'login': config['username'],
                'password': config['password']
            }
            try:
                # First get the login page so we can get the hidden input value
                soup = get_soup(
                    sess.get(
                        'https://secure.imdb.com/register-imdb/login').content)

                tag = soup.find('input', attrs={'name': '49e6c'})
                if tag:
                    params['49e6c'] = tag['value']
                else:
                    log.warning(
                        'Unable to find required info for imdb login, maybe their login method has changed.'
                    )
                # Now we do the actual login with appropriate parameters
                r = sess.post('https://secure.imdb.com/register-imdb/login',
                              data=params,
                              raise_status=False)
            except requests.RequestException as e:
                raise PluginError('Unable to login to imdb: %s' % e.message)

            # IMDb redirects us upon a successful login.
            # removed - doesn't happen always?
            # if r.status_code != 302:
            #     log.warning('It appears logging in to IMDb was unsuccessful.')

            # try to automatically figure out user_id from watchlist redirect url
            if not 'user_id' in config:
                log.verbose('Getting user_id ...')
                response = sess.get('http://www.imdb.com/list/watchlist')
                log.debug('redirected to %s' % response.url)
                user_id = response.url.split('/')[-2]
                if re.match(USER_ID_RE, user_id):
                    config['user_id'] = user_id
                else:
                    raise PluginError(
                        'Couldn\'t figure out user_id, please configure it manually.'
                    )

        if not 'user_id' in config:
            raise PluginError('Configuration option `user_id` required.')

        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {
                'list_id': config['list'],
                'author_id': config['user_id']
            }
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise PluginError(
                    'Didn\'t get CSV export as response. Probably specified list `%s` does not exist.'
                    % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException as e:
            raise PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(
                    Entry(title=title,
                          url=make_url(row[1]),
                          imdb_id=row[1],
                          imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries