Beispiel #1
0
            def add_entry(ea):
                ea['title'] = entry.title

                # fields dict may be modified during this loop, so loop over a copy (fields.items())
                for rss_field, flexget_field in list(fields.items()):
                    if rss_field in entry:
                        if rss_field == 'content':
                            content_str = ''
                            for content in entry[rss_field]:
                                try:
                                    content_str += decode_html(content.value)
                                except UnicodeDecodeError:
                                    log.warning(
                                        'Failed to decode entry `%s` field `%s`',
                                        ea['title'],
                                        rss_field,
                                    )
                            ea[flexget_field] = content_str
                            log.debug(
                                'Field `%s` set to `%s` for `%s`',
                                rss_field,
                                ea[rss_field],
                                ea['title'],
                            )
                            continue
                        if not isinstance(getattr(entry, rss_field), str):
                            # Error if this field is not a string
                            log.error('Cannot grab non text field `%s` from rss.', rss_field)
                            # Remove field from list of fields to avoid repeated error
                            del fields[rss_field]
                            continue
                        if not getattr(entry, rss_field):
                            log.debug(
                                'Not grabbing blank field %s from rss for %s.',
                                rss_field,
                                ea['title'],
                            )
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get('other_fields', []):
                                # Print a debug message for custom added fields
                                log.debug(
                                    'Field `%s` set to `%s` for `%s`',
                                    rss_field,
                                    ea[rss_field],
                                    ea['title'],
                                )
                        except UnicodeDecodeError:
                            log.warning(
                                'Failed to decode entry `%s` field `%s`', ea['title'], rss_field
                            )
                # Also grab pubdate if available
                if hasattr(entry, 'published_parsed') and entry.published_parsed:
                    ea['rss_pubdate'] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if 'username' in config and 'password' in config:
                    ea['download_auth'] = (config['username'], config['password'])
                entries.append(ea)
Beispiel #2
0
            def add_entry(ea):
                ea['title'] = entry.title

                # fields dict may be modified during this loop, so loop over a copy (fields.items())
                for rss_field, flexget_field in list(fields.items()):
                    if rss_field in entry:
                        if rss_field == 'content':
                            content_str = ''
                            for content in entry[rss_field]:
                                try:
                                    content_str += decode_html(content.value)
                                except UnicodeDecodeError:
                                    log.warning(
                                        'Failed to decode entry `%s` field `%s`',
                                        ea['title'], rss_field)
                            ea[flexget_field] = content_str
                            log.debug('Field `%s` set to `%s` for `%s`',
                                      rss_field, ea[rss_field], ea['title'])
                            continue
                        if not isinstance(getattr(entry, rss_field), str):
                            # Error if this field is not a string
                            log.error(
                                'Cannot grab non text field `%s` from rss.',
                                rss_field)
                            # Remove field from list of fields to avoid repeated error
                            del fields[rss_field]
                            continue
                        if not getattr(entry, rss_field):
                            log.debug(
                                'Not grabbing blank field %s from rss for %s.',
                                rss_field, ea['title'])
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get('other_fields', []):
                                # Print a debug message for custom added fields
                                log.debug('Field `%s` set to `%s` for `%s`',
                                          rss_field, ea[rss_field],
                                          ea['title'])
                        except UnicodeDecodeError:
                            log.warning(
                                'Failed to decode entry `%s` field `%s`',
                                ea['title'], rss_field)
                # Also grab pubdate if available
                if hasattr(entry,
                           'published_parsed') and entry.published_parsed:
                    ea['rss_pubdate'] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if 'username' in config and 'password' in config:
                    ea['download_auth'] = (config['username'],
                                           config['password'])
                entries.append(ea)
Beispiel #3
0
    def filename_from_headers(self, entry, response):
        """Checks entry filename if it's found from content-disposition"""
        data = str(response.info())
        # try to decode/encode, afaik this is against the specs but some servers do it anyway
        try:
            data = data.decode('utf-8')
            log.debug('response info UTF-8 decoded')
        except UnicodeError:
            try:
                data = unicode(data)
                log.debug('response info unicoded')
            except UnicodeError:
                pass

        # now we should have unicode string, let's convert into proper format where non-ascii
        # chars are entities
        data = encode_html(data)
        try:
            filename = email.message_from_string(data).get_filename(failobj=False)
        except (AttributeError, SystemExit, KeyboardInterrupt):
            raise # at least rethrow the most common stuff before catch-all
        except:
            log.error('Failed to decode filename from response: %r' % data)
            return
        if filename:
            filename = decode_html(filename)
            log.debug('Found filename from headers: %s' % filename)
            if 'filename' in entry:
                log.debug('Overriding filename %s with %s from content-disposition' % (entry['filename'], filename))
            entry['filename'] = filename
Beispiel #4
0
            def add_entry(ea):
                ea['title'] = entry.title

                for rss_field, flexget_field in fields.iteritems():
                    if rss_field in entry:
                        if not isinstance(getattr(entry, rss_field), basestring):
                            # Error if this field is not a string
                            log.error('Cannot grab non text field `%s` from rss.' % rss_field)
                            # Remove field from list of fields to avoid repeated error
                            config['other_fields'].remove(rss_field)
                            continue
                        if not getattr(entry, rss_field):
                            log.debug('Not grabbing blank field %s from rss for %s.' % (rss_field, ea['title']))
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get('other_fields', []):
                                # Print a debug message for custom added fields
                                log.debug('Field `%s` set to `%s` for `%s`' % (rss_field, ea[rss_field], ea['title']))
                        except UnicodeDecodeError:
                            log.warning('Failed to decode entry `%s` field `%s`' % (ea['title'], rss_field))
                # Also grab pubdate if available
                if hasattr(entry, 'published_parsed') and entry.published_parsed:
                    ea['rss_pubdate'] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if 'username' in config and 'password' in config:
                    ea['basic_auth_username'] = config['username']
                    ea['basic_auth_password'] = config['password']
                entries.append(ea)
Beispiel #5
0
            def add_entry(ea):
                ea["title"] = entry.title

                for rss_field, flexget_field in fields.iteritems():
                    if rss_field in entry:
                        if not isinstance(getattr(entry, rss_field), basestring):
                            # Error if this field is not a string
                            log.error("Cannot grab non text field `%s` from rss.", rss_field)
                            # Remove field from list of fields to avoid repeated error
                            config["other_fields"].remove(rss_field)
                            continue
                        if not getattr(entry, rss_field):
                            log.debug("Not grabbing blank field %s from rss for %s.", rss_field, ea["title"])
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get("other_fields", []):
                                # Print a debug message for custom added fields
                                log.debug("Field `%s` set to `%s` for `%s`", rss_field, ea[rss_field], ea["title"])
                        except UnicodeDecodeError:
                            log.warning("Failed to decode entry `%s` field `%s`", ea["title"], rss_field)
                # Also grab pubdate if available
                if hasattr(entry, "published_parsed") and entry.published_parsed:
                    ea["rss_pubdate"] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if "username" in config and "password" in config:
                    ea["basic_auth_username"] = config["username"]
                    ea["basic_auth_password"] = config["password"]
                entries.append(ea)
Beispiel #6
0
            def add_entry(ea):
                ea['title'] = entry.title

                for rss_field, flexget_field in fields.iteritems():
                    if rss_field in entry:
                        if not isinstance(getattr(entry, rss_field), basestring):
                            # Error if this field is not a string
                            log.error('Cannot grab non text field `%s` from rss.' % rss_field)
                            # Remove field from list of fields to avoid repeated error
                            config['other_fields'].remove(rss_field)
                            continue
                        if not getattr(entry, rss_field):
                            log.debug('Not grabbing blank field %s from rss for %s.' % (rss_field, ea['title']))
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get('other_fields', []):
                                # Print a debug message for custom added fields
                                log.debug('Field `%s` set to `%s` for `%s`' % (rss_field, ea[rss_field], ea['title']))
                        except UnicodeDecodeError:
                            log.warning('Failed to decode entry `%s` field `%s`' % (ea['title'], rss_field))
                # Also grab pubdate if available
                if hasattr(entry, 'published_parsed') and entry.published_parsed:
                    ea['rss_pubdate'] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if 'username' in config and 'password' in config:
                    ea['basic_auth_username'] = config['username']
                    ea['basic_auth_password'] = config['password']
                entries.append(ea)
Beispiel #7
0
    def filename_from_headers(self, entry, response):
        """Checks entry filename if it's found from content-disposition"""
        if not response.headers.get('content-disposition'):
            # No content disposition header, nothing we can do
            return
        filename = parse_header(response.headers['content-disposition'])[1].get('filename')

        if filename:
            # try to decode to unicode, specs allow latin1, some may do utf-8 anyway
            try:
                filename = native_str_to_text(filename, encoding='latin1')
                log.debug('filename header latin1 decoded')
            except UnicodeError:
                try:
                    filename = native_str_to_text(filename, encoding='utf-8')
                    log.debug('filename header UTF-8 decoded')
                except UnicodeError:
                    pass
            filename = decode_html(filename)
            log.debug('Found filename from headers: %s', filename)
            if 'filename' in entry:
                log.debug(
                    'Overriding filename %s with %s from content-disposition',
                    entry['filename'],
                    filename,
                )
            entry['filename'] = filename
Beispiel #8
0
    def filename_from_headers(self, entry, response):
        """Checks entry filename if it's found from content-disposition"""
        if not response.headers.get('content-disposition'):
            # No content disposition header, nothing we can do
            return
        filename = parse_header(
            response.headers['content-disposition'])[1].get('filename')

        if filename:
            # try to decode to unicode, specs allow latin1, some may do utf-8 anyway
            try:
                filename = native_str_to_text(filename, encoding='latin1')
                log.debug('filename header latin1 decoded')
            except UnicodeError:
                try:
                    filename = native_str_to_text(filename, encoding='utf-8')
                    log.debug('filename header UTF-8 decoded')
                except UnicodeError:
                    pass
            filename = decode_html(filename)
            log.debug('Found filename from headers: %s', filename)
            if 'filename' in entry:
                log.debug(
                    'Overriding filename %s with %s from content-disposition',
                    entry['filename'], filename)
            entry['filename'] = filename
Beispiel #9
0
 def update_from_bss(self, update_bss):
     """Populates any simple (string or number) attributes from a dict"""
     for col in self.__table__.columns:
         tag = update_bss.find(col.name)
         if tag and tag.string:
             if isinstance(col.type, Integer):
                 value = int(tag.string)
             elif isinstance(col.type, Float):
                 value = float(tag.string)
             else:
                 # BeautifulSoup used to take care of the html entities... but seems to have stopped.
                 value = decode_html(tag.string)
             setattr(self, col.name, value)
     self.expired = False
Beispiel #10
0
 def update_from_bss(self, update_bss):
     """Populates any simple (string or number) attributes from a dict"""
     for col in self.__table__.columns:
         tag = update_bss.find(col.name)
         if tag and tag.string:
             if isinstance(col.type, Integer):
                 value = int(tag.string)
             elif isinstance(col.type, Float):
                 value = float(tag.string)
             else:
                 # BeautifulSoup used to take care of the html entities... but seems to have stopped.
                 value = decode_html(tag.string)
             setattr(self, col.name, value)
     self.expired = False
Beispiel #11
0
    def update_from_xml(self, update_xml):
        """Populates any simple (string or number) attributes from a dict"""
        for node in update_xml:
            if not node.text or not node.tag:
                continue

            # Have to iterate to get around the inability to do a case-insensitive find
            for col in self.__table__.columns:
                if node.tag.lower() == col.name.lower():
                    if isinstance(col.type, Integer):
                        value = int(node.text)
                    elif isinstance(col.type, Float):
                        value = float(node.text)
                    else:
                        value = decode_html(node.text)
                    setattr(self, col.name, value)
        self.expired = False
Beispiel #12
0
    def update_from_xml(self, update_xml):
        """Populates any simple (string or number) attributes from a dict"""
        for node in update_xml:
            if not node.text or not node.tag:
                continue

            # Have to iterate to get around the inability to do a case-insensitive find
            for col in self.__table__.columns:
                if node.tag.lower() == col.name.lower():
                    if isinstance(col.type, Integer):
                        value = int(node.text)
                    elif isinstance(col.type, Float):
                        value = float(node.text)
                    else:
                        value = decode_html(node.text)
                    setattr(self, col.name, value)
        self.expired = False
Beispiel #13
0
    def filename_from_headers(self, entry, response):
        """Checks entry filename if it's found from content-disposition"""
        if not response.headers.get('content-disposition'):
            # No content disposition header, nothing we can do
            return
        filename = parse_header(response.headers['content-disposition'])[1].get('filename')

        if filename:
            filename = decode_html(filename)
            logger.debug('Found filename from headers: {}', filename)
            if 'filename' in entry:
                logger.debug(
                    'Overriding filename {} with {} from content-disposition',
                    entry['filename'],
                    filename,
                )
            entry['filename'] = filename
Beispiel #14
0
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise PluginError(
                    'Didn\'t get CSV export as response. Probably specified list `%s` does not exist.'
                    % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException, e:
            raise PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(
                    Entry(title=title,
                          url=make_url(row[1]),
                          imdb_id=row[1],
                          imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries


register_plugin(ImdbList, 'imdb_list', api_ver=2)
Beispiel #15
0
 def test_decode_html(self):
     """utils decode_html"""
     from flexget.utils.tools import decode_html
     assert decode_html('&lt;&#51;') == u'<3'
     assert decode_html('&#x2500;') == u'\u2500'
Beispiel #16
0
        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export?list_id=%s&author_id=%s' % (config['list'], config['user_id'])
            log.debug('Requesting %s' % url)
            opener = urlopener(url)
            mime_type = opener.headers.gettype()
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise PluginError('Didn\'t get CSV export as response. Probably specified list `%s` does not exists.'
                    % config['list'])
            csv_rows = csv.reader(opener)
        except urllib2.URLError, e:
            raise PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5])
                entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries


register_plugin(ImdbList, 'imdb_list', api_ver=2)
Beispiel #17
0
    def on_task_input(self, task, config):
        sess = requests.Session()
        if config.get('username') and config.get('password'):

            log.verbose('Logging in ...')

            # Log in to imdb with our handler
            params = {'login': config['username'], 'password': config['password']}
            try:
                # First get the login page so we can get the hidden input value
                soup = get_soup(sess.get('https://secure.imdb.com/register-imdb/login').content)

                # Fix for bs4 bug. see #2313 and github#118
                auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling
                tag = auxsoup.find('input', attrs={'name': '49e6c'})
                if tag:
                    params['49e6c'] = tag['value']
                else:
                    log.warning('Unable to find required info for imdb login, maybe their login method has changed.')
                # Now we do the actual login with appropriate parameters
                r = sess.post('https://secure.imdb.com/register-imdb/login', data=params, raise_status=False)
            except requests.RequestException as e:
                raise plugin.PluginError('Unable to login to imdb: %s' % e.message)

            # IMDb redirects us upon a successful login.
            # removed - doesn't happen always?
            # if r.status_code != 302:
            #     log.warning('It appears logging in to IMDb was unsuccessful.')

            # try to automatically figure out user_id from watchlist redirect url
            if not 'user_id' in config:
                log.verbose('Getting user_id ...')
                try:
                    response = sess.get('http://www.imdb.com/list/watchlist')
                except requests.RequestException as e:
                    log.error('Error retrieving user ID from imdb: %s' % e.message)
                    user_id = ''
                else:
                    log.debug('redirected to %s' % response.url)
                    user_id = response.url.split('/')[-2]
                if re.match(USER_ID_RE, user_id):
                    config['user_id'] = user_id
                else:
                    raise plugin.PluginError('Couldn\'t figure out user_id, please configure it manually.')

        if not 'user_id' in config:
            raise plugin.PluginError('Configuration option `user_id` required.')

        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {'list_id': config['list'], 'author_id': config['user_id']}
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise plugin.PluginError('Didn\'t get CSV export as response. Probably specified list `%s` '
                                         'does not exist.' % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException as e:
            raise plugin.PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries
Beispiel #18
0
        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {'list_id': config['list'], 'author_id': config['user_id']}
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise PluginError('Didn\'t get CSV export as response. Probably specified list `%s` does not exists.'
                    % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException, e:
            raise PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries


register_plugin(ImdbList, 'imdb_list', api_ver=2)
Beispiel #19
0
 def test_decode_html(self):
     """utils decode_html"""
     from flexget.utils.tools import decode_html
     assert decode_html('&lt;&#51;') == u'<3'
     assert decode_html('&#x2500;') == u'\u2500'
Beispiel #20
0
    def on_task_input(self, task, config):
        sess = requests.Session()
        if config.get('username') and config.get('password'):

            log.verbose('Logging in ...')

            # Log in to imdb with our handler
            params = {
                'login': config['username'],
                'password': config['password']
            }
            try:
                # First get the login page so we can get the hidden input value
                soup = get_soup(
                    sess.get(
                        'https://secure.imdb.com/register-imdb/login').content)

                # Fix for bs4 bug. see #2313 and github#118
                auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling
                tag = auxsoup.find('input', attrs={'name': '49e6c'})
                if tag:
                    params['49e6c'] = tag['value']
                else:
                    log.warning(
                        'Unable to find required info for imdb login, maybe their login method has changed.'
                    )
                # Now we do the actual login with appropriate parameters
                r = sess.post('https://secure.imdb.com/register-imdb/login',
                              data=params,
                              raise_status=False)
            except requests.RequestException as e:
                raise plugin.PluginError('Unable to login to imdb: %s' %
                                         e.message)

            # IMDb redirects us upon a successful login.
            # removed - doesn't happen always?
            # if r.status_code != 302:
            #     log.warning('It appears logging in to IMDb was unsuccessful.')

            # try to automatically figure out user_id from watchlist redirect url
            if not 'user_id' in config:
                log.verbose('Getting user_id ...')
                try:
                    response = sess.get('http://www.imdb.com/list/watchlist')
                except requests.RequestException as e:
                    log.error('Error retrieving user ID from imdb: %s' %
                              e.message)
                    user_id = ''
                else:
                    log.debug('redirected to %s' % response.url)
                    user_id = response.url.split('/')[-2]
                if re.match(USER_ID_RE, user_id):
                    config['user_id'] = user_id
                else:
                    raise plugin.PluginError(
                        'Couldn\'t figure out user_id, please configure it manually.'
                    )

        if not 'user_id' in config:
            raise plugin.PluginError(
                'Configuration option `user_id` required.')

        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {
                'list_id': config['list'],
                'author_id': config['user_id']
            }
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise plugin.PluginError(
                    'Didn\'t get CSV export as response. Probably specified list `%s` '
                    'does not exist.' % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException as e:
            raise plugin.PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(
                    Entry(title=title,
                          url=make_url(row[1]),
                          imdb_id=row[1],
                          imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries