def add_entry(ea): ea['title'] = entry.title # fields dict may be modified during this loop, so loop over a copy (fields.items()) for rss_field, flexget_field in list(fields.items()): if rss_field in entry: if rss_field == 'content': content_str = '' for content in entry[rss_field]: try: content_str += decode_html(content.value) except UnicodeDecodeError: log.warning( 'Failed to decode entry `%s` field `%s`', ea['title'], rss_field, ) ea[flexget_field] = content_str log.debug( 'Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title'], ) continue if not isinstance(getattr(entry, rss_field), str): # Error if this field is not a string log.error('Cannot grab non text field `%s` from rss.', rss_field) # Remove field from list of fields to avoid repeated error del fields[rss_field] continue if not getattr(entry, rss_field): log.debug( 'Not grabbing blank field %s from rss for %s.', rss_field, ea['title'], ) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get('other_fields', []): # Print a debug message for custom added fields log.debug( 'Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title'], ) except UnicodeDecodeError: log.warning( 'Failed to decode entry `%s` field `%s`', ea['title'], rss_field ) # Also grab pubdate if available if hasattr(entry, 'published_parsed') and entry.published_parsed: ea['rss_pubdate'] = datetime(*entry.published_parsed[:6]) # store basic auth info if 'username' in config and 'password' in config: ea['download_auth'] = (config['username'], config['password']) entries.append(ea)
def add_entry(ea): ea['title'] = entry.title # fields dict may be modified during this loop, so loop over a copy (fields.items()) for rss_field, flexget_field in list(fields.items()): if rss_field in entry: if rss_field == 'content': content_str = '' for content in entry[rss_field]: try: content_str += decode_html(content.value) except UnicodeDecodeError: log.warning( 'Failed to decode entry `%s` field `%s`', ea['title'], rss_field) ea[flexget_field] = content_str log.debug('Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title']) continue if not isinstance(getattr(entry, rss_field), str): # Error if this field is not a string log.error( 'Cannot grab non text field `%s` from rss.', rss_field) # Remove field from list of fields to avoid repeated error del fields[rss_field] continue if not getattr(entry, rss_field): log.debug( 'Not grabbing blank field %s from rss for %s.', rss_field, ea['title']) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get('other_fields', []): # Print a debug message for custom added fields log.debug('Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title']) except UnicodeDecodeError: log.warning( 'Failed to decode entry `%s` field `%s`', ea['title'], rss_field) # Also grab pubdate if available if hasattr(entry, 'published_parsed') and entry.published_parsed: ea['rss_pubdate'] = datetime(*entry.published_parsed[:6]) # store basic auth info if 'username' in config and 'password' in config: ea['download_auth'] = (config['username'], config['password']) entries.append(ea)
def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" data = str(response.info()) # try to decode/encode, afaik this is against the specs but some servers do it anyway try: data = data.decode('utf-8') log.debug('response info UTF-8 decoded') except UnicodeError: try: data = unicode(data) log.debug('response info unicoded') except UnicodeError: pass # now we should have unicode string, let's convert into proper format where non-ascii # chars are entities data = encode_html(data) try: filename = email.message_from_string(data).get_filename(failobj=False) except (AttributeError, SystemExit, KeyboardInterrupt): raise # at least rethrow the most common stuff before catch-all except: log.error('Failed to decode filename from response: %r' % data) return if filename: filename = decode_html(filename) log.debug('Found filename from headers: %s' % filename) if 'filename' in entry: log.debug('Overriding filename %s with %s from content-disposition' % (entry['filename'], filename)) entry['filename'] = filename
def add_entry(ea): ea['title'] = entry.title for rss_field, flexget_field in fields.iteritems(): if rss_field in entry: if not isinstance(getattr(entry, rss_field), basestring): # Error if this field is not a string log.error('Cannot grab non text field `%s` from rss.' % rss_field) # Remove field from list of fields to avoid repeated error config['other_fields'].remove(rss_field) continue if not getattr(entry, rss_field): log.debug('Not grabbing blank field %s from rss for %s.' % (rss_field, ea['title'])) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get('other_fields', []): # Print a debug message for custom added fields log.debug('Field `%s` set to `%s` for `%s`' % (rss_field, ea[rss_field], ea['title'])) except UnicodeDecodeError: log.warning('Failed to decode entry `%s` field `%s`' % (ea['title'], rss_field)) # Also grab pubdate if available if hasattr(entry, 'published_parsed') and entry.published_parsed: ea['rss_pubdate'] = datetime(*entry.published_parsed[:6]) # store basic auth info if 'username' in config and 'password' in config: ea['basic_auth_username'] = config['username'] ea['basic_auth_password'] = config['password'] entries.append(ea)
def add_entry(ea): ea["title"] = entry.title for rss_field, flexget_field in fields.iteritems(): if rss_field in entry: if not isinstance(getattr(entry, rss_field), basestring): # Error if this field is not a string log.error("Cannot grab non text field `%s` from rss.", rss_field) # Remove field from list of fields to avoid repeated error config["other_fields"].remove(rss_field) continue if not getattr(entry, rss_field): log.debug("Not grabbing blank field %s from rss for %s.", rss_field, ea["title"]) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get("other_fields", []): # Print a debug message for custom added fields log.debug("Field `%s` set to `%s` for `%s`", rss_field, ea[rss_field], ea["title"]) except UnicodeDecodeError: log.warning("Failed to decode entry `%s` field `%s`", ea["title"], rss_field) # Also grab pubdate if available if hasattr(entry, "published_parsed") and entry.published_parsed: ea["rss_pubdate"] = datetime(*entry.published_parsed[:6]) # store basic auth info if "username" in config and "password" in config: ea["basic_auth_username"] = config["username"] ea["basic_auth_password"] = config["password"] entries.append(ea)
def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" if not response.headers.get('content-disposition'): # No content disposition header, nothing we can do return filename = parse_header(response.headers['content-disposition'])[1].get('filename') if filename: # try to decode to unicode, specs allow latin1, some may do utf-8 anyway try: filename = native_str_to_text(filename, encoding='latin1') log.debug('filename header latin1 decoded') except UnicodeError: try: filename = native_str_to_text(filename, encoding='utf-8') log.debug('filename header UTF-8 decoded') except UnicodeError: pass filename = decode_html(filename) log.debug('Found filename from headers: %s', filename) if 'filename' in entry: log.debug( 'Overriding filename %s with %s from content-disposition', entry['filename'], filename, ) entry['filename'] = filename
def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" if not response.headers.get('content-disposition'): # No content disposition header, nothing we can do return filename = parse_header( response.headers['content-disposition'])[1].get('filename') if filename: # try to decode to unicode, specs allow latin1, some may do utf-8 anyway try: filename = native_str_to_text(filename, encoding='latin1') log.debug('filename header latin1 decoded') except UnicodeError: try: filename = native_str_to_text(filename, encoding='utf-8') log.debug('filename header UTF-8 decoded') except UnicodeError: pass filename = decode_html(filename) log.debug('Found filename from headers: %s', filename) if 'filename' in entry: log.debug( 'Overriding filename %s with %s from content-disposition', entry['filename'], filename) entry['filename'] = filename
def update_from_bss(self, update_bss): """Populates any simple (string or number) attributes from a dict""" for col in self.__table__.columns: tag = update_bss.find(col.name) if tag and tag.string: if isinstance(col.type, Integer): value = int(tag.string) elif isinstance(col.type, Float): value = float(tag.string) else: # BeautifulSoup used to take care of the html entities... but seems to have stopped. value = decode_html(tag.string) setattr(self, col.name, value) self.expired = False
def update_from_xml(self, update_xml): """Populates any simple (string or number) attributes from a dict""" for node in update_xml: if not node.text or not node.tag: continue # Have to iterate to get around the inability to do a case-insensitive find for col in self.__table__.columns: if node.tag.lower() == col.name.lower(): if isinstance(col.type, Integer): value = int(node.text) elif isinstance(col.type, Float): value = float(node.text) else: value = decode_html(node.text) setattr(self, col.name, value) self.expired = False
def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" if not response.headers.get('content-disposition'): # No content disposition header, nothing we can do return filename = parse_header(response.headers['content-disposition'])[1].get('filename') if filename: filename = decode_html(filename) logger.debug('Found filename from headers: {}', filename) if 'filename' in entry: logger.debug( 'Overriding filename {} with {} from content-disposition', entry['filename'], filename, ) entry['filename'] = filename
opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise PluginError( 'Didn\'t get CSV export as response. Probably specified list `%s` does not exist.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException, e: raise PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append( Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries register_plugin(ImdbList, 'imdb_list', api_ver=2)
def test_decode_html(self): """utils decode_html""" from flexget.utils.tools import decode_html assert decode_html('<3') == u'<3' assert decode_html('─') == u'\u2500'
# Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export?list_id=%s&author_id=%s' % (config['list'], config['user_id']) log.debug('Requesting %s' % url) opener = urlopener(url) mime_type = opener.headers.gettype() log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise PluginError('Didn\'t get CSV export as response. Probably specified list `%s` does not exists.' % config['list']) csv_rows = csv.reader(opener) except urllib2.URLError, e: raise PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]) entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries register_plugin(ImdbList, 'imdb_list', api_ver=2)
def on_task_input(self, task, config): sess = requests.Session() if config.get('username') and config.get('password'): log.verbose('Logging in ...') # Log in to imdb with our handler params = {'login': config['username'], 'password': config['password']} try: # First get the login page so we can get the hidden input value soup = get_soup(sess.get('https://secure.imdb.com/register-imdb/login').content) # Fix for bs4 bug. see #2313 and github#118 auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling tag = auxsoup.find('input', attrs={'name': '49e6c'}) if tag: params['49e6c'] = tag['value'] else: log.warning('Unable to find required info for imdb login, maybe their login method has changed.') # Now we do the actual login with appropriate parameters r = sess.post('https://secure.imdb.com/register-imdb/login', data=params, raise_status=False) except requests.RequestException as e: raise plugin.PluginError('Unable to login to imdb: %s' % e.message) # IMDb redirects us upon a successful login. # removed - doesn't happen always? # if r.status_code != 302: # log.warning('It appears logging in to IMDb was unsuccessful.') # try to automatically figure out user_id from watchlist redirect url if not 'user_id' in config: log.verbose('Getting user_id ...') try: response = sess.get('http://www.imdb.com/list/watchlist') except requests.RequestException as e: log.error('Error retrieving user ID from imdb: %s' % e.message) user_id = '' else: log.debug('redirected to %s' % response.url) user_id = response.url.split('/')[-2] if re.match(USER_ID_RE, user_id): config['user_id'] = user_id else: raise plugin.PluginError('Couldn\'t figure out user_id, please configure it manually.') if not 'user_id' in config: raise plugin.PluginError('Configuration option `user_id` required.') log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export' params = {'list_id': config['list'], 'author_id': config['user_id']} log.debug('Requesting %s' % url) opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise plugin.PluginError('Didn\'t get CSV export as response. Probably specified list `%s` ' 'does not exist.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException as e: raise plugin.PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries
# Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export' params = {'list_id': config['list'], 'author_id': config['user_id']} log.debug('Requesting %s' % url) opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise PluginError('Didn\'t get CSV export as response. Probably specified list `%s` does not exists.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException, e: raise PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries register_plugin(ImdbList, 'imdb_list', api_ver=2)
def on_task_input(self, task, config): sess = requests.Session() if config.get('username') and config.get('password'): log.verbose('Logging in ...') # Log in to imdb with our handler params = { 'login': config['username'], 'password': config['password'] } try: # First get the login page so we can get the hidden input value soup = get_soup( sess.get( 'https://secure.imdb.com/register-imdb/login').content) # Fix for bs4 bug. see #2313 and github#118 auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling tag = auxsoup.find('input', attrs={'name': '49e6c'}) if tag: params['49e6c'] = tag['value'] else: log.warning( 'Unable to find required info for imdb login, maybe their login method has changed.' ) # Now we do the actual login with appropriate parameters r = sess.post('https://secure.imdb.com/register-imdb/login', data=params, raise_status=False) except requests.RequestException as e: raise plugin.PluginError('Unable to login to imdb: %s' % e.message) # IMDb redirects us upon a successful login. # removed - doesn't happen always? # if r.status_code != 302: # log.warning('It appears logging in to IMDb was unsuccessful.') # try to automatically figure out user_id from watchlist redirect url if not 'user_id' in config: log.verbose('Getting user_id ...') try: response = sess.get('http://www.imdb.com/list/watchlist') except requests.RequestException as e: log.error('Error retrieving user ID from imdb: %s' % e.message) user_id = '' else: log.debug('redirected to %s' % response.url) user_id = response.url.split('/')[-2] if re.match(USER_ID_RE, user_id): config['user_id'] = user_id else: raise plugin.PluginError( 'Couldn\'t figure out user_id, please configure it manually.' ) if not 'user_id' in config: raise plugin.PluginError( 'Configuration option `user_id` required.') log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export' params = { 'list_id': config['list'], 'author_id': config['user_id'] } log.debug('Requesting %s' % url) opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise plugin.PluginError( 'Didn\'t get CSV export as response. Probably specified list `%s` ' 'does not exist.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException as e: raise plugin.PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append( Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries