class TumblrFile: """ This is the base container class for all downloadable resources associated with Tumblr posts. """ CATEGORY = 'misc' def __init__(self, data, container): """ Args: data(dict): API response data container(TumblrPost): Parent container """ self.log = logging.getLogger('tumdlr.containers.file') self._data = data self.container = container self.url = URL(self._data.get('url', self._data.get('post_url'))) def download(self, context, **kwargs): """ Args: context(tumdlr.main.Context): CLI request context kwargs(dict): Additional arguments to send with the download request Returns: str: Path to the saved file """ try: download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs) except Exception as e: self.log.warn('Post download failed: %r', self, exc_info=e) raise TumdlrDownloadError(error_message=str(e), download_url=self.url.as_string()) def filepath(self, context, request_data): """ Args: context(tumdlr.main.Context): CLI request context request_data(Optional[dict]): Additional arguments to send with the download request Returns: Path """ # Construct the save basedir basedir = Path(context.config['Tumdlr']['SavePath']) # Are we categorizing by user? if context.config['Categorization']['User']: self.log.debug('Categorizing by user: %s', self.container.blog.name) basedir = basedir.joinpath(sanitize_filename(self.container.blog.name)) # Are we categorizing by post type? if context.config['Categorization']['PostType']: self.log.debug('Categorizing by type: %s', self.CATEGORY) basedir = basedir.joinpath(self.CATEGORY) self.log.debug('Basedir constructed: %s', basedir) return basedir
def test_stress_authority(self): # Authority is most ambiguous part of url. Invalid host can contatin # ':' and '@' (path for example can not contain '?'. And query # can not contain '#'). The host '//no:99:' will be parsed as 'no:99' # and in next recomposition it can be written as '//no:99'. But parsing # of '//no:99:' and '//no:99' will be different. # # case generation: # from re import sub # from itertools import permutations # cases = set(sub('\d+', '7', ''.join(case)) # for case in set(permutations('::@@77777'))) cases = """7:7:7@7@ 7:7:7@7@7 7:7:7@@ 7:7:7@@7 7:7:@7@ 7:7:@7@7 7:7:@@ 7:7:@@7 7:7@7:7@ 7:7@7:7@7 7:7@7:@ 7:7@7:@7 7:7@7@7: 7:7@7@7:7 7:7@7@: 7:7@7@:7 7:7@:7@ 7:7@:7@7 7:7@:@ 7:7@:@7 7:7@@7: 7:7@@7:7 7:7@@: 7:7@@:7 7::7@7@ 7::7@7@7 7::7@@ 7::7@@7 7::@7@ 7::@7@7 7::@@ 7::@@7 7:@7:7@ 7:@7:7@7 7:@7:@ 7:@7:@7 7:@7@7: 7:@7@7:7 7:@7@: 7:@7@:7 7:@:7@ 7:@:7@7 7:@:@ 7:@:@7 7:@@7: 7:@@7:7 7:@@: 7:@@:7 7@7:7:7@ 7@7:7:7@7 7@7:7:@ 7@7:7:@7 7@7:7@7: 7@7:7@7:7 7@7:7@: 7@7:7@:7 7@7::7@ 7@7::7@7 7@7::@ 7@7::@7 7@7:@7: 7@7:@7:7 7@7:@: 7@7:@:7 7@7@7:7: 7@7@7:7:7 7@7@7:: 7@7@7::7 7@7@:7: 7@7@:7:7 7@7@:: 7@7@::7 7@:7:7@ 7@:7:7@7 7@:7:@ 7@:7:@7 7@:7@7: 7@:7@7:7 7@:7@: 7@:7@:7 7@::7@ 7@::7@7 7@::@ 7@::@7 7@:@7: 7@:@7:7 7@:@: 7@:@:7 7@@7:7: 7@@7:7:7 7@@7:: 7@@7::7 7@@:7: 7@@:7:7 7@@:: 7@@::7 :7:7@7@ :7:7@7@7 :7:7@@ :7:7@@7 :7:@7@ :7:@7@7 :7:@@ :7:@@7 :7@7:7@ :7@7:7@7 :7@7:@ :7@7:@7 :7@7@7: :7@7@7:7 :7@7@: :7@7@:7 :7@:7@ :7@:7@7 :7@:@ :7@:@7 :7@@7: :7@@7:7 :7@@: :7@@:7 ::7@7@ ::7@7@7 ::7@@ ::7@@7 ::@7@ ::@7@7 ::@@7 :@7:7@ :@7:7@7 :@7:@ :@7:@7 :@7@7: :@7@7:7 :@7@: :@7@:7 :@:7@ :@:7@7 :@:@7 :@@7: :@@7:7 :@@:7 @7:7:7@ @7:7:7@7 @7:7:@ @7:7:@7 @7:7@7: @7:7@7:7 @7:7@: @7:7@:7 @7::7@ @7::7@7 @7::@ @7::@7 @7:@7: @7:@7:7 @7:@: @7:@:7 @7@7:7: @7@7:7:7 @7@7:: @7@7::7 @7@:7: @7@:7:7 @7@:: @7@::7 @:7:7@ @:7:7@7 @:7:@ @:7:@7 @:7@7: @:7@7:7 @:7@: @:7@:7 @::7@ @::7@7 @::@7 @:@7: @:@7:7 @:@:7 @@7:7: @@7:7:7 @@7:: @@7::7 @@:7: @@:7:7 @@::7""".split() for case in cases: url = URL('//' + case) # check is all parts defined in original url is defined in parsed self.assertEqual(url, URL(url.as_string())) self.assertEqual(url, URL('//' + url.authority))
class TumblrPost: """ This is the base container class for all Tumblr post types. It contains data that is always available with any type of post. Additional supported post types may extend this class to provide additional metadata parsing """ def __init__(self, post, blog): """ Args: post(dict): API response blog(tumdlr.api.TumblrBlog): Parent blog """ self._post = post self.blog = blog self.log = logging.getLogger('tumdlr.containers.post') self.id = None # type: int self.type = None # type: str self.url = None # type: URL self.tags = set() self.post_date = None # type: str self.note_count = None # type: int self.files = [] try: self._parse_post() except Exception as e: self.log.warn('Failed to parse post data: %r', self, exc_info=e) raise TumdlrParserError(post_data=post) @property def is_text(self): """ Returns: bool """ return self.type == 'text' @property def is_photo(self): """ Returns: bool """ return self.type in ['photo', 'link'] @property def is_video(self): """ Returns: bool """ return self.type == 'video' def _parse_post(self): self.id = self._post['id'] self.type = self._post['type'] self.url = URL(self._post['post_url']) if 'post_url' in self._post else None self.tags = set(self._post.get('tags', [])) self.note_count = self._post.get('note_count') self.post_date = self._post['date'] def __repr__(self): return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\ .format(id=self.id, type=self.type, url=self.url) def __str__(self): return self.url.as_string() if self.url else ''