def get_file( self, name: str, default: Union[object, bytes] = MISSING, use_video_downloader: bool = False, allow_tokens: bool = True, ) -> bytes: if name in self._files and self._files[name]: return self._files[name] if name + "Url" in self._params: return net.download( self._params[name + "Url"], use_video_downloader=use_video_downloader, ) if allow_tokens and name + "Token" in self._params: ret = file_uploads.get(self._params[name + "Token"]) if ret: return ret elif default is not MISSING: raise errors.MissingOrExpiredRequiredFileError( "Required file %r is missing or has expired." % name) if default is not MISSING: return cast(bytes, default) raise errors.MissingRequiredFileError("Required file %r is missing." % name)
def get_file(self, name, required=False): if name in self.files: return self.files[name] if name + 'Url' in self.input: return net.download(self.input[name + 'Url']) if not required: return None raise errors.MissingRequiredFileError( 'Required file %r is missing.' % name)
def get_file(self, name, required=False): if name in self.files: return self.files[name] if name + 'Url' in self.input: return net.download(self.input[name + 'Url']) if not required: return None raise errors.MissingRequiredFileError('Required file %r is missing.' % name)
def get_file(self, name, required=False, allow_tokens=True): ret = None if name in self._files: ret = self._files[name] elif name + 'Url' in self._params: ret = net.download(self._params[name + 'Url']) elif allow_tokens and name + 'Token' in self._params: ret = file_uploads.get(self._params[name + 'Token']) if required and not ret: raise errors.MissingOrExpiredRequiredFileError( 'Required file %r is missing or has expired.' % name) if required and not ret: raise errors.MissingRequiredFileError( 'Required file %r is missing.' % name) return ret
def test_download(config_injector): config_injector({ 'user_agent': None }) url = 'http://info.cern.ch/hypertext/WWW/TheProject.html' expected_content = ( b'<HEADER>\n<TITLE>The World Wide Web project</TITLE>\n<NEXTID N="' + b'55">\n</HEADER>\n<BODY>\n<H1>World Wide Web</H1>The WorldWideWeb' + b' (W3) is a wide-area<A\nNAME=0 HREF="WhatIs.html">\nhypermedia</' + b'A> information retrieval\ninitiative aiming to give universal\na' + b'ccess to a large universe of documents.<P>\nEverything there is ' + b'online about\nW3 is linked directly or indirectly\nto this docum' + b'ent, including an <A\nNAME=24 HREF="Summary.html">executive\nsum' + b'mary</A> of the project, <A\nNAME=29 HREF="Administration/Mailin' + b'g/Overview.html">Mailing lists</A>\n, <A\nNAME=30 HREF="Policy.h' + b'tml">Policy</A> , November\'s <A\nNAME=34 HREF="News/9211.html"' + b'>W3 news</A> ,\n<A\nNAME=41 HREF="FAQ/List.html">Frequently Ask' + b'ed Questions</A> .\n<DL>\n<DT><A\nNAME=44 HREF="../DataSources/T' + b'op.html">What\'s out there?</A>\n<DD> Pointers to the\nworld\'s ' + b'online information,<A\nNAME=45 HREF="../DataSources/bySubject/Ov' + b'erview.html"> subjects</A>\n, <A\nNAME=z54 HREF="../DataSources/' + b'WWW/Servers.html">W3 servers</A>, etc.\n<DT><A\nNAME=46 HREF="He' + b'lp.html">Help</A>\n<DD> on the browser you are using\n<DT><A\nNA' + b'ME=13 HREF="Status.html">Software Products</A>\n<DD> A list of W' + b'3 project\ncomponents and their current state.\n(e.g. <A\nNAME=2' + b'7 HREF="LineMode/Browser.html">Line Mode</A> ,X11 <A\nNAME=35 HR' + b'EF="Status.html#35">Viola</A> , <A\nNAME=26 HREF="NeXT/WorldWid' + b'eWeb.html">NeXTStep</A>\n, <A\nNAME=25 HREF="Daemon/Overview.htm' + b'l">Servers</A> , <A\nNAME=51 HREF="Tools/Overview.html">Tools</A' + b'> ,<A\nNAME=53 HREF="MailRobot/Overview.html"> Mail robot</A> ,<' + b'A\nNAME=52 HREF="Status.html#57">\nLibrary</A> )\n<DT><A\nNAME=4' + b'7 HREF="Technical.html">Technical</A>\n<DD> Details of protocols' + b', formats,\nprogram internals etc\n<DT><A\nNAME=40 HREF="Bibliog' + b'raphy.html">Bibliography</A>\n<DD> Paper documentation\non W3 a' + b'nd references.\n<DT><A\nNAME=14 HREF="People.html">People</A>\n<' + b'DD> A list of some people involved\nin the project.\n<DT><A\nNAM' + b'E=15 HREF="History.html">History</A>\n<DD> A summary of the hist' + b'ory\nof the project.\n<DT><A\nNAME=37 HREF="Helping.html">How ca' + b'n I help</A> ?\n<DD> If you would like\nto support the web..\n<D' + b'T><A\nNAME=48 HREF="../README.html">Getting code</A>\n<DD> Getti' + b'ng the code by<A\nNAME=49 HREF="LineMode/Defaults/Distribution.h' + b'tml">\nanonymous FTP</A> , etc.</A>\n</DL>\n</BODY>\n') actual_content = net.download(url) assert actual_content == expected_content
def test_download(config_injector): config_injector({'user_agent': None}) url = 'http://info.cern.ch/hypertext/WWW/TheProject.html' expected_content = ( b'<HEADER>\n<TITLE>The World Wide Web project</TITLE>\n<NEXTID N="' + b'55">\n</HEADER>\n<BODY>\n<H1>World Wide Web</H1>The WorldWideWeb' + b' (W3) is a wide-area<A\nNAME=0 HREF="WhatIs.html">\nhypermedia</' + b'A> information retrieval\ninitiative aiming to give universal\na' + b'ccess to a large universe of documents.<P>\nEverything there is ' + b'online about\nW3 is linked directly or indirectly\nto this docum' + b'ent, including an <A\nNAME=24 HREF="Summary.html">executive\nsum' + b'mary</A> of the project, <A\nNAME=29 HREF="Administration/Mailin' + b'g/Overview.html">Mailing lists</A>\n, <A\nNAME=30 HREF="Policy.h' + b'tml">Policy</A> , November\'s <A\nNAME=34 HREF="News/9211.html"' + b'>W3 news</A> ,\n<A\nNAME=41 HREF="FAQ/List.html">Frequently Ask' + b'ed Questions</A> .\n<DL>\n<DT><A\nNAME=44 HREF="../DataSources/T' + b'op.html">What\'s out there?</A>\n<DD> Pointers to the\nworld\'s ' + b'online information,<A\nNAME=45 HREF="../DataSources/bySubject/Ov' + b'erview.html"> subjects</A>\n, <A\nNAME=z54 HREF="../DataSources/' + b'WWW/Servers.html">W3 servers</A>, etc.\n<DT><A\nNAME=46 HREF="He' + b'lp.html">Help</A>\n<DD> on the browser you are using\n<DT><A\nNA' + b'ME=13 HREF="Status.html">Software Products</A>\n<DD> A list of W' + b'3 project\ncomponents and their current state.\n(e.g. <A\nNAME=2' + b'7 HREF="LineMode/Browser.html">Line Mode</A> ,X11 <A\nNAME=35 HR' + b'EF="Status.html#35">Viola</A> , <A\nNAME=26 HREF="NeXT/WorldWid' + b'eWeb.html">NeXTStep</A>\n, <A\nNAME=25 HREF="Daemon/Overview.htm' + b'l">Servers</A> , <A\nNAME=51 HREF="Tools/Overview.html">Tools</A' + b'> ,<A\nNAME=53 HREF="MailRobot/Overview.html"> Mail robot</A> ,<' + b'A\nNAME=52 HREF="Status.html#57">\nLibrary</A> )\n<DT><A\nNAME=4' + b'7 HREF="Technical.html">Technical</A>\n<DD> Details of protocols' + b', formats,\nprogram internals etc\n<DT><A\nNAME=40 HREF="Bibliog' + b'raphy.html">Bibliography</A>\n<DD> Paper documentation\non W3 a' + b'nd references.\n<DT><A\nNAME=14 HREF="People.html">People</A>\n<' + b'DD> A list of some people involved\nin the project.\n<DT><A\nNAM' + b'E=15 HREF="History.html">History</A>\n<DD> A summary of the hist' + b'ory\nof the project.\n<DT><A\nNAME=37 HREF="Helping.html">How ca' + b'n I help</A> ?\n<DD> If you would like\nto support the web..\n<D' + b'T><A\nNAME=48 HREF="../README.html">Getting code</A>\n<DD> Getti' + b'ng the code by<A\nNAME=49 HREF="LineMode/Defaults/Distribution.h' + b'tml">\nanonymous FTP</A> , etc.</A>\n</DL>\n</BODY>\n') actual_content = net.download(url) assert actual_content == expected_content
def get_file(self, name: str, default: Union[object, bytes] = MISSING, allow_tokens: bool = True) -> bytes: if name in self._files and self._files[name]: return self._files[name] if name + 'Url' in self._params: return net.download(self._params[name + 'Url']) if allow_tokens and name + 'Token' in self._params: ret = file_uploads.get(self._params[name + 'Token']) if ret: return ret elif default is not MISSING: raise errors.MissingOrExpiredRequiredFileError( 'Required file %r is missing or has expired.' % name) if default is not MISSING: return cast(bytes, default) raise errors.MissingRequiredFileError('Required file %r is missing.' % name)
def get_file( self, name: str, default: Union[object, bytes] = MISSING, allow_tokens: bool = True) -> bytes: if name in self._files and self._files[name]: return self._files[name] if name + 'Url' in self._params: return net.download(self._params[name + 'Url']) if allow_tokens and name + 'Token' in self._params: ret = file_uploads.get(self._params[name + 'Token']) if ret: return ret elif default is not MISSING: raise errors.MissingOrExpiredRequiredFileError( 'Required file %r is missing or has expired.' % name) if default is not MISSING: return cast(bytes, default) raise errors.MissingRequiredFileError( 'Required file %r is missing.' % name)
def test_failed_video_download(url): with pytest.raises(errors.ThirdPartyError): net.download(url, use_video_downloader=True)
def test_video_download(url, expected_sha1): actual_content = net.download(url, use_video_downloader=True) assert get_sha1(actual_content) == expected_sha1
def test_too_large_download(url): pytest.xfail("Download limit not implemented yet") with pytest.raises(errors.ProcessingError): net.download(url)
def test_bad_content_downlaod(): url = "http://info.cern.ch/hypertext/WWW/TheProject.html" with pytest.raises(errors.ThirdPartyError): net.download(url, use_video_downloader=True)
def test_too_large_download(url): with pytest.raises(net.DownloadTooLargeError): net.download(url, use_video_downloader=True)