def test_progress_ftp(self): progress = ProgressPrinter(stream=sys.stdout) request = FTPRequest('ftp://example.com/example.txt') response = FTPResponse() response.reply = FTPReply(226, 'Closing data connection') response.file_transfer_size = 2048 response.restart_value = 10 progress.update_from_begin_request(request) progress.update_from_begin_response(response) for dummy in range(100): progress.update_with_data(b'abc') progress.update_from_end_response(response)
def test_to_dict(self): request = Request('ftp://foofle.com') request_dict = request.to_dict() self.assertEqual('ftp://foofle.com', request_dict['url']) self.assertEqual('ftp', request_dict['protocol']) response = Response() response.request = request response.reply = Reply(code=200, text='Success') response_dict = response.to_dict() self.assertEqual('ftp://foofle.com', response_dict['request']['url']) self.assertEqual('ftp', response_dict['protocol']) self.assertEqual(200, response_dict['reply']['code']) self.assertEqual(200, response_dict['response_code']) self.assertEqual('Success', response_dict['reply']['text']) self.assertEqual('Success', response_dict['response_message'])
def test_to_dict_body(self): response = Response() response.body = Body() response_dict = response.to_dict() self.assertTrue(response_dict['body']) response.body.close() response = Response() response.body = NotImplemented response_dict = response.to_dict() self.assertFalse(response_dict['body'])
def start(self, request: Request) -> Response: '''Start a file or directory listing download. Args: request: Request. Returns: A Response populated with the initial data connection reply. Once the response is received, call :meth:`download`. Coroutine. ''' if self._session_state != SessionState.ready: raise RuntimeError('Session not ready') response = Response() yield from self._prepare_fetch(request, response) response.file_transfer_size = yield from self._fetch_size(request) if request.restart_value: try: yield from self._commander.restart(request.restart_value) response.restart_value = request.restart_value except FTPServerError: _logger.debug('Could not restart file.', exc_info=1) yield from self._open_data_stream() command = Command('RETR', request.file_path) yield from self._begin_stream(command) self._session_state = SessionState.file_request_sent return response
def start(self, request: Request) -> Response: """Start a file or directory listing download. Args: request: Request. Returns: A Response populated with the initial data connection reply. Once the response is received, call :meth:`download`. Coroutine. """ if self._session_state != SessionState.ready: raise RuntimeError("Session not ready") response = Response() yield from self._prepare_fetch(request, response) response.file_transfer_size = yield from self._fetch_size(request) if request.restart_value: try: yield from self._commander.restart(request.restart_value) response.restart_value = request.restart_value except FTPServerError: _logger.debug("Could not restart file.", exc_info=1) yield from self._open_data_stream() command = Command("RETR", request.file_path) yield from self._begin_stream(command) self._session_state = SessionState.file_request_sent return response
def test_warc_recorder_ftp(self): file_prefix = 'asdf' warc_filename = 'asdf.warc' warc_recorder = WARCRecorder(file_prefix, params=WARCRecorderParams(compress=False)) request = FTPRequest('ftp://example.com/example.txt') request.address = ('0.0.0.0', 80) response = FTPResponse() response.reply = FTPReply(200, 'OK') response.body = Body() response.data_address = ('0.0.0.0', 12345) with wpull.util.reset_file_offset(response.body): response.body.write(b'KITTEH DOGE') session = warc_recorder.new_ftp_recorder_session() session.begin_control(request) session.control_send_data(b'GIMMEH example.txt') session.control_receive_data(b'200 OK, no need to yell.') session.begin_transfer(response) session.transfer_receive_data(b'KITTEH DOGE') session.end_transfer(response) session.end_control(response) session.close() warc_recorder.close() with open(warc_filename, 'rb') as in_file: warc_file_content = in_file.read() self.assertTrue(warc_file_content.startswith(b'WARC/1.0')) self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content) self.assertIn(b'Content-Type: application/warc-fields', warc_file_content) self.assertIn(b'WARC-Date: ', warc_file_content) self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content) self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content) self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content) self.assertIn(b'WARC-Type: resource\r\n', warc_file_content) self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content) self.assertIn(b'Content-Type: application/octet-stream', warc_file_content) self.assertIn(b'WARC-Type: metadata', warc_file_content) self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content) self.assertIn(b'Content-Type: text/x-ftp-control-conversation', warc_file_content) self.assertIn( 'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'), warc_file_content) self.assertIn( 'Python/{0}'.format(wpull.util.python_version()).encode('utf-8'), warc_file_content) self.assertIn(b'KITTEH DOGE', warc_file_content) self.assertIn(b'* Opening control connection to', warc_file_content) self.assertIn(b'* Kept control connection to', warc_file_content) self.assertIn(b'* Opened data connection to ', warc_file_content) self.assertIn(b'* Closed data connection to ', warc_file_content) self.assertIn(b'> GIMMEH example.txt', warc_file_content) self.assertIn(b'< 200 OK, no need to yell.', warc_file_content) # Ignore Concurrent Record ID not seen yet self.validate_warc(warc_filename, ignore_minor_error=True) with open(warc_filename, 'r+b') as in_file: # Intentionally modify the contents in_file.seek(355) in_file.write(b'f') with self.assertRaises(Exception): # Sanity check that it actually raises error on bad digest self.validate_warc(warc_filename, ignore_minor_error=True)
def test_warc_recorder_ftp(self): file_prefix = 'asdf' warc_filename = 'asdf.warc' warc_recorder = WARCRecorder( file_prefix, params=WARCRecorderParams(compress=False) ) request = FTPRequest('ftp://example.com/example.txt') request.address = ('0.0.0.0', 80) response = FTPResponse() response.reply = FTPReply(200, 'OK') response.body = Body() response.data_address = ('0.0.0.0', 12345) with wpull.util.reset_file_offset(response.body): response.body.write(b'KITTEH DOGE') session = warc_recorder.new_ftp_recorder_session() session.begin_control(request) session.control_send_data(b'GIMMEH example.txt') session.control_receive_data(b'200 OK, no need to yell.') session.begin_transfer(response) session.transfer_receive_data(b'KITTEH DOGE') session.end_transfer(response) session.end_control(response) session.close() warc_recorder.close() with open(warc_filename, 'rb') as in_file: warc_file_content = in_file.read() self.assertTrue(warc_file_content.startswith(b'WARC/1.0')) self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content) self.assertIn(b'Content-Type: application/warc-fields', warc_file_content) self.assertIn(b'WARC-Date: ', warc_file_content) self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content) self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content) self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content) self.assertIn(b'WARC-Type: resource\r\n', warc_file_content) self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content) self.assertIn(b'Content-Type: application/octet-stream', warc_file_content) self.assertIn(b'WARC-Type: metadata', warc_file_content) self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content) self.assertIn(b'Content-Type: text/x-ftp-control-conversation', warc_file_content) self.assertIn( 'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'), warc_file_content ) self.assertIn( 'Python/{0}'.format( wpull.util.python_version()).encode('utf-8'), warc_file_content ) self.assertIn(b'KITTEH DOGE', warc_file_content) self.assertIn(b'* Opening control connection to', warc_file_content) self.assertIn(b'* Kept control connection to', warc_file_content) self.assertIn(b'* Opened data connection to ', warc_file_content) self.assertIn(b'* Closed data connection to ', warc_file_content) self.assertIn(b'> GIMMEH example.txt', warc_file_content) self.assertIn(b'< 200 OK, no need to yell.', warc_file_content) # Ignore Concurrent Record ID not seen yet self.validate_warc(warc_filename, ignore_minor_error=True) with open(warc_filename, 'r+b') as in_file: # Intentionally modify the contents in_file.seek(355) in_file.write(b'f') with self.assertRaises(Exception): # Sanity check that it actually raises error on bad digest self.validate_warc(warc_filename, ignore_minor_error=True)