def test_request__chunked_encoding_retry(self, mock_session): session_instance = mock_session.return_value # Handle Auth response_dict = {"access_token": "", "expires_in": 99, "scope": ""} session_instance.request.return_value = Mock( headers={}, json=lambda: response_dict, status_code=200 ) requestor = prawcore.Requestor("prawcore:test (by /u/bboe)") authorizer = readonly_authorizer(requestor=requestor) session_instance.request.reset_mock() # Fail on subsequent request exception = ChunkedEncodingError() session_instance.request.side_effect = exception expected = ( "prawcore", "WARNING", "Retrying due to ChunkedEncodingError() status: GET " "https://oauth.reddit.com/", ) with LogCapture(level=logging.WARNING) as log_capture: with self.assertRaises(RequestException) as context_manager: prawcore.Session(authorizer).request("GET", "/") log_capture.check(expected, expected) self.assertIsInstance(context_manager.exception, RequestException) self.assertIs(exception, context_manager.exception.original_exception) self.assertEqual(3, session_instance.request.call_count)
def generate(): # Special case for google app engine. if hasattr(self.raw, 'stream'): try: if isinstance(self.raw._original_response._method, int): while True: chunk = self.raw.read(chunk_size, decode_content=True) if not chunk: break yield chunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) else: # Standard file-like object. while True: chunk = self.raw.read(chunk_size) if not chunk: break yield chunk self._content_consumed = True
def response_cb(req): nonlocal encoding_throw # Every request fails with 2 ChunkedEncodingError exception but works well on third time. if encoding_throw < 2: encoding_throw += 1 raise ChunkedEncodingError() encoding_throw = 0 days = get_range_days_from_request(req) ranges.append(days) time_mock.tick(delta=datetime.timedelta(minutes=days / days_per_minute_rate)) return (200, {}, json.dumps({"createdAt": "2020"}))
async def generate(): async with self: async with finalize(self.raw.stream(chunk_size)) as gen: try: async for trunk in gen: yield trunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) self._content_consumed = True
def requests_check_incomplete_response(response: Response): """ Check that we have read all the data as the requests library does not currently enforce this. https://blog.petrzemek.net/2018/04/22/on-incomplete-http-reads-and-the-requests-library-in-python/ """ expected_length = response.headers.get('Content-Length') if expected_length is not None: actual_length = response.raw.tell() expected_length = int(expected_length) if actual_length < expected_length: msg = 'incomplete response ({} bytes read, {} more expected)'.format( actual_length, expected_length - actual_length) raise ChunkedEncodingError(msg, response=response)
async def generate(): async with self: # async with finalize(self.raw.stream(chunk_size)) as gen: gen = self.raw.stream(chunk_size) logger.debug(f'Iterate response body stream: {self}') try: async for trunk in gen: yield trunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) self._content_consumed = True
def resp_read(chunk_size): try: # Special case for urllib3. try: for chunk in resp.raw.stream(chunk_size, decode_content=False): yield chunk except IncompleteRead as e: raise ChunkedEncodingError(e) except AttributeError: # Standard file-like object. while True: chunk = resp.raw.read(chunk_size) if not chunk: break yield chunk
def test_email_stream_chunked_encoding_exception(catalog, time_mock): TEST_START_DATE = "2020" DAYS_DURATION = 100 time_mock.move_to( pendulum.parse(TEST_START_DATE) + pendulum.Duration(days=DAYS_DURATION)) responses.add( "GET", "https://api.iterable.com/api/export/data.json", body=ChunkedEncodingError(), ) with pytest.raises( Exception, match="ChunkedEncodingError: Reached maximum number of retires: 3" ): read_from_source(catalog) assert len(responses.calls) == 3
def test_ncbi_fails(self): exceptions = [ChunkedEncodingError(), ConnectionError(), ReadTimeout(), ExpatError(), RuntimeError('bad record')] for code in [400, 429]: http_exception = HTTPError() http_exception.response = Response() http_exception.response.status_code = code exceptions.append(http_exception) for exception in exceptions: self.ncbi_exception = exception with self.assertLogs(level='DEBUG') as log: seq, tax = self.get_ncbi_data(query='MT345279.1') tax = tax.view(DataFrame) self.assertEqual( tax['Taxon']['MT345279.1'], 'k__Fungi; p__Basidiomycota; c__Agaricomycetes; ' 'o__Boletales; f__Boletaceae; g__Boletus; s__edulis' ) self.assertTrue('Retrying' in log.output[0])
def generate(): # Special case for urllib3. if hasattr(self.raw, 'stream'): try: for chunk in self.raw.stream(chunk_size, decode_content=False): yield chunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) else: # Standard file-like object. while True: chunk = self.raw.read(chunk_size) if not chunk: break yield chunk self._content_consumed = True
def raise_exception(*args, **kwargs): if " obb ".lower() not in kwargs["description"].lower(): return original(*args, **kwargs) else: raise ChunkedEncodingError()
class NotubizDocumentScraper(): """ """ def __init__(self): BASE_DIR = os.path.dirname(os.path.realpath(__name__)) self.current = os.path.abspath(BASE_DIR) self.xls_dir = BASE_DIR + "/generics/" def start(self): # post get events, get th docs and update the vectors all at once self.get_document_content() def get_document_content(self): file_name = '' for event_doc_obj in sorted( Document.objects.filter(document_content_scanned=False), key=lambda x: x.id, reverse=True): try: file_name = self.downloadFile(event_doc_obj) document_content_text = self.getFileContent( file_name, event_doc_obj.id) document_content = DocumentContent.objects.create( content=document_content_text) event_doc_obj.document_content_scanned = True event_doc_obj.file_path = file_name event_doc_obj.doc_content = document_content event_doc_obj.save() self.remove_folder_contents() if CombinedItem.objects.filter(item_id=event_doc_obj.id, item_type='document').exists(): ci = CombinedItem.objects.get(item_id=event_doc_obj.id, item_type='document') ci.doc_content = document_content ci.save() self.add_content_text( ReceivedDocument.objects.filter( document__url=event_doc_obj.url), 'received_document', document_content_text) self.add_content_text( CouncilAddress.objects.filter( question_document__url=event_doc_obj.url), 'council_address', document_content_text) self.add_content_text( WrittenQuestion.objects.filter( question_document__url=event_doc_obj.url), 'written_question', document_content_text) self.add_content_text( PublicDocument.objects.filter( document__url=event_doc_obj.url), 'format', document_content_text) self.add_content_text( PolicyDocument.objects.filter( document__url=event_doc_obj.url), 'policy_document', document_content_text) self.add_content_text( ManagementDocument.objects.filter( document__url=event_doc_obj.url), 'management_document', document_content_text) self.add_content_text( Motion.objects.filter(document__url=event_doc_obj.url), 'motion', document_content_text) self.add_content_text( Commitment.objects.filter( new_document__url=event_doc_obj.url), 'commitment', document_content_text) except Exception: self.remove_folder_contents() vector = SearchVector('content', config='dutch') DocumentContent.objects.update(vector=vector) # Adds document content text to other types of objects where their url points to this document def add_content_text(self, query_set, typez, doc_cont_text): for typ in query_set: ci = CombinedItem.objects.get(item_id=typ.id, item_type=typez) ci.document_content_text = doc_cont_text ci.save() def downloadFile(self, event_doc_obj): if local_settings.RIS_MUNICIPALITY != 'Utrecht': file_name = str(event_doc_obj.notubiz_id) + '.pdf' else: file_name = str(event_doc_obj.ibabs_id) + '.pdf' path = os.path.abspath(self.current + '/files/' + file_name) if os.path.isfile(path): return file_name # So because utrecht servers are faulty, sometimes we wouldn't get the full document content # in the request thus resulting in a ChunkedEncodingError # So we redo the download up to 5 times and if there will still be an issue # we can investigate it more thouroughly times_tried = 0 while times_tried < 5: try: try: r = requests.get(event_doc_obj.url, stream=True) except SSLError: return None except InvalidURL: return None if r.status_code == 200 and 'text/html' not in r.headers[ 'Content-Type'] and 'content-disposition' in r.headers: path = os.path.abspath(self.current + '/files/' + file_name) with open(path, 'wb') as f: for chunk in r.iter_content(1024): f.write(chunk) return file_name else: break except ChunkedEncodingError, e: times_tried = times_tried + 1 print(e.message) print( 'File wasnt downloaded succesfully so we redo the download' ) print('File downloaded {number} times'.format( number=times_tried)) if times_tried >= 5: raise ChunkedEncodingError( 'So redownloading the file 5 times didn\'t work, maybe there\'s some other issue? ' ) return None
def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None, **kwargs): """Constructs a :class:`Request <Request>`, prepares it and sends it. Returns :class:`Response <Response>` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Set to True by default. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol or protocol and hostname to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True``. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :rtype: requests.Response """ #=============================================================================================================== # add by mz error_type = kwargs.get("error_type") if error_type: from requests.exceptions import InvalidURL, URLRequired, ConnectTimeout, ConnectionError, SSLError, ReadTimeout from requests.exceptions import InvalidSchema, MissingSchema, ChunkedEncodingError, ContentDecodingError from requests.exceptions import RequestException, HTTPError, ProxyError, Timeout, RetryError, StreamConsumedError from requests.exceptions import TooManyRedirects get_error = { "InvalidURL": InvalidURL(), "URLRequired": URLRequired(), "ConnectTimeout": ConnectTimeout(), "ConnectionError": ConnectionError(), "SSLError": SSLError(), "ReadTimeout": ReadTimeout(), "InvalidSchema": InvalidSchema(), "MissingSchema": MissingSchema(), "ChunkedEncodingError": ChunkedEncodingError(), "ContentDecodingError": ContentDecodingError(), "StreamConsumedError": StreamConsumedError(), "TooManyRedirects": TooManyRedirects(), "RequestException": RequestException(), "HTTPError": HTTPError(), "ProxyError": ProxyError(), "Timeout": Timeout(), "RetryError": RetryError } error_ = get_error[error_type] raise error_ #=============================================================================================================== # Create the Request req = Request( method=method.upper(), url=url, headers=headers, files=files, data=data or {}, json=json, params=params or {}, auth=auth, cookies=cookies, hooks=hooks, ) prep = self.prepare_request(req) proxies = proxies or {} settings = self.merge_environment_settings(prep.url, proxies, stream, verify, cert) # Send the request. send_kwargs = { 'timeout': timeout, 'allow_redirects': allow_redirects, } send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp
def test_monitor_sends_exception_data_and_hb_on_expected_exceptions( self, mock_get_data) -> None: json_decode_error = json.JSONDecodeError(msg='test error', doc='test', pos=2) errors_exceptions_dict = { ReqConnectionError('test'): CannotAccessGitHubPageException(self.repo_config.releases_page), ReadTimeout('test'): CannotAccessGitHubPageException(self.repo_config.releases_page), IncompleteRead('test'): DataReadingException(self.monitor_name, self.repo_config.releases_page), ChunkedEncodingError('test'): DataReadingException(self.monitor_name, self.repo_config.releases_page), ProtocolError('test'): DataReadingException(self.monitor_name, self.repo_config.releases_page), json_decode_error: JSONDecodeException(json_decode_error) } try: self.test_monitor._initialise_rabbitmq() for error, data_ret_exception in errors_exceptions_dict.items(): mock_get_data.side_effect = error expected_output_data = { 'error': { 'meta_data': { 'monitor_name': self.test_monitor.monitor_name, 'repo_name': self.test_monitor.repo_config.repo_name, 'repo_id': self.test_monitor.repo_config.repo_id, 'repo_parent_id': self.test_monitor.repo_config.parent_id, 'time': datetime(2012, 1, 1).timestamp() }, 'message': data_ret_exception.message, 'code': data_ret_exception.code, } } expected_output_hb = { 'component_name': self.test_monitor.monitor_name, 'is_alive': True, 'timestamp': datetime(2012, 1, 1).timestamp() } # Delete the queue before to avoid messages in the queue on # error. self.test_monitor.rabbitmq.queue_delete(self.test_queue_name) res = self.test_monitor.rabbitmq.queue_declare( queue=self.test_queue_name, durable=True, exclusive=False, auto_delete=False, passive=False) self.assertEqual(0, res.method.message_count) self.test_monitor.rabbitmq.queue_bind( queue=self.test_queue_name, exchange=RAW_DATA_EXCHANGE, routing_key='github') self.test_monitor.rabbitmq.queue_bind( queue=self.test_queue_name, exchange=HEALTH_CHECK_EXCHANGE, routing_key='heartbeat.worker') self.test_monitor._monitor() # By re-declaring the queue again we can get the number of # messages in the queue. res = self.test_monitor.rabbitmq.queue_declare( queue=self.test_queue_name, durable=True, exclusive=False, auto_delete=False, passive=True) # There must be 2 messages in the queue, the heartbeat and the # processed data self.assertEqual(2, res.method.message_count) # Check that the message received is actually the processed data _, _, body = self.test_monitor.rabbitmq.basic_get( self.test_queue_name) self.assertEqual(expected_output_data, json.loads(body)) # Check that the message received is actually the HB _, _, body = self.test_monitor.rabbitmq.basic_get( self.test_queue_name) self.assertEqual(expected_output_hb, json.loads(body)) except Exception as e: self.fail("Test failed: {}".format(e))
class ImportContentTestCase(TestCase): """ Test case for the importcontent management command. """ fixtures = ["content_test.json"] the_channel_id = "6199dde695db4ee4ab392222d5af1e5c" def setUp(self): LocalFile.objects.update(available=False) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=True, ) def test_remote_cancel_immediately( self, is_cancelled_mock, cancel_mock, FileDownloadMock, annotation_mock ): # Check behaviour if cancellation is called before any file download starts FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"] call_command("importcontent", "network", self.the_channel_id) is_cancelled_mock.assert_has_calls([call(), call()]) FileDownloadMock.assert_not_called() cancel_mock.assert_called_with() annotation_mock.mark_local_files_as_available.assert_not_called() annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called() annotation_mock.recurse_annotation_up_tree.assert_not_called() @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True, True], ) def test_remote_cancel_during_transfer( self, is_cancelled_mock, cancel_mock, FileDownloadMock, local_path_mock, remote_path_mock, annotation_mock, ): # If transfer is cancelled during transfer of first file local_path = tempfile.mkstemp()[1] local_path_mock.return_value = local_path remote_path_mock.return_value = "notest" # Mock this __iter__ so that the filetransfer can be looped over FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"] FileDownloadMock.return_value.total_size = 1 LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) call_command("importcontent", "network", self.the_channel_id) # is_cancelled should be called thrice. is_cancelled_mock.assert_has_calls([call(), call(), call()]) # Should be set to the local path we mocked FileDownloadMock.assert_called_with("notest", local_path, session=Any(Session)) # Check that it was cancelled when the command was cancelled, this ensures cleanup FileDownloadMock.assert_has_calls([call().cancel()]) # Check that the command itself was also cancelled. cancel_mock.assert_called_with() annotation_mock.mark_local_files_as_available.assert_not_called() annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called() annotation_mock.recurse_annotation_up_tree.assert_not_called() @patch( "kolibri.core.content.management.commands.importcontent.import_export_content.compare_checksums", return_value=True, ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, False, False, False, False, True, True, True], ) def test_remote_cancel_after_file_copy_file_not_deleted( self, is_cancelled_mock, cancel_mock, FileDownloadMock, local_path_mock, remote_path_mock, checksum_mock, annotation_mock, ): # If transfer is cancelled after transfer of first file local_path_1 = tempfile.mkstemp()[1] local_path_2 = tempfile.mkstemp()[1] with open(local_path_1, "w") as f: f.write("a") local_path_mock.side_effect = [local_path_1, local_path_2] remote_path_mock.return_value = "notest" # Mock this __iter__ so that the filetransfer can be looped over FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"] FileDownloadMock.return_value.total_size = 1 FileDownloadMock.return_value.dest = local_path_1 LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) call_command("importcontent", "network", self.the_channel_id) # Check that the command itself was also cancelled. cancel_mock.assert_called_with() # Check that the temp file we created where the first file was being downloaded to has not been deleted self.assertTrue(os.path.exists(local_path_1)) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy") @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=True, ) def test_local_cancel_immediately( self, is_cancelled_mock, cancel_mock, FileCopyMock, annotation_mock ): # Local version of test above FileCopyMock.return_value.__iter__.return_value = ["one", "two", "three"] call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp()) is_cancelled_mock.assert_has_calls([call(), call()]) FileCopyMock.assert_not_called() cancel_mock.assert_called_with() annotation_mock.mark_local_files_as_available.assert_not_called() annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called() annotation_mock.recurse_annotation_up_tree.assert_not_called() @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy") @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True, True], ) def test_local_cancel_during_transfer( self, is_cancelled_mock, cancel_mock, FileCopyMock, local_path_mock, annotation_mock, ): # Local version of test above local_dest_path = tempfile.mkstemp()[1] local_src_path = tempfile.mkstemp()[1] local_path_mock.side_effect = [local_dest_path, local_src_path] FileCopyMock.return_value.__iter__.return_value = ["one", "two", "three"] FileCopyMock.return_value.total_size = 1 LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp()) is_cancelled_mock.assert_has_calls([call(), call(), call()]) FileCopyMock.assert_called_with(local_src_path, local_dest_path) FileCopyMock.assert_has_calls([call().cancel()]) cancel_mock.assert_called_with() annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.len") @patch( "kolibri.core.content.utils.transfer.Transfer.next", side_effect=ConnectionError("connection error"), ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True], ) def test_remote_cancel_during_connect_error( self, is_cancelled_mock, cancel_mock, next_mock, len_mock, annotation_mock ): LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) call_command( "importcontent", "network", self.the_channel_id, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) cancel_mock.assert_called_with() len_mock.assert_not_called() annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.logger.error") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) def test_remote_import_httperror_404(self, path_mock, logger_mock, annotation_mock): local_dest_path_1 = tempfile.mkstemp()[1] local_dest_path_2 = tempfile.mkstemp()[1] local_dest_path_3 = tempfile.mkstemp()[1] path_mock.side_effect = [ local_dest_path_1, local_dest_path_2, local_dest_path_3, ] LocalFile.objects.filter( files__contentnode__pk="2b6926ed22025518a8b9da91745b51d3" ).update(file_size=1) call_command( "importcontent", "network", self.the_channel_id, node_ids=["2b6926ed22025518a8b9da91745b51d3"], renderable_only=False, ) self.assertTrue(logger_mock.call_count == 3) self.assertTrue("404" in logger_mock.call_args_list[0][0][0]) @patch("kolibri.core.content.management.commands.importcontent.sleep") @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True], ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path", return_value="test", ) def test_remote_import_httperror_502( self, content_storage_file_path_mock, is_cancelled_mock, cancel_mock, file_download_mock, sleep_mock, annotation_mock, ): response = Response() response.status_code = 502 file_download_mock.return_value.__enter__.side_effect = HTTPError( response=response ) file_download_mock.return_value.dest = "test" LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) call_command("importcontent", "network", self.the_channel_id) cancel_mock.assert_called_with() annotation_mock.set_content_visibility.assert_called() sleep_mock.assert_called_once() @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path", return_value="test", ) def test_remote_import_httperror_500( self, content_storage_file_path_mock, file_download_mock, annotation_mock ): response = Response() response.status_code = 500 file_download_mock.return_value.__enter__.side_effect = HTTPError( response=response ) file_download_mock.return_value.dest = "test" LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) with self.assertRaises(HTTPError): call_command("importcontent", "network", self.the_channel_id) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.len") @patch( "kolibri.core.content.utils.transfer.Transfer.next", side_effect=ChunkedEncodingError("Chunked Encoding Error"), ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True], ) def test_remote_import_chunkedencodingerror( self, is_cancelled_mock, cancel_mock, error_mock, len_mock, annotation_mock ): LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) call_command( "importcontent", "network", self.the_channel_id, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) cancel_mock.assert_called_with() len_mock.assert_not_called() annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.logger.error") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True], ) def test_local_import_oserror_dne( self, is_cancelled_mock, cancel_mock, path_mock, logger_mock, annotation_mock ): dest_path = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path, "/test/dne"] LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) call_command("importcontent", "disk", self.the_channel_id, "destination") self.assertTrue( "No such file or directory" in logger_mock.call_args_list[0][0][0] ) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.logger.error") @patch("kolibri.core.content.utils.transfer.os.path.getsize") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) def test_local_import_oserror_permission_denied( self, path_mock, getsize_mock, logger_mock, annotation_mock ): dest_path = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path, "/test/dne"] getsize_mock.side_effect = ["1", OSError("Permission denied")] with self.assertRaises(OSError): call_command("importcontent", "disk", self.the_channel_id, "destination") self.assertTrue("Permission denied" in logger_mock.call_args_list[0][0][0]) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.os.remove") @patch( "kolibri.core.content.management.commands.importcontent.os.path.isfile", return_value=False, ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, False, True, True], ) def test_local_import_source_corrupted( self, is_cancelled_mock, cancel_mock, path_mock, isfile_mock, remove_mock, annotation_mock, ): local_src_path = tempfile.mkstemp()[1] local_dest_path = tempfile.mkstemp()[1] LocalFile.objects.filter( files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c" ).update(file_size=1) path_mock.side_effect = [local_dest_path, local_src_path] call_command( "importcontent", "disk", self.the_channel_id, "destination", node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) cancel_mock.assert_called_with() remove_mock.assert_called_with(local_dest_path) @patch( "kolibri.core.content.management.commands.importcontent.os.path.isfile", return_value=False, ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=False, ) def test_local_import_source_corrupted_full_progress( self, is_cancelled_mock, cancel_mock, path_mock, isfile_mock, annotation_mock ): """ Ensure that when a file is imported that does not match the file size in the database that the overall progress tracking for the content import process is properly updated to reflect the size of the file in the database, not the file on disk. This is important, as the total progress for the overall process is measured against the total file size recorded in the database for all files, not for the the transferred file size. """ local_src_path = tempfile.mkstemp()[1] with open(local_src_path, "w") as f: f.write("This is just a test") src_file_size = os.path.getsize(local_src_path) expected_file_size = 10000 local_dest_path = tempfile.mkstemp()[1] os.remove(local_dest_path) # Delete all but one file associated with ContentNode to reduce need for mocking files = ContentNode.objects.get( id="32a941fb77c2576e8f6b294cde4c3b0c" ).files.all() first_file = files.first() files.exclude(id=first_file.id).delete() LocalFile.objects.filter( files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c" ).update(file_size=expected_file_size) path_mock.side_effect = [local_dest_path, local_src_path] mock_overall_progress = MagicMock() mock_file_progress = MagicMock() with patch( "kolibri.core.tasks.management.commands.base.ProgressTracker" ) as progress_mock: progress_mock.return_value.__enter__.side_effect = [ mock_overall_progress, mock_file_progress, ] call_command( "importcontent", "disk", self.the_channel_id, "destination", node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) mock_overall_progress.assert_called_with(expected_file_size - src_file_size) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload.finalize" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=False, ) def test_remote_import_source_corrupted( self, is_cancelled_mock, path_mock, finalize_dest_mock, annotation_mock ): dest_path_1 = tempfile.mkstemp()[1] dest_path_2 = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path_1, dest_path_2] LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) call_command( "importcontent", "network", self.the_channel_id, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) annotation_mock.set_content_visibility.assert_called_with( self.the_channel_id, [], exclude_node_ids=[], node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], )
def raise_exception(*args, **kwargs): raise ChunkedEncodingError()
def iter_content(response, chunk_size=1024*10): """Iterates over the response object containing audio. It is roughly identical to :meth:`requests.Response.iter_content` except that it is aware of the ICY 'pseudo-HTTP' protocol, which may include, but is not limited to the track title, author, etc. When metadata is found it is inserted into the global `METADATA` dictionary with the stream URL as the key. chunk_size must be of type int. *Note: Should not normally need to be set explicitly.* ICY: 1) HTTP request to stream with the `Icy-Metadata' header 2) Response header `icy-metaint` will tell how often the metadata is sent in the stream. Specifically, how many audio data bytes there are between metadata blocks. 3) Read the number of bytes `icy-metaint` told us to read. this is the audio data. 4) Next up, read 1 byte to get the metadata 'length specifier' 5) Multiply that byte by 16 to get the size of the plaintext metadata string. (Max byte size = 255 so metadata max length = 4080). 6) Parse metadata, set global variable and repeat. """ global METADATA if hasattr(response.raw, 'stream'): has_icy = False bufsize_metadata = -1 bufsize_audio = chunk_size if response.headers.get("icy-metaint"): _metaint = response.headers.get("icy-metaint") if _metaint.isdigit() and int(_metaint) > 0: bufsize_audio = int(_metaint) has_icy = True try: #: 0: audio, 1: length specifier, 2: metadata state = 0 while True: if state == 0: # no gzip/deflate - audio already compressed chunk = next(response.raw.stream(bufsize_audio, decode_content=False)) if has_icy: state += 1 yield chunk elif state == 1: chunk = response.raw.read(1) bufsize_metadata = ord(chunk)*16 state += 1 elif state == 2: chunk = response.raw.read(bufsize_metadata) if any(s in chunk for s in ["StreamTitle", "=", ";"]): if len(chunk) >= 16: metadata = icy_parse(chunk) METADATA[response.url]["info"] = metadata state = 0 except ProtocolError as e: raise ChunkedEncodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) raise StreamConsumedError()
def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, **kwargs): """Constructs a :class:`Request <Request>`, prepares it and sends it. Returns :class:`Response <Response>` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of 'filename': file-like-objects for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) Float describing the timeout of the request. :param allow_redirects: (optional) Boolean. Set to True by default. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. """ #=============================================================================================================== # add by mz error_type = kwargs.get("error_type") if error_type: from requests.exceptions import InvalidURL, URLRequired, ConnectTimeout, ConnectionError, SSLError, ReadTimeout from requests.exceptions import InvalidSchema, MissingSchema, ChunkedEncodingError, ContentDecodingError from requests.exceptions import RequestException, HTTPError, ProxyError, Timeout, RetryError, StreamConsumedError get_error = { "InvalidURL": InvalidURL(), "URLRequired": URLRequired(), "ConnectTimeout": ConnectTimeout(), "ConnectionError": ConnectionError(), "SSLError": SSLError(), "ReadTimeout": ReadTimeout(), "InvalidSchema": InvalidSchema(), "MissingSchema": MissingSchema(), "ChunkedEncodingError": ChunkedEncodingError(), "ContentDecodingError": ContentDecodingError(), "StreamConsumedError": StreamConsumedError(), "TooManyRedirects": TooManyRedirects(), "RequestException": RequestException(), "HTTPError": HTTPError(), "ProxyError": ProxyError(), "Timeout": Timeout(), "RetryError": RetryError } error_ = get_error[error_type] raise error_ #=============================================================================================================== method = builtin_str(method) # Create the Request. req = Request( method=method.upper(), url=url, headers=headers, files=files, data=data or {}, params=params or {}, auth=auth, cookies=cookies, hooks=hooks, ) prep = self.prepare_request(req) proxies = proxies or {} # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. env_proxies = get_environ_proxies(url) or {} for (k, v) in env_proxies.items(): proxies.setdefault(k, v) # Look for configuration. if not verify and verify is not False: verify = os.environ.get('REQUESTS_CA_BUNDLE') # Curl compatibility. if not verify and verify is not False: verify = os.environ.get('CURL_CA_BUNDLE') # Merge all the kwargs. proxies = merge_setting(proxies, self.proxies) stream = merge_setting(stream, self.stream) verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert) # Send the request. send_kwargs = { 'stream': stream, 'timeout': timeout, 'verify': verify, 'cert': cert, 'proxies': proxies, 'allow_redirects': allow_redirects, } resp = self.send(prep, **send_kwargs) return resp
class ImportContentTestCase(TestCase): """ Test case for the importcontent management command. """ fixtures = ["content_test.json"] the_channel_id = "6199dde695db4ee4ab392222d5af1e5c" def setUp(self): LocalFile.objects.update(available=False) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=True, ) def test_remote_cancel_immediately( self, is_cancelled_mock, cancel_mock, FileDownloadMock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): # Check behaviour if cancellation is called before any file download starts FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"] get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10) call_command("importcontent", "network", self.the_channel_id) is_cancelled_mock.assert_has_calls([call(), call()]) FileDownloadMock.assert_not_called() cancel_mock.assert_called_with() annotation_mock.mark_local_files_as_available.assert_not_called() annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called() annotation_mock.recurse_annotation_up_tree.assert_not_called() @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, False, False, True, True, True], ) def test_remote_cancel_during_transfer( self, is_cancelled_mock, cancel_mock, FileDownloadMock, local_path_mock, remote_path_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): # If transfer is cancelled during transfer of first file local_path = tempfile.mkstemp()[1] local_path_mock.return_value = local_path remote_path_mock.return_value = "notest" # Mock this __iter__ so that the filetransfer can be looped over FileDownloadMock.return_value.__iter__.side_effect = TransferCanceled() get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10) call_command("importcontent", "network", self.the_channel_id) # is_cancelled should be called thrice. is_cancelled_mock.assert_has_calls([call(), call()]) # Should be set to the local path we mocked FileDownloadMock.assert_called_with( "notest", local_path, session=Any(Session), cancel_check=is_cancelled_mock ) # Check that the command itself was also cancelled. cancel_mock.assert_called_with() annotation_mock.mark_local_files_as_available.assert_not_called() annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called() annotation_mock.recurse_annotation_up_tree.assert_not_called() @patch( "kolibri.core.content.management.commands.importcontent.compare_checksums", return_value=True, ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True], ) def test_remote_cancel_after_file_copy_file_not_deleted( self, is_cancelled_mock, cancel_mock, FileDownloadMock, local_path_mock, remote_path_mock, checksum_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): # If transfer is cancelled after transfer of first file local_path_1 = tempfile.mkstemp()[1] local_path_2 = tempfile.mkstemp()[1] with open(local_path_1, "w") as f: f.write("a") local_path_mock.side_effect = [local_path_1, local_path_2] remote_path_mock.return_value = "notest" # Mock this __iter__ so that the filetransfer can be looped over FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"] FileDownloadMock.return_value.total_size = 1 FileDownloadMock.return_value.dest = local_path_1 LocalFile.objects.update(file_size=1) get_import_export_mock.return_value = (1, list(LocalFile.objects.all()[:3]), 10) call_command("importcontent", "network", self.the_channel_id) # Check that the command itself was also cancelled. cancel_mock.assert_called_with() # Check that the temp file we created where the first file was being downloaded to has not been deleted self.assertTrue(os.path.exists(local_path_1)) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy") @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=True, ) def test_local_cancel_immediately( self, is_cancelled_mock, cancel_mock, FileCopyMock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): # Local version of test above FileCopyMock.return_value.__iter__.return_value = ["one", "two", "three"] get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10) call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp()) is_cancelled_mock.assert_has_calls([call(), call()]) FileCopyMock.assert_not_called() cancel_mock.assert_called_with() annotation_mock.mark_local_files_as_available.assert_not_called() annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called() annotation_mock.recurse_annotation_up_tree.assert_not_called() @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy") @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True], ) def test_local_cancel_during_transfer( self, is_cancelled_mock, cancel_mock, FileCopyMock, local_path_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): # Local version of test above local_dest_path = tempfile.mkstemp()[1] local_src_path = tempfile.mkstemp()[1] local_path_mock.side_effect = [local_dest_path, local_src_path] FileCopyMock.return_value.__iter__.side_effect = TransferCanceled() get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10) call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp()) is_cancelled_mock.assert_has_calls([call(), call()]) FileCopyMock.assert_called_with( local_src_path, local_dest_path, cancel_check=is_cancelled_mock ) cancel_mock.assert_called_with() annotation_mock.set_content_visibility.assert_called() @patch( "kolibri.core.content.utils.transfer.Transfer.next", side_effect=ConnectionError("connection error"), ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True, True, True], ) def test_remote_cancel_during_connect_error( self, is_cancelled_mock, cancel_mock, next_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) get_import_export_mock.return_value = ( 1, list( LocalFile.objects.filter( pk__in=[ "6bdfea4a01830fdd4a585181c0b8068c", "211523265f53825b82f70ba19218a02e", ] ) ), 10, ) call_command( "importcontent", "network", self.the_channel_id, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) cancel_mock.assert_called_with() annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.logger.warning") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) def test_remote_import_httperror_404( self, path_mock, logger_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): local_dest_path_1 = tempfile.mkstemp()[1] local_dest_path_2 = tempfile.mkstemp()[1] local_dest_path_3 = tempfile.mkstemp()[1] path_mock.side_effect = [ local_dest_path_1, local_dest_path_2, local_dest_path_3, ] ContentNode.objects.filter(pk="2b6926ed22025518a8b9da91745b51d3").update( available=False ) LocalFile.objects.filter( files__contentnode__pk="2b6926ed22025518a8b9da91745b51d3" ).update(file_size=1, available=False) get_import_export_mock.return_value = ( 1, list( LocalFile.objects.filter( files__contentnode__pk="2b6926ed22025518a8b9da91745b51d3" ) ), 10, ) node_id = ["2b6926ed22025518a8b9da91745b51d3"] call_command( "importcontent", "network", self.the_channel_id, node_ids=node_id, renderable_only=False, ) logger_mock.assert_called_once() self.assertTrue("3 files are skipped" in logger_mock.call_args_list[0][0][0]) annotation_mock.set_content_visibility.assert_called_with( self.the_channel_id, [], node_ids=node_id, exclude_node_ids=None, public=False, ) @patch("kolibri.core.content.utils.transfer.Transfer.next") @patch("kolibri.core.content.utils.transfer.sleep") @patch("kolibri.core.content.utils.transfer.requests.Session.get") @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, False, True, True, True, True], ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path", return_value="test/test", ) def test_remote_import_httperror_502( self, content_storage_file_path_mock, is_cancelled_mock, cancel_mock, requests_get_mock, sleep_mock, transfer_next_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): response_mock = MagicMock() response_mock.status_code = 502 exception_502 = HTTPError("Bad Gateway", response=response_mock) requests_get_mock.return_value.raise_for_status.side_effect = exception_502 LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10) call_command("importcontent", "network", self.the_channel_id) sleep_mock.assert_called_once() transfer_next_mock.assert_not_called() cancel_mock.assert_called_with() annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.utils.transfer.requests.Session.get") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path", return_value="test/test", ) def test_remote_import_httperror_500( self, content_storage_file_path_mock, requests_get_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): response_mock = MagicMock() response_mock.status_code = 500 exception_500 = HTTPError("Internal Server Error", response=response_mock) requests_get_mock.return_value.raise_for_status.side_effect = exception_500 LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10) with self.assertRaises(HTTPError): call_command("importcontent", "network", self.the_channel_id) annotation_mock.set_content_visibility.assert_called_with( self.the_channel_id, [], node_ids=None, exclude_node_ids=None, public=False ) @patch("kolibri.core.content.utils.transfer.sleep") @patch( "kolibri.core.content.utils.transfer.Transfer.next", side_effect=ChunkedEncodingError("Chunked Encoding Error"), ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, False, False, True, True, True, True], ) def test_remote_import_chunkedencodingerror( self, is_cancelled_mock, cancel_mock, error_mock, sleep_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) get_import_export_mock.return_value = ( 1, list( LocalFile.objects.filter( pk__in=[ "6bdfea4a01830fdd4a585181c0b8068c", "211523265f53825b82f70ba19218a02e", ] ) ), 10, ) call_command( "importcontent", "network", self.the_channel_id, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) cancel_mock.assert_called_with() annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.logger.warning") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, True], ) def test_local_import_oserror_dne( self, is_cancelled_mock, cancel_mock, path_mock, logger_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): dest_path = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path, "/test/dne"] LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10) call_command("importcontent", "disk", self.the_channel_id, "destination") self.assertTrue("1 files are skipped" in logger_mock.call_args_list[0][0][0]) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.logger.error") @patch("kolibri.core.content.utils.transfer.os.path.getsize") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) def test_local_import_oserror_permission_denied( self, path_mock, getsize_mock, logger_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): dest_path = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path, "/test/dne"] getsize_mock.side_effect = ["1", OSError("Permission denied")] get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10) with self.assertRaises(OSError): call_command("importcontent", "disk", self.the_channel_id, "destination") self.assertTrue("Permission denied" in logger_mock.call_args_list[0][0][0]) annotation_mock.set_content_visibility.assert_called() @patch("kolibri.core.content.management.commands.importcontent.os.remove") @patch( "kolibri.core.content.management.commands.importcontent.os.path.isfile", return_value=False, ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", side_effect=[False, False, True, True, True], ) def test_local_import_source_corrupted( self, is_cancelled_mock, cancel_mock, path_mock, isfile_mock, remove_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): local_src_path = tempfile.mkstemp()[1] local_dest_path = tempfile.mkstemp()[1] LocalFile.objects.filter( files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c" ).update(file_size=1) path_mock.side_effect = [local_dest_path, local_src_path] get_import_export_mock.return_value = ( 1, [ LocalFile.objects.filter( files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c" ).first() ], 10, ) call_command( "importcontent", "disk", self.the_channel_id, "destination", node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) cancel_mock.assert_called_with() remove_mock.assert_any_call(local_dest_path) @patch( "kolibri.core.content.management.commands.importcontent.os.path.isfile", return_value=False, ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=False, ) def test_local_import_source_corrupted_full_progress( self, is_cancelled_mock, cancel_mock, path_mock, isfile_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): """ Ensure that when a file is imported that does not match the file size in the database that the overall progress tracking for the content import process is properly updated to reflect the size of the file in the database, not the file on disk. This is important, as the total progress for the overall process is measured against the total file size recorded in the database for all files, not for the the transferred file size. """ local_src_path = tempfile.mkstemp()[1] with open(local_src_path, "w") as f: f.write("This is just a test") expected_file_size = 10000 local_dest_path = tempfile.mkstemp()[1] os.remove(local_dest_path) # Delete all but one file associated with ContentNode to reduce need for mocking files = ContentNode.objects.get( id="32a941fb77c2576e8f6b294cde4c3b0c" ).files.all() first_file = files.first() files.exclude(id=first_file.id).delete() LocalFile.objects.filter( files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c" ).update(file_size=expected_file_size) get_import_export_mock.return_value = ( 1, list( LocalFile.objects.filter( files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c" ) ), 10, ) path_mock.side_effect = [local_dest_path, local_src_path] mock_overall_progress = MagicMock() mock_file_progress = MagicMock() with patch( "kolibri.core.tasks.management.commands.base.ProgressTracker" ) as progress_mock: progress_mock.return_value.__enter__.side_effect = [ mock_overall_progress, mock_file_progress, ] call_command( "importcontent", "disk", self.the_channel_id, "destination", node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) mock_overall_progress.assert_any_call(expected_file_size) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload.finalize" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=False, ) def test_remote_import_source_corrupted( self, is_cancelled_mock, path_mock, finalize_dest_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): dest_path_1 = tempfile.mkstemp()[1] dest_path_2 = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path_1, dest_path_2] LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) get_import_export_mock.return_value = ( 1, list( LocalFile.objects.filter( pk__in=[ "6bdfea4a01830fdd4a585181c0b8068c", "211523265f53825b82f70ba19218a02e", ] ) ), 10, ) call_command( "importcontent", "network", self.the_channel_id, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], ) annotation_mock.set_content_visibility.assert_called_with( self.the_channel_id, [], exclude_node_ids=None, node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"], public=False, ) @patch( "kolibri.core.content.management.commands.importcontent.transfer.FileDownload.finalize" ) @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path" ) @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", return_value=False, ) def test_remote_import_full_import( self, is_cancelled_mock, path_mock, finalize_dest_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): dest_path_1 = tempfile.mkstemp()[1] dest_path_2 = tempfile.mkstemp()[1] path_mock.side_effect = [dest_path_1, dest_path_2] LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update( file_size=2201062 ) LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update( file_size=336974 ) get_import_export_mock.return_value = ( 1, list( LocalFile.objects.filter( pk__in=[ "6bdfea4a01830fdd4a585181c0b8068c", "211523265f53825b82f70ba19218a02e", ] ) ), 10, ) call_command("importcontent", "network", self.the_channel_id) annotation_mock.set_content_visibility.assert_called_with( self.the_channel_id, [], exclude_node_ids=None, node_ids=None, public=False ) @patch("kolibri.core.content.utils.transfer.sleep") @patch("kolibri.core.content.utils.transfer.Transfer.next") @patch("kolibri.core.content.utils.transfer.requests.Session.get") @patch( "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path", return_value="test/test", ) @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel") @patch( "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled", # We have to return False for 30 1-second checks to ensure we actually retry. side_effect=[False] * 32 + [True] * 5, ) def test_remote_import_file_compressed_on_gcs( self, is_cancelled_mock, cancel_mock, content_storage_file_path_mock, requests_get_mock, transfer_next_mock, sleep_mock, annotation_mock, get_import_export_mock, channel_list_status_mock, ): response_mock = MagicMock() response_mock.status_code = 503 exception_503 = HTTPError("Service Unavailable", response=response_mock) transfer_next_mock.side_effect = exception_503 requests_get_mock.return_value.headers = {"X-Goog-Stored-Content-Length": "1"} LocalFile.objects.filter( files__contentnode__channel_id=self.the_channel_id ).update(file_size=1) get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10) m = mock_open() with patch("kolibri.core.content.utils.transfer.open", m) as open_mock: call_command("importcontent", "network", self.the_channel_id) # Check if truncate() is called since byte-range file resuming is not supported open_mock.assert_called_with("test/test.transfer", "wb") open_mock.return_value.truncate.assert_called_once() sleep_mock.assert_called() annotation_mock.set_content_visibility.assert_called_with( self.the_channel_id, [], node_ids=None, exclude_node_ids=None, public=False, )