Exemple #1
0
    def test_request__chunked_encoding_retry(self, mock_session):
        session_instance = mock_session.return_value

        # Handle Auth
        response_dict = {"access_token": "", "expires_in": 99, "scope": ""}
        session_instance.request.return_value = Mock(
            headers={}, json=lambda: response_dict, status_code=200
        )
        requestor = prawcore.Requestor("prawcore:test (by /u/bboe)")
        authorizer = readonly_authorizer(requestor=requestor)
        session_instance.request.reset_mock()

        # Fail on subsequent request
        exception = ChunkedEncodingError()
        session_instance.request.side_effect = exception

        expected = (
            "prawcore",
            "WARNING",
            "Retrying due to ChunkedEncodingError() status: GET "
            "https://oauth.reddit.com/",
        )

        with LogCapture(level=logging.WARNING) as log_capture:
            with self.assertRaises(RequestException) as context_manager:
                prawcore.Session(authorizer).request("GET", "/")
            log_capture.check(expected, expected)
        self.assertIsInstance(context_manager.exception, RequestException)
        self.assertIs(exception, context_manager.exception.original_exception)
        self.assertEqual(3, session_instance.request.call_count)
Exemple #2
0
    def generate():
        # Special case for google app engine.
        if hasattr(self.raw, 'stream'):
            try:
                if isinstance(self.raw._original_response._method, int):
                    while True:
                        chunk = self.raw.read(chunk_size, decode_content=True)
                        if not chunk:
                            break
                        yield chunk
            except ProtocolError as e:
                raise ChunkedEncodingError(e)
            except DecodeError as e:
                raise ContentDecodingError(e)
            except ReadTimeoutError as e:
                raise ConnectionError(e)
        else:
            # Standard file-like object.
            while True:
                chunk = self.raw.read(chunk_size)
                if not chunk:
                    break
                yield chunk

        self._content_consumed = True
Exemple #3
0
 def response_cb(req):
     nonlocal encoding_throw
     # Every request fails with 2 ChunkedEncodingError exception but works well on third time.
     if encoding_throw < 2:
         encoding_throw += 1
         raise ChunkedEncodingError()
     encoding_throw = 0
     days = get_range_days_from_request(req)
     ranges.append(days)
     time_mock.tick(delta=datetime.timedelta(minutes=days /
                                             days_per_minute_rate))
     return (200, {}, json.dumps({"createdAt": "2020"}))
Exemple #4
0
 async def generate():
     async with self:
         async with finalize(self.raw.stream(chunk_size)) as gen:
             try:
                 async for trunk in gen:
                     yield trunk
             except ProtocolError as e:
                 raise ChunkedEncodingError(e)
             except DecodeError as e:
                 raise ContentDecodingError(e)
             except ReadTimeoutError as e:
                 raise ConnectionError(e)
             self._content_consumed = True
Exemple #5
0
def requests_check_incomplete_response(response: Response):
    """
    Check that we have read all the data as the requests library does not
    currently enforce this.
    https://blog.petrzemek.net/2018/04/22/on-incomplete-http-reads-and-the-requests-library-in-python/
    """
    expected_length = response.headers.get('Content-Length')
    if expected_length is not None:
        actual_length = response.raw.tell()
        expected_length = int(expected_length)
        if actual_length < expected_length:
            msg = 'incomplete response ({} bytes read, {} more expected)'.format(
                actual_length, expected_length - actual_length)
            raise ChunkedEncodingError(msg, response=response)
Exemple #6
0
 async def generate():
     async with self:
         # async with finalize(self.raw.stream(chunk_size)) as gen:
         gen = self.raw.stream(chunk_size)
         logger.debug(f'Iterate response body stream: {self}')
         try:
             async for trunk in gen:
                 yield trunk
         except ProtocolError as e:
             raise ChunkedEncodingError(e)
         except DecodeError as e:
             raise ContentDecodingError(e)
         except ReadTimeoutError as e:
             raise ConnectionError(e)
         self._content_consumed = True
Exemple #7
0
 def resp_read(chunk_size):
     try:
         # Special case for urllib3.
         try:
             for chunk in resp.raw.stream(chunk_size,
                                          decode_content=False):
                 yield chunk
         except IncompleteRead as e:
             raise ChunkedEncodingError(e)
     except AttributeError:
         # Standard file-like object.
         while True:
             chunk = resp.raw.read(chunk_size)
             if not chunk:
                 break
             yield chunk
Exemple #8
0
def test_email_stream_chunked_encoding_exception(catalog, time_mock):
    TEST_START_DATE = "2020"
    DAYS_DURATION = 100

    time_mock.move_to(
        pendulum.parse(TEST_START_DATE) +
        pendulum.Duration(days=DAYS_DURATION))

    responses.add(
        "GET",
        "https://api.iterable.com/api/export/data.json",
        body=ChunkedEncodingError(),
    )

    with pytest.raises(
            Exception,
            match="ChunkedEncodingError: Reached maximum number of retires: 3"
    ):
        read_from_source(catalog)
    assert len(responses.calls) == 3
Exemple #9
0
    def test_ncbi_fails(self):
        exceptions = [ChunkedEncodingError(), ConnectionError(), ReadTimeout(),
                      ExpatError(), RuntimeError('bad record')]
        for code in [400, 429]:
            http_exception = HTTPError()
            http_exception.response = Response()
            http_exception.response.status_code = code
            exceptions.append(http_exception)

        for exception in exceptions:
            self.ncbi_exception = exception
            with self.assertLogs(level='DEBUG') as log:
                seq, tax = self.get_ncbi_data(query='MT345279.1')
                tax = tax.view(DataFrame)
                self.assertEqual(
                    tax['Taxon']['MT345279.1'],
                    'k__Fungi; p__Basidiomycota; c__Agaricomycetes; '
                    'o__Boletales; f__Boletaceae; g__Boletus; s__edulis'
                )
            self.assertTrue('Retrying' in log.output[0])
Exemple #10
0
    def generate():
        # Special case for urllib3.
        if hasattr(self.raw, 'stream'):
            try:
                for chunk in self.raw.stream(chunk_size, decode_content=False):
                    yield chunk
            except ProtocolError as e:
                raise ChunkedEncodingError(e)
            except DecodeError as e:
                raise ContentDecodingError(e)
            except ReadTimeoutError as e:
                raise ConnectionError(e)
        else:
            # Standard file-like object.
            while True:
                chunk = self.raw.read(chunk_size)
                if not chunk:
                    break
                yield chunk

        self._content_consumed = True
 def raise_exception(*args, **kwargs):
     if " obb ".lower() not in kwargs["description"].lower():
         return original(*args, **kwargs)
     else:
         raise ChunkedEncodingError()
class NotubizDocumentScraper():
    """

    """
    def __init__(self):
        BASE_DIR = os.path.dirname(os.path.realpath(__name__))
        self.current = os.path.abspath(BASE_DIR)
        self.xls_dir = BASE_DIR + "/generics/"

    def start(self):
        # post get events, get th docs and update the vectors all at once
        self.get_document_content()

    def get_document_content(self):
        file_name = ''
        for event_doc_obj in sorted(
                Document.objects.filter(document_content_scanned=False),
                key=lambda x: x.id,
                reverse=True):
            try:
                file_name = self.downloadFile(event_doc_obj)

                document_content_text = self.getFileContent(
                    file_name, event_doc_obj.id)
                document_content = DocumentContent.objects.create(
                    content=document_content_text)

                event_doc_obj.document_content_scanned = True
                event_doc_obj.file_path = file_name
                event_doc_obj.doc_content = document_content
                event_doc_obj.save()

                self.remove_folder_contents()

                if CombinedItem.objects.filter(item_id=event_doc_obj.id,
                                               item_type='document').exists():
                    ci = CombinedItem.objects.get(item_id=event_doc_obj.id,
                                                  item_type='document')
                    ci.doc_content = document_content
                    ci.save()

                self.add_content_text(
                    ReceivedDocument.objects.filter(
                        document__url=event_doc_obj.url), 'received_document',
                    document_content_text)
                self.add_content_text(
                    CouncilAddress.objects.filter(
                        question_document__url=event_doc_obj.url),
                    'council_address', document_content_text)
                self.add_content_text(
                    WrittenQuestion.objects.filter(
                        question_document__url=event_doc_obj.url),
                    'written_question', document_content_text)
                self.add_content_text(
                    PublicDocument.objects.filter(
                        document__url=event_doc_obj.url), 'format',
                    document_content_text)
                self.add_content_text(
                    PolicyDocument.objects.filter(
                        document__url=event_doc_obj.url), 'policy_document',
                    document_content_text)
                self.add_content_text(
                    ManagementDocument.objects.filter(
                        document__url=event_doc_obj.url),
                    'management_document', document_content_text)
                self.add_content_text(
                    Motion.objects.filter(document__url=event_doc_obj.url),
                    'motion', document_content_text)
                self.add_content_text(
                    Commitment.objects.filter(
                        new_document__url=event_doc_obj.url), 'commitment',
                    document_content_text)
            except Exception:
                self.remove_folder_contents()

        vector = SearchVector('content', config='dutch')
        DocumentContent.objects.update(vector=vector)

    # Adds document content text to other types of objects where their url points to this document
    def add_content_text(self, query_set, typez, doc_cont_text):
        for typ in query_set:
            ci = CombinedItem.objects.get(item_id=typ.id, item_type=typez)
            ci.document_content_text = doc_cont_text
            ci.save()

    def downloadFile(self, event_doc_obj):

        if local_settings.RIS_MUNICIPALITY != 'Utrecht':
            file_name = str(event_doc_obj.notubiz_id) + '.pdf'
        else:
            file_name = str(event_doc_obj.ibabs_id) + '.pdf'

        path = os.path.abspath(self.current + '/files/' + file_name)

        if os.path.isfile(path):
            return file_name

        # So because utrecht servers are faulty, sometimes we wouldn't get the full document content
        # in the request thus resulting in a ChunkedEncodingError
        # So we redo the download up to 5 times and if there will still be an issue
        # we can investigate it more thouroughly
        times_tried = 0
        while times_tried < 5:
            try:
                try:
                    r = requests.get(event_doc_obj.url, stream=True)
                except SSLError:
                    return None
                except InvalidURL:
                    return None

                if r.status_code == 200 and 'text/html' not in r.headers[
                        'Content-Type'] and 'content-disposition' in r.headers:

                    path = os.path.abspath(self.current + '/files/' +
                                           file_name)

                    with open(path, 'wb') as f:
                        for chunk in r.iter_content(1024):
                            f.write(chunk)

                    return file_name
                else:
                    break
            except ChunkedEncodingError, e:
                times_tried = times_tried + 1
                print(e.message)
                print(
                    'File wasnt downloaded succesfully so we redo the download'
                )
                print('File downloaded {number} times'.format(
                    number=times_tried))

        if times_tried >= 5:
            raise ChunkedEncodingError(
                'So redownloading the file 5 times didn\'t work, maybe there\'s some other issue? '
            )

        return None
Exemple #13
0
    def request(self,
                method,
                url,
                params=None,
                data=None,
                headers=None,
                cookies=None,
                files=None,
                auth=None,
                timeout=None,
                allow_redirects=True,
                proxies=None,
                hooks=None,
                stream=None,
                verify=None,
                cert=None,
                json=None,
                **kwargs):
        """Constructs a :class:`Request <Request>`, prepares it and sends it.
        Returns :class:`Response <Response>` object.

        :param method: method for the new :class:`Request` object.
        :param url: URL for the new :class:`Request` object.
        :param params: (optional) Dictionary or bytes to be sent in the query
            string for the :class:`Request`.
        :param data: (optional) Dictionary, bytes, or file-like object to send
            in the body of the :class:`Request`.
        :param json: (optional) json to send in the body of the
            :class:`Request`.
        :param headers: (optional) Dictionary of HTTP Headers to send with the
            :class:`Request`.
        :param cookies: (optional) Dict or CookieJar object to send with the
            :class:`Request`.
        :param files: (optional) Dictionary of ``'filename': file-like-objects``
            for multipart encoding upload.
        :param auth: (optional) Auth tuple or callable to enable
            Basic/Digest/Custom HTTP Auth.
        :param timeout: (optional) How long to wait for the server to send
            data before giving up, as a float, or a :ref:`(connect timeout,
            read timeout) <timeouts>` tuple.
        :type timeout: float or tuple
        :param allow_redirects: (optional) Set to True by default.
        :type allow_redirects: bool
        :param proxies: (optional) Dictionary mapping protocol or protocol and
            hostname to the URL of the proxy.
        :param stream: (optional) whether to immediately download the response
            content. Defaults to ``False``.
        :param verify: (optional) whether the SSL cert will be verified.
            A CA_BUNDLE path can also be provided. Defaults to ``True``.
        :param cert: (optional) if String, path to ssl client cert file (.pem).
            If Tuple, ('cert', 'key') pair.
        :rtype: requests.Response
    """
        #===============================================================================================================
        # add by mz
        error_type = kwargs.get("error_type")
        if error_type:
            from requests.exceptions import InvalidURL, URLRequired, ConnectTimeout, ConnectionError, SSLError, ReadTimeout
            from requests.exceptions import InvalidSchema, MissingSchema, ChunkedEncodingError, ContentDecodingError
            from requests.exceptions import RequestException, HTTPError, ProxyError, Timeout, RetryError, StreamConsumedError
            from requests.exceptions import TooManyRedirects

            get_error = {
                "InvalidURL": InvalidURL(),
                "URLRequired": URLRequired(),
                "ConnectTimeout": ConnectTimeout(),
                "ConnectionError": ConnectionError(),
                "SSLError": SSLError(),
                "ReadTimeout": ReadTimeout(),
                "InvalidSchema": InvalidSchema(),
                "MissingSchema": MissingSchema(),
                "ChunkedEncodingError": ChunkedEncodingError(),
                "ContentDecodingError": ContentDecodingError(),
                "StreamConsumedError": StreamConsumedError(),
                "TooManyRedirects": TooManyRedirects(),
                "RequestException": RequestException(),
                "HTTPError": HTTPError(),
                "ProxyError": ProxyError(),
                "Timeout": Timeout(),
                "RetryError": RetryError
            }

            error_ = get_error[error_type]
            raise error_
        #===============================================================================================================

        # Create the Request
        req = Request(
            method=method.upper(),
            url=url,
            headers=headers,
            files=files,
            data=data or {},
            json=json,
            params=params or {},
            auth=auth,
            cookies=cookies,
            hooks=hooks,
        )
        prep = self.prepare_request(req)

        proxies = proxies or {}

        settings = self.merge_environment_settings(prep.url, proxies, stream,
                                                   verify, cert)

        # Send the request.
        send_kwargs = {
            'timeout': timeout,
            'allow_redirects': allow_redirects,
        }
        send_kwargs.update(settings)
        resp = self.send(prep, **send_kwargs)
        return resp
Exemple #14
0
    def test_monitor_sends_exception_data_and_hb_on_expected_exceptions(
            self, mock_get_data) -> None:
        json_decode_error = json.JSONDecodeError(msg='test error',
                                                 doc='test',
                                                 pos=2)
        errors_exceptions_dict = {
            ReqConnectionError('test'):
            CannotAccessGitHubPageException(self.repo_config.releases_page),
            ReadTimeout('test'):
            CannotAccessGitHubPageException(self.repo_config.releases_page),
            IncompleteRead('test'):
            DataReadingException(self.monitor_name,
                                 self.repo_config.releases_page),
            ChunkedEncodingError('test'):
            DataReadingException(self.monitor_name,
                                 self.repo_config.releases_page),
            ProtocolError('test'):
            DataReadingException(self.monitor_name,
                                 self.repo_config.releases_page),
            json_decode_error:
            JSONDecodeException(json_decode_error)
        }
        try:
            self.test_monitor._initialise_rabbitmq()
            for error, data_ret_exception in errors_exceptions_dict.items():
                mock_get_data.side_effect = error
                expected_output_data = {
                    'error': {
                        'meta_data': {
                            'monitor_name': self.test_monitor.monitor_name,
                            'repo_name':
                            self.test_monitor.repo_config.repo_name,
                            'repo_id': self.test_monitor.repo_config.repo_id,
                            'repo_parent_id':
                            self.test_monitor.repo_config.parent_id,
                            'time': datetime(2012, 1, 1).timestamp()
                        },
                        'message': data_ret_exception.message,
                        'code': data_ret_exception.code,
                    }
                }
                expected_output_hb = {
                    'component_name': self.test_monitor.monitor_name,
                    'is_alive': True,
                    'timestamp': datetime(2012, 1, 1).timestamp()
                }
                # Delete the queue before to avoid messages in the queue on
                # error.
                self.test_monitor.rabbitmq.queue_delete(self.test_queue_name)

                res = self.test_monitor.rabbitmq.queue_declare(
                    queue=self.test_queue_name,
                    durable=True,
                    exclusive=False,
                    auto_delete=False,
                    passive=False)
                self.assertEqual(0, res.method.message_count)
                self.test_monitor.rabbitmq.queue_bind(
                    queue=self.test_queue_name,
                    exchange=RAW_DATA_EXCHANGE,
                    routing_key='github')
                self.test_monitor.rabbitmq.queue_bind(
                    queue=self.test_queue_name,
                    exchange=HEALTH_CHECK_EXCHANGE,
                    routing_key='heartbeat.worker')

                self.test_monitor._monitor()

                # By re-declaring the queue again we can get the number of
                # messages in the queue.
                res = self.test_monitor.rabbitmq.queue_declare(
                    queue=self.test_queue_name,
                    durable=True,
                    exclusive=False,
                    auto_delete=False,
                    passive=True)
                # There must be 2 messages in the queue, the heartbeat and the
                # processed data
                self.assertEqual(2, res.method.message_count)

                # Check that the message received is actually the processed data
                _, _, body = self.test_monitor.rabbitmq.basic_get(
                    self.test_queue_name)
                self.assertEqual(expected_output_data, json.loads(body))

                # Check that the message received is actually the HB
                _, _, body = self.test_monitor.rabbitmq.basic_get(
                    self.test_queue_name)
                self.assertEqual(expected_output_hb, json.loads(body))
        except Exception as e:
            self.fail("Test failed: {}".format(e))
Exemple #15
0
class ImportContentTestCase(TestCase):
    """
    Test case for the importcontent management command.
    """

    fixtures = ["content_test.json"]
    the_channel_id = "6199dde695db4ee4ab392222d5af1e5c"

    def setUp(self):
        LocalFile.objects.update(available=False)

    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=True,
    )
    def test_remote_cancel_immediately(
        self, is_cancelled_mock, cancel_mock, FileDownloadMock, annotation_mock
    ):
        # Check behaviour if cancellation is called before any file download starts
        FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"]
        call_command("importcontent", "network", self.the_channel_id)
        is_cancelled_mock.assert_has_calls([call(), call()])
        FileDownloadMock.assert_not_called()
        cancel_mock.assert_called_with()
        annotation_mock.mark_local_files_as_available.assert_not_called()
        annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called()
        annotation_mock.recurse_annotation_up_tree.assert_not_called()

    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True, True],
    )
    def test_remote_cancel_during_transfer(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileDownloadMock,
        local_path_mock,
        remote_path_mock,
        annotation_mock,
    ):
        # If transfer is cancelled during transfer of first file
        local_path = tempfile.mkstemp()[1]
        local_path_mock.return_value = local_path
        remote_path_mock.return_value = "notest"
        # Mock this __iter__ so that the filetransfer can be looped over
        FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"]
        FileDownloadMock.return_value.total_size = 1
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        call_command("importcontent", "network", self.the_channel_id)
        # is_cancelled should be called thrice.
        is_cancelled_mock.assert_has_calls([call(), call(), call()])
        # Should be set to the local path we mocked
        FileDownloadMock.assert_called_with("notest", local_path, session=Any(Session))
        # Check that it was cancelled when the command was cancelled, this ensures cleanup
        FileDownloadMock.assert_has_calls([call().cancel()])
        # Check that the command itself was also cancelled.
        cancel_mock.assert_called_with()
        annotation_mock.mark_local_files_as_available.assert_not_called()
        annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called()
        annotation_mock.recurse_annotation_up_tree.assert_not_called()

    @patch(
        "kolibri.core.content.management.commands.importcontent.import_export_content.compare_checksums",
        return_value=True,
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, False, False, False, False, True, True, True],
    )
    def test_remote_cancel_after_file_copy_file_not_deleted(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileDownloadMock,
        local_path_mock,
        remote_path_mock,
        checksum_mock,
        annotation_mock,
    ):
        # If transfer is cancelled after transfer of first file
        local_path_1 = tempfile.mkstemp()[1]
        local_path_2 = tempfile.mkstemp()[1]
        with open(local_path_1, "w") as f:
            f.write("a")
        local_path_mock.side_effect = [local_path_1, local_path_2]
        remote_path_mock.return_value = "notest"
        # Mock this __iter__ so that the filetransfer can be looped over
        FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"]
        FileDownloadMock.return_value.total_size = 1
        FileDownloadMock.return_value.dest = local_path_1
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        call_command("importcontent", "network", self.the_channel_id)
        # Check that the command itself was also cancelled.
        cancel_mock.assert_called_with()
        # Check that the temp file we created where the first file was being downloaded to has not been deleted
        self.assertTrue(os.path.exists(local_path_1))
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy")
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=True,
    )
    def test_local_cancel_immediately(
        self, is_cancelled_mock, cancel_mock, FileCopyMock, annotation_mock
    ):
        # Local version of test above
        FileCopyMock.return_value.__iter__.return_value = ["one", "two", "three"]
        call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp())
        is_cancelled_mock.assert_has_calls([call(), call()])
        FileCopyMock.assert_not_called()
        cancel_mock.assert_called_with()
        annotation_mock.mark_local_files_as_available.assert_not_called()
        annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called()
        annotation_mock.recurse_annotation_up_tree.assert_not_called()

    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy")
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True, True],
    )
    def test_local_cancel_during_transfer(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileCopyMock,
        local_path_mock,
        annotation_mock,
    ):
        # Local version of test above
        local_dest_path = tempfile.mkstemp()[1]
        local_src_path = tempfile.mkstemp()[1]
        local_path_mock.side_effect = [local_dest_path, local_src_path]
        FileCopyMock.return_value.__iter__.return_value = ["one", "two", "three"]
        FileCopyMock.return_value.total_size = 1
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp())
        is_cancelled_mock.assert_has_calls([call(), call(), call()])
        FileCopyMock.assert_called_with(local_src_path, local_dest_path)
        FileCopyMock.assert_has_calls([call().cancel()])
        cancel_mock.assert_called_with()
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.len")
    @patch(
        "kolibri.core.content.utils.transfer.Transfer.next",
        side_effect=ConnectionError("connection error"),
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True],
    )
    def test_remote_cancel_during_connect_error(
        self, is_cancelled_mock, cancel_mock, next_mock, len_mock, annotation_mock
    ):
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        cancel_mock.assert_called_with()
        len_mock.assert_not_called()
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.logger.error")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    def test_remote_import_httperror_404(self, path_mock, logger_mock, annotation_mock):
        local_dest_path_1 = tempfile.mkstemp()[1]
        local_dest_path_2 = tempfile.mkstemp()[1]
        local_dest_path_3 = tempfile.mkstemp()[1]
        path_mock.side_effect = [
            local_dest_path_1,
            local_dest_path_2,
            local_dest_path_3,
        ]
        LocalFile.objects.filter(
            files__contentnode__pk="2b6926ed22025518a8b9da91745b51d3"
        ).update(file_size=1)
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["2b6926ed22025518a8b9da91745b51d3"],
            renderable_only=False,
        )
        self.assertTrue(logger_mock.call_count == 3)
        self.assertTrue("404" in logger_mock.call_args_list[0][0][0])

    @patch("kolibri.core.content.management.commands.importcontent.sleep")
    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True],
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path",
        return_value="test",
    )
    def test_remote_import_httperror_502(
        self,
        content_storage_file_path_mock,
        is_cancelled_mock,
        cancel_mock,
        file_download_mock,
        sleep_mock,
        annotation_mock,
    ):
        response = Response()
        response.status_code = 502
        file_download_mock.return_value.__enter__.side_effect = HTTPError(
            response=response
        )
        file_download_mock.return_value.dest = "test"
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        call_command("importcontent", "network", self.the_channel_id)
        cancel_mock.assert_called_with()
        annotation_mock.set_content_visibility.assert_called()
        sleep_mock.assert_called_once()

    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path",
        return_value="test",
    )
    def test_remote_import_httperror_500(
        self, content_storage_file_path_mock, file_download_mock, annotation_mock
    ):
        response = Response()
        response.status_code = 500
        file_download_mock.return_value.__enter__.side_effect = HTTPError(
            response=response
        )
        file_download_mock.return_value.dest = "test"
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        with self.assertRaises(HTTPError):
            call_command("importcontent", "network", self.the_channel_id)
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.len")
    @patch(
        "kolibri.core.content.utils.transfer.Transfer.next",
        side_effect=ChunkedEncodingError("Chunked Encoding Error"),
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True],
    )
    def test_remote_import_chunkedencodingerror(
        self, is_cancelled_mock, cancel_mock, error_mock, len_mock, annotation_mock
    ):
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        cancel_mock.assert_called_with()
        len_mock.assert_not_called()
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.logger.error")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True],
    )
    def test_local_import_oserror_dne(
        self, is_cancelled_mock, cancel_mock, path_mock, logger_mock, annotation_mock
    ):
        dest_path = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path, "/test/dne"]
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        call_command("importcontent", "disk", self.the_channel_id, "destination")
        self.assertTrue(
            "No such file or directory" in logger_mock.call_args_list[0][0][0]
        )
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.logger.error")
    @patch("kolibri.core.content.utils.transfer.os.path.getsize")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    def test_local_import_oserror_permission_denied(
        self, path_mock, getsize_mock, logger_mock, annotation_mock
    ):
        dest_path = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path, "/test/dne"]
        getsize_mock.side_effect = ["1", OSError("Permission denied")]
        with self.assertRaises(OSError):
            call_command("importcontent", "disk", self.the_channel_id, "destination")
            self.assertTrue("Permission denied" in logger_mock.call_args_list[0][0][0])
            annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.os.remove")
    @patch(
        "kolibri.core.content.management.commands.importcontent.os.path.isfile",
        return_value=False,
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, False, True, True],
    )
    def test_local_import_source_corrupted(
        self,
        is_cancelled_mock,
        cancel_mock,
        path_mock,
        isfile_mock,
        remove_mock,
        annotation_mock,
    ):
        local_src_path = tempfile.mkstemp()[1]
        local_dest_path = tempfile.mkstemp()[1]
        LocalFile.objects.filter(
            files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c"
        ).update(file_size=1)
        path_mock.side_effect = [local_dest_path, local_src_path]
        call_command(
            "importcontent",
            "disk",
            self.the_channel_id,
            "destination",
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        cancel_mock.assert_called_with()
        remove_mock.assert_called_with(local_dest_path)

    @patch(
        "kolibri.core.content.management.commands.importcontent.os.path.isfile",
        return_value=False,
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=False,
    )
    def test_local_import_source_corrupted_full_progress(
        self, is_cancelled_mock, cancel_mock, path_mock, isfile_mock, annotation_mock
    ):
        """
        Ensure that when a file is imported that does not match the file size in the database
        that the overall progress tracking for the content import process is properly updated
        to reflect the size of the file in the database, not the file on disk.
        This is important, as the total progress for the overall process is measured against
        the total file size recorded in the database for all files, not for the the
        transferred file size.
        """
        local_src_path = tempfile.mkstemp()[1]
        with open(local_src_path, "w") as f:
            f.write("This is just a test")
        src_file_size = os.path.getsize(local_src_path)
        expected_file_size = 10000
        local_dest_path = tempfile.mkstemp()[1]
        os.remove(local_dest_path)
        # Delete all but one file associated with ContentNode to reduce need for mocking
        files = ContentNode.objects.get(
            id="32a941fb77c2576e8f6b294cde4c3b0c"
        ).files.all()
        first_file = files.first()
        files.exclude(id=first_file.id).delete()
        LocalFile.objects.filter(
            files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c"
        ).update(file_size=expected_file_size)
        path_mock.side_effect = [local_dest_path, local_src_path]
        mock_overall_progress = MagicMock()
        mock_file_progress = MagicMock()
        with patch(
            "kolibri.core.tasks.management.commands.base.ProgressTracker"
        ) as progress_mock:
            progress_mock.return_value.__enter__.side_effect = [
                mock_overall_progress,
                mock_file_progress,
            ]
            call_command(
                "importcontent",
                "disk",
                self.the_channel_id,
                "destination",
                node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
            )
            mock_overall_progress.assert_called_with(expected_file_size - src_file_size)

    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload.finalize"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=False,
    )
    def test_remote_import_source_corrupted(
        self, is_cancelled_mock, path_mock, finalize_dest_mock, annotation_mock
    ):
        dest_path_1 = tempfile.mkstemp()[1]
        dest_path_2 = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path_1, dest_path_2]
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        annotation_mock.set_content_visibility.assert_called_with(
            self.the_channel_id,
            [],
            exclude_node_ids=[],
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
 def raise_exception(*args, **kwargs):
     raise ChunkedEncodingError()
Exemple #17
0
def iter_content(response, chunk_size=1024*10):
    """Iterates over the response object containing audio.
    It is roughly identical to :meth:`requests.Response.iter_content`
    except that it is aware of the ICY 'pseudo-HTTP' protocol,
    which may include, but is not limited to the track title,
    author, etc.

    When metadata is found it is inserted into the global
    `METADATA` dictionary with the stream URL as the key.

    chunk_size must be of type int. *Note: Should not
    normally need to be set explicitly.*

    ICY:
    1) HTTP request to stream with the `Icy-Metadata' header
    2) Response header `icy-metaint` will tell how often
    the metadata is sent in the stream. Specifically,
    how many audio data bytes there are between metadata blocks.
    3) Read the number of bytes `icy-metaint` told us to read.
    this is the audio data.
    4) Next up, read 1 byte to get the metadata 'length specifier'
    5) Multiply that byte by 16 to get the size of the
    plaintext metadata string. (Max byte size = 255 so
    metadata max length = 4080).
    6) Parse metadata, set global variable and repeat.
    """
    global METADATA
    if hasattr(response.raw, 'stream'):
        has_icy = False
        bufsize_metadata = -1
        bufsize_audio = chunk_size
        if response.headers.get("icy-metaint"):
            _metaint = response.headers.get("icy-metaint")
            if _metaint.isdigit() and int(_metaint) > 0:
                bufsize_audio = int(_metaint)
                has_icy = True
        try:
            #: 0: audio, 1: length specifier, 2: metadata
            state = 0
            while True:
                if state == 0:
                    #  no gzip/deflate - audio already compressed
                    chunk = next(response.raw.stream(bufsize_audio, decode_content=False))
                    if has_icy:
                        state += 1
                    yield chunk
                elif state == 1:
                    chunk = response.raw.read(1)
                    bufsize_metadata = ord(chunk)*16
                    state += 1
                elif state == 2:
                    chunk = response.raw.read(bufsize_metadata)
                    if any(s in chunk for s in ["StreamTitle", "=", ";"]):
                        if len(chunk) >= 16:
                            metadata = icy_parse(chunk)
                            METADATA[response.url]["info"] = metadata
                    state = 0
        except ProtocolError as e:
            raise ChunkedEncodingError(e)
        except ReadTimeoutError as e:
            raise ConnectionError(e)
    raise StreamConsumedError()
Exemple #18
0
    def request(self,
                method,
                url,
                params=None,
                data=None,
                headers=None,
                cookies=None,
                files=None,
                auth=None,
                timeout=None,
                allow_redirects=True,
                proxies=None,
                hooks=None,
                stream=None,
                verify=None,
                cert=None,
                **kwargs):
        """Constructs a :class:`Request <Request>`, prepares it and sends it.
        Returns :class:`Response <Response>` object.

        :param method: method for the new :class:`Request` object.
        :param url: URL for the new :class:`Request` object.
        :param params: (optional) Dictionary or bytes to be sent in the query
            string for the :class:`Request`.
        :param data: (optional) Dictionary or bytes to send in the body of the
            :class:`Request`.
        :param headers: (optional) Dictionary of HTTP Headers to send with the
            :class:`Request`.
        :param cookies: (optional) Dict or CookieJar object to send with the
            :class:`Request`.
        :param files: (optional) Dictionary of 'filename': file-like-objects
            for multipart encoding upload.
        :param auth: (optional) Auth tuple or callable to enable
            Basic/Digest/Custom HTTP Auth.
        :param timeout: (optional) Float describing the timeout of the
            request.
        :param allow_redirects: (optional) Boolean. Set to True by default.
        :param proxies: (optional) Dictionary mapping protocol to the URL of
            the proxy.
        :param stream: (optional) whether to immediately download the response
            content. Defaults to ``False``.
        :param verify: (optional) if ``True``, the SSL cert will be verified.
            A CA_BUNDLE path can also be provided.
        :param cert: (optional) if String, path to ssl client cert file (.pem).
            If Tuple, ('cert', 'key') pair.
        """
        #===============================================================================================================
        # add by mz
        error_type = kwargs.get("error_type")
        if error_type:
            from requests.exceptions import InvalidURL, URLRequired, ConnectTimeout, ConnectionError, SSLError, ReadTimeout
            from requests.exceptions import InvalidSchema, MissingSchema, ChunkedEncodingError, ContentDecodingError
            from requests.exceptions import RequestException, HTTPError, ProxyError, Timeout, RetryError, StreamConsumedError

            get_error = {
                "InvalidURL": InvalidURL(),
                "URLRequired": URLRequired(),
                "ConnectTimeout": ConnectTimeout(),
                "ConnectionError": ConnectionError(),
                "SSLError": SSLError(),
                "ReadTimeout": ReadTimeout(),
                "InvalidSchema": InvalidSchema(),
                "MissingSchema": MissingSchema(),
                "ChunkedEncodingError": ChunkedEncodingError(),
                "ContentDecodingError": ContentDecodingError(),
                "StreamConsumedError": StreamConsumedError(),
                "TooManyRedirects": TooManyRedirects(),
                "RequestException": RequestException(),
                "HTTPError": HTTPError(),
                "ProxyError": ProxyError(),
                "Timeout": Timeout(),
                "RetryError": RetryError
            }

            error_ = get_error[error_type]
            raise error_
        #===============================================================================================================

        method = builtin_str(method)

        # Create the Request.
        req = Request(
            method=method.upper(),
            url=url,
            headers=headers,
            files=files,
            data=data or {},
            params=params or {},
            auth=auth,
            cookies=cookies,
            hooks=hooks,
        )
        prep = self.prepare_request(req)

        proxies = proxies or {}

        # Gather clues from the surrounding environment.
        if self.trust_env:
            # Set environment's proxies.
            env_proxies = get_environ_proxies(url) or {}
            for (k, v) in env_proxies.items():
                proxies.setdefault(k, v)

            # Look for configuration.
            if not verify and verify is not False:
                verify = os.environ.get('REQUESTS_CA_BUNDLE')

            # Curl compatibility.
            if not verify and verify is not False:
                verify = os.environ.get('CURL_CA_BUNDLE')

        # Merge all the kwargs.
        proxies = merge_setting(proxies, self.proxies)
        stream = merge_setting(stream, self.stream)
        verify = merge_setting(verify, self.verify)
        cert = merge_setting(cert, self.cert)

        # Send the request.
        send_kwargs = {
            'stream': stream,
            'timeout': timeout,
            'verify': verify,
            'cert': cert,
            'proxies': proxies,
            'allow_redirects': allow_redirects,
        }
        resp = self.send(prep, **send_kwargs)

        return resp
Exemple #19
0
class ImportContentTestCase(TestCase):
    """
    Test case for the importcontent management command.
    """

    fixtures = ["content_test.json"]
    the_channel_id = "6199dde695db4ee4ab392222d5af1e5c"

    def setUp(self):
        LocalFile.objects.update(available=False)

    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=True,
    )
    def test_remote_cancel_immediately(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileDownloadMock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        # Check behaviour if cancellation is called before any file download starts
        FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"]
        get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10)
        call_command("importcontent", "network", self.the_channel_id)
        is_cancelled_mock.assert_has_calls([call(), call()])
        FileDownloadMock.assert_not_called()
        cancel_mock.assert_called_with()
        annotation_mock.mark_local_files_as_available.assert_not_called()
        annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called()
        annotation_mock.recurse_annotation_up_tree.assert_not_called()

    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, False, False, True, True, True],
    )
    def test_remote_cancel_during_transfer(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileDownloadMock,
        local_path_mock,
        remote_path_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        # If transfer is cancelled during transfer of first file
        local_path = tempfile.mkstemp()[1]
        local_path_mock.return_value = local_path
        remote_path_mock.return_value = "notest"
        # Mock this __iter__ so that the filetransfer can be looped over
        FileDownloadMock.return_value.__iter__.side_effect = TransferCanceled()
        get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10)
        call_command("importcontent", "network", self.the_channel_id)
        # is_cancelled should be called thrice.
        is_cancelled_mock.assert_has_calls([call(), call()])
        # Should be set to the local path we mocked
        FileDownloadMock.assert_called_with(
            "notest", local_path, session=Any(Session), cancel_check=is_cancelled_mock
        )
        # Check that the command itself was also cancelled.
        cancel_mock.assert_called_with()
        annotation_mock.mark_local_files_as_available.assert_not_called()
        annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called()
        annotation_mock.recurse_annotation_up_tree.assert_not_called()

    @patch(
        "kolibri.core.content.management.commands.importcontent.compare_checksums",
        return_value=True,
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_remote_url"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True],
    )
    def test_remote_cancel_after_file_copy_file_not_deleted(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileDownloadMock,
        local_path_mock,
        remote_path_mock,
        checksum_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        # If transfer is cancelled after transfer of first file
        local_path_1 = tempfile.mkstemp()[1]
        local_path_2 = tempfile.mkstemp()[1]
        with open(local_path_1, "w") as f:
            f.write("a")
        local_path_mock.side_effect = [local_path_1, local_path_2]
        remote_path_mock.return_value = "notest"
        # Mock this __iter__ so that the filetransfer can be looped over
        FileDownloadMock.return_value.__iter__.return_value = ["one", "two", "three"]
        FileDownloadMock.return_value.total_size = 1
        FileDownloadMock.return_value.dest = local_path_1
        LocalFile.objects.update(file_size=1)
        get_import_export_mock.return_value = (1, list(LocalFile.objects.all()[:3]), 10)
        call_command("importcontent", "network", self.the_channel_id)
        # Check that the command itself was also cancelled.
        cancel_mock.assert_called_with()
        # Check that the temp file we created where the first file was being downloaded to has not been deleted
        self.assertTrue(os.path.exists(local_path_1))
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy")
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=True,
    )
    def test_local_cancel_immediately(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileCopyMock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        # Local version of test above
        FileCopyMock.return_value.__iter__.return_value = ["one", "two", "three"]
        get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10)
        call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp())
        is_cancelled_mock.assert_has_calls([call(), call()])
        FileCopyMock.assert_not_called()
        cancel_mock.assert_called_with()
        annotation_mock.mark_local_files_as_available.assert_not_called()
        annotation_mock.set_leaf_node_availability_from_local_file_availability.assert_not_called()
        annotation_mock.recurse_annotation_up_tree.assert_not_called()

    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.transfer.FileCopy")
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True],
    )
    def test_local_cancel_during_transfer(
        self,
        is_cancelled_mock,
        cancel_mock,
        FileCopyMock,
        local_path_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        # Local version of test above
        local_dest_path = tempfile.mkstemp()[1]
        local_src_path = tempfile.mkstemp()[1]
        local_path_mock.side_effect = [local_dest_path, local_src_path]
        FileCopyMock.return_value.__iter__.side_effect = TransferCanceled()
        get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10)
        call_command("importcontent", "disk", self.the_channel_id, tempfile.mkdtemp())
        is_cancelled_mock.assert_has_calls([call(), call()])
        FileCopyMock.assert_called_with(
            local_src_path, local_dest_path, cancel_check=is_cancelled_mock
        )
        cancel_mock.assert_called_with()
        annotation_mock.set_content_visibility.assert_called()

    @patch(
        "kolibri.core.content.utils.transfer.Transfer.next",
        side_effect=ConnectionError("connection error"),
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True, True, True],
    )
    def test_remote_cancel_during_connect_error(
        self,
        is_cancelled_mock,
        cancel_mock,
        next_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        get_import_export_mock.return_value = (
            1,
            list(
                LocalFile.objects.filter(
                    pk__in=[
                        "6bdfea4a01830fdd4a585181c0b8068c",
                        "211523265f53825b82f70ba19218a02e",
                    ]
                )
            ),
            10,
        )
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        cancel_mock.assert_called_with()
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.logger.warning")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    def test_remote_import_httperror_404(
        self,
        path_mock,
        logger_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        local_dest_path_1 = tempfile.mkstemp()[1]
        local_dest_path_2 = tempfile.mkstemp()[1]
        local_dest_path_3 = tempfile.mkstemp()[1]
        path_mock.side_effect = [
            local_dest_path_1,
            local_dest_path_2,
            local_dest_path_3,
        ]
        ContentNode.objects.filter(pk="2b6926ed22025518a8b9da91745b51d3").update(
            available=False
        )
        LocalFile.objects.filter(
            files__contentnode__pk="2b6926ed22025518a8b9da91745b51d3"
        ).update(file_size=1, available=False)
        get_import_export_mock.return_value = (
            1,
            list(
                LocalFile.objects.filter(
                    files__contentnode__pk="2b6926ed22025518a8b9da91745b51d3"
                )
            ),
            10,
        )

        node_id = ["2b6926ed22025518a8b9da91745b51d3"]
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=node_id,
            renderable_only=False,
        )
        logger_mock.assert_called_once()
        self.assertTrue("3 files are skipped" in logger_mock.call_args_list[0][0][0])
        annotation_mock.set_content_visibility.assert_called_with(
            self.the_channel_id,
            [],
            node_ids=node_id,
            exclude_node_ids=None,
            public=False,
        )

    @patch("kolibri.core.content.utils.transfer.Transfer.next")
    @patch("kolibri.core.content.utils.transfer.sleep")
    @patch("kolibri.core.content.utils.transfer.requests.Session.get")
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, False, True, True, True, True],
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path",
        return_value="test/test",
    )
    def test_remote_import_httperror_502(
        self,
        content_storage_file_path_mock,
        is_cancelled_mock,
        cancel_mock,
        requests_get_mock,
        sleep_mock,
        transfer_next_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        response_mock = MagicMock()
        response_mock.status_code = 502
        exception_502 = HTTPError("Bad Gateway", response=response_mock)
        requests_get_mock.return_value.raise_for_status.side_effect = exception_502
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10)
        call_command("importcontent", "network", self.the_channel_id)

        sleep_mock.assert_called_once()
        transfer_next_mock.assert_not_called()
        cancel_mock.assert_called_with()
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.utils.transfer.requests.Session.get")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path",
        return_value="test/test",
    )
    def test_remote_import_httperror_500(
        self,
        content_storage_file_path_mock,
        requests_get_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        response_mock = MagicMock()
        response_mock.status_code = 500
        exception_500 = HTTPError("Internal Server Error", response=response_mock)
        requests_get_mock.return_value.raise_for_status.side_effect = exception_500
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        get_import_export_mock.return_value = (1, list(LocalFile.objects.all()), 10)
        with self.assertRaises(HTTPError):
            call_command("importcontent", "network", self.the_channel_id)
        annotation_mock.set_content_visibility.assert_called_with(
            self.the_channel_id, [], node_ids=None, exclude_node_ids=None, public=False
        )

    @patch("kolibri.core.content.utils.transfer.sleep")
    @patch(
        "kolibri.core.content.utils.transfer.Transfer.next",
        side_effect=ChunkedEncodingError("Chunked Encoding Error"),
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, False, False, True, True, True, True],
    )
    def test_remote_import_chunkedencodingerror(
        self,
        is_cancelled_mock,
        cancel_mock,
        error_mock,
        sleep_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        get_import_export_mock.return_value = (
            1,
            list(
                LocalFile.objects.filter(
                    pk__in=[
                        "6bdfea4a01830fdd4a585181c0b8068c",
                        "211523265f53825b82f70ba19218a02e",
                    ]
                )
            ),
            10,
        )
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        cancel_mock.assert_called_with()
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.logger.warning")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, True],
    )
    def test_local_import_oserror_dne(
        self,
        is_cancelled_mock,
        cancel_mock,
        path_mock,
        logger_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        dest_path = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path, "/test/dne"]
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10)
        call_command("importcontent", "disk", self.the_channel_id, "destination")
        self.assertTrue("1 files are skipped" in logger_mock.call_args_list[0][0][0])
        annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.logger.error")
    @patch("kolibri.core.content.utils.transfer.os.path.getsize")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    def test_local_import_oserror_permission_denied(
        self,
        path_mock,
        getsize_mock,
        logger_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        dest_path = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path, "/test/dne"]
        getsize_mock.side_effect = ["1", OSError("Permission denied")]
        get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10)
        with self.assertRaises(OSError):
            call_command("importcontent", "disk", self.the_channel_id, "destination")
            self.assertTrue("Permission denied" in logger_mock.call_args_list[0][0][0])
            annotation_mock.set_content_visibility.assert_called()

    @patch("kolibri.core.content.management.commands.importcontent.os.remove")
    @patch(
        "kolibri.core.content.management.commands.importcontent.os.path.isfile",
        return_value=False,
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        side_effect=[False, False, True, True, True],
    )
    def test_local_import_source_corrupted(
        self,
        is_cancelled_mock,
        cancel_mock,
        path_mock,
        isfile_mock,
        remove_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        local_src_path = tempfile.mkstemp()[1]
        local_dest_path = tempfile.mkstemp()[1]
        LocalFile.objects.filter(
            files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c"
        ).update(file_size=1)
        path_mock.side_effect = [local_dest_path, local_src_path]
        get_import_export_mock.return_value = (
            1,
            [
                LocalFile.objects.filter(
                    files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c"
                ).first()
            ],
            10,
        )
        call_command(
            "importcontent",
            "disk",
            self.the_channel_id,
            "destination",
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        cancel_mock.assert_called_with()
        remove_mock.assert_any_call(local_dest_path)

    @patch(
        "kolibri.core.content.management.commands.importcontent.os.path.isfile",
        return_value=False,
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=False,
    )
    def test_local_import_source_corrupted_full_progress(
        self,
        is_cancelled_mock,
        cancel_mock,
        path_mock,
        isfile_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        """
        Ensure that when a file is imported that does not match the file size in the database
        that the overall progress tracking for the content import process is properly updated
        to reflect the size of the file in the database, not the file on disk.
        This is important, as the total progress for the overall process is measured against
        the total file size recorded in the database for all files, not for the the
        transferred file size.
        """
        local_src_path = tempfile.mkstemp()[1]
        with open(local_src_path, "w") as f:
            f.write("This is just a test")
        expected_file_size = 10000
        local_dest_path = tempfile.mkstemp()[1]
        os.remove(local_dest_path)
        # Delete all but one file associated with ContentNode to reduce need for mocking
        files = ContentNode.objects.get(
            id="32a941fb77c2576e8f6b294cde4c3b0c"
        ).files.all()
        first_file = files.first()
        files.exclude(id=first_file.id).delete()
        LocalFile.objects.filter(
            files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c"
        ).update(file_size=expected_file_size)
        get_import_export_mock.return_value = (
            1,
            list(
                LocalFile.objects.filter(
                    files__contentnode="32a941fb77c2576e8f6b294cde4c3b0c"
                )
            ),
            10,
        )
        path_mock.side_effect = [local_dest_path, local_src_path]
        mock_overall_progress = MagicMock()
        mock_file_progress = MagicMock()
        with patch(
            "kolibri.core.tasks.management.commands.base.ProgressTracker"
        ) as progress_mock:
            progress_mock.return_value.__enter__.side_effect = [
                mock_overall_progress,
                mock_file_progress,
            ]
            call_command(
                "importcontent",
                "disk",
                self.the_channel_id,
                "destination",
                node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
            )

            mock_overall_progress.assert_any_call(expected_file_size)

    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload.finalize"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=False,
    )
    def test_remote_import_source_corrupted(
        self,
        is_cancelled_mock,
        path_mock,
        finalize_dest_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        dest_path_1 = tempfile.mkstemp()[1]
        dest_path_2 = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path_1, dest_path_2]
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        get_import_export_mock.return_value = (
            1,
            list(
                LocalFile.objects.filter(
                    pk__in=[
                        "6bdfea4a01830fdd4a585181c0b8068c",
                        "211523265f53825b82f70ba19218a02e",
                    ]
                )
            ),
            10,
        )
        call_command(
            "importcontent",
            "network",
            self.the_channel_id,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
        )
        annotation_mock.set_content_visibility.assert_called_with(
            self.the_channel_id,
            [],
            exclude_node_ids=None,
            node_ids=["32a941fb77c2576e8f6b294cde4c3b0c"],
            public=False,
        )

    @patch(
        "kolibri.core.content.management.commands.importcontent.transfer.FileDownload.finalize"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path"
    )
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        return_value=False,
    )
    def test_remote_import_full_import(
        self,
        is_cancelled_mock,
        path_mock,
        finalize_dest_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        dest_path_1 = tempfile.mkstemp()[1]
        dest_path_2 = tempfile.mkstemp()[1]
        path_mock.side_effect = [dest_path_1, dest_path_2]
        LocalFile.objects.filter(pk="6bdfea4a01830fdd4a585181c0b8068c").update(
            file_size=2201062
        )
        LocalFile.objects.filter(pk="211523265f53825b82f70ba19218a02e").update(
            file_size=336974
        )
        get_import_export_mock.return_value = (
            1,
            list(
                LocalFile.objects.filter(
                    pk__in=[
                        "6bdfea4a01830fdd4a585181c0b8068c",
                        "211523265f53825b82f70ba19218a02e",
                    ]
                )
            ),
            10,
        )
        call_command("importcontent", "network", self.the_channel_id)
        annotation_mock.set_content_visibility.assert_called_with(
            self.the_channel_id, [], exclude_node_ids=None, node_ids=None, public=False
        )

    @patch("kolibri.core.content.utils.transfer.sleep")
    @patch("kolibri.core.content.utils.transfer.Transfer.next")
    @patch("kolibri.core.content.utils.transfer.requests.Session.get")
    @patch(
        "kolibri.core.content.management.commands.importcontent.paths.get_content_storage_file_path",
        return_value="test/test",
    )
    @patch("kolibri.core.content.management.commands.importcontent.AsyncCommand.cancel")
    @patch(
        "kolibri.core.content.management.commands.importcontent.AsyncCommand.is_cancelled",
        # We have to return False for 30 1-second checks to ensure we actually retry.
        side_effect=[False] * 32 + [True] * 5,
    )
    def test_remote_import_file_compressed_on_gcs(
        self,
        is_cancelled_mock,
        cancel_mock,
        content_storage_file_path_mock,
        requests_get_mock,
        transfer_next_mock,
        sleep_mock,
        annotation_mock,
        get_import_export_mock,
        channel_list_status_mock,
    ):
        response_mock = MagicMock()
        response_mock.status_code = 503
        exception_503 = HTTPError("Service Unavailable", response=response_mock)
        transfer_next_mock.side_effect = exception_503
        requests_get_mock.return_value.headers = {"X-Goog-Stored-Content-Length": "1"}
        LocalFile.objects.filter(
            files__contentnode__channel_id=self.the_channel_id
        ).update(file_size=1)
        get_import_export_mock.return_value = (1, [LocalFile.objects.first()], 10)

        m = mock_open()
        with patch("kolibri.core.content.utils.transfer.open", m) as open_mock:
            call_command("importcontent", "network", self.the_channel_id)
            # Check if truncate() is called since byte-range file resuming is not supported
            open_mock.assert_called_with("test/test.transfer", "wb")
            open_mock.return_value.truncate.assert_called_once()
            sleep_mock.assert_called()
            annotation_mock.set_content_visibility.assert_called_with(
                self.the_channel_id,
                [],
                node_ids=None,
                exclude_node_ids=None,
                public=False,
            )