def test_object_data_split_bincontent(self): # explictly test handling of binary content tag split over # chunk boundaries response = self.session.get('file://%s' % FIXTURES['sync1_export']) mockapi = Mock() def mock_upload(data, *args, **kwargs): list( data) # consume the generator so datastream processing happens return 'uploaded://1' mockapi.upload = mock_upload mockapi.export.return_value = response self.obj.api = self.repo.api = mockapi # test binary content tag split across chunks self.archex = ArchiveExport(self.obj, self.repo) # use a block size that will split the fixture in the middle of # the first binary content tag self.archex.read_block_size = 2688 data = self.archex.object_data() foxml = data.getvalue() self.assert_( etree.XML(foxml) is not None, 'object data should be valid xml') self.assert_( b'foxml:binaryContent' not in foxml, 'object data for ingest should not include binaryContent tags') self.archex = ArchiveExport(self.obj, self.repo) # this blocksize ends with just the < in foxml:binaryContent self.archex.read_block_size = 2680 data = self.archex.object_data() foxml = data.getvalue() self.assert_( etree.XML(foxml) is not None, 'object data should be valid xml') self.assert_( b'foxml:binaryContent' not in foxml, 'object data for ingest should not include binaryContent tags') self.archex = ArchiveExport(self.obj, self.repo) # this blocksize ends with an unrelated close tag </ self.archex.read_block_size = 1526 data = self.archex.object_data() foxml = data.getvalue() self.assert_( etree.XML(foxml) is not None, 'object data should be valid xml') self.assert_( b'foxml:binaryContent' not in foxml, 'object data for ingest should not include binaryContent tags')
def setUp(self): # todo: use mocks? self.repo = Mock(spec=Repository) self.obj = Mock() #spec=DigitalObject) self.obj.pid = 'synctest:1' self.archex = ArchiveExport(self.obj, self.repo) # set up a request session that can load file uris, so # fixtures can be used as export data self.session = requests.session() self.session.mount('file://', LocalFileAdapter())
def test_object_data(self): # mock api to read export data from a local fixture filie response = self.session.get('file://%s' % FIXTURES['sync1_export']) mockapi = Mock() def mock_upload(data, *args, **kwargs): list( data) # consume the generator so datastream processing happens return 'uploaded://1' mockapi.upload = mock_upload mockapi.export.return_value = response mockapi.base_url = 'http://fedora.example.co/fedora' self.obj.api = self.repo.api = mockapi data = self.archex.object_data() foxml = data.getvalue() self.assert_( etree.XML(foxml) is not None, 'object data should be valid xml') self.assert_( b'foxml:binaryContent' not in foxml, 'object data for ingest should not include binaryContent tags') self.assert_( b'<foxml:contentLocation REF="uploaded://1" TYPE="URL"/>' in foxml, 'object data for ingest should include upload id as content location' ) # other tests? # set read block size artificially low to test chunked handling self.archex = ArchiveExport(self.obj, self.repo) self.archex.read_block_size = 1024 data = self.archex.object_data() foxml = data.getvalue() self.assert_( etree.XML(foxml) is not None, 'object data should be valid xml') self.assert_( b'foxml:binaryContent' not in foxml, 'object data for ingest should not include binaryContent tags') self.assert_( b'<foxml:contentLocation REF="uploaded://1" TYPE="URL"/>' in foxml, 'object data for ingest should include upload id as content location' ) # test with second fixture - multiple small encoded datastreams self.archex = ArchiveExport(self.obj, self.repo) self.archex.read_block_size = 1024 response = self.session.get('file://%s' % FIXTURES['sync2_export']) mockapi.export.return_value = response data = self.archex.object_data() foxml = data.getvalue() self.assert_( etree.XML(foxml) is not None, 'object data should be valid xml') self.assert_( b'foxml:binaryContent' not in foxml, 'object data for ingest should not include binaryContent tags') self.assert_( b'<foxml:contentLocation REF="uploaded://1" TYPE="URL"/>' in foxml, 'object data for ingest should include upload id as content location' )