def test_upload_file_with_lifetime(self): mock_api = Mock() dummy_lifetime = "1day" mock_api.post_channel_file_upload.return_value = { "uploaded_at": None, "metadata": {}, "content_type": "image/jpeg", "lifetime": dummy_lifetime, "file_id": "20180515T180605-f4acc798-9afa-40a1-b500-ebce42a4fa3f" } channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) dummy_file_data = json.dumps({'data': 'dummy'}).encode('utf-8') with tempfile.NamedTemporaryFile(suffix='.json') as tmp: tmp.write(dummy_file_data) tmp.seek(0) filename = os.path.basename(tmp.name) file = channel.upload_file(tmp.name, lifetime=dummy_lifetime) self.assertIsInstance(file, DatalakeFile) content_type = 'application/json' metadata = {'x-abeja-meta-filename': filename} self.assertEqual(mock_api.post_channel_file_upload.call_count, 1) call_args = mock_api.post_channel_file_upload.call_args[0] call_kwargs = mock_api.post_channel_file_upload.call_args[1] self.assertEqual(call_args[0], CHANNEL_ID) self.assertEqual(call_args[2], content_type) self.assertDictEqual( call_kwargs, { 'lifetime': dummy_lifetime, 'conflict_target': None, 'metadata': metadata })
def test_files_below_items_per_page(self): mock_api = Mock() mock_api.list_channel_files.side_effect = [{ 'next_page_token': None, 'files': [{ 'url_expires_on': '2018-06-04T05:04:46+00:00', 'uploaded_at': '2018-06-01T05:22:44+00:00', 'metadata': { 'x-abeja-meta-filename': 'DcZzLGkV4AA8FQc.jpg' }, 'file_id': '20180601T052244-250482c0-d361-4c5b-a0f9-e796af1a5f0d', 'download_uri': 'http://example/dummy/donwload_url', 'content_type': 'image/jpeg' }] }] channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) self.assertIsInstance(channel, Channel) files = list(channel.list_files()) for file in files: self.assertIsInstance(file, DatalakeFile) self.assertEqual(mock_api.list_channel_files.call_count, 1) self.assertEqual(mock_api.list_channel_files.call_args[0][0], CHANNEL_ID) self.assertEqual(len(files), 1)
def test_upload(self): mock_api = Mock() mock_api.post_channel_file_upload.return_value = { "uploaded_at": None, "metadata": {}, "content_type": "image/jpeg", "lifetime": None, "file_id": "20180515T180605-f4acc798-9afa-40a1-b500-ebce42a4fa3f" } channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) content_type = 'application/json' metadata = {'label': 'dummy label'} dummy_file_data = json.dumps({'data': 'dummy'}).encode('utf-8') dummy_file = BytesIO(dummy_file_data) file = channel.upload(dummy_file, content_type=content_type, metadata=metadata) self.assertIsInstance(file, DatalakeFile) expected_metadata = {'x-abeja-meta-label': 'dummy label'} mock_api.post_channel_file_upload.assert_called_once_with( CHANNEL_ID, dummy_file, content_type, metadata=expected_metadata, lifetime=None, conflict_target=None)
def test_files_with_empty_items(self): mock_api = Mock() mock_api.list_channel_files.side_effect = [{ 'next_page_token': None, 'files': [] }] channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) self.assertIsInstance(channel, Channel) files = list(channel.list_files()) self.assertEqual(len(files), 0)
def test_files_with_both_items_per_page_and_next_page_token(self): mock_api = Mock() mock_api.list_channel_files.side_effect = [{ 'next_page_token': 'dummy', 'files': [{ 'url_expires_on': '2018-06-04T05:04:46+00:00', 'uploaded_at': '2018-06-01T05:22:44+00:00', 'metadata': { 'x-abeja-meta-filename': 'DcZzLGkV4AA8FQc.jpg' }, 'file_id': '20180601T052244-250482c0-d361-4c5b-a0f9-e796af1a5f0d', 'download_uri': 'http://example/dummy/donwload_url', 'content_type': 'image/jpeg' }] }, { 'next_page_token': None, 'files': [{ 'url_expires_on': '2018-06-04T05:04:46+00:00', 'uploaded_at': '2018-06-01T05:22:44+00:00', 'metadata': { 'x-abeja-meta-filename': 'DcZzLGkV4AA8FQc.jpg' }, 'file_id': '20180601T052244-250482c0-d361-4c5b-a0f9-e796af1a5f0d', 'download_uri': 'http://example/dummy/donwload_url', 'content_type': 'image/jpeg' }] }] channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) self.assertIsInstance(channel, Channel) files = list(channel.list_files(limit=1)) for file in files: self.assertIsInstance(file, DatalakeFile) self.assertEqual(mock_api.list_channel_files.call_count, 2) call_args_1 = mock_api.list_channel_files.call_args_list[0] self.assertTupleEqual(call_args_1[0], (CHANNEL_ID, )) self.assertDictEqual(call_args_1[1], {'items_per_page': 1}) call_args_2 = mock_api.list_channel_files.call_args_list[1] self.assertTupleEqual(call_args_2[0], (CHANNEL_ID, )) # items_per_page should not be passed as query parameter self.assertDictEqual(call_args_2[1], {'next_page_token': 'dummy'}) self.assertEqual(len(files), 2)
def test_upload_dir_without_thread(self, mock_generate_path_iter): mock_api = Mock() mock_api.post_channel_file_upload.side_effect = [{ "url_expires_on": "2018-05-15T19:06:05+00:00", "upload_url": "http://example.com/dummy_upload_url", "uploaded_at": None, "metadata": {}, "content_type": "image/jpeg", "file_id": "20180515T180605-f4acc798-9afa-40a1-b500-ebce42a4fa3f" }, { "url_expires_on": "2018-05-15T19:06:05+00:00", "upload_url": "http://example.com/dummy_upload_url", "uploaded_at": None, "metadata": {}, "content_type": "image/jpeg", "file_id": "20180515T180605-f4acc798-9afa-40a1-b500-ebce42a4fa3f" }] with tempfile.NamedTemporaryFile() as tmp1: tmp1.write(b'dummy1') tmp1.seek(0) with tempfile.NamedTemporaryFile() as tmp2: tmp2.write(b'dummy2') tmp2.seek(0) mock_generate_path_iter.return_value = [tmp1.name, tmp2.name] channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) dummy_metadata = {'dummy': 'data'} content_type = 'image/jpeg' files = channel.upload_dir('dummy_path', metadata=dummy_metadata, content_type=content_type, use_thread=False) files = list(files) file = files[0] self.assertIsInstance(file, DatalakeFile) self.assertEqual(mock_api.post_channel_file_upload.call_count, 2)
def test_upload_with_dir(self): mock_api = Mock() mock_api.get_channel_file_upload.return_value = { "url_expires_on": "2018-05-15T19:06:05+00:00", "upload_url": "http://example.com/dummy_upload_url", "uploaded_at": None, "metadata": {}, "content_type": "image/jpeg", "file_id": "20180515T180605-f4acc798-9afa-40a1-b500-ebce42a4fa3f" } channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) with tempfile.NamedTemporaryFile() as tmp: tmp.write(json.dumps({'data': 'dummy'}).encode('utf-8')) tmp.seek(0) base_dir = '/'.join(tmp.name.split('/')[:-1]) with self.assertRaises(IsADirectoryError): channel.upload_file( base_dir, metadata={'x-abeja-meta-filename': 'dummy'}, content_type='application/json')
def test_upload_file(self): mock_api = Mock() mock_api.post_channel_file_upload.return_value = { "url_expires_on": "2018-05-15T19:06:05+00:00", "uploaded_at": None, "metadata": {}, "content_type": "image/jpeg", "lifetime": None, "file_id": "20180515T180605-f4acc798-9afa-40a1-b500-ebce42a4fa3f" } channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) content_type = 'application/json' metadata = {'filename': 'dummy', 'label': 'dummy label'} dummy_file_data = json.dumps({'data': 'dummy'}).encode('utf-8') with tempfile.NamedTemporaryFile() as tmp: tmp.write(dummy_file_data) tmp.seek(0) file = channel.upload_file(tmp.name, metadata=metadata, content_type=content_type) self.assertIsInstance(file, DatalakeFile) expected_metadata = { 'x-abeja-meta-filename': 'dummy', 'x-abeja-meta-label': 'dummy label' } self.assertEqual(mock_api.post_channel_file_upload.call_count, 1) call_args = mock_api.post_channel_file_upload.call_args[0] call_kwargs = mock_api.post_channel_file_upload.call_args[1] self.assertEqual(call_args[0], CHANNEL_ID) self.assertDictEqual( call_kwargs, { 'lifetime': None, 'conflict_target': None, 'metadata': expected_metadata })
def test_get_file(self): mock_api = Mock() dummy_file_id = "20180101T000000-00000000-1111-2222-3333-999999999999" dummy_content_type = "image/jpeg" dummy_download_uri = "http://example.com/dummy_upload_url" dummy_url_expires_on = "2018-01-01T00:00:00+00:00" dummy_metadata = {'x-abeja-meta-filename': 'test_filename'} mock_api.get_channel_file_download.return_value = { "url_expires_on": dummy_url_expires_on, "download_uri": dummy_download_uri, "uploaded_at": None, "metadata": dummy_metadata, "content_type": dummy_content_type, "file_id": dummy_file_id } channel = Channel(mock_api, ORGANIZATION_ID, CHANNEL_ID) file = channel.get_file(dummy_file_id) self.assertIsInstance(file, DatalakeFile) self.assertEqual(file.organization_id, ORGANIZATION_ID) self.assertEqual(file.channel_id, CHANNEL_ID) self.assertEqual(file.file_id, dummy_file_id) self.assertEqual(file.content_type, dummy_content_type) self.assertEqual(file.url_expires_on, dummy_url_expires_on) self.assertEqual(file.metadata['filename'], 'test_filename')
def upload_datum(channel: Channel, dataset_items: DatasetItems, datum: PetData, is_train: bool, attribute_type: str): # upload file to the channel metadata = { 'filename': PurePath(datum.image_path).name, 'type': 'trainval' if is_train else 'test' } response = channel.upload_file(datum.image_path, metadata=metadata) # upload item to the datase attributes = build_attributes(datum, attribute_type) source_data = [{ "data_type": response.content_type, "data_uri": response.uri, }] res = dataset_items.create(source_data=source_data, attributes=attributes) return res
def upload_image_to_datalake(channel: Channel, filepath: Path) -> DatalakeFile: if filepath.suffix not in ['.jpg', '.jpeg', '.JPG', '.jpeg']: raise ValueError(f'invalid file format: {filepath}, it is not jpeg file') content_type = 'image/jpeg' metadata = {'filename': filepath.name} return channel.upload_file(str(filepath), metadata=metadata, content_type=content_type)