コード例 #1
0
    def test_can_parse_upload_package_with_some_duplicates_and_duplicated_tsv_entries(
            self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(DATA_PATH, '5p0xMAG_small')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(17, upload_doc.image_count)
            self.assertEqual(0, upload_doc.duplicate_image_count)
            self.assertEqual(0, upload_doc.broken_record_count)
            self.assertCountEqual([], upload_doc.duplicate_filenames)
            self.assertCountEqual([], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)

        items_inbeetween = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(
                DATA_PATH, '5p0xMAG_small_2_duplicates_and_tsv_duplicates')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(9, upload_doc.image_count)
            self.assertEqual(4, upload_doc.duplicate_image_count)
            self.assertEqual(0, upload_doc.broken_record_count)
            self.assertCountEqual([
                'SPC-EAWAG-5P0X-1543968111037290-9650556340265-000309-002-3712-0-52-40.jpeg',
                'SPC-EAWAG-5P0X-1543968111037290-9650556340265-000309-002-3712-0-52-40.jpeg',
                'SPC-EAWAG-5P0X-1543968114038057-9650559340515-000339-001-3536-32-68-92.jpeg',
                'SPC-EAWAG-5P0X-1543968114038057-9650559340515-000339-001-3536-32-68-92.jpeg'
            ], upload_doc.duplicate_filenames)
            self.assertCountEqual([], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)
            self.assertNotEqual(items_inbeetween, items_after)
コード例 #2
0
    def test_can_parse_upload_package_with_duplicates_only(self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(DATA_PATH, '5p0xMAG_small')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(17, upload_doc.image_count)
            self.assertEqual(0, upload_doc.duplicate_image_count)
            self.assertEqual(0, upload_doc.broken_record_count)
            self.assertCountEqual([], upload_doc.duplicate_filenames)
            self.assertCountEqual([], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)

        items_inbeetween = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(DATA_PATH, '5p0xMAG_3_entries')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(3, upload_doc.image_count)
            self.assertEqual(3, upload_doc.duplicate_image_count)
            self.assertEqual(0, upload_doc.broken_record_count)
            self.assertCountEqual([
                'SPC-EAWAG-5P0X-1543968085030435-9650530338104-000049-002-2838-1090-48-32.jpeg',
                'SPC-EAWAG-5P0X-1543968169050193-9650614345087-000889-004-2636-0-100-128.jpeg',
                'SPC-EAWAG-5P0X-1543968172024020-9650617345336-000919-002-1364-290-64-72.jpeg'
            ], upload_doc.duplicate_filenames)
            self.assertCountEqual([], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)
            self.assertEqual(items_inbeetween, items_after)
コード例 #3
0
    def test_can_parse_upload_package_with_valid_spc_native_tar_format(self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(DATA_PATH, '25_feb_upload_example_small')
            package_path = os.path.join(tmpdirname, 'package.tar')
            self.create_package_from_directory(data_path,
                                               package_path,
                                               compression='')
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(8, upload_doc.image_count)
            self.assertEqual(0, upload_doc.duplicate_image_count)
            self.assertEqual(0, upload_doc.broken_record_count)
            self.assertCountEqual([], upload_doc.duplicate_filenames)
            self.assertCountEqual([], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)
コード例 #4
0
    def test_can_parse_upload_package_with_some_fields_as_infs_or_nans(self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(DATA_PATH,
                                     '5p0xMAG_small_with_infs_and_nans')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(17, upload_doc.image_count)
            self.assertEqual(0, upload_doc.duplicate_image_count)
            self.assertEqual(2, upload_doc.broken_record_count)
            self.assertCountEqual([], upload_doc.duplicate_filenames)
            self.assertCountEqual([
                'SPC-EAWAG-5P0X-1543968157067352-9650602344089-000769-002-3546-2354-48-48.jpeg',
                'SPC-EAWAG-5P0X-1543968114038057-9650559340515-000339-001-3536-32-68-92.jpeg'
            ], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)
コード例 #5
0
    def test_can_parse_upload_package_with_duplicated_fields_filenames_in_tsv(
            self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(
                DATA_PATH, '5p0xMAG_small_with_tsv_duplicated_filenames')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('finished', upload_doc.state)
            self.assertEqual(19, upload_doc.image_count)
            self.assertEqual(2, upload_doc.duplicate_image_count)
            self.assertEqual(0, upload_doc.broken_record_count)
            self.assertCountEqual([
                'SPC-EAWAG-5P0X-1543968141051783-9650586342759-000609-002-0-2088-32-84.jpeg',
                'SPC-EAWAG-5P0X-1543968092032969-9650537338686-000119-003-2132-1914-48-48.jpeg'
            ], upload_doc.duplicate_filenames)
            self.assertCountEqual([], upload_doc.broken_records)

            items_after = self.db.items.count_documents({})
            self.assertNotEqual(items_before, items_after)
            self.assertEqual(
                items_after - items_before,
                upload_doc.image_count - upload_doc.duplicate_image_count)
コード例 #6
0
    def test_cant_parse_upload_package_with_package_with_empty_tsv_file_and_no_images(
            self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        with tempfile.TemporaryDirectory() as tmpdirname:
            data_path = os.path.join(DATA_PATH,
                                     '5p0xMAG_small_empty_tsv_no_images')
            package_path = os.path.join(tmpdirname, 'package.tar.bz2')
            self.create_package_from_directory(data_path, package_path)
            upload_id = self.upload_package(package_path)
            parse_upload_package(upload_id, self.db, storage_client)

            upload_doc = upload.get(self.db,
                                    upload_id,
                                    with_default_projection=False)
            self.assertEqual('failed', upload_doc.state)

            with self.assertRaises(AttributeError):
                upload_doc.image_count
            with self.assertRaises(AttributeError):
                upload_doc.duplicate_image_count
            with self.assertRaises(AttributeError):
                upload_doc.broken_record_count
            with self.assertRaises(AttributeError):
                upload_doc.duplicate_filenames
            with self.assertRaises(AttributeError):
                upload_doc.broken_records

            items_after = self.db.items.count_documents({})
            self.assertEqual(items_before, items_after)
コード例 #7
0
    def test_cant_parse_upload_package_with_package_that_is_just_a_file(self):
        storage_client = self.app.config['storage_client']
        items_before = self.db.items.count_documents({})

        data_path = os.path.join(DATA_PATH, '5p0xMAG_small', 'features.tsv')
        upload_id = self.upload_package(data_path)
        parse_upload_package(upload_id, self.db, storage_client)

        upload_doc = upload.get(self.db,
                                upload_id,
                                with_default_projection=False)
        self.assertEqual('failed', upload_doc.state)

        with self.assertRaises(AttributeError):
            upload_doc.image_count
        with self.assertRaises(AttributeError):
            upload_doc.duplicate_image_count
        with self.assertRaises(AttributeError):
            upload_doc.broken_record_count
        with self.assertRaises(AttributeError):
            upload_doc.duplicate_filenames
        with self.assertRaises(AttributeError):
            upload_doc.broken_records

        items_after = self.db.items.count_documents({})
        self.assertEqual(items_before, items_after)
コード例 #8
0
    def get(self, upload_id):
        db = app.config['db']
        try:
            doc = upload.get(db, upload_id, with_default_projection=False)
        except InvalidId:
            return invalid_request()

        if doc:
            return doc.serializable(shallow=True)
        else:
            return invalid_request()
コード例 #9
0
    def test_api_can_post_empty_tags_list(self):
        with self.app.app_context():
            upload_doc = copy.deepcopy(DUMMY_UPLOADS[0])
            tags = []
            request_data = json.dumps({
                'tags': tags
            })
            res = self.client().post(f'/upload/{str(upload_doc._id)}/tags',
                                     data=request_data, headers=self.headers)
            self.assertEqual(res.status_code, 204)

            db = self.app.config['db']
            upload_after = upload.get(db, upload_doc._id, with_default_projection=False)
            self.assertCountEqual(upload_after.tags, tags)
コード例 #10
0
    def test_api_cant_post_invalid_tags_list(self):
        with self.app.app_context():
            upload_doc = copy.deepcopy(DUMMY_UPLOADS[3])
            invalid_tags = [[4], ['valid', 4], 'invalid', [False]]

            for tags_list in invalid_tags:
                request_data = json.dumps({'tags': tags_list})
                res = self.client().post(f'/upload/{str(upload_doc._id)}/tags',
                                         data=request_data,
                                         headers=self.headers)
                self.assertEqual(res.status_code, 400)

                db = self.app.config['db']
                upload_after = upload.get(db,
                                          upload_doc._id,
                                          with_default_projection=False)
                self.assertCountEqual(upload_after.tags, upload_doc.tags)