class DBFileUnitTestSuite(unittest.TestCase): """spdxLicenseManager unit test suite for scan metadata in DB.""" def setUp(self): # create and initialize an in-memory database self.db = ProjectDB() self.db.createDB(":memory:") self.db.initializeDBTables() # insert sample data self.insertSampleCategoryData() self.insertSampleLicenseData() self.insertSampleSubprojectData() self.insertSampleScanData() self.insertSampleFileData() def tearDown(self): self.db.closeDB() self.db = None def insertSampleCategoryData(self): categories = [ Category(_id=1, name="a category", order=3), Category(_id=2, name="cat", order=2), Category(_id=3, name="blah category", order=1), ] self.db.session.bulk_save_objects(categories) self.db.session.commit() def insertSampleLicenseData(self): licenses = [ License(_id=1, name="DoAnything", category_id=1), License(_id=2, name="HarshEULA", category_id=2), License(_id=3, name="293PageEULA", category_id=3), License(_id=4, name="DoAnythingNoncommercial", category_id=1), ] self.db.session.bulk_save_objects(licenses) self.db.session.commit() def insertSampleSubprojectData(self): subprojects = [ Subproject(_id=1, name="sub1", desc="subproject 1"), Subproject(_id=2, name="subX", desc="subproject XYZ"), Subproject(_id=3, name="subC", desc="subproject B"), ] self.db.session.bulk_save_objects(subprojects) self.db.session.commit() def insertSampleScanData(self): scans = [ Scan(_id=1, subproject_id=2, scan_dt=datetime.date(2017, 1, 10), desc="XYZ initial scan"), Scan(_id=2, subproject_id=1, scan_dt=datetime.date(2017, 1, 3), desc="1 initial scan"), Scan(_id=3, subproject_id=2, scan_dt=datetime.date(2017, 2, 10), desc="XYZ 2017-02 monthly scan"), Scan(_id=4, subproject_id=2, scan_dt=datetime.date(2017, 2, 17), desc="XYZ 2017-02 rescan"), ] self.db.session.bulk_save_objects(scans) self.db.session.commit() def insertSampleFileData(self): files = [ File(_id=1, scan_id=1, path="/fileC.c", license_id=1, sha1="aabbcc", md5="ddeeff", sha256="aaccee"), File(_id=2, scan_id=1, path="/fileA.c", license_id=1, sha1="112233", md5="445566", sha256="778899"), File(_id=3, scan_id=1, path="/fileB.c", license_id=2, sha1=None, md5=None, sha256=None), File(_id=4, scan_id=1, path="/dir/fileA.c", license_id=4, sha1="123456", md5="789012", sha256="345678"), ] self.db.session.bulk_save_objects(files) self.db.session.commit() ##### Test cases below def test_can_retrieve_files_in_one_scan(self): files = self.db.getFiles(scan_id=1) self.assertIsInstance(files, list) self.assertEqual(len(files), 4) # will sort by file path self.assertEqual(files[0]._id, 4) self.assertEqual(files[0].path, "/dir/fileA.c") self.assertEqual(files[1]._id, 2) self.assertEqual(files[1].path, "/fileA.c") self.assertEqual(files[2]._id, 3) self.assertEqual(files[2].path, "/fileB.c") self.assertEqual(files[3]._id, 1) self.assertEqual(files[3].path, "/fileC.c") def test_cannot_retrieve_files_in_scan_that_does_not_exist(self): with self.assertRaises(ProjectDBQueryError): self.db.getFiles(scan_id=17) def test_returns_empty_list_if_no_files_in_known_scan(self): files = self.db.getFiles(scan_id=4) self.assertEqual(files, []) def test_can_get_file_by_id(self): file = self.db.getFile(_id=3) self.assertEqual(file.path, "/fileB.c") self.assertEqual(file.license.name, "HarshEULA") def test_can_get_file_by_scan_and_path(self): file = self.db.getFile(scan_id=1, path="/fileB.c") self.assertEqual(file._id, 3) self.assertEqual(file.license.name, "HarshEULA") def test_cannot_get_file_by_id_with_scan_or_path(self): with self.assertRaises(ProjectDBQueryError): self.db.getFile(_id=3, scan_id=1) with self.assertRaises(ProjectDBQueryError): self.db.getFile(_id=3, path="/fileB.c") def test_cannot_get_file_with_no_id_or_scan_or_path(self): with self.assertRaises(ProjectDBQueryError): self.db.getFile() def test_cannot_get_file_with_only_one_of_scan_or_path(self): with self.assertRaises(ProjectDBQueryError): self.db.getFile(scan_id=1) with self.assertRaises(ProjectDBQueryError): self.db.getFile(path="/fileB.c") def test_returns_none_if_file_not_found_by_id(self): file = self.db.getFile(_id=17) self.assertIsNone(file) def test_returns_none_if_file_not_found_by_scan_plus_path(self): file = self.db.getFile(scan_id=1, path="/nope") self.assertIsNone(file) file = self.db.getFile(scan_id=6, path="/fileB.c") self.assertIsNone(file) def test_can_add_and_retrieve_files(self): self.db.addFile(scan_id=1, path="/file17.py", license_id=3, sha1=None, md5=None, sha256=None) self.db.addFile(scan_id=1, path="/file13.py", license_id=2, sha1=None, md5=None, sha256=None) file_id = self.db.addFile(scan_id=1, path="/dir5/file128.py", license_id=4, sha1="123456", md5="789012", sha256="345678") # confirm that we now have seven files in this scan files = self.db.getFiles(scan_id=1) self.assertEqual(len(files), 7) self.assertEqual(file_id, 7) # and confirm that we can retrieve this one by id file = self.db.getFile(_id=7) self.assertEqual(file.path, "/dir5/file128.py") self.assertEqual(file.license.name, "DoAnythingNoncommercial") def test_can_start_adding_but_rollback_file(self): file_id = self.db.addFile(scan_id=1, path="/will_rollback", license_id=3, sha1=None, md5=None, sha256=None, commit=False) self.db.rollback() # confirm that we still only have four files files = self.db.getFiles(scan_id=1) self.assertEqual(len(files), 4) # and confirm that this file ID doesn't exist in database file = self.db.getFile(_id=file_id) self.assertIsNone(file) def test_can_start_adding_and_then_commit_files(self): f1_id = self.db.addFile(scan_id=1, path="/f1", license_id=1, sha1=None, md5=None, sha256=None, commit=False) f2_id = self.db.addFile(scan_id=1, path="/f2", license_id=1, sha1=None, md5=None, sha256=None, commit=False) self.db.commit() # confirm that we now have six files files = self.db.getFiles(scan_id=1) self.assertEqual(len(files), 6) def test_can_bulk_add_and_retrieve_files(self): bulkfiles = [ ("/file17.py", 3, None, None, None), ("/file13.py", 2, None, None, None), ("/dir5/file128.py", 4, "123456", "789012", "345678"), ] self.db.addBulkFiles(scan_id=1, file_tuples=bulkfiles) # confirm that we now have seven files in this scan files = self.db.getFiles(scan_id=1) self.assertEqual(len(files), 7) # and confirm that we can retrieve last one by id file = self.db.getFile(_id=7) self.assertEqual(file.path, "/dir5/file128.py") self.assertEqual(file.license.name, "DoAnythingNoncommercial") def test_can_start_bulk_adding_files_but_rollback(self): bulkfiles = [ ("/file17.py", 3, None, None, None), ("/file13.py", 2, None, None, None), ("/dir5/file128.py", 4, "123456", "789012", "345678"), ] self.db.addBulkFiles(scan_id=1, file_tuples=bulkfiles, commit=False) self.db.rollback() # confirm that we still only have four files files = self.db.getFiles(scan_id=1) self.assertEqual(len(files), 4) # and confirm that this file ID doesn't exist in database file = self.db.getFile(_id=7) self.assertIsNone(file) def test_can_start_bulk_adding_and_then_commit_files(self): bulkfiles = [ ("/file17.py", 3, None, None, None), ("/file13.py", 2, None, None, None), ("/dir5/file128.py", 4, "123456", "789012", "345678"), ] self.db.addBulkFiles(scan_id=1, file_tuples=bulkfiles, commit=False) self.db.commit() # confirm that we now have seven files files = self.db.getFiles(scan_id=1) self.assertEqual(len(files), 7)
class TVImporterTestSuite(unittest.TestCase): """spdxLicenseManager SPDX tag-value importer unit test suite.""" def setUp(self): # create importer object self.importer = TVImporter() # create and initialize an in-memory database self.db = ProjectDB() self.db.createDB(":memory:") self.db.initializeDBTables() # insert sample data self.insertSampleCategoryData() self.insertSampleLicenseData() self.insertSampleConversionData() self.insertSampleSubprojectData() self.insertSampleScanData() # build sample file data list self.fd1 = createFD("/tmp/f1", "DoAnything", md5="abcdef") self.fd2 = createFD("/tmp/f2", "DoAnythingNoncommercial", sha256="abcdef") self.fd3 = createFD("/tmp/f3", "HarshEULA", sha1="abcdef") self.fd4 = createFD("/tmp/f4", "HarshEULA") self.fdList = [self.fd1, self.fd2, self.fd3, self.fd4] # not in fdList by default self.fd5 = createFD("/tmp/badLicense", "UnknownLicense") self.fd6 = createFD("/tmp/badLic2", "SecondUnknownLic") self.fdConvert = createFD("/tmp/needsConvert", "293") def tearDown(self): pass def insertSampleCategoryData(self): categories = [ Category(_id=1, name="a category", order=3), Category(_id=2, name="cat", order=2), Category(_id=3, name="blah category", order=1), ] self.db.session.bulk_save_objects(categories) self.db.session.commit() def insertSampleLicenseData(self): licenses = [ License(_id=1, name="DoAnything", category_id=1), License(_id=2, name="HarshEULA", category_id=2), License(_id=3, name="293PageEULA", category_id=3), License(_id=4, name="DoAnythingNoncommercial", category_id=1), ] self.db.session.bulk_save_objects(licenses) self.db.session.commit() def insertSampleConversionData(self): conversions = [ Conversion(_id=1, old_text="293", new_license_id=3), Conversion(_id=2, old_text="NC", new_license_id=4), Conversion(_id=3, old_text="anything", new_license_id=1), Conversion(_id=4, old_text="Anything", new_license_id=1), ] self.db.session.bulk_save_objects(conversions) self.db.session.commit() def insertSampleSubprojectData(self): subprojects = [ Subproject(_id=1, name="sub1", desc="subproject 1"), ] self.db.session.bulk_save_objects(subprojects) self.db.session.commit() def insertSampleScanData(self): scans = [ Scan(_id=1, subproject_id=1, scan_dt=datetime.date(2017, 1, 10), desc="new scan"), ] self.db.session.bulk_save_objects(scans) self.db.session.commit() self.scan_id = 1 ##### Test cases below def test_new_importer_is_in_expected_reset_state(self): self.assertEqual(self.importer.scanChecked, False) self.assertEqual(self.importer.licensesAll, []) self.assertEqual(self.importer.licensesUnknown, []) self.assertEqual(self.importer.licensesMapping, {}) self.assertEqual(self.importer.pathDuplicates, []) self.assertEqual(self.importer.importedCount, 0) def test_import_fails_if_scan_not_checked_first(self): with self.assertRaises(ProjectDBInsertError): self.importer.importFileDataList(fdList=self.fdList, db=self.db, scan_id=self.scan_id) def test_cannot_check_without_providing_valid_fdList(self): with self.assertRaises(ProjectDBInsertError): self.importer.checkFileDataList(db=self.db) def test_cannot_check_without_providing_database(self): with self.assertRaises(ProjectDBInsertError): self.importer.checkFileDataList(fdList=self.fdList) def test_checking_valid_fdList_returns_true(self): retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.assertEqual(True, retval) def test_checker_returns_false_if_any_licenses_are_unknown(self): self.fdList.append(self.fd5) retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.assertEqual(False, retval) def test_can_get_license_list_if_any_are_unknown(self): self.fdList.append(self.fd5) self.importer.checkFileDataList(fdList=self.fdList, db=self.db) unknowns = self.importer.getUnknowns() self.assertIn("UnknownLicense", unknowns) self.assertNotIn("DoAnything", unknowns) self.assertNotIn("HarshEULA", unknowns) def test_license_list_is_sorted_if_multiple_are_unknown(self): self.fdList.append(self.fd5) self.fdList.append(self.fd6) self.importer.checkFileDataList(fdList=self.fdList, db=self.db) unknowns = self.importer.getUnknowns() self.assertEqual("SecondUnknownLic", unknowns[0]) self.assertEqual("UnknownLicense", unknowns[1]) def test_checker_returns_true_if_all_paths_are_unique(self): retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.assertEqual(True, retval) def test_checker_returns_false_if_any_paths_are_duplicates(self): fdup = createFD("/tmp/f2", "DoAnythingNoncommercial", sha256="abcdef") self.fdList.append(fdup) retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.assertEqual(False, retval) def test_duplicates_list_is_empty_if_all_paths_are_unique(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) dups = self.importer.getDuplicatePaths() self.assertEqual([], dups) def test_duplicates_list_has_paths_if_any_paths_are_duplicates(self): fdup = createFD("/tmp/f2", "DoAnythingNoncommercial", sha256="abcdef") self.fdList.append(fdup) self.importer.checkFileDataList(fdList=self.fdList, db=self.db) dups = self.importer.getDuplicatePaths() self.assertEqual(["/tmp/f2"], dups) def test_can_get_duplicate_paths_after_checker_if_any(self): fdup = createFD("/tmp/f2", "DoAnythingNoncommercial", sha256="abcdef") self.fdList.append(fdup) retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) dups = self.importer.getDuplicatePaths() self.assertIn("/tmp/f2", dups) self.assertNotIn("/tmp/f1", dups) def test_checker_returns_true_if_all_is_good(self): retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.assertEqual(True, retval) def test_reads_licenses_into_licensesAll(self): # fill in finalLicense, since we are skipping _applyConversions for fd in self.fdList: fd.finalLicense = fd.license self.importer._checkFileDataListForLicenses(fdList=self.fdList, db=self.db) self.assertIn("DoAnything", self.importer.licensesAll) def test_reads_only_unknown_licenses_into_licensesUnknown(self): self.fdList.append(self.fd5) # fill in finalLicense, since we are skipping _applyConversions for fd in self.fdList: fd.finalLicense = fd.license self.importer._checkFileDataListForLicenses(fdList=self.fdList, db=self.db) self.assertIn("UnknownLicense", self.importer.licensesUnknown) self.assertNotIn("DoAnything", self.importer.licensesUnknown) def test_reads_only_known_licenses_into_licensesMapping(self): self.fdList.append(self.fd5) # fill in finalLicense, since we are skipping _applyConversions for fd in self.fdList: fd.finalLicense = fd.license self.importer._checkFileDataListForLicenses(fdList=self.fdList, db=self.db) self.assertEqual(1, self.importer.licensesMapping.get("DoAnything", None)) self.assertEqual(2, self.importer.licensesMapping.get("HarshEULA", None)) self.assertEqual(None, self.importer.licensesMapping.get("UnknownLicense", None)) def test_checker_returns_true_if_all_licenses_are_known(self): # fill in finalLicense, since we are skipping _applyConversions for fd in self.fdList: fd.finalLicense = fd.license retval = self.importer._checkFileDataListForLicenses(fdList=self.fdList, db=self.db) self.assertEqual(True, retval) def test_checker_returns_false_if_any_licenses_are_unknown(self): # fill in finalLicense, since we are skipping _applyConversions for fd in self.fdList: fd.finalLicense = fd.license self.fdList.append(self.fd5) retval = self.importer._checkFileDataListForLicenses(fdList=self.fdList, db=self.db) self.assertEqual(False, retval) def test_cannot_import_without_providing_valid_fdList(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) with self.assertRaises(ProjectDBInsertError): self.importer.importFileDataList(db=self.db, scan_id=self.scan_id) def test_cannot_import_without_providing_database(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) with self.assertRaises(ProjectDBInsertError): self.importer.importFileDataList(fdList=self.fdList, scan_id=self.scan_id) def test_cannot_import_without_providing_scan_id(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) with self.assertRaises(ProjectDBInsertError): self.importer.importFileDataList(fdList=self.fdList, db=self.db) def test_cannot_import_with_positional_args(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) with self.assertRaises(TypeError): self.importer.importFileDataList(self.fdList) with self.assertRaises(TypeError): self.importer.importFileDataList(self.fdList, self.db) with self.assertRaises(TypeError): self.importer.importFileDataList(self.fdList, self.db, self.scan_id) def test_checker_returns_true_if_all_paths_are_unique(self): retval = self.importer._checkFileDataListForDuplicatePaths(fdList=self.fdList) self.assertEqual(True, retval) def test_checker_returns_false_if_any_paths_are_duplicates(self): fdup = createFD("/tmp/f2", "DoAnythingNoncommercial", sha256="abcdef") self.fdList.append(fdup) retval = self.importer._checkFileDataListForDuplicatePaths(fdList=self.fdList) self.assertEqual(False, retval) def test_files_are_imported_if_all_is_good(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) retval = self.importer.importFileDataList(fdList=self.fdList, db=self.db, scan_id=self.scan_id) self.assertEqual(True, retval) f1 = self.db.getFile(scan_id=self.scan_id, path="/tmp/f1") self.assertEqual("/tmp/f1", f1.path) self.assertEqual("abcdef", f1.md5) self.assertEqual("DoAnything", f1.license.name) f4 = self.db.getFile(scan_id=self.scan_id, path="/tmp/f4") self.assertEqual("/tmp/f4", f4.path) self.assertEqual(None, f4.md5) self.assertEqual("HarshEULA", f4.license.name) def test_can_get_count_of_imported_files_if_all_licenses_are_known(self): self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.importer.importFileDataList(fdList=self.fdList, db=self.db, scan_id=self.scan_id) count = self.importer.getImportedCount() self.assertEqual(4, count) def test_files_are_not_imported_if_any_licenses_are_unknown(self): self.fdList.append(self.fd5) self.importer.checkFileDataList(fdList=self.fdList, db=self.db) with self.assertRaises(ProjectDBInsertError): self.importer.importFileDataList(fdList=self.fdList, db=self.db, scan_id=self.scan_id) f1 = self.db.getFile(scan_id=self.scan_id, path="/tmp/f1") self.assertIsNone(f1) def test_checker_applies_conversions(self): self.fdList.append(self.fdConvert) retval = self.importer.checkFileDataList(fdList=self.fdList, db=self.db) self.assertTrue(retval) self.assertEqual("293PageEULA", self.fdConvert.finalLicense) def test_path_prefixes_are_stripped_if_config_is_yes(self): self.db.setConfigValue("import-strip-path-prefixes", "Yes") prefix = self.importer._applyPathPrefixStrip(fdList=self.fdList, db=self.db) self.assertEqual("/tmp", prefix) self.assertEqual("/tmp/f1", self.fd1.path) self.assertEqual("/f1", self.fd1.finalPath) def test_path_prefixes_are_not_stripped_if_config_is_no(self): self.db.setConfigValue("import-strip-path-prefixes", "no") prefix = self.importer._applyPathPrefixStrip(fdList=self.fdList, db=self.db) self.assertEqual("", prefix) self.assertEqual("/tmp/f1", self.fd1.path) self.assertEqual("/tmp/f1", self.fd1.finalPath) def test_path_prefixes_are_not_stripped_if_config_is_not_set(self): prefix = self.importer._applyPathPrefixStrip(fdList=self.fdList, db=self.db) self.assertEqual("", prefix) self.assertEqual("/tmp/f1", self.fd1.path) self.assertEqual("/tmp/f1", self.fd1.finalPath) def test_path_prefixes_are_not_stripped_if_mixing_abs_and_rel_paths(self): self.db.setConfigValue("import-strip-path-prefixes", "yes") self.fdDifferentPath = createFD("absolutePath", "293PageEULA") self.fdList.append(self.fdDifferentPath) prefix = self.importer._applyPathPrefixStrip(fdList=self.fdList, db=self.db) self.assertEqual("", prefix) self.assertEqual("/tmp/f1", self.fd1.path) self.assertEqual("/tmp/f1", self.fd1.finalPath) def test_path_prefixes_are_not_stripped_if_no_common_prefix(self): self.db.setConfigValue("import-strip-path-prefixes", "yes") fdDifferentPath1 = createFD("somewhereElse", "293PageEULA") fdDifferentPath2 = createFD("something", "293PageEULA") tmpFDList = [fdDifferentPath1, fdDifferentPath2] prefix = self.importer._applyPathPrefixStrip(fdList=tmpFDList, db=self.db) self.assertEqual("", prefix) self.assertEqual("somewhereElse", fdDifferentPath1.finalPath) self.assertEqual("something", fdDifferentPath2.finalPath) def test_file_path_prefixes_are_stripped_on_import_if_configured(self): self.db.setConfigValue("import-strip-path-prefixes", "yes") self.importer.checkFileDataList(fdList=self.fdList, db=self.db) retval = self.importer.importFileDataList(fdList=self.fdList, db=self.db, scan_id=self.scan_id) self.assertEqual(True, retval) f1 = self.db.getFile(scan_id=self.scan_id, path="/f1") self.assertEqual("/f1", f1.path) self.assertEqual("DoAnything", f1.license.name) f4 = self.db.getFile(scan_id=self.scan_id, path="/f4") self.assertEqual("/f4", f4.path) self.assertEqual("HarshEULA", f4.license.name) wrongPathFile = self.db.getFile(scan_id=self.scan_id, path="/tmp/f1") self.assertIsNone(wrongPathFile)