def test_compare_hashes_failed(self, mocker):
     """Ensure we get consistent output when the checksum comparison fails."""
     hash_file = "metadata/checksum.sha256"
     job = Job("stub", "stub", ["", ""])
     hashsum = self.setup_hashsum(hash_file, job)
     toolname = "sha256sum"
     objects_dir = "objects"
     output_string = (
         b"objects/file1.bin: OK\n"
         b"objects/file2.bin: FAILED\n"
         b"objects/nested/\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab"
         b"3.bin: FAILED\n"
         b"objects/readonly.file: FAILED open or read")
     exception_string = (
         "sha256: comparison exited with status: 1. Please check the formatting of the checksums or integrity of the files.\n"
         "sha256: objects/file2.bin: FAILED\n"
         "sha256: objects/nested/ファイル3.bin: FAILED\n"
         "sha256: objects/readonly.file: FAILED open or read")
     mock = mocker.patch.object(hashsum,
                                "_call",
                                return_value=output_string)
     mocker.patch.object(hashsum,
                         "count_and_compare_lines",
                         return_value=True)
     mock.side_effect = subprocess.CalledProcessError(returncode=1,
                                                      cmd=toolname,
                                                      output=output_string)
     ret = hashsum.compare_hashes("")
     mock.assert_called_once_with("-c",
                                  "--strict",
                                  hash_file,
                                  transfer_dir=objects_dir)
     assert ret == 1, self.assert_return_value.format(ret)
     assert (job.get_stderr().strip() == exception_string
             ), self.assert_exception_string
Esempio n. 2
0
def test_job_encoding():
    job = Job(name="somejob", uuid=str(uuid4()), args=["a", "b"])

    job.pyprint(UNICODE)
    stdout = job.get_stdout()
    expected_stdout = f"{UNICODE}\n"
    expected_output = f"{UNICODE}\n"
    assert job.output == expected_output
    assert stdout == expected_stdout
    assert isinstance(job.output, str)
    assert isinstance(stdout, str)

    job.print_error(NON_ASCII)
    stderr = job.get_stderr()
    expected_stderr = f"{NON_ASCII}\n"
    expected_error = f"{NON_ASCII}\n"
    assert job.error == expected_error
    assert stderr == expected_stderr
    assert isinstance(job.error, str)
    assert isinstance(stderr, str)

    job_dump = job.dump()
    assert job.UUID in job_dump
    assert stderr in job_dump
    assert stdout in job_dump
 def test_compare_hashes_with_bad_files(self, mocker):
     """Ensure that the formatting of errors is consistent if improperly
     formatted files are provided to hashsum.
     """
     hash_file = "metadata/checksum.sha1"
     job = Job("stub", "stub", ["", ""])
     hashsum = self.setup_hashsum(hash_file, job)
     toolname = "sha1sum"
     objects_dir = "objects"
     no_proper_output = (
         b"sha1sum: metadata/checksum.sha1: no properly formatted SHA1 "
         b"checksum lines found")
     except_string_no_proper_out = (
         "sha1: comparison exited with status: 1. Please check the formatting of the checksums or integrity of the files.\n"
         "sha1: sha1sum: metadata/checksum.sha1: no properly formatted "
         "SHA1 checksum lines found")
     improper_formatting = b"sha1sum: WARNING: 1 line is improperly formatted"
     except_string_improper_format = (
         "sha1: comparison exited with status: 1. Please check the formatting of the checksums or integrity of the files.\n"
         "sha1: sha1sum: WARNING: 1 line is improperly formatted")
     mock = mocker.patch.object(hashsum,
                                "_call",
                                return_value=no_proper_output)
     mocker.patch.object(hashsum,
                         "count_and_compare_lines",
                         return_value=True)
     mock.side_effect = subprocess.CalledProcessError(
         returncode=1, cmd=toolname, output=no_proper_output)
     ret = hashsum.compare_hashes("")
     mock.assert_called_once_with("-c",
                                  "--strict",
                                  hash_file,
                                  transfer_dir=objects_dir)
     assert (job.get_stderr().strip() == except_string_no_proper_out
             ), self.assert_exception_string
     assert ret == 1, self.assert_return_value.format(ret)
     # Flush job.error as it isn't flushed automatically.
     job.error = ""
     mock = mocker.patch.object(hashsum,
                                "_call",
                                return_value=improper_formatting)
     mock.side_effect = subprocess.CalledProcessError(
         returncode=1, cmd="sha1sum", output=improper_formatting)
     ret = hashsum.compare_hashes("")
     assert (job.get_stderr().strip() == except_string_improper_format
             ), self.assert_exception_string
     mock.assert_called_once_with("-c",
                                  "--strict",
                                  hash_file,
                                  transfer_dir=objects_dir)
     assert ret == 1, self.assert_return_value.format(ret)
def test_json_csv_keys_vals_vary(tmpdir):
    """Test that a JSON array of objects with varying keys and varying value
    types works as expected, i.e.:

    - the headers of the CSV are the union of all attributes of all objects in
      the JSON array: ``[{'a': ...}, {'b': ...}]`` yields ``'a,b\r\n'``;
    - the order of the JSON object attributes does not matter;
    - a JSON object value may be an array or a string:
      ``[{'a': ['x', 'y']}, {'a': 'z'}]`` yields ``'a,a\r\nx,y\r\nz,\r\n'``.
    """
    json_array = json.loads(JSON_KEYS_VALS_VARY)
    headers = json_metadata_to_csv.fetch_keys(json_array)
    assert headers == KEYS_VALS_VARY_HEADERS
    first_json_obj, second_json_obj = json_array
    first_row = json_metadata_to_csv.object_to_row(
        json_metadata_to_csv.fix_encoding(first_json_obj), headers
    )
    assert first_row == KEYS_VALS_VARY_FIRST_ROW
    second_row = json_metadata_to_csv.object_to_row(
        json_metadata_to_csv.fix_encoding(second_json_obj), headers
    )
    assert second_row == KEYS_VALS_VARY_SECOND_ROW
    json_path = os.path.join(str(tmpdir), "metadata.json")
    csv_path = os.path.join(str(tmpdir), "metadata.csv")
    with open(json_path, "w") as jsonfile:
        jsonfile.write(JSON_KEYS_VALS_VARY)
    job = Job("stub", "stub", ["", json_path])
    json_metadata_to_csv.call([job])
    with open(csv_path, newline="") as csvfile:
        csvdata = csvfile.read()
    assert CSV_KEYS_VALS_VARY == csvdata
 def test_provenance_string(self, mocker):
     """Test to ensure that the string output to the PREMIS event for this
     microservice Job is consistent with what we're expecting. Provenance
     string includes the command called, plus the utility's version string.
     """
     hash_file = "metadata/checksum.md5"
     hashsum = self.setup_hashsum(hash_file, Job("stub", "stub", ["", ""]))
     version_string = [
         "md5sum (GNU coreutils) 8.28",
         "Copyright (C) 2017 Free Software Foundation, Inc.",
     ]
     mock = mocker.patch.object(hashsum,
                                "_call",
                                return_value=version_string)
     assert (hashsum.version() == "md5sum (GNU coreutils) 8.28"
             ), "Hashsum version retrieved is incorrect"
     mock.assert_called_once_with("--version")
     mocker.patch.object(
         hashsum,
         "command_called",
         (hashsum.COMMAND, ) + ("-c", "--strict", hash_file),
     )
     expected_provenance = 'program="md5sum -c --strict metadata/checksum.md5"; version="md5sum (GNU coreutils) 8.28"'
     provenance_output = hashsum.get_command_detail()
     assert (provenance_output == expected_provenance
             ), f"Provenance output is incorrect: {provenance_output}"
Esempio n. 6
0
def test_sanitize_transfer_with_multiple_files(monkeypatch, tmp_path, transfer,
                                               subdir_path,
                                               multiple_transfer_file_objs):
    monkeypatch.setattr(sanitize_object_names.NameSanitizer, "BATCH_SIZE", 10)

    sanitizer = sanitize_object_names.NameSanitizer(
        Job("stub", "stub", []),
        subdir_path.as_posix(),
        transfer.uuid,
        "2017-01-04 19:35:22",
        "%transferDirectory%",
        "transfer_id",
        os.path.join(tmp_path.as_posix(), ""),
    )
    sanitizer.sanitize_objects()

    assert multiple_transfer_file_objs, "File objects structure is empty"
    for file_obj in multiple_transfer_file_objs:
        original_location = file_obj.currentlocation
        file_obj.refresh_from_db()

        assert file_obj.currentlocation != original_location
        assert subdir_path.as_posix() not in file_obj.currentlocation
        assert "bulk-file" in file_obj.currentlocation
        # Test the event details were written correctly for our object.
        event = Event.objects.get(file_uuid=file_obj.uuid,
                                  event_type="name cleanup")
        verify_event_details(event)
Esempio n. 7
0
 def _fixup_fileid_state(self):
     """For items on-disk we have to mimic the filename cleanup process."""
     for key, _ in dict(self.state.fileNameToFileID).items():
         self.state.fileNameToFileID[
             create_mets_v2._fixup_path_input_by_user(
                 Job("stub", "stub", []),
                 key)] = self.state.fileNameToFileID.pop(key)
Esempio n. 8
0
    def test_parse_metadata_csv_repeated_columns(self):
        """It should put repeated elements into a list of values."""
        # Create metadata.csv
        data = [
            ["Filename", "dc.title", "dc.type", "dc.type", "dc.type"],
            ["objects/foo.jpg", "Foo", "Photograph", "Still image", "Picture"],
        ]
        with self.metadata_file.open("w") as f:
            writer = csv.writer(f)
            for row in data:
                writer.writerow(row)

        # Run test
        dc = archivematicaCreateMETSMetadataCSV.parseMetadataCSV(
            Job("stub", "stub", []), str(self.metadata_file))
        # Verify
        assert dc
        assert "objects/foo.jpg" in dc
        assert "dc.title" in dc["objects/foo.jpg"]
        assert dc["objects/foo.jpg"]["dc.title"] == ["Foo"]
        assert "dc.type" in dc["objects/foo.jpg"]
        assert dc["objects/foo.jpg"]["dc.type"] == [
            "Photograph",
            "Still image",
            "Picture",
        ]
        assert list(dc["objects/foo.jpg"].keys()) == ["dc.title", "dc.type"]
Esempio n. 9
0
    def test_dmdsec_from_csv_parsed_metadata_both(self):
        """It should create a dmdSec for DC and Other parsed metadata."""
        data = collections.OrderedDict([
            ("dc.title", ["Yamani Weapons"]),
            ("dc.contributor", ["雪 ユキ"]),
            ("Title", ["Yamani Weapons"]),
            ("Contributor", ["雪 ユキ"]),
            (
                "Long Description",
                ["This is about how glaives are used in the Yamani Islands"],
            ),
        ])
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
            Job("stub", "stub", []), data, state)
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "DC"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 2
        assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title"
        assert dc_elem[0].text == "Yamani Weapons"
        assert dc_elem[
            1].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[1].text == "雪 ユキ"

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "OTHER"
        assert "OTHERMDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        # Elements are direct children of xmlData
        assert len(xmldata) == 3
        assert xmldata[0].tag == "title"
        assert xmldata[0].text == "Yamani Weapons"
        assert xmldata[1].tag == "contributor"
        assert xmldata[1].text == "雪 ユキ"
        assert xmldata[2].tag == "long_description"
        assert (xmldata[2].text ==
                "This is about how glaives are used in the Yamani Islands")
Esempio n. 10
0
 def test_dmdsec_from_csv_parsed_metadata_no_data(self):
     """It should not create dmdSecs with no parsed metadata."""
     data = {}
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
         Job("stub", "stub", []), data, state)
     # Verify
     assert ret == []
Esempio n. 11
0
 def test_create_dc_dmdsec_no_dc_no_transfers_dir(self):
     """It should not fail if no transfers directory exists."""
     badsipuuid = "dnednedn-5bd2-4249-84a1-2f00f725b981"
     state = create_mets_v2.MetsState()
     dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(
         Job("stub", "stub", []), self.siptypeuuid, badsipuuid, THIS_DIR,
         state)
     # Expect no element
     assert dmdsec_elem is None
Esempio n. 12
0
    def test_rows_processed_and_database_content_with_unicode_filepath(self):
        """Test CSV import using the RightsReader class when file paths have unicode characters in them.

        It should process all rows of the CSV file even if file paths have unicode characters in them.
        It should populate the rights-related models using data from the CSV file.
        """
        models.File.objects.get(pk="47813453-6872-442b-9d65-6515be3c5aa1")

        rights_csv_filepath = os.path.join(
            THIS_DIR, "fixtures/rights-unicode-filepath.csv")
        parser = rights_from_csv.RightCsvReader(Job("stub", "stub",
                                                    []), self.transfer_uuid,
                                                "%s" % rights_csv_filepath)
        rows_processed = parser.parse()

        assert rows_processed == 1

        # Test row 1
        row_1_rights_statement = models.RightsStatement.objects.order_by(
            "pk")[0]
        assert (row_1_rights_statement.metadataappliestotype ==
                self.get_metadata_applies_to_type_for_file())
        assert row_1_rights_statement.metadataappliestoidentifier == self.file_1_uuid
        assert row_1_rights_statement.status == "ORIGINAL"
        assert row_1_rights_statement.rightsbasis == "Copyright"

        row_1_copyright_info = models.RightsStatementCopyright.objects.order_by(
            "pk")[0]
        assert row_1_copyright_info.rightsstatement == row_1_rights_statement
        assert row_1_copyright_info.copyrightstatus == "cop status"
        assert row_1_copyright_info.copyrightjurisdiction == "cop juris"
        assert row_1_copyright_info.copyrightstatusdeterminationdate == "2001-01-01"
        assert row_1_copyright_info.copyrightapplicablestartdate == "2002-02-02"
        assert row_1_copyright_info.copyrightenddateopen is False
        assert row_1_copyright_info.copyrightapplicableenddate == "2003-03-03"

        row_1_copyright_identifier = (
            models.RightsStatementCopyrightDocumentationIdentifier.objects.
            order_by("pk")[0])
        assert (row_1_copyright_identifier.copyrightdocumentationidentifiertype
                == "cop type")
        assert (row_1_copyright_identifier.copyrightdocumentationidentifierrole
                == "cop role")

        row_1_copyright_note = models.RightsStatementCopyrightNote.objects.order_by(
            "pk")[0]
        assert row_1_copyright_note.rightscopyright == row_1_copyright_info
        assert row_1_copyright_note.copyrightnote == "cop note"

        row_1_grant = models.RightsStatementRightsGranted.objects.order_by(
            "pk")[0]
        assert row_1_grant.rightsstatement == row_1_rights_statement
        assert row_1_grant.act == "cop act"
        assert row_1_grant.startdate == "2004-04-04"
        assert row_1_grant.enddateopen is False
        assert row_1_grant.enddate == "2005-05-05"
Esempio n. 13
0
def test_json_csv_conversion_with_int_val(tmpdir):
    json_path = os.path.join(str(tmpdir), "metadata.json")
    csv_path = os.path.join(str(tmpdir), "metadata.csv")
    with open(json_path, "w") as jsonfile:
        jsonfile.write(JSON_INT_VAL)
    job = Job("stub", "stub", ["", json_path])
    json_metadata_to_csv.call([job])
    with open(csv_path, newline="") as csvfile:
        csvdata = csvfile.read()
    assert csvdata == CSV_INT_VAL
Esempio n. 14
0
def test_json_csv_with_nested_null_data(tmpdir):
    json_path = os.path.join(str(tmpdir), "metadata.json")
    csv_path = os.path.join(str(tmpdir), "metadata.csv")
    with open(json_path, "w") as jsonfile:
        jsonfile.write(JSON_NESTED_NULL)
    job = Job("stub", "stub", ["", json_path])
    json_metadata_to_csv.call([job])
    with open(csv_path, newline="") as csvfile:
        csvdata = csvfile.read()

    assert csvdata == CSV_NESTED_NULL
Esempio n. 15
0
 def test_provenance_string_no_command(self):
     """When nothing has happened, e.g. the checksums haven't been validated
     then it should be practically impossible to write to the database and
     generate some form of false-positive.
     """
     hash_file = "metadata/checksum.sha1"
     hashsum = self.setup_hashsum(hash_file, Job("stub", "stub", ["", ""]))
     try:
         hashsum.get_command_detail()
     except PREMISFailure:
         pass
Esempio n. 16
0
    def test_dmdsec_from_csv_parsed_metadata_repeats(self):
        """It should create multiple elements for repeated input."""
        data = collections.OrderedDict([("dc.contributor", ["Yuki", "雪 ユキ"]),
                                        ("Contributor", ["Yuki", "雪 ユキ"])])
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
            Job("stub", "stub", []), data, state)
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "DC"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 2
        assert dc_elem[
            0].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[0].text == "Yuki"
        assert dc_elem[
            1].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[1].text == "雪 ユキ"

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "OTHER"
        assert "OTHERMDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        # Elements are direct children of xmlData
        assert len(xmldata) == 2
        assert xmldata[0].tag == "contributor"
        assert xmldata[0].text == "Yuki"
        assert xmldata[1].tag == "contributor"
        assert xmldata[1].text == "雪 ユキ"
Esempio n. 17
0
    def generate_aip_mets_v2_state(self):
        """Generate fileSec state

        State will be generated that we will help us to test the units involved
        with creating a custom structmap in the AIP METS.
        """
        arbitrary_max_structmaps = 10
        self.transfer_dir = os.path.join(
            THIS_DIR,
            "fixtures",
            "custom_structmaps",
            "custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929",
            "",
        )
        self.objects_dir = os.path.join(self.transfer_dir, "objects")
        structMap = etree.Element(
            ns.metsBNS + "structMap",
            TYPE="physical",
            ID="structMap_1",
            LABEL="Archivematica default",
        )
        # Input to create_file_sec:
        #
        # <ns0:div xmlns:ns0="http://www.loc.gov/METS/"
        #          LABEL="3-031927e0-63bb-430c-8b37-fc799c132ca9"
        #          TYPE="Directory"
        #          DMDID="dmdSec_1"
        # />
        #
        sip_dir_name = os.path.basename(self.objects_dir.rstrip(os.path.sep))
        structMapDiv = etree.SubElement(structMap,
                                        ns.metsBNS + "div",
                                        TYPE="Directory",
                                        LABEL=sip_dir_name)
        self.state = create_mets_v2.MetsState()
        self.state.globalStructMapCounter = random.choice(
            [x for x in range(arbitrary_max_structmaps)])
        self.structmap_div_element = create_mets_v2.createFileSec(
            job=Job("stub", "stub", []),
            directoryPath=self.objects_dir,
            parentDiv=structMapDiv,
            baseDirectoryPath=self.transfer_dir,
            baseDirectoryName="%SIPDirectory%",
            fileGroupIdentifier="3a915449-d1bb-4920-b274-c917c7bb5929",
            fileGroupType="sip_id",
            directories={},
            state=self.state,
            includeAmdSec=True,
        )
Esempio n. 18
0
    def test_parse_metadata_csv(self):
        """It should parse the metadata.csv into a dict."""
        # Create metadata.csv
        data = [
            ["Filename", "dc.title", "dc.date", "Other metadata"],
            ["objects/foo.jpg", "Foo", "2000", "Taken on a sunny day"],
            ["objects/bar/", "Bar", "2000", "All taken on a rainy day"],
        ]
        with self.metadata_file.open("w") as f:
            writer = csv.writer(f)
            for row in data:
                writer.writerow(row)

        # Run test
        dc = archivematicaCreateMETSMetadataCSV.parseMetadataCSV(
            Job("stub", "stub", []), str(self.metadata_file))
        # Verify
        assert dc
        assert "objects/foo.jpg" in dc
        assert "dc.title" in dc["objects/foo.jpg"]
        assert dc["objects/foo.jpg"]["dc.title"] == ["Foo"]
        assert "dc.date" in dc["objects/foo.jpg"]
        assert dc["objects/foo.jpg"]["dc.date"] == ["2000"]
        assert "Other metadata" in dc["objects/foo.jpg"]
        assert dc["objects/foo.jpg"]["Other metadata"] == [
            "Taken on a sunny day"
        ]
        assert list(dc["objects/foo.jpg"].keys()) == [
            "dc.title",
            "dc.date",
            "Other metadata",
        ]

        assert "objects/bar" in dc
        assert "dc.title" in dc["objects/bar"]
        assert dc["objects/bar"]["dc.title"] == ["Bar"]
        assert "dc.date" in dc["objects/bar"]
        assert dc["objects/bar"]["dc.date"] == ["2000"]
        assert "Other metadata" in dc["objects/bar"]
        assert dc["objects/bar"]["Other metadata"] == [
            "All taken on a rainy day"
        ]
        assert list(dc["objects/bar"].keys()) == [
            "dc.title",
            "dc.date",
            "Other metadata",
        ]
Esempio n. 19
0
    def test_parse_metadata_csv_non_ascii(self):
        """It should parse unicode."""
        # Create metadata.csv
        data = [["Filename", "dc.title"], ["objects/foo.jpg", "元気です"]]
        with self.metadata_file.open("w") as f:
            writer = csv.writer(f)
            for row in data:
                writer.writerow(row)

        # Run test
        dc = archivematicaCreateMETSMetadataCSV.parseMetadataCSV(
            Job("stub", "stub", []), str(self.metadata_file))
        # Verify
        assert dc
        assert "objects/foo.jpg" in dc
        assert "dc.title" in dc["objects/foo.jpg"]
        assert dc["objects/foo.jpg"]["dc.title"] == ["元気です"]
Esempio n. 20
0
 def test_line_comparison_fail(self, mocker):
     """If the checksum line and object comparison function fails then
     we want to return early and _call shouldn't be called.
     """
     hash_file = "metadata/checksum.sha1"
     hashsum = self.setup_hashsum(hash_file, Job("stub", "stub", ["", ""]))
     toolname = "sha1sum"
     mock = mocker.patch.object(hashsum, "_call", return_value=None)
     mocker.patch.object(hashsum,
                         "count_and_compare_lines",
                         return_value=False)
     mock.side_effect = subprocess.CalledProcessError(returncode=1,
                                                      cmd=toolname,
                                                      output=None)
     ret = hashsum.compare_hashes("")
     mock.assert_not_called()
     assert ret == 1, self.assert_return_value.format(ret)
Esempio n. 21
0
def test_sanitize_transfer_with_directory_uuids(tmp_path, transfer,
                                                subdir_path, transfer_dir_obj):
    sanitizer = sanitize_object_names.NameSanitizer(
        Job("stub", "stub", []),
        os.path.join(tmp_path.as_posix(), ""),
        transfer.uuid,
        "2017-01-04 19:35:22",
        "%transferDirectory%",
        "transfer_id",
        os.path.join(tmp_path.as_posix(), ""),
    )
    sanitizer.sanitize_objects()

    original_location = transfer_dir_obj.currentlocation
    transfer_dir_obj.refresh_from_db()

    assert transfer_dir_obj.currentlocation != original_location
    assert subdir_path.as_posix() not in transfer_dir_obj.currentlocation
Esempio n. 22
0
 def test_create_dc_dmdsec_dc_exists(self):
     """It should create a dmdSec if DC information exists."""
     # Generate dmdSec if DC exists
     state = create_mets_v2.MetsState()
     dmdsec_elem, dmdid = create_mets_v2.createDublincoreDMDSecFromDBData(
         Job("stub", "stub", []), self.siptypeuuid, self.sipuuid, THIS_DIR,
         state)
     # Verify created correctly
     assert dmdsec_elem is not None
     assert dmdsec_elem.tag == "{http://www.loc.gov/METS/}dmdSec"
     assert dmdsec_elem.attrib["ID"] == dmdid
     assert len(dmdsec_elem) == 1
     mdwrap = dmdsec_elem[0]
     assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
     assert mdwrap.attrib["MDTYPE"] == "DC"
     assert len(mdwrap) == 1
     xmldata = mdwrap[0]
     assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
     assert len(xmldata) == 1
     assert xmldata[0].tag == "{http://purl.org/dc/terms/}dublincore"
Esempio n. 23
0
 def test_create_dc_dmdsec_no_dc_no_transfers(self):
     """It should not fail if no dublincore.xml exists from transfers."""
     badsipuuid = "dnednedn-5bd2-4249-84a1-2f00f725b981"
     sip_dir = Path(tempfile.mkdtemp()) / "emptysip"
     try:
         shutil.copytree(os.path.join(THIS_DIR, "fixtures", "emptysip"),
                         str(sip_dir))
         # Make sure directory is empty
         (sip_dir / "objects/metadata/transfers/.gitignore").unlink()
         state = create_mets_v2.MetsState()
         dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(
             Job("stub", "stub", []),
             self.siptypeuuid,
             badsipuuid,
             str(sip_dir),
             state,
         )
         assert dmdsec_elem is None
     finally:
         shutil.rmtree(str(sip_dir.parent))
Esempio n. 24
0
    def test_parse_metadata_csv_blank_rows(self):
        """It should skip blank rows."""
        # Create metadata.csv
        data = [
            ["Filename", "dc.title", "dc.type", "dc.type", "dc.type"],
            ["objects/foo.jpg", "Foo", "Photograph", "Still image", "Picture"],
            [],
        ]
        with self.metadata_file.open("w") as f:
            writer = csv.writer(f)
            for row in data:
                writer.writerow(row)

        # Run test
        dc = archivematicaCreateMETSMetadataCSV.parseMetadataCSV(
            Job("stub", "stub", []), str(self.metadata_file))
        # Verify
        assert dc
        assert len(dc) == 1
        assert "objects/foo.jpg" in dc
Esempio n. 25
0
 def test_create_rights_granted(self):
     # Setup
     elem = etree.Element(
         "{http://www.loc.gov/premis/v3}rightsStatement",
         nsmap={"premis": NSMAP["premis"]},
     )
     statement = RightsStatement.objects.get(id=1)
     # Test
     state = create_mets_v2.MetsState()
     archivematicaCreateMETSRights.getrightsGranted(Job("stub", "stub", []),
                                                    statement, elem, state)
     # Verify
     assert len(elem) == 1
     rightsgranted = elem[0]
     assert rightsgranted.tag == "{http://www.loc.gov/premis/v3}rightsGranted"
     assert len(rightsgranted.attrib) == 0
     assert len(rightsgranted) == 4
     assert rightsgranted[0].tag == "{http://www.loc.gov/premis/v3}act"
     assert rightsgranted[0].text == "Disseminate"
     assert len(rightsgranted[0].attrib) == 0
     assert len(rightsgranted[0]) == 0
     assert rightsgranted[
         1].tag == "{http://www.loc.gov/premis/v3}restriction"
     assert rightsgranted[1].text == "Allow"
     assert len(rightsgranted[1].attrib) == 0
     assert len(rightsgranted[1]) == 0
     assert rightsgranted[
         2].tag == "{http://www.loc.gov/premis/v3}termOfGrant"
     assert len(rightsgranted[2].attrib) == 0
     assert len(rightsgranted[2]) == 2
     assert rightsgranted[2][
         0].tag == "{http://www.loc.gov/premis/v3}startDate"
     assert rightsgranted[2][0].text == "2000"
     assert rightsgranted[2][
         1].tag == "{http://www.loc.gov/premis/v3}endDate"
     assert rightsgranted[2][1].text == "OPEN"
     assert rightsgranted[
         3].tag == "{http://www.loc.gov/premis/v3}rightsGrantedNote"
     assert rightsgranted[3].text == "Attribution required"
     assert len(rightsgranted[3].attrib) == 0
     assert len(rightsgranted[3]) == 0
Esempio n. 26
0
def handle_batch_task(task_name, batch_payload):
    tasks = batch_payload["tasks"]

    utc_date = getUTCDate()
    jobs = []
    for task_uuid in tasks:
        task_data = tasks[task_uuid]
        arguments = task_data["arguments"]

        replacements = list(replacement_dict.items()) + list(
            {
                r"%date%": utc_date.isoformat(),
                r"%taskUUID%": task_uuid,
                r"%jobCreatedDate%": task_data["createdDate"],
            }.items())

        for var, val in replacements:
            arguments = arguments.replace(var, val)

        job = Job(
            task_name,
            task_data["uuid"],
            _parse_command_line(arguments),
            caller_wants_output=task_data["wants_output"],
        )
        jobs.append(job)

    # Set their start times.  If we collide with the MCP Server inserting new
    # Tasks (which can happen under heavy concurrent load), retry as needed.
    def set_start_times():
        Task.objects.filter(taskuuid__in=[item.UUID for item in jobs]).update(
            starttime=utc_date)

    retryOnFailure("Set task start times", set_start_times)

    module = importlib.import_module("a3m.client.clientScripts." +
                                     task_data["execute"])
    module.call(jobs)

    return jobs
Esempio n. 27
0
 def test_get_included_structmap_incomplete_mets(self):
     """Test the output of custom structmaps in create_mets_v2 where the
     structMap is incomplete.
     """
     self.generate_aip_mets_v2_state()
     self._fixup_fileid_state()
     default_structmap = "mets_structmap.xml"
     Result = collections.namedtuple("Result",
                                     "structmap_name structmap_id")
     results = [
         Result("no-contentids.xml", "custom_structmap"),
         Result("file_does_not_exist.xml", "custom_structmap"),
         Result("empty_filenames.xml", "custom_structmap"),
         Result("missing_contentid.xml", "custom_structmap"),
     ]
     for res in results:
         self.state = create_mets_v2.MetsState()
         structmap_path = os.path.join(
             self.objects_dir,
             "metadata",
             "transfers",
             "custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51",
             (default_structmap
              if not res.structmap_name else res.structmap_name),
         )
         assert os.path.isfile(structmap_path)
         assert os.path.isfile(self.mets_xsd_path)
         self.validate_mets(self.mets_xsd_path, structmap_path)
         custom_structmap = create_mets_v2.include_custom_structmap(
             job=Job("stub", "stub", []),
             baseDirectoryPath=self.transfer_dir,
             state=self.state,
             custom_structmap=res.structmap_name,
         )
         assert (
             custom_structmap == []
         ), "Return from include_custom_structmap should be an empty array: {}".format(
             custom_structmap)
         assert (self.state.error_accumulator.error_count == 1
                 ), "error counter should be incremented on error"
Esempio n. 28
0
 def test_dmdsec_from_csv_parsed_metadata_other_only(self):
     """It should only create an Other dmdSec from parsed metadata."""
     data = collections.OrderedDict([
         ("Title", ["Yamani Weapons"]),
         ("Contributor", ["雪 ユキ"]),
         (
             "Long Description",
             ["This is about how glaives are used in the Yamani Islands"],
         ),
     ])
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
         Job("stub", "stub", []), data, state)
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
     assert "ID" in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
     assert "MDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["MDTYPE"] == "OTHER"
     assert "OTHERMDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
     xmldata = mdwrap[0]
     assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
     # Elements are direct children of xmlData
     assert len(xmldata) == 3
     assert xmldata[0].tag == "title"
     assert xmldata[0].text == "Yamani Weapons"
     assert xmldata[1].tag == "contributor"
     assert xmldata[1].text == "雪 ユキ"
     assert xmldata[2].tag == "long_description"
     assert (xmldata[2].text ==
             "This is about how glaives are used in the Yamani Islands")
Esempio n. 29
0
def test_sanitize_sip(tmp_path, sip, subdir_path, sip_dir_obj, sip_file_obj):
    sanitizer = sanitize_object_names.NameSanitizer(
        Job("stub", "stub", []),
        os.path.join(tmp_path.as_posix(), ""),
        sip.uuid,
        "2017-01-04 19:35:22",
        r"%SIPDirectory%",
        "sip_id",
        os.path.join(tmp_path.as_posix(), ""),
    )
    sanitizer.sanitize_objects()

    original_dir_location = sip_dir_obj.currentlocation
    sip_dir_obj.refresh_from_db()

    assert sip_dir_obj.currentlocation != original_dir_location
    assert subdir_path.as_posix() not in sip_dir_obj.currentlocation

    original_file_location = sip_file_obj.currentlocation
    sip_file_obj.refresh_from_db()

    assert sip_file_obj.currentlocation != original_file_location
    assert subdir_path.as_posix() not in sip_file_obj.currentlocation
    assert "file" in sip_file_obj.currentlocation
Esempio n. 30
0
 def test_get_included_structmap_valid_mets(self):
     """Test the valid output of custom structmaps in create_mets_v2."""
     self.generate_aip_mets_v2_state()
     self._fixup_fileid_state()
     default_structmap = "mets_structmap.xml"
     Result = collections.namedtuple(
         "Result", "structmap_name files replaced_count structmap_id")
     results = [
         Result(None, ["objects/test_file.flac"], 1, None),
         Result(
             "simple_book_structmap.xml",
             ["objects/test_file.jpg", "objects/test_file.png"],
             2,
             None,
         ),
         Result("mets_area_structmap.xml", ["test_file.mp3"], 6, None),
         Result(
             "unicode_simple_book_structmap.xml",
             [
                 "objects/página_de_prueba.jpg",
                 "objects/página_de_prueba.png"
             ],
             2,
             "custom_structmap",
         ),
         Result(
             "nested_file_structmap.xml",
             ["objects/nested_dir/nested_file.rdata"],
             6,
             None,
         ),
         Result(
             "complex_book_structmap.xml",
             [
                 "objects/nested_dir/duplicate_file_name.png",
                 "objects/duplicate_file_name.png",
             ],
             2,
             None,
         ),
         Result(
             "path_with_spaces_structmap.xml",
             ["objects/dir-with-dashes/file with spaces.bin"],
             1,
             None,
         ),
     ]
     for res in results:
         structmap_path = os.path.join(
             self.objects_dir,
             "metadata",
             "transfers",
             "custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51",
             (default_structmap
              if not res.structmap_name else res.structmap_name),
         )
         assert os.path.isfile(structmap_path)
         assert os.path.isfile(self.mets_xsd_path)
         self.validate_mets(self.mets_xsd_path, structmap_path)
         # Ensure that we test default behavior.
         if not res.structmap_name:
             custom_structmap = create_mets_v2.include_custom_structmap(
                 job=Job("stub", "stub", []),
                 baseDirectoryPath=self.transfer_dir,
                 state=self.state,
             )[0]
         else:
             # Expand the scope of testing to all our sample structmaps.
             custom_structmap = create_mets_v2.include_custom_structmap(
                 job=Job("stub", "stub", []),
                 baseDirectoryPath=self.transfer_dir,
                 state=self.state,
                 custom_structmap=res.structmap_name,
             )[0]
         # All custom structmaps that are used and return from this function
         # should remain valid.
         self.validate_mets(self.mets_xsd_path, custom_structmap)
         assert custom_structmap.tag == f"{{{ns.metsNS}}}structMap"
         if not res.structmap_id:
             assert custom_structmap.attrib["ID"].lower(
             ) == "structmap_{}".format(self.state.globalStructMapCounter
                                        ), "structmap id is incorrect"
         else:
             assert (custom_structmap.attrib["ID"].lower() == res.
                     structmap_id), "structmap id hasn't been maintained"
         fids = custom_structmap.xpath("//*[@FILEID]",
                                       namespaces={"mets:": ns.metsNS})
         assert len(
             fids) == res.replaced_count, "Count of FILEIDs is incorrect"
         assert len({
             fid.attrib["FILEID"]
             for fid in fids
         }) == len(
             res.files), "Uneven replacement of IDs for files in structmap"
         for fileid in [fid.attrib["FILEID"] for fid in fids]:
             assert fileid in list(self.state.fileNameToFileID.values(
             )), "Expected FILEID not in returned structmap"