def test_mets_dnx_with_json_structmap_IDs(): """Make sure that each div in the structMap gets the right ID a bug was found where all fptr elements in the structMap got the fid of the first file. This is to check that doesn't happen anymore.""" ie_dc_dict = {"dc:title": "test title"} pm_json = """[ {"fileOriginalName": "img1.jpg", "fileOriginalPath": "path/to/files/img1.jpg", "MD5": "aff64bf1391ac627edb3234a422f9a77", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image One", "note": "This is a note for image 1", "fileSizeBytes": "119191"}, {"fileOriginalName": "img2.jpg", "fileOriginalPath": "path/to/files/img2.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Two", "note": "This is a note for image 2", "fileSizeBytes": "119192"} ]""" mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict, pres_master_json=pm_json, input_dir=os.path.join( CURRENT_DIR, 'data', 'test_batch_2'), digital_original=True) fptr_list = mets.findall('.//{http://www.loc.gov/METS/}fptr') # print(fptr_list) file_ids = [] for fptr in fptr_list: file_ids.append(fptr.attrib['FILEID']) assert len(file_ids) == len(set(file_ids))
def test_mets_dnx_with_json_supply_filesizebytes(): """For testing new function for building SIP with JSON documents describing the structure and metadata of files. Specifically testing that all files in the filesec have an ADMID attrib.""" ie_dc_dict = {"dc:title": "test title"} pm_json = """[ {"fileOriginalName": "img1.jpg", "fileOriginalPath": "path/to/files/img1.jpg", "MD5": "aff64bf1391ac627edb3234a422f9a77", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image One", "note": "This is a note for image 1", "fileSizeBytes": "119191"}, {"fileOriginalName": "img2.jpg", "fileOriginalPath": "path/to/files/img2.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Two", "note": "This is a note for image 2", "fileSizeBytes": "119192"} ]""" mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict, pres_master_json=pm_json, input_dir=os.path.join( CURRENT_DIR, 'data', 'test_batch_2'), digital_original=True) print(ET.tounicode(mets, pretty_print=True)) bytes_list = mets.findall('.//key[@id="fileSizeBytes"]') # print(bytes_list) for element in bytes_list: assert element.text in ['119191', '119192']
def test_mm_and_ad_for_json_mets(): """Make sure that all details for mms and ads are being processed correctly""" ie_dc_dict = {"dc:title": "test title"} pm_json = """[ {"fileOriginalName": "presmaster.jpg", "fileOriginalPath": "pm/presmaster.jpg", "MD5": "aff64bf1391ac627edb3234a422f9a77", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image One", "note": "This is a note for image 1", "fileSizeBytes": "119191"}]""" mm_json = """[{"fileOriginalName": "modifiedmaster.jpg", "fileOriginalPath": "mm/modifiedmaster.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Two", "note": "This is a note for image 2", "fileSizeBytes": "119192"}]""" ad_json = """[{"fileOriginalName": "deriv.jpg", "fileOriginalPath": "ad/deriv.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Three", "note": "This is a note for image 3", "fileSizeBytes": "119192"}, {"fileOriginalName": "deriv_2.jpg", "fileOriginalPath": "ad/deriv_2.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Three", "note": "This is a note for image 3", "fileSizeBytes": "119192"} ]""" mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict, pres_master_json=pm_json, modified_master_json=mm_json, access_derivative_json=ad_json, input_dir=os.path.join( CURRENT_DIR, 'data', 'test_batch_3'), digital_original=True) # structmaps = mets.findall('./{http://www.loc.gov/METS/}structMap') assert mets.find( './{http://www.loc.gov/METS/}structMap[@ID="rep1-1"]') != None assert mets.find( './{http://www.loc.gov/METS/}structMap[@ID="rep2-1"]') != None assert mets.find( './{http://www.loc.gov/METS/}structMap[@ID="rep3-1"]') != None print(ET.tounicode(mets, pretty_print=True))
def test_mets_dnx_with_json_structmap_IDs(): """Make sure files are in the correct order in the StructMap""" ie_dc_dict = {"dc:title": "test title"} pm_json = """[ {"fileOriginalName": "img1.jpg", "fileOriginalPath": "img1.jpg", "MD5": "aff64bf1391ac627edb3234a422f9a77", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image One", "note": "This is a note for image 1", "fileSizeBytes": "119191"}, {"fileOriginalName": "img2.jpg", "fileOriginalPath": "img2.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Two", "note": "This is a note for image 2", "fileSizeBytes": "119192"}, {"fileOriginalName": "img3.jpg", "fileOriginalPath": "img3.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Three", "note": "This is a note for image 3", "fileSizeBytes": "119192"} ]""" mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict, pres_master_json=pm_json, input_dir=os.path.join( CURRENT_DIR, 'data', 'test_batch_3'), digital_original=True) div_list = mets.findall( './/{http://www.loc.gov/METS/}div/{http://www.loc.gov/METS/}div//{http://www.loc.gov/METS/}div' ) # print(fptr_list) div_labels = [] print(mets.tounicode(pretty_print=True)) for div in div_list: div_labels.append(div.attrib['LABEL']) assert div_labels == ["Image One", "Image Two", "Image Three"]
def test_structmap_has_table_of_contents_div_for_json(): """Make sure files are in the correct order in the StructMap""" ie_dc_dict = {"dc:title": "test title"} pm_json = """[ {"fileOriginalName": "img1.jpg", "fileOriginalPath": "img1.jpg", "MD5": "aff64bf1391ac627edb3234a422f9a77", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image One", "note": "This is a note for image 1", "fileSizeBytes": "119191"}, {"fileOriginalName": "img2.jpg", "fileOriginalPath": "img2.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Two", "note": "This is a note for image 2", "fileSizeBytes": "119192"}, {"fileOriginalName": "img3.jpg", "fileOriginalPath": "img3.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Three", "note": "This is a note for image 3", "fileSizeBytes": "119192"} ]""" mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict, pres_master_json=pm_json, input_dir=os.path.join( CURRENT_DIR, 'data', 'test_batch_3'), digital_original=True) toc_div = mets.find( './/{http://www.loc.gov/METS/}structMap/{http://www.loc.gov/METS/}div/{http://www.loc.gov/METS/}div' ) # print(fptr_list) print(toc_div.attrib['LABEL']) # div_labels = [] assert toc_div.attrib['LABEL'] == 'Table of Contents'
def test_mets_dnx_with_json_for_admid_in_filesec_files(): """For testing new function for building SIP with JSON documents describing the structure and metadata of files. Specifically testing that all files in the filesec have an ADMID attrib.""" ie_dc_dict = {"dc:title": "test title"} pm_json = """[ {"fileOriginalName": "img1.jpg", "fileOriginalPath": "path/to/files/img1.jpg", "MD5": "aff64bf1391ac627edb3234a422f9a77", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image One", "note": "This is a note for image 1"}, {"fileOriginalName": "img2.jpg", "fileOriginalPath": "path/to/files/img2.jpg", "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9", "fileCreationDate": "1st of January, 1601", "fileModificationDate": "1st of January, 1601", "label": "Image Two", "note": "This is a note for image 2"} ]""" mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict, pres_master_json=pm_json, input_dir=os.path.join( CURRENT_DIR, 'data', 'test_batch_2'), digital_original=True) print(ET.tounicode(mets, pretty_print=True)) files_list = mets.findall( ".//{http://www.loc.gov/METS/}fileSec" "/{http://www.loc.gov/METS/}fileGrp/{http://www.loc.gov/METS/}file") for file in files_list: assert ("ADMID" in file.attrib) assert (file.attrib['ADMID'].endswith('-amd')) amdsec_list = mets.findall(".//{http://www.loc.gov/METS/}amdSec") for amdsec in amdsec_list: assert ("ID" in amdsec.attrib) assert (amdsec.attrib["ID"].endswith("-amd"))