def create_structmap(workspace, divs, structmap, filegrp, path=''): """Create structmap based on directory structure """ fptr_list = [] div_list = [] for div in divs.keys(): # It's a file if there is "-techmd.xml", lets create file+fptr # elements if div.endswith('-techmd.xml'): div = div[:-len('-techmd.xml')] div_path = encode_path(os.path.join(decode_path(path), div)) amdids = get_links_event_agent(workspace, div_path) fileid = add_file_to_filesec(workspace, div_path, filegrp, amdids) fptr = mets.fptr(fileid) fptr_list.append(fptr) # It's not a file, lets create a div element else: div_path = encode_path(os.path.join(decode_path(path), div)) amdids = get_links_event_agent(workspace, div_path) _, dmdsec_id = ids_for_files(workspace, div_path, 'dmdsec.xml') div_el = mets.div(type_attr=div, dmdid=dmdsec_id, admid=amdids) div_list.append(div_el) create_structmap(workspace, divs[div], div_el, filegrp, div_path) # Add fptr list first, then div list for fptr_elem in fptr_list: structmap.append(fptr_elem) for div_elem in div_list: structmap.append(div_elem)
def test_decode_path(): """Tests for the decode_path function.""" decoded_path = utils.decode_path('tests%2Ftestpath') assert decoded_path == 'tests/testpath' decoded_path = utils.decode_path('tests%2Ftestpath-testsuffix', suffix='-testsuffix') assert decoded_path == 'tests/testpath' decoded_path = utils.decode_path('t%C3%A4sts%2Ft%C3%B8stpath') assert decoded_path == 't\u00e4sts/t\u00f8stpath'
def div_structure(workspace): """Create div structure for directory-based structmap """ workspace_files = [fname.name for fname in scandir.scandir(workspace)] techmd_files = [x for x in workspace_files if 'techmd' in x] divs = tree() for techmd_file in techmd_files: add(divs, decode_path(techmd_file).split('/')) return divs
def copy_files(workspace, data_dir): """Copy digital objects to workspace """ for entry in scandir(workspace): if entry.name.endswith('-techmd.xml') and entry.is_file(): source = decode_path(entry.name, '-techmd.xml') target = os.path.join(workspace, source) if not os.path.exists(os.path.dirname(target)): os.makedirs(os.path.dirname(target)) copyfile(os.path.join(data_dir, source), target)
def test_create_addml_with_flatfile(is_header): """Tests that ``create_addml`` adds flatFile element if optional parameter flatfile_name is provided. """ addml_etree = create_addml.create_addml_metadata( CSV_FILE, DELIMITER, is_header, CHARSET, RECORDSEPARATOR, QUOTINGCHAR, flatfile_name="path/to/test" ) # Check that URL encoded path is written to flatFile element flatfile = addml_etree.find(ADDML_NS + "flatFile") assert decode_path(flatfile.get("name")) == "path/to/test"
def test_create_addml_with_flatfile(is_header): """Tests that ``create_addml`` adds flatFile element if optional parameter flatfile_name is provided. """ addml_etree = create_addml.create_addml_metadata( csv_file=CSV_FILE, delimiter=DELIMITER, isheader=is_header, charset=CHARSET, record_separator=RECORDSEPARATOR, quoting_char=QUOTINGCHAR, flatfile_name="path/to/test" ) # Check that URL encoded path is written to flatFile element flatfile = addml_etree.find(ADDML_NS + "flatFile") assert decode_path(flatfile.get("name")) == "path/to/test"
def add_file_to_filesec(workspace, path, filegrp, amdids): """Add file element to fileSec. """ othermd_types = ['addml', 'mix', 'videomd', 'audiomd', 'textmd'] techmd_files, techmd_ids = ids_for_files(workspace, path, 'techmd.xml') fileid = '_' + str(uuid4()) filepath = decode_path(techmd_files[0], '-techmd.xml') othermd_ids = [] for mdtype in othermd_types: othermd_ids = read_temp_othermdfile(workspace, mdtype, filepath, othermd_ids) file_el = mets.file_elem(fileid, admid_elements=techmd_ids + amdids + othermd_ids, loctype='URL', xlink_href='file://%s' % filepath, xlink_type='simple', groupid=None) filegrp.append(file_el) return fileid
def test_create_addml_creator(testpath, isheader, exp_amd_files, exp_fields): """ Test that ``create_addml`` creates addml files and md-reference file without unnecessary duplication. """ # Common expectations exp_csvs = [ ['tests/data/simple_csv.csv', 'tests/data/simple_csv_2.csv'], ['tests/data/csvfile.csv'] ] exp_flatfiles_child_count = [4, 3] _create_addml(testpath, isheader) # Check that md-reference and the ADDML-amd files with correct content # are created assert os.path.isfile( os.path.join(testpath, 'create-addml-md-references.jsonl') ) for amd_file_index, exp_amd_file in enumerate(exp_amd_files): amd_file = os.path.join(testpath, exp_amd_file) assert os.path.isfile(amd_file) root = ET.parse(amd_file) flat_files = root.find(ADDML_NS + "flatFiles") # Verify the number of child elements in flatFiles assert len(flat_files) == exp_flatfiles_child_count[amd_file_index] # Verify that right CSV files are in flatFiles for index, exp_csv in enumerate(exp_csvs[amd_file_index]): assert decode_path(flat_files[index].get('name')) == exp_csv # Verify fields within flatFiles field_definitions = root.find(ADDML_NS + "fieldDefinitions") for index, field in enumerate(field_definitions): assert field.get('name') == exp_fields[amd_file_index][index]