Ejemplo n.º 1
0
 def test_collect_mdsec_elements(self):
     f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     f1.amdsecs.append(metsrw.AMDSec())
     f1.dmdsecs.append(metsrw.SubSection('dmdSec', None))
     f2 = metsrw.FSEntry('file2.txt', file_uuid=str(uuid.uuid4()))
     f2.dmdsecs.append(metsrw.SubSection('dmdSec', None))
     mw = metsrw.METSDocument()
     elements = mw._collect_mdsec_elements([f1, f2])
     # Check ordering - dmdSec before amdSec
     assert isinstance(elements, list)
     assert len(elements) == 3
     assert isinstance(elements[0], metsrw.SubSection)
     assert elements[0].subsection == 'dmdSec'
     assert isinstance(elements[1], metsrw.SubSection)
     assert elements[1].subsection == 'dmdSec'
     assert isinstance(elements[2], metsrw.AMDSec)
Ejemplo n.º 2
0
def add_new_files(mets, sip_uuid, sip_dir):
    """
    Add new files to structMap, fileSec.

    This supports adding new metadata or preservation files.

    If a new file is a metadata.csv, parse it to create dmdSecs.
    """
    # Find new files
    # How tell new file from old with same name? Check hash?
    # QUESTION should the metadata.csv be parsed and only updated if different even if one already existed?
    new_files = []
    old_mets_rel_path = _get_old_mets_rel_path(sip_uuid)
    metadata_csv = None
    objects_dir = os.path.join(sip_dir, 'objects')
    for dirpath, _, filenames in os.walk(objects_dir):
        for filename in filenames:
            # Find in METS
            current_loc = os.path.join(dirpath, filename).replace(
                sip_dir, '%SIPDirectory%', 1)
            rel_path = current_loc.replace('%SIPDirectory%', '', 1)
            print('Looking for', rel_path, 'in METS')
            fsentry = mets.get_file(path=rel_path)
            if fsentry is None:
                # If not in METS (and is not old METS), get File object and
                # store for later
                if rel_path != old_mets_rel_path:
                    print(rel_path, 'not found in METS, must be new file')
                    f = models.File.objects.get(currentlocation=current_loc,
                                                sip_id=sip_uuid)
                    new_files.append(f)
                    if rel_path == 'objects/metadata/metadata.csv':
                        metadata_csv = f
            else:
                print(rel_path, 'found in METS, no further work needed')

    if not new_files:
        return mets

    # Set global counters so getAMDSec will work
    createmets2.globalAmdSecCounter = int(
        mets.tree.xpath('count(mets:amdSec)', namespaces=ns.NSMAP))
    createmets2.globalTechMDCounter = int(
        mets.tree.xpath('count(mets:amdSec/mets:techMD)', namespaces=ns.NSMAP))
    createmets2.globalDigiprovMDCounter = int(
        mets.tree.xpath('count(mets:amdSec/mets:digiprovMD)',
                        namespaces=ns.NSMAP))

    objects_fsentry = mets.get_file(label='objects', type='Directory')

    for f in new_files:
        # Create amdSecs
        print('Adding amdSec for', f.currentlocation, '(', f.uuid, ')')
        amdsec, amdid = createmets2.getAMDSec(
            fileUUID=f.uuid,
            filePath=None,  # Only needed if use=original
            use=f.filegrpuse,
            type=None,  # Not used
            sip_uuid=sip_uuid,
            transferUUID=None,  # Only needed if use=original
            itemdirectoryPath=None,  # Only needed if use=original
            typeOfTransfer=None,  # Only needed if use=original
            baseDirectoryPath=sip_dir,
        )
        print(f.uuid, 'has amdSec with ID', amdid)

        # Create parent directories if needed
        dirs = os.path.dirname(
            f.currentlocation.replace('%SIPDirectory%objects/', '',
                                      1)).split('/')
        parent_fsentry = objects_fsentry
        for dirname in (d for d in dirs if d):
            child = mets.get_file(type='Directory', label=dirname)
            if child is None:
                child = metsrw.FSEntry(
                    path=None,
                    type='Directory',
                    label=dirname,
                )
                parent_fsentry.add_child(child)
            parent_fsentry = child

        derived_from = None
        if f.original_file_set.exists():
            original_f = f.original_file_set.get().source_file
            derived_from = mets.get_file(file_uuid=original_f.uuid)
        entry = metsrw.FSEntry(
            path=f.currentlocation.replace('%SIPDirectory%', '', 1),
            use=f.filegrpuse,
            type='Item',
            file_uuid=f.uuid,
            derived_from=derived_from,
        )
        metsrw_amdsec = metsrw.AMDSec(tree=amdsec, section_id=amdid)
        entry.amdsecs.append(metsrw_amdsec)
        parent_fsentry.add_child(entry)

    # Parse metadata.csv and add dmdSecs
    if metadata_csv:
        mets = update_metadata_csv(mets, metadata_csv, sip_uuid, sip_dir)

    return mets
Ejemplo n.º 3
0
 def test_identifier(self):
     # should be in the format 'amdSec_1'
     amdsec = metsrw.AMDSec()
     assert amdsec.id_string()
Ejemplo n.º 4
0
 def test_tree_overwrites_serialize(self):
     elem = etree.Element('temp')
     amdsec = metsrw.AMDSec(tree=elem, section_id='id1')
     assert amdsec.serialize() == elem
Ejemplo n.º 5
0
 def test_tree_no_id(self):
     with pytest.raises(ValueError) as excinfo:
         metsrw.AMDSec(tree=etree.Element('amdSec'))
     assert 'section_id' in str(excinfo.value)
Ejemplo n.º 6
0
def add_new_files(job, mets, sip_uuid, sip_dir):
    """
    Add new files to structMap, fileSec.

    This supports adding new metadata or preservation files.

    If a new file is a metadata.csv, parse it to create dmdSecs.
    """
    # Find new files
    # How tell new file from old with same name? Check hash?
    # QUESTION should the metadata.csv be parsed and only updated if different
    # even if one already existed?
    new_files = []
    old_mets_rel_path = _get_old_mets_rel_path(sip_uuid)
    metadata_csv = None
    objects_dir = os.path.join(sip_dir, "objects")
    for dirpath, _, filenames in os.walk(objects_dir):
        for filename in filenames:
            # Find in METS
            current_loc = os.path.join(dirpath, filename).replace(
                sip_dir, "%SIPDirectory%", 1
            )
            rel_path = current_loc.replace("%SIPDirectory%", "", 1)
            job.pyprint("Looking for", rel_path, "in METS")
            fsentry = mets.get_file(path=rel_path)
            if fsentry is None:
                # If not in METS (and is not old METS), get File object and
                # store for later
                if rel_path != old_mets_rel_path:
                    job.pyprint(rel_path, "not found in METS, must be new file")
                    f = models.File.objects.get(
                        currentlocation=current_loc, sip_id=sip_uuid
                    )
                    new_files.append(f)
                    if rel_path == "objects/metadata/metadata.csv":
                        metadata_csv = f
            else:
                job.pyprint(rel_path, "found in METS, no further work needed")

    if not new_files:
        return mets

    # Set global counters so getAMDSec will work
    state = createmets2.MetsState(
        globalAmdSecCounter=metsrw.AMDSec.get_current_id_count(),
        globalTechMDCounter=metsrw.SubSection.get_current_id_count("techMD"),
        globalDigiprovMDCounter=metsrw.SubSection.get_current_id_count("digiprovMD"),
    )

    objects_fsentry = mets.get_file(label="objects", type="Directory")

    for f in new_files:
        # Create amdSecs
        job.pyprint("Adding amdSec for", f.currentlocation, "(", f.uuid, ")")
        amdsec, amdid = createmets2.getAMDSec(
            job,
            fileUUID=f.uuid,
            filePath=None,  # Only needed if use=original
            use=f.filegrpuse,
            sip_uuid=sip_uuid,
            transferUUID=None,  # Only needed if use=original
            itemdirectoryPath=None,  # Only needed if use=original
            typeOfTransfer=None,  # Only needed if use=original
            baseDirectoryPath=sip_dir,
            state=state,
        )
        job.pyprint(f.uuid, "has amdSec with ID", amdid)

        # Create parent directories if needed
        dirs = os.path.dirname(
            f.currentlocation.replace("%SIPDirectory%objects/", "", 1)
        ).split("/")
        parent_fsentry = objects_fsentry
        for dirname in (d for d in dirs if d):
            child = mets.get_file(type="Directory", label=dirname)
            if child is None:
                child = metsrw.FSEntry(path=None, type="Directory", label=dirname)
                parent_fsentry.add_child(child)
            parent_fsentry = child

        derived_from = None
        if f.original_file_set.exists():
            original_f = f.original_file_set.get().source_file
            derived_from = mets.get_file(file_uuid=original_f.uuid)
        entry = metsrw.FSEntry(
            path=f.currentlocation.replace("%SIPDirectory%", "", 1),
            use=f.filegrpuse,
            type="Item",
            file_uuid=f.uuid,
            derived_from=derived_from,
        )
        metsrw_amdsec = metsrw.AMDSec(tree=amdsec, section_id=amdid)
        entry.amdsecs.append(metsrw_amdsec)
        parent_fsentry.add_child(entry)

    # Parse metadata.csv and add dmdSecs
    if metadata_csv:
        mets = update_metadata_csv(job, mets, metadata_csv, sip_uuid, sip_dir, state)

    return mets