Exemple #1
0
    def _delete_files(self):
        """
        Delete AIP local files once stored in LOCKSS from disk and pointer file.

        Helper to update_package_status.
        """
        # Get paths to delete
        if self.keep_local:
            # Get all LOCKSS chucks local path FLocats
            delete_elements = self.pointer_root.xpath(
                ".//mets:fileGrp[@USE='LOCKSS chunk']/*/mets:FLocat[@LOCTYPE='OTHER' and @OTHERLOCTYPE='SYSTEM']",
                namespaces=utils.NSMAP)
        else:
            # Get all local path FLocats
            delete_elements = self.pointer_root.xpath(
                ".//mets:FLocat[@LOCTYPE='OTHER' and @OTHERLOCTYPE='SYSTEM']",
                namespaces=utils.NSMAP)
        LOGGER.debug('delete_elements: %s', delete_elements)

        # Delete paths from delete_elements from disk, and remove from METS
        for element in delete_elements:
            path = element.get(utils.PREFIX_NS['xlink'] + 'href')
            LOGGER.debug('path to delete: %s', path)
            try:
                os.remove(path)
            except os.error as e:
                if e.errno != errno.ENOENT:
                    LOGGER.exception('Could not delete %s', path)
            element.getparent().remove(element)

        # Update pointer file
        # If delete_elements is false, then this function has probably already
        # been run, and we don't want to add another delete event
        if not self.keep_local and delete_elements:
            amdsec = self.pointer_root.find('mets:amdSec',
                                            namespaces=utils.NSMAP)
            # Add 'deletion' PREMIS:EVENT
            utils.mets_add_event(
                amdsec,
                event_type='deletion',
                event_outcome_detail_note='AIP deleted from local storage',
            )

            # If file was split
            if self.pointer_root.find(".//mets:fileGrp[@USE='LOCKSS chunk']",
                                      namespaces=utils.NSMAP) is not None:
                # Delete fileGrp USE="AIP"
                del_elem = self.pointer_root.find(
                    ".//mets:fileGrp[@USE='Archival Information Package']",
                    namespaces=utils.NSMAP)
                del_elem.getparent().remove(del_elem)
                # Delete structMap div TYPE='Local copy'
                del_elem = self.pointer_root.find(
                    ".//mets:structMap/*/mets:div[@TYPE='Local copy']",
                    namespaces=utils.NSMAP)
                del_elem.getparent().remove(del_elem)
        return None
Exemple #2
0
    def _split_package(self, package):
        """
        Splits the package into chunks of size self.au_size. Returns list of paths to the chunks.

        If the package has already been split (and an event is in the pointer
        file), returns the list if file paths from the pointer file.

        Updates the pointer file with the new LOCKSS chunks, and adds 'division'
        event.
        """
        # Parse pointer file
        if not self.pointer_root:
            self.pointer_root = etree.parse(package.full_pointer_file_path)

        # Check if file is already split, and if so just return split files
        if self.pointer_root.xpath('.//premis:eventType[text()="division"]', namespaces=utils.NSMAP):
            chunks = self.pointer_root.findall(".//mets:div[@TYPE='Archival Information Package']/mets:div[@TYPE='LOCKSS chunk']", namespaces=utils.NSMAP)
            output_files = [c.find('mets:fptr', namespaces=utils.NSMAP).get('FILEID') for c in chunks]
            return output_files

        file_path = package.full_path
        expected_num_files = math.ceil(os.path.getsize(file_path) / self.au_size)
        LOGGER.debug('expected_num_files: %s', expected_num_files)

        # No split needed - just return the file path
        if expected_num_files <= 1:
            LOGGER.debug('Only one file expected, not splitting')
            output_files = [file_path]
            # No events or structMap changes needed
            LOGGER.info('LOCKSS: after splitting: %s', output_files)
            return output_files

        # Split file
        # Strip extension, add .tar-1 ('-1' to make rename script happy)
        output_path = os.path.splitext(file_path)[0] + '.tar-1'
        command = ['tar', '--create', '--multi-volume',
            '--tape-length', str(self.au_size),
            '--new-volume-script', 'common/tar_new_volume.sh',
            '-f', output_path, file_path]
        # TODO reserve space in quota for extra files
        LOGGER.info('LOCKSS split command: %s', command)
        try:
            subprocess.check_call(command)
        except Exception:
            LOGGER.exception("Split of %s failed with command %s", file_path, command)
            raise
        output_path = output_path[:-2]  # Remove '-1'
        dirname, basename = os.path.split(output_path)
        output_files = sorted([os.path.join(dirname, entry) for entry in os.listdir(dirname) if entry.startswith(basename)])

        # Update pointer file
        amdsec = self.pointer_root.find('mets:amdSec', namespaces=utils.NSMAP)

        # Add 'division' PREMIS:EVENT
        try:
            event_detail = subprocess.check_output(['tar', '--version'])
        except subprocess.CalledProcessError as e:
            event_detail = e.output or _('Error: getting tool info; probably GNU tar')
        utils.mets_add_event(
            amdsec,
            event_type='division',
            event_detail=event_detail,
            event_outcome_detail_note='{} LOCKSS chunks created'.format(len(output_files)),
        )

        # Update structMap & fileSec
        self.pointer_root.find('mets:structMap', namespaces=utils.NSMAP).set('TYPE', 'logical')
        aip_div = self.pointer_root.find("mets:structMap/mets:div[@TYPE='Archival Information Package']", namespaces=utils.NSMAP)
        filesec = self.pointer_root.find('mets:fileSec', namespaces=utils.NSMAP)
        filegrp = etree.SubElement(filesec, utils.PREFIX_NS['mets'] + 'fileGrp', USE='LOCKSS chunk')

        # Move ftpr to Local copy div
        local_ftpr = aip_div.find('mets:fptr', namespaces=utils.NSMAP)
        if local_ftpr is not None:
            div = etree.SubElement(aip_div, utils.PREFIX_NS['mets'] + 'div', TYPE='Local copy')
            div.append(local_ftpr)  # This moves local_fptr

        # Add each split chunk to structMap & fileSec
        for idx, out_path in enumerate(output_files):
            # Add div to structMap
            div = etree.SubElement(aip_div, utils.PREFIX_NS['mets'] + 'div', TYPE='LOCKSS chunk', ORDER=str(idx + 1))
            etree.SubElement(div, utils.PREFIX_NS['mets'] + 'fptr', FILEID=os.path.basename(out_path))
            # Get checksum and size for fileSec
            try:
                checksum = utils.generate_checksum(out_path, self.checksum_type)
            except ValueError:  # Invalid checksum type
                checksum = utils.generate_checksum(out_path, 'md5')
            checksum_name = checksum.name.upper().replace('SHA', 'SHA-')
            size = os.path.getsize(out_path)
            # Add file & FLocat to fileSec
            file_e = etree.SubElement(filegrp, utils.PREFIX_NS['mets'] + 'file',
                ID=os.path.basename(out_path), SIZE=str(size),
                CHECKSUM=checksum.hexdigest(), CHECKSUMTYPE=checksum_name)
            flocat = etree.SubElement(file_e, utils.PREFIX_NS['mets'] + 'FLocat', OTHERLOCTYPE="SYSTEM", LOCTYPE="OTHER")
            flocat.set(utils.NSMAP['xlink'] + 'href', out_path)

        # Write out pointer file again
        with open(package.full_pointer_file_path, 'w') as f:
            f.write(etree.tostring(self.pointer_root, pretty_print=True, xml_declaration=True, encoding='utf-8'))

        return output_files