Example #1
0
    def replace(self, *strings):
        """
        Iterates over a set of strings. Any keys in self found within
        the string will be replaced with their respective values.
        Returns an array of strings, regardless of the number of parameters
        pased in. For example:

        >>> rd = ReplacementDict({"$foo": "bar"})
        >>> rd.replace('The value of the foo variable is: $foo')
        ['The value of the foo variable is: bar']

        IMPORTANT NOTE: Any unicode strings present as dictionary values will
        be converted into bytestrings. All returned strings will also be
        bytestrings, regardless of the type of the original strings.
        Returned strings may or may not be valid Unicode, depending on the
        contents of data fetched from the database. (%originalLocation%,
        for instance, may contain arbitrary non-Unicode characters of
        nonspecific encoding.)

        Note that, within, Archivematica, the only value that typically
        contains Unicode characters is "%originalLocation%", and Archivematica
        does not use this variable in any place where precise fidelity of the
        original string is required.
        """
        ret = []
        for orig in strings:
            if orig is not None:
                orig = unicodeToStr(orig)
                for key, value in self.iteritems():
                    orig = orig.replace(key, unicodeToStr(value))
            ret.append(orig)
        return ret
Example #2
0
 def start(self):
     """Based on polling example: http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html"""
     self.run = True
     LOGGER.info('Watching directory %s (Files: %s)', self.directory,
                 self.alertOnFiles)
     before = dict([(f, None) for f in os.listdir(self.directory)])
     while self.run:
         time.sleep(self.interval)
         after = dict([(f, None) for f in os.listdir(self.directory)])
         added = [f for f in after if f not in before]
         removed = [f for f in before if f not in after]
         if added:
             LOGGER.debug('Added %s', added)
             for i in added:
                 i = unicodeToStr(i)
                 directory = unicodeToStr(self.directory)
                 self.event(os.path.join(directory, i), self.variablesAdded,
                            self.callBackFunctionAdded)
         if removed:
             LOGGER.debug('Removed %s', removed)
             for i in removed:
                 i = unicodeToStr(i)
                 directory = unicodeToStr(self.directory)
                 self.event(os.path.join(directory,
                                         i), self.variablesRemoved,
                            self.callBackFunctionRemoved)
         before = after
Example #3
0
def updateFileLocation(src, dst, eventType, eventDateTime, eventDetail, eventIdentifierUUID = uuid.uuid4().__str__(), fileUUID="None", sipUUID = None, transferUUID=None, eventOutcomeDetailNote = ""):
    """If the file uuid is not provided, will use the sip uuid and old path to find the file uuid"""
    src = unicodeToStr(src)
    dst = unicodeToStr(dst)
    fileUUID = unicodeToStr(fileUUID)
    if not fileUUID or fileUUID == "None":
        sql = "Need to define transferUUID or sipUUID"
        if sipUUID:
            sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.sipUUID = '" + sipUUID + "';"
        elif transferUUID:
            sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.transferUUID = '" + transferUUID + "';"
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            fileUUID = unicodeToStr(row[0])
            row = c.fetchone()
        sqlLock.release()

    if eventOutcomeDetailNote == "":
        eventOutcomeDetailNote = "Original name=\"%s\"; cleaned up name=\"%s\"" %(src, dst)
        #eventOutcomeDetailNote = eventOutcomeDetailNote.decode('utf-8')
    #CREATE THE EVENT
    if not fileUUID:
        print >>sys.stderr, "Unable to find file uuid for: ", src, " -> ", dst
        exit(6)
    insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote)

    #UPDATE THE CURRENT FILE PATH
    sql =  """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dst), fileUUID)
    databaseInterface.runSQL(sql)
Example #4
0
 def start(self):
     """Based on polling example: http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html"""
     self.run = True
     if DEBUG:
         print "watching directory: ", self.directory
     before = dict([(f, None) for f in os.listdir(self.directory)])
     while self.run:
         time.sleep(self.interval)
         after = dict([(f, None) for f in os.listdir(self.directory)])
         added = [f for f in after if not f in before]
         removed = [f for f in before if not f in after]
         if added:
             if DEBUG:
                 print "Added: ", ", ".join(added)
             for i in added:
                 i = unicodeToStr(i)
                 directory = unicodeToStr(self.directory)
                 self.event(os.path.join(directory, i), self.variablesAdded,
                            self.callBackFunctionAdded)
         if removed:
             if DEBUG:
                 print "Removed: ", ", ".join(removed)
             for i in removed:
                 i = unicodeToStr(i)
                 directory = unicodeToStr(self.directory)
                 self.event(os.path.join(directory,
                                         i), self.variablesRemoved,
                            self.callBackFunctionRemoved)
         before = after
Example #5
0
 def start(self):
     """Based on polling example: http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html"""
     self.run = True
     if DEBUG:
         print "watching directory: ", self.directory
     before = dict ([(f, None) for f in os.listdir (self.directory)])
     while self.run:
         time.sleep (self.interval)
         after = dict ([(f, None) for f in os.listdir (self.directory)])
         added = [f for f in after if not f in before]
         removed = [f for f in before if not f in after]
         if added: 
             if DEBUG:
                 print "Added: ", ", ".join (added)
             for i in added:
                 i = unicodeToStr(i)
                 directory = unicodeToStr(self.directory)
                 self.event(os.path.join(directory, i), self.variablesAdded, self.callBackFunctionAdded)
         if removed:
             if DEBUG: 
                 print "Removed: ", ", ".join (removed)
             for i in removed:
                 i = unicodeToStr(i)
                 directory = unicodeToStr(self.directory)
                 self.event(os.path.join(directory, i), self.variablesRemoved, self.callBackFunctionRemoved)
         before = after
Example #6
0
def directory_children(request, basePath=False):
    path = ''
    if (basePath):
        path = path + basePath
    path = path + request.GET.get('base_path', '')
    path = path + request.GET.get('path', '')

    response = {}
    entries = []
    directories = []

    for entry in sorted_directory_list(path):
        entry = archivematicaFunctions.strToUnicode(entry)
        if unicode(entry)[0] != '.':
            entries.append(entry)
            entry_path = os.path.join(path, entry)
            if os.path.isdir(archivematicaFunctions.unicodeToStr(
                    entry_path)) and os.access(
                        archivematicaFunctions.unicodeToStr(entry_path),
                        os.R_OK):
                directories.append(entry)

    response = {'entries': entries, 'directories': directories}

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json')
Example #7
0
def start_transfer_logged_in(request):
    """
    Endpoint for starting a transfer if logged in and calling from the dashboard.
    """
    if request.method not in ('POST', ):
        return django.http.HttpResponseNotAllowed(['POST'])

    transfer_name = archivematicaFunctions.unicodeToStr(
        request.POST.get('name', ''))
    transfer_type = archivematicaFunctions.unicodeToStr(
        request.POST.get('type', ''))
    accession = archivematicaFunctions.unicodeToStr(
        request.POST.get('accession', ''))
    # Note that the path may contain arbitrary, non-unicode characters,
    # and hence is POSTed to the server base64-encoded
    paths = request.POST.getlist('paths[]', [])
    paths = [base64.b64decode(path) for path in paths]
    row_ids = request.POST.getlist('row_ids[]', [])
    try:
        response = start_transfer(transfer_name, transfer_type, accession,
                                  paths, row_ids)
    except ValueError as e:
        return helpers.json_response({
            'error': True,
            'message': str(e)
        },
                                     status_code=400)
    except storage_service.StorageServiceError as e:
        return helpers.json_response({
            'error': True,
            'message': str(e)
        },
                                     status_code=500)
    else:
        return helpers.json_response(response)
Example #8
0
def copy_transfer_component(request):
    transfer_name = archivematicaFunctions.unicodeToStr(request.POST.get('name', ''))
    path = archivematicaFunctions.unicodeToStr(request.POST.get('path', ''))
    destination = archivematicaFunctions.unicodeToStr(request.POST.get('destination', ''))

    error = None

    if transfer_name == '':
        error = 'No transfer name provided.'
    else:
        if path == '':
            error = 'No path provided.'
        else:
            # if transfer compontent path leads to a ZIP file, treat as zipped
            # bag
            try:
                path.lower().index('.zip')
                shutil.copy(path, destination)
                paths_copied = 1
            except:
                transfer_dir = os.path.join(destination, transfer_name)

                # Create directory before it is used, otherwise shutil.copy()
                # would that location to store a file
                if not os.path.isdir(transfer_dir):
                    os.mkdir(transfer_dir)

                paths_copied = 0

                # cycle through each path copying files/dirs inside it to transfer dir
                for entry in sorted_directory_list(path):
                    entry_path = os.path.join(path, entry)
                    if os.path.isdir(entry_path):
                        destination_dir = os.path.join(transfer_dir, entry)
                        try:
                            shutil.copytree(
                                entry_path,
                                destination_dir
                            )
                        except:
                            error = 'Error copying from ' + entry_path + ' to ' + destination_dir + '. (' + str(sys.exc_info()[0]) + ')'
                    else:
                        shutil.copy(entry_path, transfer_dir)

                    paths_copied = paths_copied + 1

    response = {}

    if error != None:
      response['message'] = error
      response['error']   = True
    else:
      response['message'] = 'Copied ' + str(paths_copied) + ' entries.'

    return HttpResponse(
        simplejson.JSONEncoder().encode(response),
        mimetype='application/json'
    )
Example #9
0
def updateFileLocation(
    src,
    dst,
    eventType="",
    eventDateTime="",
    eventDetail="",
    eventIdentifierUUID=uuid.uuid4().__str__(),
    fileUUID="None",
    sipUUID=None,
    transferUUID=None,
    eventOutcomeDetailNote="",
    createEvent=True,
):
    """
    Updates file location in the database, and optionally writes an event for the sanitization to the database.
    Note that this does not actually move a file on disk.
    If the file uuid is not provided, will use the SIP uuid and the old path to find the file uuid.
    To suppress creation of an event, pass the createEvent keyword argument (for example, if the file moved due to the renaming of a parent directory and not the file itself).
    """

    src = unicodeToStr(src)
    dst = unicodeToStr(dst)
    fileUUID = unicodeToStr(fileUUID)
    if not fileUUID or fileUUID == "None":
        kwargs = {"removedtime__isnull": True, "currentlocation": src}

        if sipUUID:
            kwargs["sip_id"] = sipUUID
        elif transferUUID:
            kwargs["transfer_id"] = transferUUID
        else:
            raise ValueError(
                "One of fileUUID, sipUUID, or transferUUID must be provided")

        f = File.objects.get(**kwargs)
    else:
        f = File.objects.get(uuid=fileUUID)

    # UPDATE THE CURRENT FILE PATH
    f.currentlocation = dst
    f.save()

    if not createEvent:
        return

    if eventOutcomeDetailNote == "":
        eventOutcomeDetailNote = 'Original name="%s"; cleaned up name="%s"' % (
            src, dst)
    # CREATE THE EVENT
    insertIntoEvents(
        fileUUID=f.uuid,
        eventType=eventType,
        eventDateTime=eventDateTime,
        eventDetail=eventDetail,
        eventOutcome="",
        eventOutcomeDetailNote=eventOutcomeDetailNote,
    )
Example #10
0
def createUnitAndJobChain(path, config, terminate=False):
    path = unicodeToStr(path)
    if os.path.isdir(path):
            path = path + "/"
    print "createUnitAndJobChain", path, config
    unit = None
    if os.path.isdir(path):
        if config[3] == "SIP":
            UUID = findOrCreateSipInDB(path)
            unit = unitSIP(path, UUID)
        elif config[3] == "DIP":
            UUID = findOrCreateSipInDB(path)
            unit = unitDIP(path, UUID)
        elif config[3] == "Transfer":
            #UUID = findOrCreateSipInDB(path)
            unit = unitTransfer(path)
    elif os.path.isfile(path):
        if config[3] == "Transfer":
            unit = unitTransfer(path)
        else:
            return
            UUID = uuid.uuid4()
            unit = unitFile(path, UUID)
    else:
        return
    jobChain(unit, config[1])
    if terminate:
        exit(0)
Example #11
0
def directory_children(request, basePath=False):
    path = ''
    if (basePath):
        path = path + basePath
    path = path + request.GET.get('base_path', '')
    path = path + request.GET.get('path', '')

    response    = {}
    entries     = []
    directories = []

    for entry in sorted_directory_list(path):
        entry = archivematicaFunctions.strToUnicode(entry)
        if unicode(entry)[0] != '.':
            entries.append(entry)
            entry_path = os.path.join(path, entry)
            if os.path.isdir(archivematicaFunctions.unicodeToStr(entry_path)) and os.access(archivematicaFunctions.unicodeToStr(entry_path), os.R_OK):
                directories.append(entry)

    response = {
      'entries': entries,
      'directories': directories
    }

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json'
    )
Example #12
0
def approve_transfer(request):
    # Example: curl --data \
    #   "username=mike&api_key=<API key>&directory=MyTransfer" \
    #   http://127.0.0.1/api/transfer/approve
    if request.method == 'POST':
        auth_error = authenticate_request(request)

        response = {}

        if auth_error is None:
            error = None

            directory = request.POST.get('directory', '')
            transfer_type = request.POST.get('type', 'standard')
            directory = archivematicaFunctions.unicodeToStr(directory)
            error, unit_uuid = approve_transfer_via_mcp(
                directory, transfer_type, request.user.id)

            if error is not None:
                response['message'] = error
                response['error'] = True
                return helpers.json_response(response, status_code=500)
            else:
                response['message'] = 'Approval successful.'
                response['uuid'] = unit_uuid
                return helpers.json_response(response)
        else:
            response['message'] = auth_error
            response['error'] = True
            return helpers.json_response(response, status_code=403)
    else:
        return django.http.HttpResponseNotAllowed(permitted_methods=['POST'])
Example #13
0
    def reloadFileList(self):
        """Match files to their UUID's via their location and the File table's currentLocation"""
        self.fileList = {}
        # currentPath must be a string to return all filenames as bytestrings,
        # and to safely concatenate with other bytestrings
        currentPath = os.path.join(self.currentPath.replace("%sharedPath%", django_settings.SHARED_DIRECTORY, 1), "").encode('utf-8')
        try:
            for directory, subDirectories, files in os.walk(currentPath):
                directory = directory.replace(currentPath, self.pathString, 1)
                for file_ in files:
                    if self.pathString != directory:
                        filePath = os.path.join(directory, file_)
                    else:
                        filePath = directory + file_
                    self.fileList[filePath] = unitFile(filePath, owningUnit=self)

            if self.unitType == "Transfer":
                files = File.objects.filter(transfer_id=self.UUID)
            else:
                files = File.objects.filter(sip_id=self.UUID)
            for f in files:
                currentlocation = archivematicaFunctions.unicodeToStr(f.currentlocation)
                if currentlocation in self.fileList:
                    self.fileList[currentlocation].UUID = f.uuid
                    self.fileList[currentlocation].fileGrpUse = f.filegrpuse
                else:
                    LOGGER.warning('%s %s has file (%s) %s in the database, but file does not exist in the file system',
                                   self.unitType, self.UUID, f.uuid, f.currentlocation)
        except Exception:
            LOGGER.exception('Error reloading file list for %s', currentPath)
            exit(1)
Example #14
0
def createUnitAndJobChain(path, config, terminate=False):
    path = unicodeToStr(path)
    if os.path.isdir(path):
            path = path + "/"
    logger.debug('Creating unit and job chain for %s with %s', path, config)
    unit = None
    if os.path.isdir(path):
        if config[3] == "SIP":
            UUID = findOrCreateSipInDB(path)
            unit = unitSIP(path, UUID)
        elif config[3] == "DIP":
            UUID = findOrCreateSipInDB(path, unit_type='DIP')
            unit = unitDIP(path, UUID)
        elif config[3] == "Transfer":
            unit = unitTransfer(path)
    elif os.path.isfile(path):
        if config[3] == "Transfer":
            unit = unitTransfer(path)
        else:
            return
            UUID = uuid.uuid4()
            unit = unitFile(path, UUID)
    else:
        return
    jobChain(unit, config[1])

    if terminate:
        exit(0)
Example #15
0
def createUnitAndJobChain(path, config, terminate=False):
    path = unicodeToStr(path)
    if os.path.isdir(path):
        path = path + "/"
    print "createUnitAndJobChain", path, config
    unit = None
    if os.path.isdir(path):
        if config[3] == "SIP":
            UUID = findOrCreateSipInDB(path)
            unit = unitSIP(path, UUID)
        elif config[3] == "DIP":
            UUID = findOrCreateSipInDB(path)
            unit = unitDIP(path, UUID)
        elif config[3] == "Transfer":
            #UUID = findOrCreateSipInDB(path)
            unit = unitTransfer(path)
    elif os.path.isfile(path):
        if config[3] == "Transfer":
            unit = unitTransfer(path)
        else:
            return
            UUID = uuid.uuid4()
            unit = unitFile(path, UUID)
    else:
        return
    jobChain(unit, config[1])
    if terminate:
        exit(0)
Example #16
0
def start_transfer(transfer_name, transfer_type, accession, access_id, paths,
                   row_ids):
    """
    Start a new transfer.

    :param str transfer_name: Name of new transfer.
    :param str transfer_type: Type of new transfer. From TRANSFER_TYPE_DIRECTORIES.
    :param str accession: Accession number of new transfer.
    :param str access_id: Access system identifier for the new transfer.
    :param list paths: List of <location_uuid>:<relative_path> to be copied into the new transfer. Location UUIDs should be associated with this pipeline, and relative path should be relative to the location.
    :param list row_ids: ID of the associated TransferMetadataSet for disk image ingest.
    :returns: Dict with {'message': <message>, ['error': True, 'path': <path>]}.  Error is a boolean, present and True if there is an error.  Message describes the success or failure. Path is populated if there is no error.
    """
    if not transfer_name:
        raise ValueError('No transfer name provided.')
    if not paths:
        raise ValueError('No path provided.')

    # Create temp directory that everything will be copied into
    temp_base_dir = os.path.join(SHARED_DIRECTORY_ROOT, 'tmp')
    temp_dir = tempfile.mkdtemp(dir=temp_base_dir)
    os.chmod(temp_dir, 0o770)  # Needs to be writeable by the SS

    for i, path in enumerate(paths):
        index = i + 1  # so transfers start from 1, not 0
        # Don't suffix the first transfer component, only subsequent ones
        if index > 1:
            target = transfer_name + '_' + str(index)
        else:
            target = transfer_name
        row_id = row_ids[i]

        if helpers.file_is_an_archive(path):
            transfer_dir = temp_dir
            p = path.split(':', 1)[1]
            logger.debug('found a zip file, splitting path ' + p)
            filepath = os.path.join(temp_dir, os.path.basename(p))
        else:
            path = os.path.join(path, '.')  # Copy contents of dir but not dir
            transfer_dir = os.path.join(temp_dir, target)
            filepath = os.path.join(temp_dir, target)

        transfer_relative = transfer_dir.replace(SHARED_DIRECTORY_ROOT, '', 1)
        copy_from_transfer_sources([path], transfer_relative)
        filepath = archivematicaFunctions.unicodeToStr(filepath)
        try:
            destination = copy_to_start_transfer(
                filepath=filepath,
                type=transfer_type,
                accession=accession,
                access_id=access_id,
                transfer_metadata_set_row_uuid=row_id)
        except Exception as e:
            logger.exception('Error starting transfer {}: {}'.format(
                filepath, e))
            raise Exception('Error starting transfer {}: {}'.format(
                filepath, e))

    shutil.rmtree(temp_dir)
    return {'message': _('Copy successful.'), 'path': destination}
    def checkForPreconfiguredXML(self):
        ret = None
        xmlFilePath = os.path.join( \
                                        self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \
                                        archivematicaMCP.config.get('MCPServer', "processingXMLFile") \
                                    )
        xmlFilePath = unicodeToStr(xmlFilePath)
        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                command = "sudo chmod 774 \"" + xmlFilePath + "\""
                if isinstance(command, unicode):
                    command = command.encode("utf-8")
                exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False)
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.find("preconfiguredChoices"):
                    #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk:
                    if preconfiguredChoice.find("appliesTo").text == self.jobChainLink.description:
                        desiredChoice = preconfiguredChoice.find("goToChain").text
                        sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = %s;""" % (desiredChoice, self.jobChainLink.pk.__str__())
                        c, sqlLock = databaseInterface.querySQL(sql)
                        row = c.fetchone()
                        while row != None:
                            ret = row[0]
                            row = c.fetchone()
                        sqlLock.release()
                        try:
                            #<delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            unitAtimeXML = delayXML.get("unitCtime")
                            if unitAtimeXML != None and unitAtimeXML.lower() != "no":
                                delaySeconds=int(delayXML.text)
                                unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \
                                               archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1))
                                nowTime=time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                print "time to go:", timeToGo
                                #print "that will be: ", (nowTime + timeToGo)
                                self.jobChainLink.setExitMessage("Waiting till: " + datetime.datetime.fromtimestamp((nowTime + timeToGo)).ctime())

                                t = threading.Timer(timeToGo, self.proceedWithChoice, args=[ret], kwargs={"delayTimerStart":True})
                                t.daemon = True
                                self.delayTimer = t
                                t.start()
                                return None

                        except Exception as inst:
                            print >>sys.stderr, "Error parsing xml:"
                            print >>sys.stderr, type(inst)
                            print >>sys.stderr, inst.args

            except Exception as inst:
                print >>sys.stderr, "Error parsing xml:"
                print >>sys.stderr, type(inst)
                print >>sys.stderr, inst.args
        return ret
Example #18
0
def copy_to_start_transfer(request):
    filepath = archivematicaFunctions.unicodeToStr(request.POST.get('filepath', ''))
    type = request.POST.get('type', '')

    error = check_filepath_exists('/' + filepath)

    if error == None:
        # confine destination to subdir of originals
        filepath = os.path.join('/', filepath)
        basename = os.path.basename(filepath)

        # default to standard transfer
        type_paths = {
          'standard':     'standardTransfer',
          'unzipped bag': 'baggitDirectory',
          'zipped bag':   'baggitZippedDirectory',
          'dspace':       'Dspace',
          'maildir':      'maildir'
        }

        try:
          type_subdir = type_paths[type]
          destination = os.path.join(ACTIVE_TRANSFER_DIR, type_subdir)
        except KeyError:
          destination = os.path.join(STANDARD_TRANSFER_DIR)

        # if transfer compontent path leads to a ZIP file, treat as zipped
        # bag
        try:
            filepath.lower().index('.zip')

            shutil.copy(filepath, destination)
        except:
            destination = os.path.join(destination, basename)

            destination = pad_destination_filepath_if_it_already_exists(destination)

            try:
                shutil.copytree(
                    filepath,
                    destination
                )
            except:
                error = 'Error copying from ' + filepath + ' to ' + destination + '. (' + str(sys.exc_info()[0]) + ')'

    response = {}

    if error != None:
        response['message'] = error
        response['error']   = True
    else:
        response['message'] = 'Copy successful.'

    return HttpResponse(
        simplejson.JSONEncoder().encode(response),
        mimetype='application/json'
    )
Example #19
0
def _determine_transfer_paths(name, path, tmpdir):
    if _file_is_an_archive(path):
        transfer_dir = tmpdir
        p = Path(path).path
        filepath = os.path.join(tmpdir, os.path.basename(p))
    else:
        path = os.path.join(path, '.')  # Copy contents of dir but not dir
        transfer_dir = filepath = os.path.join(tmpdir, name)
    return (transfer_dir.replace(django_settings.SHARED_DIRECTORY, '',
                                 1), unicodeToStr(filepath), path)
Example #20
0
def sanitizeRecursively(job, path):
    path = os.path.abspath(path)
    sanitizations = {}

    sanitizedName = sanitizePath(job, path)
    if sanitizedName != path:
        path_key = unicodeToStr(
            unicodedata.normalize('NFC', path.decode('utf8')))
        sanitizations[path_key] = sanitizedName
    if os.path.isdir(sanitizedName):
        for f in os.listdir(sanitizedName):
            sanitizations.update(sanitizeRecursively(job, os.path.join(sanitizedName, f)))

    return sanitizations
Example #21
0
def approve_transfer(request):
    """Approve a transfer.

    The user may find the Package API a better option when the ID of the
    unit is known in advance.

    The errors returned use the 500 status code for backward-compatibility
    reasons.

    Example::

        $ curl --data "directory=MyTransfer" \
               --header "Authorization: ApiKey: user:token" \
               http://127.0.0.1/api/transfer/approve
    """
    directory = request.POST.get("directory")
    if not directory:
        return _error_response(
            "Please specify a transfer directory.", status_code=500)
    directory = archivematicaFunctions.unicodeToStr(directory)
    transfer_type = request.POST.get("type", "standard")
    if not transfer_type:
        return _error_response(
            "Please specify a transfer type.", status_code=500)
    modified_transfer_path = get_modified_standard_transfer_path(transfer_type)
    if modified_transfer_path is None:
        return _error_response("Invalid transfer type.", status_code=500)
    watched_path = os.path.join(modified_transfer_path, directory)
    transfer_file = watched_path.replace(
        SHARED_PATH_TEMPLATE_VAL, SHARED_DIRECTORY_ROOT
    )
    if transfer_type in ("zipped bag", "dspace") and os.path.isfile(transfer_file):
        db_transfer_path = watched_path
    else:
        # Append a slash to complete the directory path.
        db_transfer_path = os.path.join(watched_path, "")
    try:
        client = MCPClient(request.user)
        unit_uuid = client.approve_transfer_by_path(
            db_transfer_path, transfer_type)
    except Exception as err:
        msg = "Unable to start the transfer."
        LOGGER.error("%s %s (db_transfer_path=%s)",
                     msg, err, db_transfer_path)
        return _error_response(msg, status_code=500)
    return _ok_response("Approval successful.", uuid=unit_uuid)
Example #22
0
def approve_transfer(request):
    # Example: curl --data \
    #   "username=mike&api_key=<API key>&directory=MyTransfer" \
    #   http://127.0.0.1/api/transfer/approve
    response = {}
    error = None

    directory = request.POST.get('directory', '')
    transfer_type = request.POST.get('type', 'standard')
    directory = archivematicaFunctions.unicodeToStr(directory)
    error, unit_uuid = approve_transfer_via_mcp(directory, transfer_type,
                                                request.user.id)

    if error is not None:
        response['message'] = error
        response['error'] = True
        return helpers.json_response(response, status_code=500)
    else:
        response['message'] = 'Approval successful.'
        response['uuid'] = unit_uuid
        return helpers.json_response(response)
Example #23
0
def createUnitAndJobChain(path, watched_dir, workflow):
    path = unicodeToStr(path)
    if os.path.isdir(path):
        path = path + "/"
    logger.debug('Starting chain for %s', path)
    if not os.path.exists(path):
        return
    unit = None
    unit_type = watched_dir["unit_type"]
    if os.path.isdir(path):
        if unit_type == "SIP":
            UUID = findOrCreateSipInDB(path)
            unit = unitSIP(path, UUID)
        elif unit_type == "DIP":
            UUID = findOrCreateSipInDB(path, unit_type='DIP')
            unit = unitDIP(path, UUID)
        elif unit_type == "Transfer":
            unit = unitTransfer(path)
    elif os.path.isfile(path):
        if unit_type == "Transfer":
            unit = unitTransfer(path)
    else:
        return
    jobChain(unit, watched_dir.chain, workflow)
Example #24
0
def sorted_directory_list(path):
    cleaned = []
    entries = os.listdir(archivematicaFunctions.unicodeToStr(path))
    for entry in entries:
        cleaned.append(archivematicaFunctions.unicodeToStr(entry))
    return sorted(cleaned, cmp=locale.strcoll)
def handle_job(job):
    # http://www.doughellmann.com/PyMOTW/mailbox/
    sharedVariablesAcrossModules.errorCounter = 0
    transferDir = job.args[1]
    transferUUID = job.args[2]
    date = job.args[3]
    maildir = os.path.join(transferDir, "objects", "Maildir")
    outXML = os.path.join(transferDir, "logs", "attachmentExtraction.xml")
    mirrorDir = os.path.join(transferDir, "objects", "attachments")
    try:
        os.makedirs(mirrorDir)
    except os.error:
        pass
    # print "Extracting attachments from: " + maildir
    root = etree.Element("ArchivematicaMaildirAttachmentExtractionRecord")
    root.set("directory", maildir)
    for maildirsub in (d for d in os.listdir(maildir)
                       if os.path.isdir(os.path.join(maildir, d))):
        maildirsub_full_path = os.path.join(maildir, maildirsub)
        job.pyprint("Extracting attachments from: " + maildirsub_full_path)
        md = mailbox.Maildir(maildirsub_full_path, None)
        directory = etree.SubElement(root, "subDir")
        directory.set("dir", maildirsub)
        try:
            for item in md.keys():
                try:
                    subDir = md.get_message(item).get_subdir()
                    sourceFilePath2 = os.path.join(maildir, maildirsub, subDir,
                                                   item)
                    sourceFilePath = sourceFilePath2.replace(
                        transferDir, "%transferDirectory%", 1)
                    sourceFileUUID = getFileUUIDofSourceFile(
                        transferUUID, sourceFilePath)
                    sharedVariablesAcrossModules.sourceFileUUID = sourceFileUUID
                    sharedVariablesAcrossModules.sourceFilePath = sourceFilePath
                    fil = md.get_file(item)
                    out = parse(fil)
                    job.pyprint('Email Subject:', out.get('subject'))
                    if out['attachments']:
                        msg = etree.SubElement(directory, "msg")
                        etree.SubElement(
                            msg, "Message-ID"
                        ).text = out['msgobj']['Message-ID'][1:-1]
                        etree.SubElement(msg, "Extracted-from").text = item
                        if isinstance(out["subject"], six.binary_type):
                            etree.SubElement(
                                msg, "Subject").text = out["subject"].decode(
                                    'utf-8')
                        else:
                            etree.SubElement(msg,
                                             "Subject").text = out["subject"]
                        etree.SubElement(msg,
                                         "Date").text = out['msgobj']['date']
                        etree.SubElement(msg, "To").text = out["to"]
                        etree.SubElement(msg, "From").text = out["from"]
                        for attachment in out['attachments']:
                            job.pyprint('\tAttachment name:', attachment.name)
                            try:
                                if attachment.name is None:
                                    continue
                                # these are versions of the body of the email - I think
                                if attachment.name == 'rtf-body.rtf':
                                    continue
                                attachedFileUUID = uuid.uuid4().__str__()
                                # attachment = StringIO(file_data) TODO LOG TO FILE
                                attch = etree.SubElement(msg, "attachment")
                                etree.SubElement(attch,
                                                 "name").text = attachment.name
                                etree.SubElement(
                                    attch, "content_type"
                                ).text = attachment.content_type
                                etree.SubElement(attch, "size").text = str(
                                    attachment.size)
                                # print attachment.create_date
                                # FIXME Dates don't appear to be working. Disabling for the moment
                                # etree.SubElement(attch, "create_date").text = attachment.create_date
                                # etree.SubElement(attch, "mod_date").text = attachment.mod_date
                                # etree.SubElement(attch, "read_date").text = attachment.read_date
                                filePath = os.path.join(
                                    transferDir, "objects", "attachments",
                                    maildirsub, subDir, "%s_%s" %
                                    (attachedFileUUID, attachment.name))
                                job.pyprint('\tAttachment path:', filePath)
                                filePath = unicodeToStr(filePath)
                                writeFile(filePath, attachment)
                                eventDetail = "Unpacked from: {%s}%s" % (
                                    sourceFileUUID, sourceFilePath)
                                addFile(filePath,
                                        transferDir,
                                        transferUUID,
                                        date,
                                        eventDetail=eventDetail,
                                        fileUUID=attachedFileUUID)
                            except Exception as inst:
                                job.pyprint(sourceFilePath, file=sys.stderr)
                                job.print_error(traceback.format_exc())
                                job.pyprint(
                                    type(inst),
                                    file=sys.stderr)  # the exception instance
                                job.pyprint(inst.args, file=sys.stderr)
                                job.pyprint(etree.tostring(msg),
                                            file=sys.stderr)
                                job.pyprint(file=sys.stderr)
                                sharedVariablesAcrossModules.errorCounter += 1
                except Exception as inst:
                    job.pyprint(sourceFilePath, file=sys.stderr)
                    job.print_error(traceback.format_exc())
                    job.pyprint(type(inst),
                                file=sys.stderr)  # the exception instance
                    job.pyprint(inst.args, file=sys.stderr)
                    job.pyprint(file=sys.stderr)
                    sharedVariablesAcrossModules.errorCounter += 1
        except Exception as inst:
            job.pyprint("INVALID MAILDIR FORMAT", file=sys.stderr)
            job.pyprint(type(inst), file=sys.stderr)
            job.pyprint(inst.args, file=sys.stderr)
            job.set_status(246)  # Was -10, but exit codes are unsigned
            return

        mirrorDir = os.path.join(transferDir, "objects/attachments",
                                 maildirsub)
        try:
            os.makedirs(mirrorDir)
        except:
            pass
        eventDetail = "added for normalization purposes"
        fileUUID = uuid.uuid4().__str__()
        addKeyFileToNormalizeMaildirOffOf(os.path.join(
            maildir, maildirsub).replace(transferDir, "%transferDirectory%",
                                         1),
                                          mirrorDir,
                                          transferDir,
                                          transferUUID,
                                          date,
                                          eventDetail=eventDetail,
                                          fileUUID=fileUUID)
    tree = etree.ElementTree(root)
    tree.write(outXML, pretty_print=True, xml_declaration=True)

    job.set_status(sharedVariablesAcrossModules.errorCounter)
def escapeForDB(str):
    str = unicodeToStr(str)
    str = MySQLdb.escape_string(str)
    return str
Example #27
0
def create_package(name,
                   type_,
                   accession,
                   access_system_id,
                   path,
                   metadata_set_id,
                   auto_approve=True,
                   wait_until_complete=False,
                   processing_config=None):
    """Launch transfer and return its object immediately.

    ``auto_approve`` changes significantly the way that the transfer is
    initiated. See ``_start_package_transfer_with_auto_approval`` and
    ``_start_package_transfer`` for more details.
    """
    if not name:
        raise ValueError('No transfer name provided.')
    name = unicodeToStr(name)
    if type_ is None or type_ == 'disk image':
        type_ = 'standard'
    if type_ not in PACKAGE_TYPE_STARTING_POINTS:
        raise ValueError(
            "Unexpected type of package provided '{}'".format(type_))
    if not path:
        raise ValueError('No path provided.')
    if isinstance(auto_approve, bool) is False:
        raise ValueError('Unexpected value in auto_approve parameter')

    # Create Transfer object.
    kwargs = {'uuid': str(uuid4())}
    if accession is not None:
        kwargs['accessionid'] = unicodeToStr(accession)
    if access_system_id is not None:
        kwargs['access_system_id'] = unicodeToStr(access_system_id)
    if metadata_set_id is not None:
        try:
            kwargs['transfermetadatasetrow'] = \
                TransferMetadataSet.objects.get(id=metadata_set_id)
        except TransferMetadataSet.DoesNotExist:
            pass
    transfer = Transfer.objects.create(**kwargs)
    logger.debug('Transfer object created: %s', transfer.pk)

    @auto_close_db
    def _start(transfer, name, type_, path):
        # TODO: use tempfile.TemporaryDirectory as a context manager in Py3.
        tmpdir = mkdtemp(
            dir=os.path.join(django_settings.SHARED_DIRECTORY, 'tmp'))
        starting_point = PACKAGE_TYPE_STARTING_POINTS.get(type_)
        logger.debug('Package %s: starting transfer (%s)', transfer.pk,
                     (name, type_, path, tmpdir))
        try:
            params = (transfer, name, path, tmpdir, starting_point,
                      processing_config)
            if auto_approve:
                _start_package_transfer_with_auto_approval(*params)
            else:
                _start_package_transfer(*params)
        finally:
            os.chmod(tmpdir, 0o770)  # Needs to be writeable by the SS.

    getattr(
        Executor,
        'apply' if wait_until_complete else 'apply_async',
    )(_start, (transfer, name, type_, path))

    return transfer
Example #28
0
                                          relativeReplacement, 1) + "/"
                newfile = newfile.replace(objectsDirectory,
                                          relativeReplacement, 1) + "/"
                directoryContents = []

                sql = "SELECT fileUUID, currentLocation FROM Files WHERE Files.removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(
                    oldfile.replace("\\", "\\\\")).replace(
                        "%",
                        "\%") + "%' AND " + groupSQL + " = '" + groupID + "';"

                c, sqlLock = databaseInterface.querySQL(sql)
                row = c.fetchone()
                while row != None:
                    fileUUID = row[0]
                    oldPath = row[1]
                    newPath = unicodeToStr(oldPath).replace(
                        oldfile, newfile, 1)
                    directoryContents.append((fileUUID, oldPath, newPath))
                    row = c.fetchone()
                sqlLock.release()

                print oldfile, " -> ", newfile

                for fileUUID, oldPath, newPath in directoryContents:
                    updateFileLocation(oldPath,
                                       newPath,
                                       "name cleanup",
                                       date,
                                       "prohibited characters removed:" +
                                       eventDetail,
                                       fileUUID=fileUUID)
Example #29
0
def sorted_directory_list(path):
    cleaned = []
    entries = os.listdir(archivematicaFunctions.unicodeToStr(path))
    for entry in entries:
        cleaned.append(archivematicaFunctions.unicodeToStr(entry))
    return sorted(cleaned, key=helpers.keynat)
Example #30
0
def sorted_directory_list(path):
    cleaned = []
    entries = os.listdir(unicodeToStr(path))
    cleaned = [unicodeToStr(entry) for entry in entries]
    return sorted(cleaned, key=helpers.keynat)
Example #31
0
    def checkForPreconfiguredXML(self):
        ret = None
        xmlFilePath = os.path.join( \
                                        self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \
                                        archivematicaMCP.config.get('MCPServer', "processingXMLFile") \
                                    )
        xmlFilePath = unicodeToStr(xmlFilePath)
        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                command = "sudo chmod 774 \"" + xmlFilePath + "\""
                if isinstance(command, unicode):
                    command = command.encode("utf-8")
                exitCode, stdOut, stdError = executeOrRun("command",
                                                          command,
                                                          "",
                                                          printing=False)
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.find("preconfiguredChoices"):
                    #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk:
                    if preconfiguredChoice.find(
                            "appliesTo").text == self.jobChainLink.description:
                        desiredChoice = preconfiguredChoice.find(
                            "goToChain").text
                        sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = '%s';""" % (
                            desiredChoice, self.jobChainLink.pk.__str__())
                        c, sqlLock = databaseInterface.querySQL(sql)
                        row = c.fetchone()
                        while row != None:
                            ret = row[0]
                            row = c.fetchone()
                        sqlLock.release()
                        try:
                            #<delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            unitAtimeXML = delayXML.get("unitCtime")
                            if unitAtimeXML != None and unitAtimeXML.lower(
                            ) != "no":
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \
                                               archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1))
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                print "time to go:", timeToGo
                                #print "that will be: ", (nowTime + timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: " +
                                    datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)).ctime())

                                t = threading.Timer(
                                    timeToGo,
                                    self.proceedWithChoice,
                                    args=[ret, None],
                                    kwargs={"delayTimerStart": True})
                                t.daemon = True
                                self.delayTimer = t
                                t.start()
                                return None

                        except Exception as inst:
                            print >> sys.stderr, "Error parsing xml:"
                            print >> sys.stderr, type(inst)
                            print >> sys.stderr, inst.args

            except Exception as inst:
                print >> sys.stderr, "Error parsing xml:"
                print >> sys.stderr, type(inst)
                print >> sys.stderr, inst.args
        return ret
Example #32
0
def sorted_directory_list(path):
    cleaned = []
    entries = os.listdir(archivematicaFunctions.unicodeToStr(path))
    cleaned = [archivematicaFunctions.unicodeToStr(entry) for entry in entries]
    return sorted(cleaned, key=helpers.keynat)
Example #33
0
def generate_project_client_package(job, output_dir, package_type, structmap, dmdsecs, dipuuid):
    """
    Generates a simple.txt or compound.txt from the METS of a DIP

    :param output_dir: Path to directory for simple/compound.txt
    :param structmap: structMap element from the METS (Preparse somehow?)
    :param dmdsecs: Dict of {<DMDID>: OrderedDict{column name: value} or <dmdSec element>? }
    :param dipuuid: UUID of the DIP
    """
    job.pyprint('DIP UUID:', dipuuid)

    if 'compound' in package_type:
        csv_path = os.path.join(output_dir, 'compound.txt')
    else:
        csv_path = os.path.join(output_dir, 'simple.txt')

    job.pyprint('Package type:', package_type)
    job.pyprint('Path to the output tabfile', csv_path)

    divs_with_dmdsecs = structmap.findall('.//mets:div[@DMDID]', namespaces=ns.NSMAP)
    with open(csv_path, "wb") as csv_file:
        writer = csv.writer(csv_file, delimiter='\t')

        # Iterate through every div and create a row for each
        csv_header_ref = None
        for div in divs_with_dmdsecs:
            # Find associated dmdSecs
            dmdids = div.get('DMDID').split()
            # Take nonDC dmdSec, fallback to DC dmdSec
            dmdsecpair = splitDmdSecs(job, [dmdsecs[dmdid] for dmdid in dmdids])
            dmdsecpair['dc'] = addAipUuidToDcMetadata(dipuuid, dmdsecpair['dc'])
            metadata = dmdsecpair['nonDc'] or dmdsecpair['dc']
            # Create csv_header and csv_values from the dmdSec metadata
            csv_header = []
            csv_values = []
            for header, value in metadata.items():
                csv_header.append(header)
                value = '; '.join(value).replace('\r', '').replace('\n', '')
                csv_values.append(archivematicaFunctions.unicodeToStr(value))

            # Add AIP UUID
            csv_header.append('AIP UUID')
            csv_values.append(dipuuid)

            # Add file UUID
            csv_header.append('file UUID')
            if 'dirs' in package_type:
                # Directories have no file UUID
                csv_values.append('')
            else:
                file_uuid = ''
                fptr = div.find('mets:fptr', namespaces=ns.NSMAP)
                # Only files have fptrs as direct children
                if fptr is not None:
                    # File UUID is last 36 characters of FILEID
                    file_uuid = fptr.get('FILEID')[-36:]
                csv_values.append(file_uuid)

            # Add file or directory name
            name = div.attrib['LABEL']  # Fallback if LABEL doesn't exist?
            if 'dirs' in package_type:
                csv_header.insert(0, 'Directory name')
                csv_values.insert(0, name)
            else:
                csv_header.append('Filename')
                csv_values.append(name)

            # Compare csv_header, if diff ERROR (first time set, write to file)
            if csv_header_ref and csv_header_ref != csv_header:
                job.pyprint('ERROR headers differ,', csv_path, 'almost certainly invalid', file=sys.stderr)
                job.pyprint('Reference header:', csv_header_ref, file=sys.stderr)
                job.pyprint('Differing header:', csv_header, file=sys.stderr)
                return 1
            # If first time through, write out header
            if not csv_header_ref:
                csv_header_ref = csv_header
                writer.writerow(csv_header_ref)
                job.pyprint('Tabfile header:', csv_header)
            # Write csv_row
            writer.writerow(csv_values)
            job.pyprint('Values:', csv_values)
    return 0
Example #34
0
    def checkForPreconfiguredXML(self):
        desiredChoice = None
        xmlFilePath = os.path.join( \
                                        self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \
                                        archivematicaMCP.config.get('MCPServer', "processingXMLFile") \
                                    )
        xmlFilePath = unicodeToStr(xmlFilePath)
        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                command = "sudo chmod 774 \"" + xmlFilePath + "\""
                if isinstance(command, unicode):
                    command = command.encode("utf-8")
                exitCode, stdOut, stdError = executeOrRun("command",
                                                          command,
                                                          "",
                                                          printing=False)
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.findall(
                        ".//preconfiguredChoice"):
                    if preconfiguredChoice.find(
                            "appliesTo").text == self.jobChainLink.pk:
                        desiredChoice = preconfiguredChoice.find(
                            "goToChain").text
                        try:
                            #<delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            if delayXML is not None:
                                unitAtimeXML = delayXML.get("unitCtime")
                            else:
                                unitAtimeXML = None
                            if unitAtimeXML is not None and unitAtimeXML.lower(
                            ) != "no":
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \
                                               archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1))
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                LOGGER.info('Time to go: %s', timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: " +
                                    datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)).ctime())

                                t = threading.Timer(
                                    timeToGo,
                                    self.proceedWithChoice,
                                    args=[desiredChoice, None],
                                    kwargs={"delayTimerStart": True})
                                t.daemon = True
                                self.delayTimer = t
                                t.start()
                                return None

                        except Exception:
                            LOGGER.info('Error parsing XML', exc_info=True)
            except Exception:
                LOGGER.warning(
                    'Error parsing xml at %s for pre-configured choice',
                    xmlFilePath,
                    exc_info=True)
        LOGGER.info('Using preconfigured choice %s for %s', desiredChoice,
                    self.jobChainLink.pk)
        return desiredChoice
Example #35
0
def create_package(
    package_queue,
    executor,
    name,
    type_,
    accession,
    access_system_id,
    path,
    metadata_set_id,
    user_id,
    workflow,
    auto_approve=True,
    processing_config=None,
):
    """Launch transfer and return its object immediately.

    ``auto_approve`` changes significantly the way that the transfer is
    initiated. See ``_start_package_transfer_with_auto_approval`` and
    ``_start_package_transfer`` for more details.
    """
    if not name:
        raise ValueError("No transfer name provided.")
    if type_ is None or type_ == "disk image":
        type_ = "standard"
    if type_ not in PACKAGE_TYPE_STARTING_POINTS:
        raise ValueError("Unexpected type of package provided '{}'".format(type_))
    if not path:
        raise ValueError("No path provided.")
    if isinstance(auto_approve, bool) is False:
        raise ValueError("Unexpected value in auto_approve parameter")
    try:
        int(user_id)
    except (TypeError, ValueError):
        raise ValueError("Unexpected value in user_id parameter")

    # Create Transfer object.
    kwargs = {"uuid": str(uuid4())}
    if accession is not None:
        kwargs["accessionid"] = unicodeToStr(accession)
    if access_system_id is not None:
        kwargs["access_system_id"] = unicodeToStr(access_system_id)
    if metadata_set_id is not None:
        try:
            kwargs["transfermetadatasetrow"] = models.TransferMetadataSet.objects.get(
                id=metadata_set_id
            )
        except models.TransferMetadataSet.DoesNotExist:
            pass
    transfer = models.Transfer.objects.create(**kwargs)
    if not processing_configuration_file_exists(processing_config):
        processing_config = "default"
    transfer.set_processing_configuration(processing_config)
    transfer.update_active_agent(user_id)
    logger.debug("Transfer object created: %s", transfer.pk)

    # TODO: use tempfile.TemporaryDirectory as a context manager in Py3.
    tmpdir = mkdtemp(dir=os.path.join(_get_setting("SHARED_DIRECTORY"), "tmp"))
    starting_point = PACKAGE_TYPE_STARTING_POINTS.get(type_)
    logger.debug(
        "Package %s: starting transfer (%s)", transfer.pk, (name, type_, path, tmpdir)
    )
    params = (transfer, name, path, tmpdir, starting_point)
    if auto_approve:
        params = params + (workflow, package_queue)
        result = executor.submit(_start_package_transfer_with_auto_approval, *params)
    else:
        result = executor.submit(_start_package_transfer, *params)

    result.add_done_callback(lambda f: os.chmod(tmpdir, 0o770))

    return transfer
def sanitize_object_names(job, objectsDirectory, sipUUID, date, groupType, groupSQL, sipPath):
    """Sanitize object names in a Transfer/SIP."""
    relativeReplacement = objectsDirectory.replace(sipPath, groupType, 1)  # "%SIPDirectory%objects/"

    # Get any ``Directory`` instances created for this transfer (if such exist)
    directory_mdls = []
    if groupSQL == 'transfer_id':
        transfer_mdl = Transfer.objects.get(uuid=sipUUID)
        if transfer_mdl.diruuids:
            directory_mdls = Directory.objects.filter(
                transfer=transfer_mdl).all()

    # Sanitize objects on disk
    sanitizations = sanitize_names.sanitizeRecursively(job, objectsDirectory)
    for oldfile, newfile in sanitizations.items():
        logger.info('sanitizations: %s -> %s', oldfile, newfile)

    eventDetail = 'program="sanitize_names"; version="' + sanitize_names.VERSION + '"'

    # Update files in DB
    kwargs = {
        groupSQL: sipUUID,
        "removedtime__isnull": True,
    }
    file_mdls = File.objects.filter(**kwargs)
    # Iterate over ``File`` and ``Directory``
    for model in chain(file_mdls, directory_mdls):
        # Check all files to see if any parent directory had a sanitization event
        current_location = unicodeToStr(
            unicodedata.normalize('NFC', model.currentlocation)).replace(
                groupType, sipPath)
        sanitized_location = unicodeToStr(current_location)
        logger.info('Checking %s', current_location)

        # Check parent directories
        # Since directory keys are a mix of sanitized and unsanitized, this is
        # a little complicated
        # Directories keys are in the form sanitized/sanitized/unsanitized
        # When a match is found (eg 'unsanitized' -> 'sanitized') reset the
        # search.
        # This will find 'sanitized/unsanitized2' -> 'sanitized/sanitized2' on
        # the next pass
        # TODO This should be checked for a more efficient solution
        dirpath = sanitized_location
        while objectsDirectory in dirpath:  # Stay within unit
            if dirpath in sanitizations:  # Make replacement
                sanitized_location = sanitized_location.replace(
                    dirpath, sanitizations[dirpath])
                dirpath = sanitized_location  # Reset search
            else:  # Check next level up
                dirpath = os.path.dirname(dirpath)

        if current_location != sanitized_location:
            old_location = current_location.replace(
                objectsDirectory, relativeReplacement, 1)
            new_location = sanitized_location.replace(
                objectsDirectory, relativeReplacement, 1)
            kwargs = {
                'src': old_location,
                'dst': new_location,
                'eventType': 'name cleanup',
                'eventDateTime': date,
                'eventDetail': "prohibited characters removed:" + eventDetail,
                'fileUUID': None,
            }
            if groupType == "%SIPDirectory%":
                kwargs['sipUUID'] = sipUUID
            elif groupType == "%transferDirectory%":
                kwargs['transferUUID'] = sipUUID
            else:
                job.pyprint("bad group type", groupType, file=sys.stderr)
                return 3
            logger.info('Sanitized name: %s -> %s', old_location, new_location)
            job.pyprint('Sanitized name:', old_location, " -> ", new_location)
            if isinstance(model, File):
                updateFileLocation(**kwargs)
            else:
                model.currentlocation = new_location
                model.save()
        else:
            logger.info('No sanitization for %s', current_location)
            job.pyprint('No sanitization found for', current_location)

    return 0
Example #37
0
def copy_to_start_transfer(request):
    filepath  = archivematicaFunctions.unicodeToStr(request.POST.get('filepath', ''))
    type      = request.POST.get('type', '')
    accession = request.POST.get('accession', '')

    error = check_filepath_exists('/' + filepath)

    if error == None:
        # confine destination to subdir of originals
        filepath = os.path.join('/', filepath)
        basename = os.path.basename(filepath)

        # default to standard transfer
        type_paths = {
          'standard':     'standardTransfer',
          'unzipped bag': 'baggitDirectory',
          'zipped bag':   'baggitZippedDirectory',
          'dspace':       'Dspace',
          'maildir':      'maildir',
          'TRIM':         'TRIM'
        }

        try:
          type_subdir = type_paths[type]
          destination = os.path.join(ACTIVE_TRANSFER_DIR, type_subdir)
        except KeyError:
          destination = os.path.join(STANDARD_TRANSFER_DIR)

        # if transfer compontent path leads to a ZIP file, treat as zipped
        # bag
        try:
            filepath.lower().index('.zip')
        except:
            destination = os.path.join(destination, basename)
            destination = pad_destination_filepath_if_it_already_exists(destination)

        # relay accession via DB row that MCPClient scripts will use to get
        # supplementary info from
        if accession != '':
            temp_uuid = uuid.uuid4().__str__()
            mcp_destination = destination.replace(SHARED_DIRECTORY_ROOT + '/', '%sharedPath%') + '/'
            transfer = models.Transfer.objects.create(
                uuid=temp_uuid,
                accessionid=accession,
                currentlocation=mcp_destination
            )
            transfer.save()

        try:
            shutil.move(filepath, destination)
        except:
            error = 'Error copying from ' + filepath + ' to ' + destination + '. (' + str(sys.exc_info()[0]) + ')'

    response = {}

    if error != None:
        response['message'] = error
        response['error']   = True
    else:
        response['message'] = 'Copy successful.'

    return HttpResponse(
        simplejson.JSONEncoder().encode(response),
        mimetype='application/json'
    )
                                etree.SubElement(
                                    attch, "content_type"
                                ).text = attachment.content_type
                                etree.SubElement(attch, "size").text = str(
                                    attachment.size)
                                #print attachment.create_date
                                # Dates don't appear to be working. Disabling for the moment - Todo
                                #etree.SubElement(attch, "create_date").text = attachment.create_date
                                #etree.SubElement(attch, "mod_date").text = attachment.mod_date
                                #etree.SubElement(attch, "read_date").text = attachment.read_date

                                filePath = os.path.join(
                                    transferDir, "objects/attachments",
                                    maildirsub2, subDir, "%s_%s" %
                                    (attachedFileUUID, attachment.name))
                                filePath = unicodeToStr(filePath)
                                writeFile(filePath, attachment)
                                eventDetail = "Unpacked from: {%s}%s" % (
                                    sourceFileUUID, sourceFilePath)
                                addFile(filePath,
                                        transferDir,
                                        transferUUID,
                                        date,
                                        eventDetail=eventDetail,
                                        fileUUID=attachedFileUUID)
                            except Exception as inst:
                                print >> sys.stderr, sourceFilePath
                                traceback.print_exc(file=sys.stderr)
                                print >> sys.stderr, type(
                                    inst)  # the exception instance
                                print >> sys.stderr, inst.args
                if groupType == "%SIPDirectory%":
                    updateFileLocation(oldfile, newfile, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=None, sipUUID=sipUUID)
                elif groupType == "%transferDirectory%":
                    updateFileLocation(oldfile, newfile, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=None, transferUUID=sipUUID)
                else:
                    print >>sys.stderr, "bad group type", groupType
                    exit(3)

            elif os.path.isdir(newfile):
                oldfile = oldfile.replace(objectsDirectory, relativeReplacement, 1) + "/"
                newfile = newfile.replace(objectsDirectory, relativeReplacement, 1) + "/"
                directoryContents = []

                sql = "SELECT fileUUID, currentLocation FROM Files WHERE Files.removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(oldfile.replace("\\", "\\\\")).replace("%","\%") + "%' AND " + groupSQL + " = '" + groupID + "';"

                c, sqlLock = databaseInterface.querySQL(sql)
                row = c.fetchone()
                while row != None:
                    fileUUID = row[0]
                    oldPath = row[1]
                    newPath = unicodeToStr(oldPath).replace(oldfile, newfile, 1)
                    directoryContents.append((fileUUID, oldPath, newPath))
                    row = c.fetchone()
                sqlLock.release()

                print oldfile, " -> ", newfile

                for fileUUID, oldPath, newPath in directoryContents:
                    updateFileLocation(oldPath, newPath, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=fileUUID)

    def checkForPreconfiguredXML(self):
        desiredChoice = None
        xmlFilePath = os.path.join(
            self.unit.currentPath.replace("%sharedPath%",
                                          django_settings.SHARED_DIRECTORY, 1),
            django_settings.PROCESSING_XML_FILE,
        )
        xmlFilePath = unicodeToStr(xmlFilePath)
        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.findall(
                        ".//preconfiguredChoice"):
                    if (preconfiguredChoice.find("appliesTo").text ==
                            self.jobChainLink.pk):
                        desiredChoice = preconfiguredChoice.find(
                            "goToChain").text
                        try:
                            # <delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            if delayXML is not None:
                                unitAtimeXML = delayXML.get("unitCtime")
                            else:
                                unitAtimeXML = None
                            if (unitAtimeXML is not None
                                    and unitAtimeXML.lower() != "no"):
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(
                                    self.unit.currentPath.replace(
                                        "%sharedPath%",
                                        django_settings.SHARED_DIRECTORY,
                                        1,
                                    ))
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                LOGGER.info("Time to go: %s", timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: " +
                                    datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)).ctime())

                                t = threading.Timer(
                                    timeToGo,
                                    self.proceedWithChoice,
                                    args=[desiredChoice, None],
                                    kwargs={"delayTimerStart": True},
                                )
                                t.daemon = True
                                self.delayTimer = t
                                t.start()
                                return None

                        except Exception:
                            LOGGER.info("Error parsing XML", exc_info=True)
            except Exception:
                LOGGER.warning(
                    "Error parsing xml at %s for pre-configured choice",
                    xmlFilePath,
                    exc_info=True,
                )
        LOGGER.info("Using preconfigured choice %s for %s", desiredChoice,
                    self.jobChainLink.pk)
        return desiredChoice
Example #41
0
        elif os.path.isdir(newfile):
            oldfile = oldfile.replace(objectsDirectory, relativeReplacement,
                                      1) + "/"
            newfile = newfile.replace(objectsDirectory, relativeReplacement,
                                      1) + "/"
            directoryContents = []

            kwargs = {
                "removedtime__isnull": True,
                "currentlocation__startswith": oldfile,
                groupSQL: groupID
            }
            files = File.objects.filter(**kwargs)

            print oldfile, " -> ", newfile

            for f in files:
                new_path = unicodeToStr(f.currentlocation).replace(
                    oldfile, newfile, 1)
                updateFileLocation(
                    f.currentlocation,
                    new_path,
                    fileUUID=f.uuid,
                    # Don't create sanitization events for each
                    # file, since it's only a parent directory
                    # somewhere up that changed.
                    # Otherwise, extra amdSecs will be generated
                    # from the resulting METS.
                    createEvent=False)
                     continue
                 attachedFileUUID = uuid.uuid4().__str__()
                 #attachment = StringIO(file_data) TODO LOG TO FILE
                 attch = etree.SubElement(msg, "attachment")
                 #attachment.name = attachment.name[1:-1]
                 etree.SubElement(attch, "name").text = attachment.name
                 etree.SubElement(attch, "content_type").text = attachment.content_type
                 etree.SubElement(attch, "size").text = str(attachment.size)
                 #print attachment.create_date
                 # Dates don't appear to be working. Disabling for the moment - Todo
                 #etree.SubElement(attch, "create_date").text = attachment.create_date
                 #etree.SubElement(attch, "mod_date").text = attachment.mod_date
                 #etree.SubElement(attch, "read_date").text = attachment.read_date
                 
                 filePath = os.path.join(transferDir, "objects/attachments", maildirsub2, subDir, "%s_%s" % (attachedFileUUID, attachment.name))
                 filePath = unicodeToStr(filePath)
                 writeFile(filePath, attachment)
                 eventDetail="Unpacked from: {%s}%s" % (sourceFileUUID, sourceFilePath) 
                 addFile(filePath, transferDir, transferUUID, date, eventDetail=eventDetail, fileUUID=attachedFileUUID)
             except Exception as inst:
                 print >>sys.stderr, sourceFilePath
                 traceback.print_exc(file=sys.stderr)
                 print >>sys.stderr, type(inst)     # the exception instance
                 print >>sys.stderr, inst.args
                 print >>sys.stderr, etree.tostring(msg) 
                 print >>sys.stderr
                 sharedVariablesAcrossModules.errorCounter += 1
 except Exception as inst:
     print >>sys.stderr, sourceFilePath
     traceback.print_exc(file=sys.stderr)
     print >>sys.stderr, type(inst)     # the exception instance