Exemple #1
0
def main(opts):
    """ Find and execute normalization commands on input file. """
    # TODO fix for maildir working only on attachments

    # If no explicit return happens earlier, this returns `status`.
    # This allows default rules to define a non-zero exit status.
    status = SUCCESS

    # Find the file and it's FormatVersion (file identification)
    try:
        file_ = File.objects.get(uuid=opts.file_uuid)
    except File.DoesNotExist:
        print('File with uuid',
              opts.file_uuid,
              'does not exist in database.',
              file=sys.stderr)
        return NO_RULE_FOUND
    print('File found:', file_.uuid, file_.currentlocation)

    # Unless normalization file group use is submissionDocumentation, skip the
    # submissionDocumentation directory
    if opts.normalize_file_grp_use != "submissionDocumentation" and file_.currentlocation.startswith(
            '%SIPDirectory%objects/submissionDocumentation'):
        print('File', os.path.basename(opts.file_path),
              'in objects/submissionDocumentation, skipping')
        return SUCCESS

    # Only normalize files where the file's group use and normalize group use match
    if file_.filegrpuse != opts.normalize_file_grp_use:
        print(os.path.basename(opts.file_path), 'is file group usage',
              file_.filegrpuse, 'instead of ', opts.normalize_file_grp_use,
              ' - skipping')
        return SUCCESS

    # For re-ingest: clean up old derivations
    # If the file already has a Derivation with the same purpose, remove it and mark the derived file as deleted
    derivatives = Derivation.objects.filter(
        source_file=file_, derived_file__filegrpuse=opts.purpose)
    for derivative in derivatives:
        print(opts.purpose, 'derivative', derivative.derived_file_id,
              'already exists, marking as deleted')
        File.objects.filter(uuid=derivative.derived_file_id).update(
            filegrpuse='deleted')
        # Don't create events for thumbnail files
        if opts.purpose != 'thumbnail':
            databaseFunctions.insertIntoEvents(
                fileUUID=derivative.derived_file_id,
                eventType='deletion',
            )
    derivatives.delete()

    # If a file has been manually normalized for this purpose, skip it
    manually_normalized_file = check_manual_normalization(opts)
    if manually_normalized_file:
        print(os.path.basename(opts.file_path),
              'was already manually normalized into',
              manually_normalized_file.currentlocation)
        if 'preservation' in opts.purpose:
            # Add derivation link and associated event
            insert_derivation_event(
                original_uuid=opts.file_uuid,
                output_uuid=manually_normalized_file.uuid,
                derivation_uuid=str(uuid.uuid4()),
                event_detail_output="manual normalization",
                outcome_detail_note=None,
            )
        return SUCCESS

    format_id = get_object_or_None(FileFormatVersion, file_uuid=opts.file_uuid)

    # Look up the normalization command in the FPR
    if format_id is None:
        rule = get_default_rule(opts.purpose)
        print(os.path.basename(file_.currentlocation),
              "not identified - falling back to default", opts.purpose, "rule")
    else:
        print('File format:', format_id.format_version)
        try:
            rule = FPRule.active.get(format=format_id.format_version,
                                     purpose=opts.purpose)
        except FPRule.DoesNotExist:
            try:
                rule = get_default_rule(opts.purpose)
                print("No rule for", os.path.basename(file_.currentlocation),
                      "falling back to default", opts.purpose, "rule")
                status = NO_RULE_FOUND
            except FPRule.DoesNotExist:
                print('Not normalizing',
                      os.path.basename(file_.currentlocation),
                      ' - No rule or default rule found to normalize for',
                      opts.purpose,
                      file=sys.stderr)
                return NO_RULE_FOUND
    print('Format Policy Rule:', rule)
    command = rule.command
    print('Format Policy Command', command.description)

    replacement_dict = get_replacement_dict(opts)
    cl = transcoder.CommandLinker(rule, command, replacement_dict, opts,
                                  once_normalized)
    exitstatus = cl.execute()

    # Store thumbnails locally for use during AIP searches
    # TODO is this still needed, with the storage service?
    if 'thumbnail' in opts.purpose:
        thumbnail_filepath = cl.commandObject.output_location
        clientConfigFilePath = '/etc/archivematica/MCPClient/clientConfig.conf'
        config = ConfigParser.SafeConfigParser()
        config.read(clientConfigFilePath)
        try:
            shared_path = config.get('MCPClient', 'sharedDirectoryMounted')
        except:
            shared_path = '/var/archivematica/sharedDirectory/'
        thumbnail_storage_dir = os.path.join(
            shared_path,
            'www',
            'thumbnails',
            opts.sip_uuid,
        )
        try:
            os.makedirs(thumbnail_storage_dir)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(
                    thumbnail_storage_dir):
                pass
            else:
                raise
        thumbnail_basename, thumbnail_extension = os.path.splitext(
            thumbnail_filepath)
        thumbnail_storage_file = os.path.join(
            thumbnail_storage_dir,
            opts.file_uuid + thumbnail_extension,
        )

        shutil.copyfile(thumbnail_filepath, thumbnail_storage_file)

    if not exitstatus == 0:
        print('Command', command.description, 'failed!', file=sys.stderr)
        return RULE_FAILED
    else:
        print('Successfully normalized ', os.path.basename(opts.file_path),
              'for', opts.purpose)
        return status
def identifyCommands(fileName):
    """Identify file type(s)"""
    ret = []
    premisFile = opts.logsDirectory + "fileMeta/" + opts.fileUUID + ".xml"
    try:
        for pronomID in getPronomsFromPremis(premisFile):
            sql = """SELECT CR.pk, CR.command, CR.GroupMember
            FROM CommandRelationships AS CR
            JOIN FileIDs ON CR.fileID=FileIDs.pk
            JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk
            JOIN FileIDsByPronom AS FIBP  ON FileIDs.pk = FIBP.FileIDs
            WHERE FIBP.FileID = '""" + pronomID.__str__() + """'
            AND CommandClassifications.classification = '""" + opts.commandClassifications + """';"""
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                ret.append(row)
                row = c.fetchone()
            sqlLock.release()
    except:
        print >> sys.stderr, "Failed to retrieve pronomIDs."
        ret = []

    if transcoder.fileExtension:
        sql = """SELECT CR.pk, CR.command, CR.GroupMember
        FROM CommandRelationships AS CR
        JOIN FileIDs ON CR.fileID=FileIDs.pk
        JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk
        JOIN FileIDsByExtension AS FIBE  ON FileIDs.pk = FIBE.FileIDs
        WHERE FIBE.Extension = '""" + transcoder.fileExtension.__str__() + """'
        AND CommandClassifications.classification = '""" + opts.commandClassifications + """';"""
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            ret.append(row)
            row = c.fetchone()
        sqlLock.release()

    if not len(ret):
        if opts.commandClassifications == "preservation":
            if inPreservationFormat():
                print "Already in preservation format."
            else:
                print >> sys.stderr, "Unable to verify archival readiness."
                #Issue 528: related to exit code
                exit(0)

        elif opts.commandClassifications == "access":
            sql = """SELECT CR.pk, CR.command, CR.GroupMember
            FROM CommandRelationships AS CR
            JOIN Commands AS C ON CR.command = C.pk
            WHERE C.description = 'Copying file to access directory.';"""
            rows = databaseInterface.queryAllSQL(sql)
            for row in rows:
                cl = transcoder.CommandLinker(row)
                copyExitCode = cl.execute()
                if copyExitCode:
                    exit(copyExitCode)
            if inAccessFormat():
                print "Already in access format."
                exit(0)
            else:
                print >> sys.stderr, "Unable to verify access readiness."
                #Issue 528: related to exit code
                exit(0)

        elif opts.commandClassifications == "thumbnail":
            #use default thumbnail
            print "Using default thumbnail"
            sql = """SELECT CR.pk, CR.command, CR.GroupMember
            FROM CommandRelationships AS CR
            JOIN Commands AS C ON CR.command = C.pk
            WHERE C.description = 'Using default thumbnail.';"""
            rows = databaseInterface.queryAllSQL(sql)
            for row in rows:
                cl = transcoder.CommandLinker(row)
                copyExitCode = cl.execute()
                exit(copyExitCode)

    return ret
def main(opts):
    """ Find and execute normalization commands on input file. """
    # TODO fix for maildir working only on attachments

    setup_dicts(mcpclient_settings)

    # Find the file and it's FormatVersion (file identification)
    try:
        file_ = File.objects.get(uuid=opts.file_uuid)
    except File.DoesNotExist:
        print('File with uuid',
              opts.file_uuid,
              'does not exist in database.',
              file=sys.stderr)
        return NO_RULE_FOUND
    print('File found:', file_.uuid, file_.currentlocation)

    # Unless normalization file group use is submissionDocumentation, skip the
    # submissionDocumentation directory
    if opts.normalize_file_grp_use != "submissionDocumentation" and file_.currentlocation.startswith(
            '%SIPDirectory%objects/submissionDocumentation'):
        print('File', os.path.basename(opts.file_path),
              'in objects/submissionDocumentation, skipping')
        return SUCCESS

    # Only normalize files where the file's group use and normalize group use match
    if file_.filegrpuse != opts.normalize_file_grp_use:
        print(os.path.basename(opts.file_path), 'is file group usage',
              file_.filegrpuse, 'instead of ', opts.normalize_file_grp_use,
              ' - skipping')
        return SUCCESS

    # For re-ingest: clean up old derivations
    # If the file already has a Derivation with the same purpose, remove it and mark the derived file as deleted
    derivatives = Derivation.objects.filter(
        source_file=file_, derived_file__filegrpuse=opts.purpose)
    for derivative in derivatives:
        print(opts.purpose, 'derivative', derivative.derived_file_id,
              'already exists, marking as deleted')
        File.objects.filter(uuid=derivative.derived_file_id).update(
            filegrpuse='deleted')
        # Don't create events for thumbnail files
        if opts.purpose != 'thumbnail':
            databaseFunctions.insertIntoEvents(
                fileUUID=derivative.derived_file_id,
                eventType='deletion',
            )
    derivatives.delete()

    # If a file has been manually normalized for this purpose, skip it
    manually_normalized_file = check_manual_normalization(opts)
    if manually_normalized_file:
        print(os.path.basename(opts.file_path),
              'was already manually normalized into',
              manually_normalized_file.currentlocation)
        if 'preservation' in opts.purpose:
            # Add derivation link and associated event
            insert_derivation_event(
                original_uuid=opts.file_uuid,
                output_uuid=manually_normalized_file.uuid,
                derivation_uuid=str(uuid.uuid4()),
                event_detail_output="manual normalization",
                outcome_detail_note=None,
            )
        return SUCCESS

    do_fallback = False
    format_id = get_object_or_None(FileFormatVersion, file_uuid=opts.file_uuid)

    # Look up the normalization command in the FPR
    if format_id:
        print('File format:', format_id.format_version)
        try:
            rule = FPRule.active.get(format=format_id.format_version,
                                     purpose=opts.purpose)
        except FPRule.DoesNotExist:
            do_fallback = True

    # Try with default rule if no format_id or rule was found
    if format_id is None or do_fallback:
        try:
            rule = get_default_rule(opts.purpose)
            print(os.path.basename(file_.currentlocation),
                  "not identified or without rule",
                  "- Falling back to default", opts.purpose, "rule")
        except FPRule.DoesNotExist:
            print('Not normalizing', os.path.basename(file_.currentlocation),
                  ' - No rule or default rule found to normalize for',
                  opts.purpose)
            return NO_RULE_FOUND

    print('Format Policy Rule:', rule)
    command = rule.command
    print('Format Policy Command', command.description)

    replacement_dict = get_replacement_dict(opts)
    cl = transcoder.CommandLinker(rule, command, replacement_dict, opts,
                                  once_normalized)
    exitstatus = cl.execute()

    # If the access/thumbnail normalization command has errored AND a
    # derivative was NOT created, then we run the default access/thumbnail
    # rule. Note that we DO need to check if the derivative file exists. Even
    # when a verification command exists for the normalization command, the
    # transcoder.py::Command.execute method will only run the verification
    # command if the normalization command returns a 0 exit code.
    # Errored thumbnail normalization also needs to result in default thumbnail
    # normalization; if not, then a transfer with a single file that failed
    # thumbnail normalization will result in a failed SIP at "Prepare DIP: Copy
    # thumbnails to DIP directory"
    if (exitstatus != 0 and opts.purpose in ('access', 'thumbnail')
            and cl.commandObject.output_location
            and (not os.path.isfile(cl.commandObject.output_location))):
        # Fall back to default rule
        try:
            fallback_rule = get_default_rule(opts.purpose)
            print(opts.purpose,
                  'normalization failed, falling back to default',
                  opts.purpose, 'rule')
        except FPRule.DoesNotExist:
            print('Not retrying normalizing for',
                  os.path.basename(file_.currentlocation),
                  ' - No default rule found to normalize for', opts.purpose)
            fallback_rule = None
        # Don't re-run the same command
        if fallback_rule and fallback_rule.command != command:
            print('Fallback Format Policy Rule:', fallback_rule)
            command = fallback_rule.command
            print('Fallback Format Policy Command', command.description)

            # Use existing replacement dict
            cl = transcoder.CommandLinker(fallback_rule, command,
                                          replacement_dict, opts,
                                          once_normalized)
            exitstatus = cl.execute()

    # Store thumbnails locally for use during AIP searches
    # TODO is this still needed, with the storage service?
    if 'thumbnail' in opts.purpose:
        thumbnail_filepath = cl.commandObject.output_location
        thumbnail_storage_dir = os.path.join(
            mcpclient_settings.SHARED_DIRECTORY,
            'www',
            'thumbnails',
            opts.sip_uuid,
        )
        try:
            os.makedirs(thumbnail_storage_dir)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(
                    thumbnail_storage_dir):
                pass
            else:
                raise
        thumbnail_basename, thumbnail_extension = os.path.splitext(
            thumbnail_filepath)
        thumbnail_storage_file = os.path.join(
            thumbnail_storage_dir,
            opts.file_uuid + thumbnail_extension,
        )

        shutil.copyfile(thumbnail_filepath, thumbnail_storage_file)

    if not exitstatus == 0:
        print('Command', command.description, 'failed!', file=sys.stderr)
        return RULE_FAILED
    else:
        print('Successfully normalized ', os.path.basename(opts.file_path),
              'for', opts.purpose)
        return SUCCESS
def executeCommandReleationship(gearman_worker, gearman_job):
    """some text"""
    try:
        execute = gearman_job.task
        print "executing:", execute, "{", gearman_job.unique, "}"
        data = cPickle.loads(gearman_job.data)
        utcDate = databaseInterface.getUTCDate()
        opts = data["arguments"]  #.encode("utf-8")
        #if isinstance(arguments, unicode):
        #    arguments = arguments.encode("utf-8")
        #if isinstance(arguments, str):
        #    arguments = unicode(arguments)

        sInput = ""
        clientID = gearman_worker.worker_client_id

        opts["date"] = utcDate
        opts["accessDirectory"] = os.path.join(opts['sipPath'],
                                               "DIP/objects") + "/"
        opts["thumbnailDirectory"] = os.path.join(opts['sipPath'],
                                                  "thumbnails") + "/"
        print opts
        for key, value in archivematicaClient.replacementDic.iteritems():
            for key2 in opts:
                opts[key2] = opts[key2].replace(key, value)
        replacementDic = getReplacementDic(opts)
        opts[
            "prependStdOut"] = """Operating on file: {%s}%s \r\nUsing  %s  command classifications""" % (
                opts["fileUUID"], replacementDic["%fileName%"],
                opts["commandClassification"])
        opts["prependStdError"] = "\r\nSTDError:"
        #    print clientID, execute, data
        archivematicaClient.logTaskAssignedSQL(gearman_job.unique.__str__(),
                                               clientID, utcDate)
        cl = transcoder.CommandLinker(opts["CommandRelationship"],
                                      replacementDic, opts, onceNormalized)
        cl.execute()

        co = cl.commandObject
        exitCode = co.exitCode
        stdOut = "%s \r\n%s" % (opts["prependStdOut"], co.stdOut)
        if not co.stdError or co.stdError.isspace():
            stdError = ""
        else:
            stdError = "%s \r\n%s" % (opts["prependStdError"], co.stdError)

        #TODO add date to ops

        #Replace replacement strings
        #archivematicaClient.printOutputLock.acquire()
        #print >>sys.stderr, "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>"
        #archivematicaClient.printOutputLock.release()
        #exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False)
        return cPickle.dumps({
            "exitCode": exitCode,
            "stdOut": stdOut,
            "stdError": stdError
        })
    #catch OS errors
    except OSError, ose:
        archivematicaClient.printOutputLock.acquire()
        traceback.print_exc(file=sys.stdout)
        print >> sys.stderr, "Execution failed:", ose
        archivematicaClient.printOutputLock.release()
        output = ["Config Error!", ose.__str__()]
        exitCode = 1
        return cPickle.dumps({
            "exitCode": exitCode,
            "stdOut": output[0],
            "stdError": output[1]
        })