def newChild(parent, tag, text=None, tailText=None, sets=[]):
    child = etree.Element(tag)
    parent.append(child)
    child.text = strToUnicode(text)
    if tailText:
        child.tail = strToUnicode(tailText)
    for set in sets:
        key, value = set
        child.set(key, value)
    return child
Beispiel #2
0
def newChild(parent, tag, text=None, tailText=None, sets=[]):
    child = etree.Element(tag)
    parent.append(child)
    child.text = strToUnicode(text)
    if tailText:
        child.tail = strToUnicode(tailText)
    for set in sets:
        key, value = set
        child.set(key, value)
    return child
def newChild(parent, tag, text=None, tailText=None, sets=[]):
    # TODO convert sets to a dict, and use **dict
    child = etree.SubElement(parent, tag)
    child.text = strToUnicode(text)
    if tailText:
        child.tail = strToUnicode(tailText)
    for set in sets:
        key, value = set
        child.set(key, value)
    return child
Beispiel #4
0
def newChild(parent, tag, text=None, tailText=None, sets=[]):
    # TODO convert sets to a dict, and use **dict
    child = etree.SubElement(parent, tag)
    child.text = strToUnicode(text)
    if tailText:
        child.tail = strToUnicode(tailText)
    for set in sets:
        key, value = set
        child.set(key, value)
    return child
Beispiel #5
0
def _move_to_internal_shared_dir(filepath, dest, transfer):
    """Move package to an internal Archivematica directory.

    The side effect of this function is to update the transfer object with the
    final location. This is important so other components can continue the
    processing. When relying on watched directories to start a transfer (see
    _start_package_transfer), this also matters because Transfer is going
    to look up the object in the database based on the location.
    """
    error = _check_filepath_exists(filepath)
    if error:
        raise Exception(error)

    filepath = Path(filepath)
    dest = Path(dest)

    # Confine destination to subdir of originals.
    basename = filepath.name
    dest = _pad_destination_filepath_if_it_already_exists(dest / basename)

    try:
        filepath.rename(dest)
    except OSError as e:
        raise Exception("Error moving from %s to %s (%s)", filepath, dest, e)
    else:
        transfer.currentlocation = strToUnicode(dest.as_posix()).replace(
            _get_setting("SHARED_DIRECTORY"), r"%sharedPath%", 1
        )
        transfer.save()
Beispiel #6
0
def directory_children(request, basePath=False):
    path = ''
    if (basePath):
        path = path + basePath
    path = path + request.GET.get('base_path', '')
    path = path + request.GET.get('path', '')

    response    = {}
    entries     = []
    directories = []

    for entry in sorted_directory_list(path):
        entry = archivematicaFunctions.strToUnicode(entry)
        if unicode(entry)[0] != '.':
            entries.append(entry)
            entry_path = os.path.join(path, entry)
            if os.path.isdir(archivematicaFunctions.unicodeToStr(entry_path)) and os.access(archivematicaFunctions.unicodeToStr(entry_path), os.R_OK):
                directories.append(entry)

    response = {
      'entries': entries,
      'directories': directories
    }

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json'
    )
Beispiel #7
0
def directory_children(request, basePath=False):
    path = ''
    if (basePath):
        path = path + basePath
    path = path + request.GET.get('base_path', '')
    path = path + request.GET.get('path', '')

    response = {}
    entries = []
    directories = []

    for entry in sorted_directory_list(path):
        entry = archivematicaFunctions.strToUnicode(entry)
        if unicode(entry)[0] != '.':
            entries.append(entry)
            entry_path = os.path.join(path, entry)
            if os.path.isdir(archivematicaFunctions.unicodeToStr(
                    entry_path)) and os.access(
                        archivematicaFunctions.unicodeToStr(entry_path),
                        os.R_OK):
                directories.append(entry)

    response = {'entries': entries, 'directories': directories}

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json')
Beispiel #8
0
def logTaskCompletedSQL(task):
    """
    Fetches execution data from the completed task and logs it to the database.
    Updates the entry in the Tasks table with data in the provided task.
    Saves the following fields: exitCode, stdOut, stdError

    :param task:
    """
    print("Logging task output to db", task.UUID)
    taskUUID = task.UUID.__str__()
    exitCode = task.results["exitCode"].__str__()
    stdOut = task.results["stdOut"]
    stdError = task.results["stdError"]

    task = Task.objects.get(taskuuid=taskUUID)
    task.endtime = getUTCDate()
    task.exitcode = exitCode
    # ``strToUnicode`` here prevents the MCP server from crashing when, e.g.,
    # stderr contains Latin-1-encoded chars such as \xa9, i.e., the copyright
    # symbol, cf. #9967.
    task.stdout = strToUnicode(stdOut, obstinate=True)
    task.stderror = strToUnicode(stdError, obstinate=True)
    task.save()
def sanitize_name(basename):
    if basename == "":
        raise ValueError("sanitize_name recieved an empty filename.")
    unicode_basename = strToUnicode(basename)
    unicode_name = unidecode(unicode_basename)
    # We can't return  an empty string here because it will become the new filename.
    # However, in some cases unidecode just strips out all chars (e.g.
    # unidecode(u"🚀") == ""), so if that happens, we to replace the invalid chars with
    # REPLACEMENT_CHAR. This will result in a filename of one or more underscores,
    # which isn't great, but allows processing to continue.
    if unicode_name == "":
        unicode_name = unicode_basename

    return ALLOWED_CHARS.sub(REPLACEMENT_CHAR, unicode_name)
def call(jobs):
    """Primary entry point for this script."""
    for job in jobs:
        with job.JobContext():
            mets_structmap = os.path.join(strToUnicode(job.args[1]),
                                          "metadata", "mets_structmap.xml")
            mets_xsd = job.args[2]
            if not os.path.isfile(mets_structmap):
                job.pyprint("Custom structmap not supplied with package")
                return
            if not os.path.isfile(mets_xsd):
                raise (VerifyMETSException("METS asset is unavailable"))
            xmlschema = etree.XMLSchema(etree.parse(mets_xsd))
            # Raise an exception if not valid, e.g. etree.DocumentInvalid
            # otherwise, the document validates correctly and returns.
            xmlschema.assertValid(etree.parse(mets_structmap))
            job.pyprint("Custom structmap validated correctly")
def run_hashsum_commands(job):
    """Run hashsum commands and generate a cumulative return code."""
    transfer_dir = None
    transfer_uuid = None
    try:
        transfer_dir = strToUnicode(job.args[1])
        transfer_uuid = job.args[2]
    except IndexError:
        logger.error("Cannot access expected module arguments: %s", job.args)
        return 1
    ret = 0
    # Create a query-set once so we don't need to generate per each checksum
    # file type.
    file_queryset = get_file_queryset(transfer_uuid)
    for hashfile in Hashsum.HASHFILES_COMMANDS:
        hashsum = None
        hashfilepath = os.path.join(transfer_dir, "metadata", hashfile)
        if os.path.exists(hashfilepath):
            try:
                hashsum = Hashsum(hashfilepath, job)
            except NoHashCommandAvailable:
                job.pyprint(
                    "Nothing to do for {}. No command available.".format(
                        Hashsum.get_ext(hashfilepath)))
                continue
        if hashsum:
            job.pyprint(
                "Comparing transfer checksums with the supplied {} file".
                format(Hashsum.get_ext(hashfilepath)),
                file=sys.stderr,
            )
            result = hashsum.compare_hashes(transfer_dir=transfer_dir)
            # Add to PREMIS on success only.
            if result == 0:
                job.pyprint("{}: Comparison was OK".format(
                    Hashsum.get_ext(hashfile)))
                write_premis_event_per_file(
                    file_uuids=file_queryset,
                    transfer_uuid=transfer_uuid,
                    event_detail=hashsum.get_command_detail(),
                )
                continue
            ret += result
    return ret
Beispiel #12
0
def _pad_destination_filepath_if_it_already_exists(filepath,
                                                   original=None,
                                                   attempt=0):
    """
    Return a version of the filepath that does not yet exist, padding with numbers
    as necessary and reattempting until a non-existent filepath is found

    :param filepath: `Path` or string of the desired destination filepath
    :param original: `Path` or string of the original filepath (before padding attempts)
    :param attempt: Number

    :returns: `Path` object, padded as necessary
    """
    if original is None:
        original = filepath
    filepath = Path(filepath)
    original = Path(original)

    attempt = attempt + 1
    if not filepath.exists():
        return filepath
    if filepath.is_dir():
        return _pad_destination_filepath_if_it_already_exists(
            "{}_{}".format(strToUnicode(original.as_posix()), attempt),
            original,
            attempt,
        )

    # need to work out basename
    basedirectory = original.parent
    basename = original.name

    # do more complex padding to preserve file extension
    period_position = basename.index(".")
    non_extension = basename[0:period_position]
    extension = basename[period_position:]
    new_basename = "{}_{}{}".format(non_extension, attempt, extension)
    new_filepath = basedirectory / new_basename
    return _pad_destination_filepath_if_it_already_exists(
        new_filepath, original, attempt)
    def _package_create_handler(self, worker, job, payload):
        """Create a new package.

        [config]
        name = packageCreate
        raise_exc = True
        """
        args = (
            self.package_queue,
            self.executor,
            payload.get("name"),
            payload.get("type"),
            payload.get("accession"),
            payload.get("access_system_id"),
            strToUnicode(payload.get("path")),
            payload.get("metadata_set_id"),
            payload.get("user_id"),
            self.workflow,
        )
        kwargs = {"auto_approve": payload.get("auto_approve")}
        processing_config = payload.get("processing_config")
        if processing_config is not None:
            kwargs["processing_config"] = processing_config
        return create_package(*args, **kwargs).pk
Beispiel #14
0
def _get_setting(name):
    """Retrieve a Django setting decoded as a unicode string."""
    return strToUnicode(getattr(settings, name))