Ejemplo n.º 1
0
def create_premis_agent_file(workspace,
                             event_type,
                             agent_name,
                             agent_type,
                             agent_identifier,
                             event_target=None):
    """Creates `<event_type>-agent.xml` file. If path to target file is given
    as `event_target` parameter, the URL-encoded path is used as filename
    prefix. The file is METS XML file that contains PREMIS agent element inside
    digiprovMD element. The ID attribute of digiprovMD is hashed from the
    filename.

    :param workspace: path to directory where file is created
    :param event_type: event type (for filename)
    :param agent_name: PREMIS agentName
    :param agent_type: PREMIS agentType
    :param agent_identifier: PREMIS agentIdentifierValue
    :param event_target: event target file (for filename)
    :returns: output file path and METS XML element object
    """
    output_filename = '%s-agent-amd.xml' % (event_type)
    if event_target:
        output_filename = '%s-%s' % (event_target, output_filename)
    output_filename = encode_path(output_filename)

    agent_id = encode_id(output_filename)

    premis_agent = create_premis_agent(agent_name, agent_type,
                                       agent_identifier)

    agent_mets = _create_mets(premis_agent, agent_id, 'PREMIS:AGENT')
    _write_mets(agent_mets, os.path.join(workspace, output_filename))

    return (os.path.join(workspace, output_filename), agent_mets)
Ejemplo n.º 2
0
def main(arguments=None):
    """The main method for import_description"""
    args = parse_arguments(arguments)

    if args.dmdsec_target:
        url_t_path = encode_path(args.dmdsec_target, suffix='-dmdsec.xml')
    else:
        url_t_path = 'dmdsec.xml'

    with open(args.dmdsec_location, 'r') as content_file:
        content = content_file.read()

    _mets = mets.mets()

    tree = lxml.etree.fromstring(content)

    if args.desc_root == 'remove':
        childs = tree.findall('*')
    else:
        childs = [tree]
    xmldata_e = mets.xmldata(child_elements=childs)
    ns = h.get_namespace(childs[0])

    if ns in METS_MDTYPES.keys():
        mdt = METS_MDTYPES[ns]['mdtype']
        if 'othermdtype' in METS_MDTYPES[ns]:
            mdo = METS_MDTYPES[ns]['othermdtype']
        else:
            mdo = None
        mdv = METS_MDTYPES[ns]['version']
    else:
        raise TypeError("Invalid namespace: %s" % ns)

    mdwrap_e = mets.mdwrap(mdtype=mdt,
                           othermdtype=mdo,
                           mdtypeversion=mdv,
                           child_elements=[xmldata_e])
    dmdsec_e = mets.dmdsec(encode_id(url_t_path), child_elements=[mdwrap_e])

    _mets.append(dmdsec_e)

    if args.stdout:
        print h.serialize(_mets)

    output_file = os.path.join(args.workspace, url_t_path)
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "import_description created file: %s" % output_file

    return 0
Ejemplo n.º 3
0
def ids_for_files(workspace, path, idtype, dash_count=0):
    """Get ids for metadata files
    """
    workspace_files = [fname.name for fname in scandir.scandir(workspace)]
    md_files = [x for x in workspace_files if idtype in x]
    if path:
        files_result = [
            x for x in md_files if path in x and (path + '%2F') not in x
        ]
    else:
        files_result = [x for x in md_files if x.count('-') == dash_count]
    id_result = [encode_id(x) for x in files_result]
    return files_result, id_result
Ejemplo n.º 4
0
def create_premis_event_file(workspace,
                             event_type,
                             event_datetime,
                             event_detail,
                             event_outcome,
                             event_outcome_detail,
                             event_target=None,
                             agent_identifier=None):
    """Creates `<event_type>-event.xml` file. If path to target file is given
    as `event_target` parameter, the URL-encoded path is used as filename
    prefix. The file is METS XML file that contains PREMIS event element inside
    digiprovMD element. The ID attribute of digiprovMD is hashed from the
    filename.

    :param workspace: path to directory where file is created
    :param event_type: PREMIS eventType
    :param event_datetime: PREMIS eventDateTime
    :param event_detail: PREMIS eventDetail
    :param event_outcome: PREMIS eventOutcome
    :param event_outcome_detail: PREMIS eventOutcomeDetail
    :param agent_identifier: PREMIS linkingAgentIdentifierValue
    :param event_target: event target file (for filename)
    :returns: output file path and METS XML element object
    """
    output_filename = '%s-event-amd.xml' % event_type
    if event_target:
        output_filename = '%s-%s' % (event_target, output_filename)
    output_filename = encode_path(output_filename)

    event_id = encode_id(output_filename)

    premis_event_elem = create_premis_event(event_type, event_datetime,
                                            event_detail, event_outcome,
                                            event_outcome_detail,
                                            agent_identifier)

    event_mets = _create_mets(premis_event_elem, event_id, 'PREMIS:EVENT')
    _write_mets(event_mets, os.path.join(workspace, output_filename))

    return (os.path.join(workspace, output_filename), event_mets)
Ejemplo n.º 5
0
def main(arguments=None):
    """The main method for argparser"""
    args = parse_arguments(arguments)

    # Loop files and create premis objects
    files = collect_filepaths(dirs=args.files, base=args.base_path)
    for filename in files:
        if args.base_path != '':
            filerel = os.path.relpath(filename, args.base_path)
        else:
            filerel = filename

        xmldata = mets.xmldata()
        premis_object = create_premis_object(
            xmldata, filename, args.skip_inspection, args.format_name,
            args.format_version, args.digest_algorithm, args.message_digest,
            args.date_created, args.charset)

        mdwrap = mets.mdwrap('PREMIS:OBJECT', '2.3', child_elements=[xmldata])
        techmd = mets.techmd(encode_id(
            encode_path(filerel, suffix="-techmd.xml")),
                             child_elements=[mdwrap])
        amdsec = mets.amdsec(child_elements=[techmd])
        _mets = mets.mets(child_elements=[amdsec])

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(args.workspace):
            os.makedirs(args.workspace)

        filename = encode_path(filerel, suffix="-techmd.xml")

        with open(os.path.join(args.workspace, filename), 'w+') as outfile:
            outfile.write(h.serialize(_mets))
            print "Wrote METS technical metadata to file %s" % outfile.name

    return 0
Ejemplo n.º 6
0
def main(arguments=None):
    """The main method for premis_event"""
    args = parse_arguments(arguments)

    if args.agent_name:

        _mets = mets.mets()
        amdsec = mets.amdsec()
        _mets.append(amdsec)

        if args.event_target:
            agent_id = encode_id(
                encode_path('%s-%s-agent.xml' %
                            (args.event_target, args.event_type)))
            output_file = os.path.join(
                args.workspace,
                encode_path('%s-%s-agent.xml' %
                            (args.event_target, args.event_type)))
        else:
            agent_id = encode_id(
                encode_path('%s-agent.xml' % (args.event_type)))
            output_file = os.path.join(
                args.workspace,
                encode_path('%s-agent.xml' % (args.event_type)))
        linking_agent_identifier = create_premis_agent(amdsec, agent_id,
                                                       args.agent_name,
                                                       args.agent_type)

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(os.path.dirname(output_file)):
            os.makedirs(os.path.dirname(output_file))

        with open(output_file, 'w+') as outfile:
            outfile.write(h.serialize(_mets))

        print "premis_event created file: %s" % output_file

    else:
        linking_agent_identifier = None

    # Create event
    _mets = mets.mets()
    amdsec = mets.amdsec()
    _mets.append(amdsec)

    if args.event_target:
        event_id = encode_id(
            encode_path('%s-%s-event.xml' %
                        (args.event_target, args.event_type)))
        output_file = os.path.join(
            args.workspace,
            encode_path('%s-%s-event.xml' %
                        (args.event_target, args.event_type)))
    else:
        event_id = encode_id(encode_path('%s-event.xml' % (args.event_type)))
        output_file = os.path.join(
            args.workspace, encode_path('%s-event.xml' % (args.event_type)))

    create_premis_event(amdsec, args.event_type, args.event_datetime,
                        args.event_detail, args.event_outcome,
                        args.event_outcome_detail, linking_agent_identifier,
                        event_id)

    if args.stdout:
        print h.serialize(_mets)

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "premis_event created file: %s" % output_file

    return 0