Beispiel #1
0
def add_file_metadata(manifest,
                      source_url=None,
                      local_path=None,
                      media_type=None,
                      retrieved_on=None,
                      retrieved_by=None,
                      created_on=None,
                      created_by=None,
                      authored_on=None,
                      authored_by=None,
                      conforms_to=None,
                      bundled_as=None,
                      update_existing=False):

    check_input(manifest)

    if not source_url and not local_path:
        raise ValueError(
            "Error while adding file metadata to RO manifest. "
            "At least one of the parameters \"source_url\" or \"local_path\" must be specified"
        )

    path = source_url
    if local_path:
        path = local_path
    elif bundled_as:
        filename = bundled_as.get("filename")
        path = filename

    if not conforms_to:
        file_ext = os.path.splitext(path)[1][1:]
        file_ext = file_ext.lstrip(".") if file_ext else None
        conforms_to = FILETYPE_ONTOLOGY_MAP.get(file_ext, None)

    if not media_type:
        media_type = guess_mime_type(path)

    uri = source_url = escape_uri(source_url)
    retrieved_from = None

    if local_path:
        uri = escape_uri(ensure_payload_path_prefix(local_path))
        if source_url:
            retrieved_from = dict(retrievedFrom=source_url)

    add_provenance(add_aggregate(manifest,
                                 uri=uri,
                                 mediatype=media_type,
                                 conforms_to=conforms_to,
                                 bundled_as=bundled_as,
                                 update_existing=update_existing),
                   retrieved_from=retrieved_from,
                   retrieved_on=retrieved_on,
                   retrieved_by=retrieved_by,
                   created_on=created_on,
                   created_by=created_by,
                   authored_on=authored_on,
                   authored_by=authored_by)
Beispiel #2
0
def _make_fetch_file(path, remote_entries):
    fetch_file_path = os.path.join(path, "fetch.txt")
    if not remote_entries:
        if os.path.isfile(fetch_file_path):
            os.remove(fetch_file_path)
        return

    LOGGER.info('Writing fetch.txt')

    with open_text_file(fetch_file_path, 'w') as fetch_file:
        for filename in sorted(remote_entries.keys()):
            fetch_file.write("%s\t%s\t%s\n" %
                             (escape_uri(remote_entries[filename]['url']),
                              remote_entries[filename]['length'],
                              escape_uri(_denormalize_filename(filename))))