예제 #1
0
def internalize_rels(pid, dsid, source, cursor=None):
    """
    Internalize rels given a ds_db_id.
    """
    cursor = check_cursor(cursor)
    if dsid not in ['DC', 'RELS-EXT', 'RELS-INT']:
        return cursor
    object_reader.object_id_from_raw(pid, cursor=cursor)
    object_id = cursor.fetchone()['id']
    datastream_reader.datastream({
        'object': object_id,
        'dsid': dsid
    },
                                 cursor=cursor)
    ds_info = cursor.fetchone()
    if ds_info is None or ds_info['resource'] is None:
        if dsid == 'DC':
            internalize_rels_dc(None, object_id, cursor=cursor)
        elif dsid == 'RELS-INT':
            internalize_rels_int(etree.parse(None),
                                 object_id,
                                 source,
                                 cursor=cursor)
        elif dsid == 'RELS-EXT':
            internalize_rels_ext(None, object_id, source, cursor=cursor)
        return cursor
    else:
        datastream_reader.resource(ds_info['resource'], cursor=cursor)
        resource_info = cursor.fetchone()
        resource_path = filestore.resolve_uri(resource_info['uri'])

    with open(resource_path, 'rb') as relations_file:
        if dsid == 'DC':
            internalize_rels_dc(relations_file, object_id, cursor=cursor)
        elif dsid == 'RELS-INT':
            internalize_rels_int(etree.parse(relations_file),
                                 object_id,
                                 source,
                                 cursor=cursor)
        elif dsid == 'RELS-EXT':
            internalize_rels_ext(relations_file,
                                 object_id,
                                 source,
                                 cursor=cursor)

    return cursor
예제 #2
0
def update_checksums(resource, checksums, cursor=None):
    """
    Bring a resource's checksums up to date.

    Raises:
        ValueError: On checksum mismatch.
    """
    # Fedora hash types mapped to the names that hashlib uses.
    hash_type_map = {
        'MD5': 'md5',
        'SHA-1': 'sha1',
        'SHA-256': 'sha256',
        'SHA-384': 'sha384',
        'SHA-512': 'sha512'
    }

    if checksums is not None:
        old_checksums = datastream_reader.checksums(resource,
                                                    cursor=cursor).fetchall()
        for checksum in checksums:
            # Resolve default checksum.
            if checksum['type'] == 'DEFAULT':
                checksum['type'] = _config['default_hash_algorithm']

            # Checksums can be disabled.
            if checksum['type'] == 'DISABLED':
                for old_checksum in old_checksums:
                    datastream_purger.delete_checksum(old_checksum['id'],
                                                      cursor=cursor)
                continue

            # Only set or validate checksums if they have changed.
            update_checksum = True
            for old_checksum in old_checksums:
                # If we get checksums with no type it is the old.
                if not checksum['type']:
                    checksum['type'] = old_checksum['type']
                if (old_checksum['type'] == checksum['type']
                        and old_checksum['checksum'] == checksum['checksum']):
                    update_checksum = False

            if update_checksum:
                checksum['resource'] = resource
                file_path = resolve_uri(
                    datastream_reader.resource(
                        resource, cursor=cursor).fetchone()['uri'])
                checksum_value = checksum_file(file_path,
                                               hash_type_map[checksum['type']])

                if not checksum['checksum']:
                    # Set checksum.
                    checksum['checksum'] = checksum_value
                elif checksum_value != checksum['checksum']:
                    raise ValueError('Checksum mismatch.')

                datastream_writer.upsert_checksum(checksum, cursor=cursor)
                cursor.fetchone()
예제 #3
0
def datastream_to_profile(ds_info, cursor, version=0):
    """
    Get a datastream profile dict from a DB DS dict.
    """
    versionable = 'true' if ds_info['versioned'] else 'false'
    location = None
    location_type = 'INTERNAL_ID'
    mime = None
    checksum = 'none'
    checksum_type = 'DISABLED'
    size = None
    if ds_info['resource'] is not None:
        ds_reader.resource(ds_info['resource'], cursor=cursor)
        resource_info = cursor.fetchone()
        if resource_info is not None:
            location = resource_info['uri']
            if ds_info['control_group'] != 'R':
                size = filestore.uri_size(resource_info['uri'])
            else:
                location_type = 'URL'

            ds_reader.mime(resource_info['mime'], cursor=cursor)
            mime = cursor.fetchone()['mime']

            ds_reader.checksums(ds_info['resource'], cursor=cursor)
            checksum_info = cursor.fetchone()
            if checksum_info is not None:
                checksum = checksum_info['checksum']
                checksum_type = checksum_info['type']
                cursor.fetchall()
    return {
        'dsLabel': ds_info['label'],
        'dsCreateDate': format_date(ds_info['modified']),
        'dsState': ds_info['state'],
        'dsMIME': mime,
        'dsControlGroup': ds_info['control_group'],
        'dsVersionable': versionable,
        'dsVersionID': '{}.{}'.format(ds_info['dsid'], version),
        'dsChecksumType': checksum_type,
        'dsChecksum': checksum,
        'dsSize': size,
        'dsLocation': location,
        'dsLocationType': location_type,
    }
예제 #4
0
    def _get_ds_dissemination(self, req, pid, dsid):
        """
        Provide datastream content.
        """
        with get_connection() as conn, conn.cursor() as cursor:
            object_info = object_reader.object_id_from_raw(
                pid, cursor=cursor).fetchone()
            if object_info is None:
                raise ObjectDoesNotExistError(pid)

            time = utils.iso8601_to_datetime(req.get_param('asOfDateTime'))
            ds_info = ds_reader.datastream(
                {
                    'object': object_info['id'],
                    'dsid': dsid
                }, cursor=cursor).fetchone()
            if ds_info is None:
                raise DatastreamDoesNotExistError(pid, dsid)
            if time is not None:
                ds_info = ds_reader.datastream_as_of_time(
                    ds_info['id'], time, cursor)
                if ds_info is None:
                    raise DatastreamDoesNotExistError(pid, dsid, time)

            resource_info = ds_reader.resource(ds_info['resource']).fetchone()
            if resource_info is None:
                return None

            info = {}
            mime_info = ds_reader.mime_from_resource(resource_info['id'],
                                                     cursor=cursor).fetchone()
            if mime_info:
                info['mime'] = mime_info['mime']
            # Redirect if we are a redirect DS.
            if ds_info['control_group'] == 'R':
                info['location'] = resource_info['uri']
            else:
                # Send data if we are not a redirect DS.
                file_path = filestore.resolve_uri(resource_info['uri'])
                info['stream'] = open(file_path, 'rb')

            return info
예제 #5
0
    def _get_info(self, pid, dsid):
        """
        Get the MIME-type and URI of the given datastream.

        Returns:
            A three-tuple comprising:
            - the datastream control group
            - the URI of the resource the datastream represents
            - the MIME type of the datastream's resource
        Raises:
            DatastreamDoesNotExistError: The datastream doesn't exist.
        """
        with get_connection() as conn, conn.cursor() as cursor:
            datastream_info = ds_reader.datastream_from_raw(
                pid, dsid, cursor=cursor).fetchone()
            if datastream_info is None:
                raise DatastreamDoesNotExistError(pid, dsid)
            resource_info = ds_reader.resource(datastream_info['resource'],
                                               cursor=cursor).fetchone()
            mime_info = ds_reader.mime(resource_info['mime'],
                                       cursor=cursor).fetchone()

            return (datastream_info['control_group'], resource_info['uri'],
                    mime_info['mime'])
예제 #6
0
def write_ds(ds, old=False, cursor=None):
    """
    Create a datastream on the current object.
    """
    cursor = check_cursor(cursor, ISOLATION_LEVEL_READ_COMMITTED)

    if ds['data'] is not None:
        # We already have data.
        filestore.create_datastream_from_data(ds,
                                              ds['data'],
                                              mime=ds['mimetype'],
                                              checksums=ds['checksums'],
                                              old=old,
                                              cursor=cursor)
    elif ds['data_ref'] is not None:
        # There is data but not in the request.
        if ds['control_group'] == 'R':
            # Data will remain external.
            ds_writer.upsert_mime(ds['mimetype'], cursor=cursor)
            ds_writer.upsert_resource(
                {
                    'uri': ds['data_ref']['REF'],
                    'mime': cursor.fetchone()['id'],
                },
                cursor=cursor)
            ds['resource'] = cursor.fetchone()['id']
            ds_writer.upsert_datastream(ds, cursor=cursor)
        elif ds['data_ref']['REF'].startswith(filestore.UPLOAD_SCHEME):
            # Data has been uploaded.
            filestore.create_datastream_from_upload(ds,
                                                    ds['data_ref']['REF'],
                                                    mime=ds['mimetype'],
                                                    checksums=ds['checksums'],
                                                    old=old,
                                                    cursor=cursor)
        else:
            # We need to fetch data.
            ds_resp = requests.get(ds['data_ref']['REF'], stream=True)
            # @XXX: we should be able to avoid creating this file by
            # wrapping the raw attribute on the response to decode on read.
            ds_file = utils.SpooledTemporaryFile()
            for chunk in ds_resp.iter_content(_config['download_chunk_size']):
                ds_file.write(chunk)
            ds_file.seek(0)

            filestore.create_datastream_from_data(ds,
                                                  ds_file,
                                                  mime=ds['mimetype'],
                                                  checksums=ds['checksums'],
                                                  old=old,
                                                  cursor=cursor)
    else:
        # There is no data change.
        mime = ds_writer.upsert_mime(ds['mimetype'],
                                     cursor=cursor).fetchone()['id']
        uri = ds_reader.resource(ds['resource'],
                                 cursor=cursor).fetchone()['uri']
        ds_writer.upsert_resource({'uri': uri, 'mime': mime}, cursor=cursor)
        filestore.update_checksums(ds['resource'],
                                   ds['checksums'],
                                   cursor=cursor)
        ds_writer.upsert_datastream(ds, cursor=cursor)

    return cursor
예제 #7
0
def populate_foxml_datastream(foxml,
                              pid,
                              datastream,
                              base_url='http://localhost:8080/fedora',
                              archival=False,
                              inline_to_managed=False,
                              cursor=None):
    """
    Add a FOXML datastream into an lxml etree.
    """
    datastream_attributes = {
        'ID': datastream['dsid'],
        'STATE': datastream['state'],
        'CONTROL_GROUP': datastream['control_group'],
        'VERSIONABLE': str(datastream['versioned']).lower(),
    }
    with foxml.element('{{{0}}}datastream'.format(FOXML_NAMESPACE),
                       datastream_attributes):
        versions = list(datastream_reader.old_datastreams(datastream['id']))
        versions.append(datastream)

        for index, version in enumerate(versions):
            datastream_reader.resource(version['resource'], cursor=cursor)
            resource_info = cursor.fetchone()
            datastream_reader.mime(resource_info['mime'], cursor=cursor)
            mime_info = cursor.fetchone()
            try:
                created = format_date(version['committed'])
            except KeyError:
                created = format_date(datastream['created'])

            version_attributes = {
                'ID': '{}.{}'.format(datastream['dsid'], index),
                'LABEL': version['label'] if version['label'] else '',
                'CREATED': created,
                'MIMETYPE': mime_info['mime'],
            }
            if datastream['control_group'] != 'R':
                size = filestore.uri_size(resource_info['uri'])
                version_attributes['SIZE'] = str(size)

            with foxml.element(
                    '{{{0}}}datastreamVersion'.format(FOXML_NAMESPACE),
                    version_attributes):

                datastream_reader.checksums(version['resource'], cursor=cursor)
                checksums = cursor.fetchall()
                for checksum in checksums:
                    foxml.write(
                        etree.Element(
                            '{{{0}}}datastreamDigest'.format(FOXML_NAMESPACE),
                            {
                                'TYPE': checksum['type'],
                                'DIGEST': checksum['checksum']
                            }))

                if datastream['control_group'] == 'X' and (
                        not inline_to_managed):
                    content_element = etree.Element(
                        '{{{0}}}xmlContent'.format(FOXML_NAMESPACE))
                    uri = filestore.resolve_uri(resource_info['uri'])
                    xml_etree = etree.parse(uri)
                    content_element.append(xml_etree.getroot())
                    foxml.write(content_element)
                elif datastream['control_group'] in ['M', 'X'] and archival:
                    uri = filestore.resolve_uri(resource_info['uri'])
                    with open(uri, 'rb') as ds_file:
                        with foxml.element('{{{0}}}binaryContent'.format(
                                FOXML_NAMESPACE)):
                            base64.encode(ds_file, foxml)
                else:
                    if datastream['control_group'] == 'R':
                        content_attributes = {
                            'TYPE': 'URL',
                            'REF': resource_info['uri'],
                        }
                    else:
                        content_attributes = {
                            'TYPE':
                            'INTERNAL_ID',
                            'REF': ('{}/objects/{}/datastreams/{}/'
                                    'content?asOfDateTime={}').format(
                                        base_url, pid, datastream['dsid'],
                                        created),
                        }

                    foxml.write(
                        etree.Element(
                            '{{{0}}}contentLocation'.format(FOXML_NAMESPACE),
                            content_attributes))