Exemple #1
0
    def create_sips(cls, dump, deposit, files, recid):
        """Create submission information packages."""
        if not recid or recid.status == PIDStatus.RESERVED:
            return
        first = True
        for s in dump.sips:
            # Create SIP
            sip = SIP.create(
                s['format'],
                s['content'],
                user_id=s['user_id'],
                agent=s['agent'],
                id_=s['id'],
            )
            sip.created = s['timestamp']

            # Create SIP files only for first package.
            if first:
                first = False
                for meta, f in files:
                    db.session.add(
                        SIPFile(sip_id=sip.id,
                                filepath=meta['key'],
                                file_id=f.id))

            # PID - SIP relationship
            db.session.add(RecordSIP(sip_id=sip.id, pid_id=recid.id))
Exemple #2
0
def reconstruct_sipfiles_t(recid=None, pid=None):
    """Reconstruct SIPFiles from record metadata."""
    if not pid:
        pid = PersistentIdentifier.get('recid', recid)

    recsip = RecordSIP.query.filter_by(pid_id=pid.id).order_by(
        RecordSIP.created).first()
    if recsip is None:
        raise Exception("RecordSIP does not exist.")
    sip = recsip.sip
    record = Record.get_record(recsip.pid.object_uuid)
    first_json_rec = \
        next((rec for rec in record.revisions if '_files' in rec), None)
    if first_json_rec is None:
        raise Exception("Files information not found in SIPMetadata nor"
                        " in Record revision")
    files_j = first_json_rec['_files']
    ovs = [
        ObjectVersion.query.filter_by(version_id=fj['version_id']).first()
        for fj in files_j
    ]
    for ov in ovs:
        q = SIPFile.query.filter_by(sip_id=sip.id,
                                    filepath=ov.key,
                                    file_id=ov.file_id)
        if not q.count():
            obj = SIPFile(sip_id=sip.id,
                          filepath=ov.key,
                          file_id=ov.file_id,
                          created=sip.created)
            db.session.add(obj)
    db.session.commit()
def test_sip_file_model(app, db, sips):
    """Test the SIPFile model."""
    sip = sips[0]
    app.config['SIPSTORE_FILEPATH_MAX_LEN'] = 15
    with pytest.raises(ValueError) as excinfo:
        SIPFile(sip_id=sip.id,
                filepath="way too long file name.zip",
                file_id=sip.files[0].file_id)
    assert 'Filepath too long' in str(excinfo.value)
def test_sip_file_model(db):
    """Test the SIPFile model."""
    sip1 = SIP.create('json', '{}')
    file1 = FileInstance.create()
    sipfile1 = SIPFile(sip_id=sip1.id, filepath="foobar.zip", file_id=file1.id)

    db.session.add(sipfile1)
    db.session.commit()
    assert SIP.query.count() == 1
    assert SIPFile.query.count() == 1
    def attach_file(self, file):
        """Add a file to the SIP.

        :param file: the file to attach. It must at least implement a `key`
            and a valid `file_id`. See
            :py:class:`invenio_files_rest.models.ObjectVersion`.
        :returns: the created SIPFile
        :rtype: :py:class:`invenio_sipstore.models.SIPFile`
        """
        sf = SIPFile(sip_id=self.id, filepath=file.key, file_id=file.file_id)
        db.session.add(sf)
        return sf
Exemple #6
0
def load_sipfile(file_id, filepath, sip_id, created):
    """Load a single SIPFile from parameters into DB."""
    q = SIPFile.query.filter_by(sip_id=sip_id,
                                filepath=filepath,
                                file_id=file_id)
    dt_created = arrow.get(created).datetime.replace(tzinfo=None)
    if not q.count():
        obj = SIPFile(sip_id=sip_id,
                      filepath=filepath,
                      file_id=file_id,
                      created=dt_created)
        db.session.add(obj)
        db.session.commit()
Exemple #7
0
def test_transfer_rsync(app, db, location):
    """Test factories.transfer_rsync function."""
    # config
    app.config['SIPSTORE_ARCHIVER_DIRECTORY_BUILDER'] = \
        'helpers:archive_directory_builder'
    app.config['SIPSTORE_ARCHIVER_METADATA_TYPES'] = ['test']
    # SIP
    sip = SIP.create()
    # SIPMetadataType
    mtype = SIPMetadataType(title='Test', name='test', format='json')
    db.session.add(mtype)
    # SIPMetadata
    mcontent = {'title': 'title', 'author': 'me'}
    meth = SIPMetadata(sip=sip, type=mtype, content=json.dumps(mcontent))
    db.session.add(meth)
    # SIPFile
    f = FileInstance.create()
    fcontent = b'weighted companion cube\n'
    f.set_contents(BytesIO(fcontent), default_location=location.uri)
    sfile = SIPFile(sip=sip, file=f, filepath='portal.txt')
    db.session.add(sfile)
    db.session.commit()

    # EXPORT
    folder = path.join(location.uri, 'lulz')
    params = {
        'server': '',
        'user': '',
        'destination': folder,
        'args': '-az'
    }
    factories.transfer_rsync(sip.id, params)

    # TEST
    assert not path.exists(path.join(location.uri, 'test'))
    assert path.isdir(folder)
    assert path.isdir(path.join(folder, 'files'))
    assert path.isfile(path.join(folder, 'files', 'portal.txt'))
    assert path.isdir(path.join(folder, 'metadata'))
    assert path.isfile(path.join(folder, 'metadata', 'test.json'))
    with open(path.join(folder, 'files', 'portal.txt'), 'rb') as fp:
        assert fp.read() == fcontent
    with open(path.join(folder, 'metadata', 'test.json'), 'r') as fp:
        assert json.loads(fp.read()) == mcontent
Exemple #8
0
    def create(cls,
               pid,
               record,
               create_sip_files=True,
               user_id=None,
               agent=None):
        """Create a Zenodo SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``RecordSIP`` for the
        SIP-PID-Record relationship, as well as ``SIPFile`` objects for each
        the files in the record.
        Those objects are not returned by this function but can be fetched by
        the corresponding SIP relationships 'record_sips' and 'sip_files'.
        :param pid: PID of the published record ('recid').
        :type pid: `invenio_pidstore.models.PersistentIdentifier`
        :param record: Record for which the SIP should be created.
        :type record: `invenio_records.api.Record`
        :param create_sip_files: If True the SIPFiles will be created.
        :type create_sip_files: bool
        :returns: A Zenodo-specifi SIP object.
        :rtype: ``invenio_sipstore.models.SIP``
        """
        if not user_id:
            user_id = (None
                       if current_user.is_anonymous else current_user.get_id())
        if not agent:
            agent = cls._build_agent_info()

        with db.session.begin_nested():
            sip = SIP.create('json',
                             json.dumps(record.dumps()),
                             user_id=user_id,
                             agent=agent)
            recsip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
            db.session.add(recsip)
            if record.files and create_sip_files:
                for f in record.files:
                    sf = SIPFile(sip_id=sip.id,
                                 filepath=f.key,
                                 file_id=f.file_id)
                    db.session.add(sf)
        return sip
Exemple #9
0
def sips(db, locations, sip_metadata_types):
    """Fixture for the SIP objects sharing multiple files.

    Four SIPs are sharing three files in the following way:
    SIP-1: File1
    SIP-2: File1, File2
    SIP-3: File2(renamed on SIPFile, but same FileInstance), File3
    SIP-4: File4, File5, File6
    """
    # A SIP with agent info
    sip1 = SIP.create(
        agent={
            'email': '*****@*****.**',
            'orcid': '1111-1111-1111-1111',
            'ip_address': '1.1.1.1'
        })
    sip1api = SIPApi(sip1)
    sip1api.attach_metadata('marcxml-test', '<p>XML 1</p>')
    sip1api.attach_metadata('json-test', '{"title": "JSON 1"}')
    # Metadata 'txt-test', although attached should not be archived
    # (see conftest configuration)
    sip1api.attach_metadata('txt-test', 'Title: TXT 1')
    file1 = FileInstance.create()
    file1.set_contents(BytesIO(b('test')),
                       default_location=locations['default'].uri)
    sip1file1 = SIPFile(sip_id=sip1.id,
                        filepath="foobar.txt",
                        file_id=file1.id)

    db_.session.add(sip1file1)

    sip2 = SIP.create()
    sip2api = SIPApi(sip2)
    sip2api.attach_metadata('marcxml-test', '<p>XML 2</p>')
    sip2api.attach_metadata('json-test', '{"title": "JSON 2"}')
    file2 = FileInstance.create()
    file2.set_contents(BytesIO(b'test-second'),
                       default_location=locations['default'].uri)
    sip2file1 = SIPFile(sip_id=sip2.id,
                        filepath="foobar.txt",
                        file_id=file1.id)
    sip2file2 = SIPFile(sip_id=sip2.id,
                        filepath="foobar2.txt",
                        file_id=file2.id)

    db_.session.add(sip2file1)
    db_.session.add(sip2file2)

    sip3 = SIP.create()
    sip3api = SIPApi(sip3)
    sip3api.attach_metadata('marcxml-test', '<p>XML 3</p>')
    sip3api.attach_metadata('json-test', '{"title": "JSON 3"}')
    file3 = FileInstance.create()
    file3.set_contents(BytesIO(b'test-third'),
                       default_location=locations['default'].uri)
    sip3file2 = SIPFile(sip_id=sip3.id,
                        filepath="foobar2-renamed.txt",
                        file_id=file2.id)
    sip3file3 = SIPFile(sip_id=sip3.id,
                        filepath="foobar3.txt",
                        file_id=file3.id)

    db_.session.add(sip3file2)
    db_.session.add(sip3file3)

    # A SIP with naughty filenames
    sip4 = SIP.create()
    sip4api = SIPApi(sip4)
    sip4api.attach_metadata('marcxml-test', '<p>XML 4 żółć</p>')
    sip4api.attach_metadata('json-test', '{"title": "JSON 4 żółć"}')
    file4 = FileInstance.create()
    file4.set_contents(BytesIO('test-fourth żółć'.encode('utf-8')),
                       default_location=locations['default'].uri)
    file5 = FileInstance.create()
    file5.set_contents(BytesIO('test-fifth ąęćźə'.encode('utf-8')),
                       default_location=locations['default'].uri)

    file6 = FileInstance.create()
    file6.set_contents(BytesIO('test-sixth π'.encode('utf-8')),
                       default_location=locations['default'].uri)
    sip5file4 = SIPFile(sip_id=sip4.id,
                        filepath="../../foobar.txt",
                        file_id=file4.id)

    sip5file5 = SIPFile(sip_id=sip4.id,
                        filepath="http://maliciouswebsite.com/hack.js",
                        file_id=file5.id)

    sip5file6 = SIPFile(sip_id=sip4.id,
                        filepath="łóżźćąę.dat",
                        file_id=file6.id)

    db_.session.add(sip5file4)
    db_.session.add(sip5file5)
    db_.session.add(sip5file6)

    # A SIP with metadata-only changes
    sip5 = SIP.create()
    sip5api = SIPApi(sip5)
    sip5api.attach_metadata('marcxml-test', '<p>XML 5 Meta Only</p>')

    db_.session.commit()
    return [sip1api, sip2api, sip3api, sip4api, sip5api]
Exemple #10
0
def create_files_and_sip(deposit, dep_pid):
    """Create deposit Bucket, Files and SIPs."""
    from invenio_pidstore.errors import PIDDoesNotExistError
    from invenio_pidstore.models import PersistentIdentifier
    from invenio_sipstore.errors import SIPUserDoesNotExist
    from invenio_sipstore.models import SIP, RecordSIP, SIPFile
    from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_db import db
    buc = Bucket.create()
    recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id)
    db.session.add(recbuc)
    deposit.setdefault('_deposit', dict())
    deposit.setdefault('_files', list())
    files = deposit.get('files', [])
    sips = deposit.get('sips', [])
    recid = None

    if sips:
        recids = [int(sip['metadata']['recid']) for sip in sips]
        if len(set(recids)) > 1:
            logger.error('Multiple recids ({recids}) found in deposit {depid}'
                         ' does not exists.'.format(recids=recids,
                                                    depid=dep_pid.pid_value))
            raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids)))
        elif recids:  # If only one recid
            recid = recids[0]

    # Store the path -> FileInstance mappings for SIPFile creation later
    dep_file_instances = list()

    for file_ in files:
        fi = FileInstance.create()
        fi.set_uri(file_['path'], file_['size'], file_['checksum'])
        ov = ObjectVersion.create(buc, file_['name'], _file_id=fi.id)
        file_meta = dict(
            bucket=str(buc.id),
            key=file_['name'],
            checksum=file_['checksum'],
            size=file_['size'],
            version_id=str(ov.version_id),
        )
        deposit['_files'].append(file_meta)
        dep_file_instances.append((file_['path'], fi))

    for idx, sip in enumerate(sips):
        agent = None
        user_id = None
        if sip['agents']:
            agent = dict(
                ip_address=sip['agents'][0].get('ip_address', ""),
                email=sip['agents'][0].get('email_address', ""),
            )
            user_id = sip['agents'][0]['user_id']
        content = sip['package']
        sip_format = 'marcxml'
        try:
            sip = SIP.create(sip_format,
                             content,
                             user_id=user_id,
                             agent=agent)
        except SIPUserDoesNotExist:
            logger.exception('User ID {user_id} referred in deposit {depid} '
                             'does not exists.'.format(
                                 user_id=user_id, depid=dep_pid.pid_value))
            raise DepositSIPUserDoesNotExist(dep_pid.pid_value, user_id)

        # If recid was found, attach it to SIP
        # TODO: This is always uses the first recid, as we quit if multiple
        # recids are found in the sips information
        if recid:
            try:
                pid = PersistentIdentifier.get(pid_type='recid',
                                               pid_value=recid)
                record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
                db.session.add(record_sip)
            except PIDDoesNotExistError:
                logger.exception('Record {recid} referred in '
                                 'Deposit {depid} does not exists.'.format(
                                     recid=recid, depid=dep_pid.pid_value))
                raise DepositRecidDoesNotExist(dep_pid.pid_value, recid)
        if idx == 0:
            for fp, fi in dep_file_instances:
                sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id)
                db.session.add(sipf)
    deposit.commit()
    db.session.commit()
    return deposit
Exemple #11
0
def create_files_and_sip(deposit, dep_pid):
    """Create deposit Bucket, Files and SIPs."""
    from invenio_pidstore.errors import PIDDoesNotExistError
    from invenio_pidstore.models import PersistentIdentifier, PIDStatus
    from invenio_sipstore.errors import SIPUserDoesNotExist
    from invenio_sipstore.models import SIP, RecordSIP, SIPFile
    from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_db import db
    buc = Bucket.create()
    recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id)
    db.session.add(recbuc)
    deposit.setdefault('_deposit', dict())
    deposit.setdefault('_buckets', dict(deposit=str(buc.id)))
    deposit.setdefault('_files', list())
    files = deposit.get('files', [])
    sips = deposit.get('sips', [])

    # Look for prereserved DOI (and recid)
    if 'drafts' in deposit:
        drafts = list(deposit['drafts'].items())
        if len(drafts) != 1:
            logger.exception('Deposit {dep_pid} has multiple drafts'.format(
                dep_pid=dep_pid))
        if len(drafts) == 1:
            draft_type, draft = drafts[0]
            draft_v = draft['values']
            if 'prereserve_doi' in draft_v:
                pre_recid = str(draft_v['prereserve_doi']['recid'])
                pre_doi = str(draft_v['prereserve_doi']['doi'])

                # If pre-reserve info available, try to reserve 'recid'
                try:
                    pid = PersistentIdentifier.get(pid_type='recid',
                                                   pid_value=str(pre_recid))
                except PIDDoesNotExistError:
                    # Reserve recid
                    pid = PersistentIdentifier.create(
                        pid_type='recid',
                        pid_value=str(pre_recid),
                        object_type='rec',
                        status=PIDStatus.RESERVED)

                # If pre-reserve info available, try to reserve 'doi'
                try:
                    pid = PersistentIdentifier.get(pid_type='doi',
                                                   pid_value=str(pre_doi))
                except PIDDoesNotExistError:
                    # Reserve DOI
                    pid = PersistentIdentifier.create(
                        pid_type='doi',
                        pid_value=str(pre_doi),
                        object_type='rec',
                        status=PIDStatus.RESERVED)

                if RecordIdentifier.query.get(int(pre_recid)) is None:
                    RecordIdentifier.insert(int(pre_recid))

    # Store the path -> FileInstance mappings for SIPFile creation later
    dep_file_instances = list()

    for file_ in files:
        size = file_['size']
        key = file_['name']
        # Warning: Assumes all checksums are MD5!
        checksum = 'md5:{0}'.format(file_['checksum'])
        fi = FileInstance.create()
        fi.set_uri(file_['path'], size, checksum)
        ov = ObjectVersion.create(buc, key, _file_id=fi.id)
        ext = splitext(ov.key)[1].lower()
        if ext.startswith('.'):
            ext = ext[1:]
        file_meta = dict(
            bucket=str(ov.bucket.id),
            key=ov.key,
            checksum=ov.file.checksum,
            size=ov.file.size,
            version_id=str(ov.version_id),
            type=ext,
        )
        deposit['_files'].append(file_meta)
        dep_file_instances.append((file_['path'], fi))

    # Get a recid from SIP information
    recid = None
    if sips:
        recids = [int(sip['metadata']['recid']) for sip in sips]
        if len(set(recids)) > 1:
            logger.error('Multiple recids ({recids}) found in deposit {depid}'
                         ' does not exists.'.format(recids=recids,
                                                    depid=dep_pid.pid_value))
            raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids)))
        elif recids:  # If only one recid
            recid = recids[0]

    for idx, sip in enumerate(sips):
        agent = None
        user_id = None
        if sip['agents']:
            agent = dict(
                ip_address=empty_str_if_none(sip['agents'][0].get(
                    'ip_address', "")),
                email=empty_str_if_none(sip['agents'][0].get(
                    'email_address', "")),
            )
            user_id = sip['agents'][0]['user_id']
        if user_id == 0:
            user_id = None
        content = sip['package']
        sip_format = 'marcxml'
        try:
            sip = SIP.create(sip_format, content, user_id=user_id, agent=agent)
        except SIPUserDoesNotExist:
            logger.exception('User ID {user_id} referred in deposit {depid} '
                             'does not exists.'.format(
                                 user_id=user_id, depid=dep_pid.pid_value))
            sip = SIP.create(sip_format, content, agent=agent)

        # Attach recid to SIP
        if recid:
            try:
                pid = PersistentIdentifier.get(pid_type='recid',
                                               pid_value=str(recid))
                record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
                db.session.add(record_sip)
            except PIDDoesNotExistError:
                logger.exception('Record {recid} referred in '
                                 'Deposit {depid} does not exists.'.format(
                                     recid=recid, depid=dep_pid.pid_value))
                if deposit['_p']['submitted'] == True:
                    logger.exception('Pair {recid}/{depid} was submitted,'
                                     ' (should it be unpublished?).'.format(
                                         recid=recid, depid=dep_pid.pid_value))
                else:
                    logger.exception(
                        'Pair {recid}/{depid} was not submitted.'.format(
                            recid=recid, depid=dep_pid.pid_value))

                # Reserve recid
                pid = PersistentIdentifier.create(pid_type='recid',
                                                  pid_value=str(recid),
                                                  object_type='rec',
                                                  status=PIDStatus.RESERVED)

                if RecordIdentifier.query.get(int(recid)) is None:
                    RecordIdentifier.insert(int(recid))
        if idx == 0:
            for fp, fi in dep_file_instances:
                sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id)
                db.session.add(sipf)
    deposit.commit()
    return deposit