Example #1
0
def test_sip_model(db):
    """Test the SIP model."""
    user1 = create_test_user('*****@*****.**')

    # Valid agent JSON
    agent1 = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}

    # Invalid agent JSON
    agent2 = {
        'email': ['should', 'not', 'be', 'a', 'list'],
        'ip_address': {'definitely': 'not', 'a': 'dict'},
    }
    # Agent JSON with wrong schema
    agent3 = {
        'email': '*****@*****.**',
        'ip_address': '1.1.1.1',
        '$schema': 'http://incorrect/agent/schema.json',
    }
    sip1 = SIP.create(user_id=user1.id, agent=agent1)
    assert sip1.user == user1

    SIP.create()
    SIP.create(user_id=user1.id, agent=agent1)
    assert SIP.query.count() == 3

    pytest.raises(ValidationError, SIP.create, agent=agent2)
    pytest.raises(SIPUserDoesNotExist, SIP.create, user_id=5)
    pytest.raises(JSONSchemaNotFound, SIP.create, agent=agent3)
    db.session.commit()
def test_sip_model(db):
    """Test the SIP model."""
    user1 = create_test_user('*****@*****.**')

    # Valid agent JSON
    agent1 = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}

    # Invalid agent JSON
    agent2 = {
        'email': ['should', 'not', 'be', 'a', 'list'],
        'ip_address': {'definitely': 'not', 'a': 'dict'},
    }
    # Agent JSON with wrong schema
    agent3 = {
        'email': '*****@*****.**',
        'ip_address': '1.1.1.1',
        '$schema': 'http://incorrect/agent/schema.json',
    }
    sip1 = SIP.create(user_id=user1.id, agent=agent1)
    assert sip1.user == user1

    SIP.create()
    SIP.create(user_id=user1.id, agent=agent1)
    assert SIP.query.count() == 3

    pytest.raises(ValidationError, SIP.create, agent=agent2)
    pytest.raises(SIPUserDoesNotExist, SIP.create, user_id=5)
    pytest.raises(JSONSchemaNotFound, SIP.create, agent=agent3)
    db.session.commit()
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
Example #4
0
def test_Archive_get_realstatus_transfer(db, client, oauth2):
    """Test the Archive's get method with transfer processing."""
    sip = SIP.create()
    ark = Archive.create(sip=sip,
                         accession_id='id',
                         archivematica_id=uuid.uuid4())
    ark.status = ArchiveStatus.WAITING
    db.session.commit()

    mock_response = Response()
    mock_response.status_code = 200
    mock_response._content = json.dumps({
        'status': 'SIP_PROCESSING'
    }).encode('utf-8')
    with patch('requests.get', return_value=mock_response):
        response = client.get(url_for('invenio_archivematica_api.archive_api',
                                      accession_id=ark.accession_id,
                                      access_token=oauth2.token),
                              data=json.dumps({'realStatus': True}),
                              content_type='application/json')
    assert response.status_code == 200
    result = json.loads(response.data.decode('utf-8'))
    assert 'sip_id' in result and result['sip_id'] == str(sip.id)
    assert 'status' in result and result['status'] == 'PROCESSING_TRANSFER'
    assert 'accession_id' in result and result['accession_id'] == 'id'
    assert 'archivematica_id' in result \
        and result['archivematica_id'] == str(ark.archivematica_id)
Example #5
0
    def create(cls, pid, record, create_sip_files=True, user_id=None,
               agent=None):
        """Create a Zenodo SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``RecordSIP`` for the
        SIP-PID-Record relationship, as well as ``SIPFile`` objects for each
        the files in the record.
        Those objects are not returned by this function but can be fetched by
        the corresponding SIP relationships 'record_sips' and 'sip_files'.
        :param pid: PID of the published record ('recid').
        :type pid: `invenio_pidstore.models.PersistentIdentifier`
        :param record: Record for which the SIP should be created.
        :type record: `invenio_records.api.Record`
        :param create_sip_files: If True the SIPFiles will be created.
        :type create_sip_files: bool
        :returns: A Zenodo-specifi SIP object.
        :rtype: ``invenio_sipstore.models.SIP``
        """
        if not user_id:
            user_id = (None if current_user.is_anonymous
                       else current_user.get_id())
        if not agent:
            agent = cls._build_agent_info()

        with db.session.begin_nested():
            sip = SIP.create('json', json.dumps(record.dumps()),
                             user_id=user_id, agent=agent)
            recsip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
            db.session.add(recsip)
            if record.files and create_sip_files:
                for f in record.files:
                    sf = SIPFile(sip_id=sip.id, filepath=f.key,
                                 file_id=f.file_id)
                    db.session.add(sf)
        return sip
Example #6
0
    def create_sips(cls, dump, deposit, files, recid):
        """Create submission information packages."""
        if not recid or recid.status == PIDStatus.RESERVED:
            return
        first = True
        for s in dump.sips:
            # Create SIP
            sip = SIP.create(
                s['format'],
                s['content'],
                user_id=s['user_id'],
                agent=s['agent'],
                id_=s['id'],
            )
            sip.created = s['timestamp']

            # Create SIP files only for first package.
            if first:
                first = False
                for meta, f in files:
                    db.session.add(SIPFile(
                        sip_id=sip.id, filepath=meta['key'], file_id=f.id
                    ))

            # PID - SIP relationship
            db.session.add(RecordSIP(sip_id=sip.id, pid_id=recid.id))
Example #7
0
    def create_sips(cls, dump, deposit, files, recid):
        """Create submission information packages."""
        if not recid or recid.status == PIDStatus.RESERVED:
            return
        first = True
        for s in dump.sips:
            # Create SIP
            sip = SIP.create(
                s['format'],
                s['content'],
                user_id=s['user_id'],
                agent=s['agent'],
                id_=s['id'],
            )
            sip.created = s['timestamp']

            # Create SIP files only for first package.
            if first:
                first = False
                for meta, f in files:
                    db.session.add(
                        SIPFile(sip_id=sip.id,
                                filepath=meta['key'],
                                file_id=f.id))

            # PID - SIP relationship
            db.session.add(RecordSIP(sip_id=sip.id, pid_id=recid.id))
Example #8
0
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
Example #9
0
def test_create_accessioned_id(db):
    """Test ``create_accessioned_id`` function."""
    # First, we create a SIP
    sip = SIP.create()
    ark = Archive.create(sip)
    db.session.commit()
    accessioned_id = factories.create_accession_id(ark)
    assert accessioned_id == 'CERN-' + str(sip.id)
Example #10
0
def test_record_sip_model(db):
    """Test the RecordSIP model."""
    sip1 = SIP.create()
    db.session.commit()
    pid1 = PersistentIdentifier.create('recid', '12345')

    rsip1 = RecordSIP(sip_id=sip1.id, pid_id=pid1.id)
    db.session.add(rsip1)
    db.session.commit()
    assert RecordSIP.query.count() == 1
def test_record_sip_model(db):
    """Test the RecordSIP model."""
    sip1 = SIP.create()
    db.session.commit()
    pid1 = PersistentIdentifier.create('recid', '12345')

    rsip1 = RecordSIP(sip_id=sip1.id, pid_id=pid1.id)
    db.session.add(rsip1)
    db.session.commit()
    assert RecordSIP.query.count() == 1
Example #12
0
def test_create_accessioned_id(db):
    """Test ``create_accessioned_id`` function."""
    # First, we create a SIP
    sip = SIP.create()
    ark = Archive.create(sip)
    db.session.commit()
    accessioned_id = factories.create_accession_id(ark)
    assert accessioned_id \
        == current_app.config['ARCHIVEMATICA_ORGANIZATION_NAME'] + '-' \
        + str(sip.id)
def test_sip_file_model(db):
    """Test the SIPFile model."""
    sip1 = SIP.create('json', '{}')
    file1 = FileInstance.create()
    sipfile1 = SIPFile(sip_id=sip1.id, filepath="foobar.zip", file_id=file1.id)

    db.session.add(sipfile1)
    db.session.commit()
    assert SIP.query.count() == 1
    assert SIPFile.query.count() == 1
Example #14
0
def test_ArchiveDownload_get_412(db, client, oauth2):
    """Test the Download's get method with no archivematica_id."""
    sip = SIP.create()
    ark = Archive.create(sip=sip, accession_id='id')
    db.session.commit()

    response = client.get(
        url_for('invenio_archivematica_api.download_api',
                accession_id=ark.accession_id,
                access_token=oauth2.token))
    assert response.status_code == 412
Example #15
0
def test_oais_fail_transfer(db):
    """Test the oais_fail_transfer function."""
    # let's create a SIP
    sip = SIP.create()
    Archive.create(sip)
    db.session.commit()
    # we fail the transfer
    oais_fail_transfer(sip.id)
    assert Archive.query.count() == 1
    ark = Archive.get_from_sip(sip.id)
    assert ark.status == ArchiveStatus.FAILED
def test_sip_file_model(db):
    """Test the SIPFile model."""
    sip1 = SIP.create('json', '{}')
    file1 = FileInstance.create()
    sipfile1 = SIPFile(sip_id=sip1.id, filepath="foobar.zip",
                       file_id=file1.id)

    db.session.add(sipfile1)
    db.session.commit()
    assert SIP.query.count() == 1
    assert SIPFile.query.count() == 1
Example #17
0
def test_oais_process_transfer(db):
    """Test the oais_process_transfer function."""
    # let's create a SIP
    sip = SIP.create()
    Archive.create(sip)
    db.session.commit()
    aipid = uuid.uuid4()
    oais_process_transfer(sip.id, archivematica_id=aipid)
    assert Archive.query.count() == 1
    ark = Archive.get_from_sip(sip.id)
    assert ark.status == ArchiveStatus.PROCESSING_TRANSFER
    assert ark.archivematica_id == aipid
Example #18
0
def test_ArchiveDownload_get_520(db, client, oauth2):
    """Test the Download's get method with no storage server running."""
    sip = SIP.create()
    ark = Archive.create(sip=sip,
                         accession_id='id',
                         archivematica_id=uuid.uuid4())
    ark.status = ArchiveStatus.REGISTERED
    db.session.commit()
    response = client.get(
        url_for('invenio_archivematica_api.download_api',
                accession_id=ark.accession_id,
                access_token=oauth2.token))
    assert response.status_code == 520
Example #19
0
def test_oais_finish_transfer(db):
    """Test the oais_finish_transfer function."""
    # let's create a SIP
    sip = SIP.create()
    Archive.create(sip)
    aipid = uuid.uuid4()
    db.session.commit()
    # we finish the transfer
    oais_finish_transfer(sip.id, archivematica_id=aipid)
    assert Archive.query.count() == 1
    ark = Archive.get_from_sip(sip.id)
    assert ark.status == ArchiveStatus.REGISTERED
    assert ark.archivematica_id == aipid
    assert ark.sip.archived is True
Example #20
0
def test_archive_new_sips(db, location):
    """Test the archive_new_sips function."""
    # we create 2 SIP
    sip1 = SIP.create()
    Archive.create(sip1)
    db.session.commit()
    time.sleep(3)
    sip2 = SIP.create()
    Archive.create(sip2)
    db.session.commit()
    # we archive all records older than 2 seconds
    archive_new_sips('invenio_archivematica.factories.create_accession_id',
                     days=0,
                     seconds=2,
                     delay=False)
    arks = Archive.query.all()
    assert len(arks) == 2
    for ark in arks:
        if ark.sip_id == sip1.id:
            assert ark.status == ArchiveStatus.WAITING
            # we update the archive so it will be ignored in what follows
            ark.status = ArchiveStatus.IGNORED
            db.session.commit()
        else:
            assert ark.status == ArchiveStatus.NEW
    # now we archive everything, but rec2 shouldn't be archived as it is
    # flagged as IGNORED
    archive_new_sips('invenio_archivematica.factories.create_accession_id',
                     days=0,
                     delay=False)
    arks = Archive.query.all()
    assert len(arks) == 2
    for ark in arks:
        if ark.sip_id == sip1.id:
            assert ark.status == ArchiveStatus.IGNORED
        else:
            assert ark.status == ArchiveStatus.WAITING
    def create(cls,
               archivable,
               files=None,
               metadata=None,
               user_id=None,
               agent=None):
        """Create a SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``SIPFile`` objects for
        each of the files in the record, along with ``SIPMetadata`` for the
        metadata.
        Those objects are not returned by this function but can be fetched by
        the corresponding SIP attributes 'files' and 'metadata'.
        The created model is stored in the attribute 'model'.

        :param bool archivable: tells if the SIP should be archived or not.
            Usefull if ``Invenio-Archivematica`` is installed.
        :param files: The list of files to associate with the SIP. See
            :py:func:`invenio_sipstore.api.SIP.attach_file`
        :param dict metadata: A dictionary of metadata. The keys are the
            type (valid :py:class:`invenio_sipstore.models.SIPMetadataType`
            name) and the values are the content (string)
        :param user_id: the ID of the user. If not given, automatically
            computed
        :param agent: If not given, automatically computed
        :returns: API SIP object.
        :rtype: :py:class:`invenio_sipstore.api.SIP`
        """
        if not user_id:
            user_id = (None
                       if current_user.is_anonymous else current_user.get_id())
        if not agent:
            agent_factory = import_string(
                current_app.config['SIPSTORE_AGENT_FACTORY'])
            agent = agent_factory()
        files = [] if not files else files
        metadata = {} if not metadata else metadata

        with db.session.begin_nested():
            sip = cls(
                SIP_.create(user_id=user_id,
                            agent=agent,
                            archivable=archivable))
            for f in files:
                sip.attach_file(f)
            for type, content in metadata.items():
                sip.attach_metadata(type, content)
        sipstore_created.send(sip)
        return sip
Example #22
0
def test_ArchiveDownload_get_status_code(db, client, oauth2):
    """Test the API request for Download's get method."""
    sip = SIP.create()
    ark = Archive.create(sip=sip,
                         accession_id='id',
                         archivematica_id=uuid.uuid4())
    ark.status = ArchiveStatus.REGISTERED
    db.session.commit()
    mock_response = Response()
    mock_response.status_code = 404
    with patch('requests.get', return_value=mock_response):
        response = client.get(
            url_for('invenio_archivematica_api.download_api',
                    accession_id=ark.accession_id,
                    access_token=oauth2.token))
    assert response.status_code == mock_response.status_code
Example #23
0
def test_Archive_get_status_code(db, client, oauth2):
    """Test the Archive's get method with error on Archivematica."""
    sip = SIP.create()
    ark = Archive.create(sip=sip,
                         accession_id='id',
                         archivematica_id=uuid.uuid4())
    ark.status = ArchiveStatus.WAITING
    db.session.commit()
    mock_response = Response()
    mock_response.status_code = 404
    with patch('requests.get', return_value=mock_response):
        response = client.get(url_for('invenio_archivematica_api.archive_api',
                                      accession_id=ark.accession_id,
                                      access_token=oauth2.token),
                              data=json.dumps({'realStatus': True}),
                              content_type='application/json')
    assert response.status_code == mock_response.status_code
def test_sip_metadata_model(db):
    """Test the SIPMetadata model."""
    sip1 = SIP.create()
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url')
    db.session.add(mtype)
    metadata1 = '{"title": "great book"}'
    sipmetadata = SIPMetadata(sip_id=sip1.id, content=metadata1,
                              type=mtype)
    db.session.add(sipmetadata)
    db.session.commit()
    assert SIP.query.count() == 1
    assert SIPMetadataType.query.count() == 1
    assert SIPMetadata.query.count() == 1
    sipmetadata = SIPMetadata.query.one()
    assert sipmetadata.content == metadata1
    assert sipmetadata.type.format == 'json'
    assert sipmetadata.sip.id == sip1.id
Example #25
0
def test_sip_metadata_model(db):
    """Test the SIPMetadata model."""
    sip1 = SIP.create()
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url')
    db.session.add(mtype)
    metadata1 = '{"title": "great book"}'
    sipmetadata = SIPMetadata(sip_id=sip1.id, content=metadata1,
                              type=mtype)
    db.session.add(sipmetadata)
    db.session.commit()
    assert SIP.query.count() == 1
    assert SIPMetadataType.query.count() == 1
    assert SIPMetadata.query.count() == 1
    sipmetadata = SIPMetadata.query.one()
    assert sipmetadata.content == metadata1
    assert sipmetadata.type.format == 'json'
    assert sipmetadata.sip.id == sip1.id
Example #26
0
def test_transfer_rsync(app, db, location):
    """Test factories.transfer_rsync function."""
    # config
    app.config['SIPSTORE_ARCHIVER_DIRECTORY_BUILDER'] = \
        'helpers:archive_directory_builder'
    app.config['SIPSTORE_ARCHIVER_METADATA_TYPES'] = ['test']
    # SIP
    sip = SIP.create()
    # SIPMetadataType
    mtype = SIPMetadataType(title='Test', name='test', format='json')
    db.session.add(mtype)
    # SIPMetadata
    mcontent = {'title': 'title', 'author': 'me'}
    meth = SIPMetadata(sip=sip, type=mtype, content=json.dumps(mcontent))
    db.session.add(meth)
    # SIPFile
    f = FileInstance.create()
    fcontent = b'weighted companion cube\n'
    f.set_contents(BytesIO(fcontent), default_location=location.uri)
    sfile = SIPFile(sip=sip, file=f, filepath='portal.txt')
    db.session.add(sfile)
    db.session.commit()

    # EXPORT
    folder = path.join(location.uri, 'lulz')
    params = {
        'server': '',
        'user': '',
        'destination': folder,
        'args': '-az'
    }
    factories.transfer_rsync(sip.id, params)

    # TEST
    assert not path.exists(path.join(location.uri, 'test'))
    assert path.isdir(folder)
    assert path.isdir(path.join(folder, 'files'))
    assert path.isfile(path.join(folder, 'files', 'portal.txt'))
    assert path.isdir(path.join(folder, 'metadata'))
    assert path.isfile(path.join(folder, 'metadata', 'test.json'))
    with open(path.join(folder, 'files', 'portal.txt'), 'rb') as fp:
        assert fp.read() == fcontent
    with open(path.join(folder, 'metadata', 'test.json'), 'r') as fp:
        assert json.loads(fp.read()) == mcontent
Example #27
0
def test_Archive_get_200(db, client, oauth2):
    """Test the Archive's get method with no archivematica_id."""
    sip = SIP.create()
    ark = Archive.create(sip=sip,
                         accession_id='id',
                         archivematica_id=uuid.uuid4())
    db.session.commit()

    response = client.get(
        url_for('invenio_archivematica_api.archive_api',
                accession_id=ark.accession_id,
                access_token=oauth2.token))
    assert response.status_code == 200
    result = json.loads(response.data.decode('utf-8'))
    assert 'sip_id' in result and result['sip_id'] == str(sip.id)
    assert 'status' in result and result['status'] == 'NEW'
    assert 'accession_id' in result and result['accession_id'] == 'id'
    assert 'archivematica_id' in result \
        and result['archivematica_id'] == str(ark.archivematica_id)
Example #28
0
    def create(cls, archivable, files=None, metadata=None, user_id=None,
               agent=None):
        """Create a SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``SIPFile`` objects for
        each of the files in the record, along with ``SIPMetadata`` for the
        metadata.
        Those objects are not returned by this function but can be fetched by
        the corresponding SIP attributes 'files' and 'metadata'.
        The created model is stored in the attribute 'model'.

        :param bool archivable: tells if the SIP should be archived or not.
            Usefull if ``Invenio-Archivematica`` is installed.
        :param files: The list of files to associate with the SIP. See
            :py:func:`invenio_sipstore.api.SIP.attach_file`
        :param dict metadata: A dictionary of metadata. The keys are the
            type (valid :py:class:`invenio_sipstore.models.SIPMetadataType`
            name) and the values are the content (string)
        :param user_id: the ID of the user. If not given, automatically
            computed
        :param agent: If not given, automatically computed
        :returns: API SIP object.
        :rtype: :py:class:`invenio_sipstore.api.SIP`
        """
        if not user_id:
            user_id = (None if not current_user or current_user.is_anonymous
                       else current_user.get_id())
        if not agent:
            agent_factory = import_string(
                current_app.config['SIPSTORE_AGENT_FACTORY'])
            agent = agent_factory()
        files = [] if not files else files
        metadata = {} if not metadata else metadata

        with db.session.begin_nested():
            sip = cls(SIP_.create(user_id=user_id, agent=agent,
                                  archivable=archivable))
            for f in files:
                sip.attach_file(f)
            for type, content in metadata.items():
                sip.attach_metadata(type, content)
        sipstore_created.send(sip)
        return sip
Example #29
0
    def create(cls,
               pid,
               record,
               create_sip_files=True,
               user_id=None,
               agent=None):
        """Create a Zenodo SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``RecordSIP`` for the
        SIP-PID-Record relationship, as well as ``SIPFile`` objects for each
        the files in the record.
        Those objects are not returned by this function but can be fetched by
        the corresponding SIP relationships 'record_sips' and 'sip_files'.
        :param pid: PID of the published record ('recid').
        :type pid: `invenio_pidstore.models.PersistentIdentifier`
        :param record: Record for which the SIP should be created.
        :type record: `invenio_records.api.Record`
        :param create_sip_files: If True the SIPFiles will be created.
        :type create_sip_files: bool
        :returns: A Zenodo-specifi SIP object.
        :rtype: ``invenio_sipstore.models.SIP``
        """
        if not user_id:
            user_id = (None
                       if current_user.is_anonymous else current_user.get_id())
        if not agent:
            agent = cls._build_agent_info()

        with db.session.begin_nested():
            sip = SIP.create('json',
                             json.dumps(record.dumps()),
                             user_id=user_id,
                             agent=agent)
            recsip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
            db.session.add(recsip)
            if record.files and create_sip_files:
                for f in record.files:
                    sf = SIPFile(sip_id=sip.id,
                                 filepath=f.key,
                                 file_id=f.file_id)
                    db.session.add(sf)
        return sip
def test_SIP_metadata(db):
    """Test the metadata methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url')
    db.session.add(mtype)
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.metadata) == 0
    # we create a dummy metadata
    metadata = json.dumps({'this': 'is', 'not': 'sparta'})
    # we attach it to the SIP
    sm = api_sip.attach_metadata('json-test', metadata)
    db.session.commit()
    assert len(api_sip.metadata) == 1
    assert api_sip.metadata[0].type.format == 'json'
    assert api_sip.metadata[0].content == metadata
    assert sip.sip_metadata[0].content == metadata
Example #31
0
def test_SIP(db):
    """Test SIP API class."""
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    # we create a SIP model
    sip = SIP_.create(user_id=user.id, agent=agent)
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert api_sip.model is sip
    assert api_sip.id == sip.id
    assert api_sip.user is user
    assert api_sip.agent == agent
    assert api_sip.archivable is True
    assert api_sip.archived is False
    api_sip.archived = True
    db.session.commit()
    assert api_sip.archived is True
    assert sip.archived is True
    # test of the get method
    api_sip2 = SIP.get_sip(sip.id)
    assert api_sip2.id == api_sip.id
def test_SIP(db):
    """Test SIP API class."""
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    # we create a SIP model
    sip = SIP_.create(user_id=user.id, agent=agent)
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert api_sip.model is sip
    assert api_sip.id == sip.id
    assert api_sip.user is user
    assert api_sip.agent == agent
    assert api_sip.archivable is True
    assert api_sip.archived is False
    api_sip.archived = True
    db.session.commit()
    assert api_sip.archived is True
    assert sip.archived is True
    # test of the get method
    api_sip2 = SIP.get_sip(sip.id)
    assert api_sip2.id == api_sip.id
Example #33
0
def test_SIP_metadata(db):
    """Test the metadata methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    mtype = SIPMetadataType(title='JSON Test',
                            name='json-test',
                            format='json',
                            schema='url')
    db.session.add(mtype)
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.metadata) == 0
    # we create a dummy metadata
    metadata = json.dumps({'this': 'is', 'not': 'sparta'})
    # we attach it to the SIP
    sm = api_sip.attach_metadata('json-test', metadata)
    db.session.commit()
    assert len(api_sip.metadata) == 1
    assert api_sip.metadata[0].type.format == 'json'
    assert api_sip.metadata[0].content == metadata
    assert sip.sip_metadata[0].content == metadata
Example #34
0
def test_Archive_patch_200(db, client, oauth2):
    """Test the Archive's get method with no archivematica_id."""
    sip = SIP.create()
    ark = Archive.create(sip=sip, accession_id='id')
    db.session.commit()

    params = {'archivematica_id': str(uuid.uuid4()), 'status': 'COMPLETE'}
    response = client.patch(url_for('invenio_archivematica_api.archive_api',
                                    accession_id=ark.accession_id,
                                    access_token=oauth2.token),
                            data=json.dumps(params),
                            content_type='application/json')
    assert response.status_code == 200
    result = json.loads(response.data.decode('utf-8'))
    assert 'sip_id' in result and result['sip_id'] == str(sip.id)
    assert 'status' in result and result['status'] == 'REGISTERED'
    assert 'accession_id' in result and result['accession_id'] == 'id'
    assert 'archivematica_id' in result \
        and result['archivematica_id'] == params['archivematica_id']

    ark = Archive.query.one()
    assert ark.status == ArchiveStatus.REGISTERED
    assert str(ark.archivematica_id) == params['archivematica_id']
Example #35
0
def test_oais_start_transfer(app, db, location):
    """Test the oais_start_transfer function."""
    assert Archive.query.count() == 0
    # let's create a SIP
    sip = SIP.create()
    Archive.create(sip)
    db.session.commit()
    assert Archive.query.count() == 1
    # we start the transfer
    oais_start_transfer(sip.id, '1991')
    ark = Archive.get_from_sip(sip.id)
    assert ark.status == ArchiveStatus.WAITING
    assert ark.accession_id == '1991'
    # we try the case where no archive exist and transfer fails
    db.session.delete(ark)
    db.session.commit()
    app.config['ARCHIVEMATICA_TRANSFER_FACTORY'] = 'helpers:transfer_fail'
    assert Archive.query.count() == 0
    oais_start_transfer(sip.id, '1991')
    ark = Archive.get_from_sip(sip.id)
    assert Archive.query.count() == 1
    assert ark.status == ArchiveStatus.FAILED
    assert ark.accession_id == '1991'
    assert ark.sip.archived is False
def sips(db, locations, sip_metadata_types):
    """Fixture for the SIP objects sharing multiple files.

    Four SIPs are sharing three files in the following way:
    SIP-1: File1
    SIP-2: File1, File2
    SIP-3: File2(renamed on SIPFile, but same FileInstance), File3
    SIP-4: File4, File5, File6
    """
    # A SIP with agent info
    sip1 = SIP.create(agent={
        'email': '*****@*****.**',
        'orcid': '1111-1111-1111-1111',
        'ip_address': '1.1.1.1'
    })
    sip1api = SIPApi(sip1)
    sip1api.attach_metadata('marcxml-test', '<p>XML 1</p>')
    sip1api.attach_metadata('json-test', '{"title": "JSON 1"}')
    # Metadata 'txt-test', although attached should not be archived
    # (see conftest configuration)
    sip1api.attach_metadata('txt-test', 'Title: TXT 1')
    file1 = FileInstance.create()
    file1.set_contents(BytesIO(b('test')),
                       default_location=locations['default'].uri)
    sip1file1 = SIPFile(sip_id=sip1.id, filepath="foobar.txt",
                        file_id=file1.id)

    db_.session.add(sip1file1)

    sip2 = SIP.create()
    sip2api = SIPApi(sip2)
    sip2api.attach_metadata('marcxml-test', '<p>XML 2</p>')
    sip2api.attach_metadata('json-test', '{"title": "JSON 2"}')
    file2 = FileInstance.create()
    file2.set_contents(BytesIO(b'test-second'),
                       default_location=locations['default'].uri)
    sip2file1 = SIPFile(sip_id=sip2.id, filepath="foobar.txt",
                        file_id=file1.id)
    sip2file2 = SIPFile(sip_id=sip2.id, filepath="foobar2.txt",
                        file_id=file2.id)

    db_.session.add(sip2file1)
    db_.session.add(sip2file2)

    sip3 = SIP.create()
    sip3api = SIPApi(sip3)
    sip3api.attach_metadata('marcxml-test', '<p>XML 3</p>')
    sip3api.attach_metadata('json-test', '{"title": "JSON 3"}')
    file3 = FileInstance.create()
    file3.set_contents(BytesIO(b'test-third'),
                       default_location=locations['default'].uri)
    sip3file2 = SIPFile(sip_id=sip3.id, filepath="foobar2-renamed.txt",
                        file_id=file2.id)
    sip3file3 = SIPFile(sip_id=sip3.id, filepath="foobar3.txt",
                        file_id=file3.id)

    db_.session.add(sip3file2)
    db_.session.add(sip3file3)

    # A SIP with naughty filenames
    sip4 = SIP.create()
    sip4api = SIPApi(sip4)
    sip4api.attach_metadata('marcxml-test', '<p>XML 4 żółć</p>')
    sip4api.attach_metadata('json-test', '{"title": "JSON 4 żółć"}')
    file4 = FileInstance.create()
    file4.set_contents(BytesIO('test-fourth żółć'.encode('utf-8')),
                       default_location=locations['default'].uri)
    file5 = FileInstance.create()
    file5.set_contents(BytesIO('test-fifth ąęćźə'.encode('utf-8')),
                       default_location=locations['default'].uri)

    file6 = FileInstance.create()
    file6.set_contents(BytesIO('test-sixth π'.encode('utf-8')),
                       default_location=locations['default'].uri)
    sip5file4 = SIPFile(sip_id=sip4.id, filepath="../../foobar.txt",
                        file_id=file4.id)

    sip5file5 = SIPFile(sip_id=sip4.id,
                        filepath="http://maliciouswebsite.com/hack.js",
                        file_id=file5.id)

    sip5file6 = SIPFile(sip_id=sip4.id,
                        filepath="łóżźćąę.dat",
                        file_id=file6.id)

    db_.session.add(sip5file4)
    db_.session.add(sip5file5)
    db_.session.add(sip5file6)

    # A SIP with metadata-only changes
    sip5 = SIP.create()
    sip5api = SIPApi(sip5)
    sip5api.attach_metadata('marcxml-test', '<p>XML 5 Meta Only</p>')

    db_.session.commit()
    return [sip1api, sip2api, sip3api, sip4api, sip5api]
def test_default_archive_directory_builder(app, db):
    """Test the default archive builder."""
    sip_id = UUID('abcd0000-1111-2222-3333-444455556666')
    sip = SIP.create(id_=sip_id)
    assert default_archive_directory_builder(sip) == \
        ['ab', 'cd', '0000-1111-2222-3333-444455556666']
Example #38
0
def test_is_archivable_none(db):
    """Test ``is_archivable_none`` function."""
    sip1 = SIP.create(archivable=True)
    sip2 = SIP.create(archivable=False)
    assert not factories.is_archivable_none(sip1)
    assert not factories.is_archivable_none(sip2)
Example #39
0
def create_files_and_sip(deposit, dep_pid):
    """Create deposit Bucket, Files and SIPs."""
    from invenio_pidstore.errors import PIDDoesNotExistError
    from invenio_pidstore.models import PersistentIdentifier
    from invenio_sipstore.errors import SIPUserDoesNotExist
    from invenio_sipstore.models import SIP, RecordSIP, SIPFile
    from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_db import db
    buc = Bucket.create()
    recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id)
    db.session.add(recbuc)
    deposit.setdefault('_deposit', dict())
    deposit.setdefault('_files', list())
    files = deposit.get('files', [])
    sips = deposit.get('sips', [])
    recid = None

    if sips:
        recids = [int(sip['metadata']['recid']) for sip in sips]
        if len(set(recids)) > 1:
            logger.error('Multiple recids ({recids}) found in deposit {depid}'
                         ' does not exists.'.format(recids=recids,
                                                    depid=dep_pid.pid_value))
            raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids)))
        elif recids:  # If only one recid
            recid = recids[0]

    # Store the path -> FileInstance mappings for SIPFile creation later
    dep_file_instances = list()

    for file_ in files:
        fi = FileInstance.create()
        fi.set_uri(file_['path'], file_['size'], file_['checksum'])
        ov = ObjectVersion.create(buc, file_['name'], _file_id=fi.id)
        file_meta = dict(
            bucket=str(buc.id),
            key=file_['name'],
            checksum=file_['checksum'],
            size=file_['size'],
            version_id=str(ov.version_id),
        )
        deposit['_files'].append(file_meta)
        dep_file_instances.append((file_['path'], fi))

    for idx, sip in enumerate(sips):
        agent = None
        user_id = None
        if sip['agents']:
            agent = dict(
                ip_address=sip['agents'][0].get('ip_address', ""),
                email=sip['agents'][0].get('email_address', ""),
            )
            user_id = sip['agents'][0]['user_id']
        content = sip['package']
        sip_format = 'marcxml'
        try:
            sip = SIP.create(sip_format,
                             content,
                             user_id=user_id,
                             agent=agent)
        except SIPUserDoesNotExist:
            logger.exception('User ID {user_id} referred in deposit {depid} '
                             'does not exists.'.format(
                                 user_id=user_id, depid=dep_pid.pid_value))
            raise DepositSIPUserDoesNotExist(dep_pid.pid_value, user_id)

        # If recid was found, attach it to SIP
        # TODO: This is always uses the first recid, as we quit if multiple
        # recids are found in the sips information
        if recid:
            try:
                pid = PersistentIdentifier.get(pid_type='recid',
                                               pid_value=recid)
                record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
                db.session.add(record_sip)
            except PIDDoesNotExistError:
                logger.exception('Record {recid} referred in '
                                 'Deposit {depid} does not exists.'.format(
                                     recid=recid, depid=dep_pid.pid_value))
                raise DepositRecidDoesNotExist(dep_pid.pid_value, recid)
        if idx == 0:
            for fp, fi in dep_file_instances:
                sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id)
                db.session.add(sipf)
    deposit.commit()
    db.session.commit()
    return deposit
Example #40
0
def create_files_and_sip(deposit, dep_pid):
    """Create deposit Bucket, Files and SIPs."""
    from invenio_pidstore.errors import PIDDoesNotExistError
    from invenio_pidstore.models import PersistentIdentifier, PIDStatus
    from invenio_sipstore.errors import SIPUserDoesNotExist
    from invenio_sipstore.models import SIP, RecordSIP, SIPFile
    from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_db import db
    buc = Bucket.create()
    recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id)
    db.session.add(recbuc)
    deposit.setdefault('_deposit', dict())
    deposit.setdefault('_buckets', dict(deposit=str(buc.id)))
    deposit.setdefault('_files', list())
    files = deposit.get('files', [])
    sips = deposit.get('sips', [])

    # Look for prereserved DOI (and recid)
    if 'drafts' in deposit:
        drafts = list(deposit['drafts'].items())
        if len(drafts) != 1:
            logger.exception('Deposit {dep_pid} has multiple drafts'.format(
                dep_pid=dep_pid))
        if len(drafts) == 1:
            draft_type, draft = drafts[0]
            draft_v = draft['values']
            if 'prereserve_doi' in draft_v:
                pre_recid = str(draft_v['prereserve_doi']['recid'])
                pre_doi = str(draft_v['prereserve_doi']['doi'])

                # If pre-reserve info available, try to reserve 'recid'
                try:
                    pid = PersistentIdentifier.get(pid_type='recid',
                                                   pid_value=str(pre_recid))
                except PIDDoesNotExistError:
                    # Reserve recid
                    pid = PersistentIdentifier.create(
                        pid_type='recid',
                        pid_value=str(pre_recid),
                        object_type='rec',
                        status=PIDStatus.RESERVED)

                # If pre-reserve info available, try to reserve 'doi'
                try:
                    pid = PersistentIdentifier.get(pid_type='doi',
                                                   pid_value=str(pre_doi))
                except PIDDoesNotExistError:
                    # Reserve DOI
                    pid = PersistentIdentifier.create(
                        pid_type='doi',
                        pid_value=str(pre_doi),
                        object_type='rec',
                        status=PIDStatus.RESERVED)

                if RecordIdentifier.query.get(int(pre_recid)) is None:
                    RecordIdentifier.insert(int(pre_recid))

    # Store the path -> FileInstance mappings for SIPFile creation later
    dep_file_instances = list()

    for file_ in files:
        size = file_['size']
        key = file_['name']
        # Warning: Assumes all checksums are MD5!
        checksum = 'md5:{0}'.format(file_['checksum'])
        fi = FileInstance.create()
        fi.set_uri(file_['path'], size, checksum)
        ov = ObjectVersion.create(buc, key, _file_id=fi.id)
        ext = splitext(ov.key)[1].lower()
        if ext.startswith('.'):
            ext = ext[1:]
        file_meta = dict(
            bucket=str(ov.bucket.id),
            key=ov.key,
            checksum=ov.file.checksum,
            size=ov.file.size,
            version_id=str(ov.version_id),
            type=ext,
        )
        deposit['_files'].append(file_meta)
        dep_file_instances.append((file_['path'], fi))

    # Get a recid from SIP information
    recid = None
    if sips:
        recids = [int(sip['metadata']['recid']) for sip in sips]
        if len(set(recids)) > 1:
            logger.error('Multiple recids ({recids}) found in deposit {depid}'
                         ' does not exists.'.format(recids=recids,
                                                    depid=dep_pid.pid_value))
            raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids)))
        elif recids:  # If only one recid
            recid = recids[0]

    for idx, sip in enumerate(sips):
        agent = None
        user_id = None
        if sip['agents']:
            agent = dict(
                ip_address=empty_str_if_none(sip['agents'][0].get(
                    'ip_address', "")),
                email=empty_str_if_none(sip['agents'][0].get(
                    'email_address', "")),
            )
            user_id = sip['agents'][0]['user_id']
        if user_id == 0:
            user_id = None
        content = sip['package']
        sip_format = 'marcxml'
        try:
            sip = SIP.create(sip_format, content, user_id=user_id, agent=agent)
        except SIPUserDoesNotExist:
            logger.exception('User ID {user_id} referred in deposit {depid} '
                             'does not exists.'.format(
                                 user_id=user_id, depid=dep_pid.pid_value))
            sip = SIP.create(sip_format, content, agent=agent)

        # Attach recid to SIP
        if recid:
            try:
                pid = PersistentIdentifier.get(pid_type='recid',
                                               pid_value=str(recid))
                record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
                db.session.add(record_sip)
            except PIDDoesNotExistError:
                logger.exception('Record {recid} referred in '
                                 'Deposit {depid} does not exists.'.format(
                                     recid=recid, depid=dep_pid.pid_value))
                if deposit['_p']['submitted'] == True:
                    logger.exception('Pair {recid}/{depid} was submitted,'
                                     ' (should it be unpublished?).'.format(
                                         recid=recid, depid=dep_pid.pid_value))
                else:
                    logger.exception(
                        'Pair {recid}/{depid} was not submitted.'.format(
                            recid=recid, depid=dep_pid.pid_value))

                # Reserve recid
                pid = PersistentIdentifier.create(pid_type='recid',
                                                  pid_value=str(recid),
                                                  object_type='rec',
                                                  status=PIDStatus.RESERVED)

                if RecordIdentifier.query.get(int(recid)) is None:
                    RecordIdentifier.insert(int(recid))
        if idx == 0:
            for fp, fi in dep_file_instances:
                sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id)
                db.session.add(sipf)
    deposit.commit()
    return deposit
Example #41
0
def test_default_archive_directory_builder(app, db):
    """Test the default archive builder."""
    sip_id = UUID('abcd0000-1111-2222-3333-444455556666')
    sip = SIP.create(id_=sip_id)
    assert default_archive_directory_builder(sip) == \
        ['ab', 'cd', '0000-1111-2222-3333-444455556666']
Example #42
0
def sips(db, locations, sip_metadata_types):
    """Fixture for the SIP objects sharing multiple files.

    Four SIPs are sharing three files in the following way:
    SIP-1: File1
    SIP-2: File1, File2
    SIP-3: File2(renamed on SIPFile, but same FileInstance), File3
    SIP-4: File4, File5, File6
    """
    # A SIP with agent info
    sip1 = SIP.create(
        agent={
            'email': '*****@*****.**',
            'orcid': '1111-1111-1111-1111',
            'ip_address': '1.1.1.1'
        })
    sip1api = SIPApi(sip1)
    sip1api.attach_metadata('marcxml-test', '<p>XML 1</p>')
    sip1api.attach_metadata('json-test', '{"title": "JSON 1"}')
    # Metadata 'txt-test', although attached should not be archived
    # (see conftest configuration)
    sip1api.attach_metadata('txt-test', 'Title: TXT 1')
    file1 = FileInstance.create()
    file1.set_contents(BytesIO(b('test')),
                       default_location=locations['default'].uri)
    sip1file1 = SIPFile(sip_id=sip1.id,
                        filepath="foobar.txt",
                        file_id=file1.id)

    db_.session.add(sip1file1)

    sip2 = SIP.create()
    sip2api = SIPApi(sip2)
    sip2api.attach_metadata('marcxml-test', '<p>XML 2</p>')
    sip2api.attach_metadata('json-test', '{"title": "JSON 2"}')
    file2 = FileInstance.create()
    file2.set_contents(BytesIO(b'test-second'),
                       default_location=locations['default'].uri)
    sip2file1 = SIPFile(sip_id=sip2.id,
                        filepath="foobar.txt",
                        file_id=file1.id)
    sip2file2 = SIPFile(sip_id=sip2.id,
                        filepath="foobar2.txt",
                        file_id=file2.id)

    db_.session.add(sip2file1)
    db_.session.add(sip2file2)

    sip3 = SIP.create()
    sip3api = SIPApi(sip3)
    sip3api.attach_metadata('marcxml-test', '<p>XML 3</p>')
    sip3api.attach_metadata('json-test', '{"title": "JSON 3"}')
    file3 = FileInstance.create()
    file3.set_contents(BytesIO(b'test-third'),
                       default_location=locations['default'].uri)
    sip3file2 = SIPFile(sip_id=sip3.id,
                        filepath="foobar2-renamed.txt",
                        file_id=file2.id)
    sip3file3 = SIPFile(sip_id=sip3.id,
                        filepath="foobar3.txt",
                        file_id=file3.id)

    db_.session.add(sip3file2)
    db_.session.add(sip3file3)

    # A SIP with naughty filenames
    sip4 = SIP.create()
    sip4api = SIPApi(sip4)
    sip4api.attach_metadata('marcxml-test', '<p>XML 4 żółć</p>')
    sip4api.attach_metadata('json-test', '{"title": "JSON 4 żółć"}')
    file4 = FileInstance.create()
    file4.set_contents(BytesIO('test-fourth żółć'.encode('utf-8')),
                       default_location=locations['default'].uri)
    file5 = FileInstance.create()
    file5.set_contents(BytesIO('test-fifth ąęćźə'.encode('utf-8')),
                       default_location=locations['default'].uri)

    file6 = FileInstance.create()
    file6.set_contents(BytesIO('test-sixth π'.encode('utf-8')),
                       default_location=locations['default'].uri)
    sip5file4 = SIPFile(sip_id=sip4.id,
                        filepath="../../foobar.txt",
                        file_id=file4.id)

    sip5file5 = SIPFile(sip_id=sip4.id,
                        filepath="http://maliciouswebsite.com/hack.js",
                        file_id=file5.id)

    sip5file6 = SIPFile(sip_id=sip4.id,
                        filepath="łóżźćąę.dat",
                        file_id=file6.id)

    db_.session.add(sip5file4)
    db_.session.add(sip5file5)
    db_.session.add(sip5file6)

    # A SIP with metadata-only changes
    sip5 = SIP.create()
    sip5api = SIPApi(sip5)
    sip5api.attach_metadata('marcxml-test', '<p>XML 5 Meta Only</p>')

    db_.session.commit()
    return [sip1api, sip2api, sip3api, sip4api, sip5api]