def test_basic_search(app, db, es):
    """Test basic search functionality."""
    # The index should be empty
    assert len(ItemSearch().execute()) == 0

    # Create item1, search for everything
    item1 = Item.create({})
    item1.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item1)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 1

    # Create item2, search for everything again
    item2 = Item.create({'foo': 'bar'})
    item2.commit()
    record_indexer.index(item2)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 2

    # Search for item2
    assert len(ItemSearch().query('match', foo='bar').execute()) == 1

    # Search for nonsense
    assert len(ItemSearch().query('match', foo='banana').execute()) == 0
Esempio n. 2
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update') and not model_instance.json.get("deleted"):
                if hasattr(model_instance, '_enhanced_record'):
                    record = model_instance._enhanced_record
                else:
                    record = model_instance.json
                indexer.index(InspireRecord(record, model_instance))
            else:
                try:
                    indexer.delete(InspireRecord(
                        model_instance.json, model_instance))
                except NotFoundError:
                    # Record not found in ES
                    LOGGER.debug('Record %s not found in ES',
                                 model_instance.json.get("id"))
                    pass

            pid_type = get_pid_type_from_schema(model_instance.json['$schema'])
            pid_value = model_instance.json['control_number']
            db_version = model_instance.version_id

            index_modified_citations_from_record.delay(pid_type, pid_value, db_version)
Esempio n. 3
0
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            current_search.flush_and_refresh('_all')

    return records
Esempio n. 4
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from flask_login import login_user, logout_user
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_accounts.models import User
    from invenio_deposit.api import Deposit

    users = User.query.all()

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        with current_app.test_request_context():
            indexer = RecordIndexer()
            with db.session.begin_nested():
                for index, data in enumerate(split_blob(source.read()),
                                             start=1):
                    login_user(users[index % len(users)])
                    # do translate
                    record = marc21.do(create_record(data))
                    # create record
                    indexer.index(Deposit.create(record))
                    logout_user()
            db.session.commit()
Esempio n. 5
0
def record_not_yet_deleted(app):
    snippet = (
        '<record>'
        '  <controlfield tag="001">333</controlfield>'
        '  <controlfield tag="005">20160913214552.0</controlfield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '</record>'
    )

    with app.app_context():
        json_record = hep.do(create_record(snippet))
        json_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record = record_upsert(json_record)
            if record:
                ri = RecordIndexer()
                ri.index(record)

        db.session.commit()

    yield

    with app.app_context():
        _delete_record_from_everywhere('literature', 333)
Esempio n. 6
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 7
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_pidstore import current_pidstore
    from invenio_records.api import Record

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid_minter'](
                    rec_uuid, record
                )
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        db.session.commit()
Esempio n. 8
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 9
0
def receive_after_model_commit(sender, changes):
    """Perform actions after models committed to database."""
    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update'):
                indexer.index(InspireRecord(model_instance.json, model_instance))
            else:
                indexer.delete(InspireRecord(model_instance.json, model_instance))
Esempio n. 10
0
def closed_access_record(db, es, record_with_files_creation):
    """Creation of a full record with closed access right."""
    _, record, record_url = record_with_files_creation
    record['access_right'] = AccessRight.CLOSED
    record.commit()
    db.session.commit()
    indexer = RecordIndexer()
    indexer.index(record)
    current_search.flush_and_refresh(index='records')
    return record
Esempio n. 11
0
def closed_access_record(db, es, record_with_files_creation):
    """Creation of a full record with closed access right."""
    _, record, record_url = record_with_files_creation
    record['access_right'] = AccessRight.CLOSED
    record.commit()
    db.session.commit()
    indexer = RecordIndexer()
    indexer.index(record)
    current_search.flush_and_refresh(index='records')
    return record
Esempio n. 12
0
def oaiserver(sets, records):
    """Initialize OAI-PMH server."""
    from invenio_db import db
    from invenio_oaiserver.models import OAISet
    from invenio_records.api import Record

    # create a OAI Set
    with db.session.begin_nested():
        for i in range(sets):
            db.session.add(
                OAISet(
                    spec='test{0}'.format(i),
                    name='Test{0}'.format(i),
                    description='test desc {0}'.format(i),
                    search_pattern='title_statement.title:Test{0}'.format(i),
                ))

    # create a record
    schema = {
        'type': 'object',
        'properties': {
            'title_statement': {
                'type': 'object',
                'properties': {
                    'title': {
                        'type': 'string',
                    },
                },
            },
            'field': {
                'type': 'boolean'
            },
        },
    }

    search.client.indices.delete_alias('_all', '_all', ignore=[400, 404])
    search.client.indices.delete('*')

    with app.app_context():
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for i in range(records):
                record_id = uuid.uuid4()
                data = {
                    'title_statement': {
                        'title': 'Test{0}'.format(i)
                    },
                    '$schema': schema,
                }
                recid_minter(record_id, data)
                oaiid_minter(record_id, data)
                record = Record.create(data, id_=record_id)
                indexer.index(record)

        db.session.commit()
Esempio n. 13
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 14
0
def create_record(app, item_dict, mint_oaiid=True):
    """Create test record."""
    indexer = RecordIndexer()
    with app.test_request_context():
        record_id = uuid.uuid4()
        recid_minter(record_id, item_dict)
        if mint_oaiid:
            oaiid_minter(record_id, item_dict)
        record = Record.create(item_dict, id_=record_id)
        indexer.index(record)
        return record
Esempio n. 15
0
def create_record(app, item_dict, mint_oaiid=True):
    """Create test record."""
    indexer = RecordIndexer()
    with app.test_request_context():
        record_id = uuid.uuid4()
        recid_minter(record_id, item_dict)
        if mint_oaiid:
            oaiid_minter(record_id, item_dict)
        record = Record.create(item_dict, id_=record_id)
        indexer.index(record)
        return record
Esempio n. 16
0
def receive_after_model_commit(sender, changes):
    """Perform actions after models committed to database."""
    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update'):
                indexer.index(
                    InspireRecord(model_instance.json, model_instance))
            else:
                indexer.delete(
                    InspireRecord(model_instance.json, model_instance))
Esempio n. 17
0
def _records_create_and_index(db, objs, cls, pid_type):
    """Create records and index."""
    indexer = RecordIndexer()
    recs = []
    for obj in objs:
        record = cls.create(obj)
        mint_record_pid(pid_type, "pid", record)
        record.commit()
        recs.append(record)
    db.session.commit()
    for rec in recs:
        indexer.index(rec)
def indexed_loans(es, test_loans):
    """Get a function to wait for records to be flushed to index."""
    indexer = RecordIndexer()
    for pid, loan in test_loans:
        indexer.index(loan)
    current_search.flush_and_refresh(index="loans")

    yield test_loans

    for pid, loan in test_loans:
        indexer.delete_by_id(loan.id)
    current_search.flush_and_refresh(index="loans")
Esempio n. 19
0
def indexed_loans(es, test_loans):
    """Get a function to wait for records to be flushed to index."""
    indexer = RecordIndexer()
    for pid, loan in test_loans:
        indexer.index(loan)
    current_search.flush_and_refresh(index="loans")

    yield test_loans

    for pid, loan in test_loans:
        indexer.delete_by_id(loan.id)
    current_search.flush_and_refresh(index="loans")
Esempio n. 20
0
def testdata(app, db, es_clear, system_user):
    """Create, index and return test data."""
    indexer = RecordIndexer()

    locations = load_json_from_datadir("locations.json")
    for location in locations:
        record = Location.create(location)
        mint_record_pid(LOCATION_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    internal_locations = load_json_from_datadir("internal_locations.json")
    for internal_location in internal_locations:
        record = InternalLocation.create(internal_location)
        mint_record_pid(INTERNAL_LOCATION_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    documents = load_json_from_datadir("documents.json")
    for doc in documents:
        record = Document.create(doc)
        mint_record_pid(DOCUMENT_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    items = load_json_from_datadir("items.json")
    for item in items:
        record = Item.create(item)
        mint_record_pid(ITEM_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    loans = load_json_from_datadir("loans.json")
    for loan in loans:
        record = Loan.create(loan)
        mint_record_pid(CIRCULATION_LOAN_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    # flush all indices after indexing, otherwise ES won't be ready for tests
    current_search.flush_and_refresh(index='*')
    return {
        "locations": locations,
        "documents": documents,
        "items": items,
        "loans": loans,
    }
Esempio n. 21
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))
                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 22
0
def create_deposits(app, test_records_data, creator):
    """Create test deposits."""
    DepositInfo = namedtuple('DepositInfo', ['id', 'data', 'deposit'])
    indexer = RecordIndexer()

    with authenticated_user(creator):
        deposits = [Deposit.create(data=data)
                    for data in deepcopy(test_records_data)]
    for deposit in deposits:
        indexer.index(deposit)
        deposit.commit()
        deposit.commit()
    return [DepositInfo(dep.id, dep.dumps(), dep) for dep in deposits]
Esempio n. 23
0
def _create_and_index_record(record):
    record = Record.create(record)
    inspire_recid_minter(record.id, record)
    # invenio-collections will populate _collections field in record upon
    # commit
    db.session.commit()

    # Record needs to be indexed since views fetch records from ES
    r = RecordIndexer()
    r.index(record)
    es.indices.refresh('records-hep')

    return record
Esempio n. 24
0
def test_record_can_be_deleted(app, record_not_yet_deleted):
    with app.test_client() as client:
        assert client.get('/api/literature/333').status_code == 200

    record = get_db_record('literature', 333)
    record['deleted'] = True
    record.commit()
    if record:
        ri = RecordIndexer()
        ri.index(record)
    db.session.commit()

    with app.test_client() as client:
        assert client.get('/api/literature/333').status_code == 410
Esempio n. 25
0
def oaiserver(sets, records):
    """Initialize OAI-PMH server."""
    from invenio_db import db
    from invenio_oaiserver.models import OAISet
    from invenio_records.api import Record

    # create a OAI Set
    with db.session.begin_nested():
        for i in range(sets):
            db.session.add(OAISet(
                spec='test{0}'.format(i),
                name='Test{0}'.format(i),
                description='test desc {0}'.format(i),
                search_pattern='title_statement.title:Test{0}'.format(i),
            ))

    # create a record
    schema = {
        'type': 'object',
        'properties': {
            'title_statement': {
                'type': 'object',
                'properties': {
                    'title': {
                        'type': 'string',
                    },
                },
            },
            'field': {'type': 'boolean'},
        },
    }

    search.client.indices.delete_alias('_all', '_all', ignore=[400, 404])
    search.client.indices.delete('*')

    with app.app_context():
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for i in range(records):
                record_id = uuid.uuid4()
                data = {
                    'title_statement': {'title': 'Test{0}'.format(i)},
                    '$schema': schema,
                }
                recid_minter(record_id, data)
                oaiid_minter(record_id, data)
                record = Record.create(data, id_=record_id)
                indexer.index(record)

        db.session.commit()
Esempio n. 26
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    indexer = RecordIndexer()

    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update'):
                indexer.index(Record(model_instance.json, model_instance))
            else:
                indexer.delete(Record(model_instance.json, model_instance))
Esempio n. 27
0
def data(datafile):
    """Insert demo data."""
    click.secho("Importing demo data from {}".format(datafile), fg="yellow")

    indexer = RecordIndexer()
    holder = Holder()

    loader = DataLoader(holder)
    loader.load(datafile)
    rec_items = loader.persist()
    for rec in rec_items:
        # TODO: bulk index when we have the queue in k8s deployment
        indexer.index(rec)

    current_search.flush_and_refresh(index="*")
Esempio n. 28
0
def make_sample_record(db, title, community_id, state='filling', secondary=None):
    rec = {
        'title': title,
        'oarepo:primaryCommunity': community_id,
        'oarepo:recordStatus': state,
        'oarepo:secondaryCommunities': secondary,
        'oarepo:ownedBy': 1
    }
    record_uuid = uuid.uuid4()
    pid = recid_minter(record_uuid, rec)
    rec = TestRecord.create(rec, id_=record_uuid)
    db.session.commit()
    indexer = RecordIndexer()
    indexer.index(rec)
    return PIDRecord(pid, rec)
Esempio n. 29
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    indexer = RecordIndexer()

    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update'):
                indexer.index(Record(model_instance.json, model_instance))
            else:
                indexer.delete(Record(model_instance.json, model_instance))
Esempio n. 30
0
def index_after_commit(sender, changes):
    """Index records automatically after each modification."""

    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update') and model_instance.json:
                indexer.index(Record(model_instance.json, model_instance))
            else:
                try:
                    indexer.delete(Record(model_instance.json, model_instance))
                except NotFoundError:
                    # Record not found in ES
                    current_app.logger.warning(
                        'Record with id "%s" not found in ElasticSearch' %
                        model_instance.json.get('control_number'))
def test_publication_date_mapping(db, es, minimal_record):
    """Tests publication_date related fields are indexed properly.

    - Tests jsonschema validates correctly
    - Tests that retrieved record document is fine.

    NOTE:
        - es fixture depends on appctx fixture, so we are in app context
        - this test requires a running ES instance
    """
    # Interval
    minimal_record['publication_date'] = '1939/1945'
    minimal_record['_publication_date_search'] = '1939-01-01'

    record_id = uuid.uuid4()
    current_pidstore.minters['recid_v2'](record_id, minimal_record)
    record = Record.create(minimal_record, id_=record_id)
    db.session.commit()
    indexer = RecordIndexer()

    index_result = indexer.index(record)

    _index = index_result['_index']
    _doc = index_result['_type']
    _id = index_result['_id']
    es_doc = es.get(index=_index, doc_type=_doc, id=_id)
    source = es_doc['_source']
    assert source['publication_date'] == '1939/1945'
    assert source['_publication_date_search'] == '1939-01-01'
Esempio n. 32
0
def testdata(app, db, es_clear, patrons):
    """Create, index and return test data."""
    data = load_json_from_datadir("locations.json")
    locations = _create_records(db, data, Location, LOCATION_PID_TYPE)

    data = load_json_from_datadir("internal_locations.json")
    int_locs = _create_records(db, data, InternalLocation,
                               INTERNAL_LOCATION_PID_TYPE)

    data = load_json_from_datadir("documents.json")
    documents = _create_records(db, data, Document, DOCUMENT_PID_TYPE)

    data = load_json_from_datadir("series.json")
    series = _create_records(db, data, Series, SERIES_PID_TYPE)

    data = load_json_from_datadir("items.json")
    items = _create_records(db, data, Item, ITEM_PID_TYPE)

    data = load_json_from_datadir("eitems.json")
    eitems = _create_records(db, data, EItem, EITEM_PID_TYPE)

    data = load_json_from_datadir("ill_libraries.json")
    ill_libraries = _create_records(db, data, Provider, PROVIDER_PID_TYPE)

    data = load_json_from_datadir("ill_borrowing_requests.json")
    ill_brw_reqs = _create_records(db, data, BorrowingRequest,
                                   BORROWING_REQUEST_PID_TYPE)

    data = load_json_from_datadir("loans.json")
    loans = _create_records(db, data, Loan, CIRCULATION_LOAN_PID_TYPE)

    # index
    ri = RecordIndexer()
    for rec in (locations + int_locs + series + documents + items + eitems +
                loans + ill_libraries + ill_brw_reqs):
        ri.index(rec)

    current_search.flush_and_refresh(index="*")
    return {
        "documents": documents,
        "eitems": eitems,
        "internal_locations": int_locs,
        "items": items,
        "loans": loans,
        "locations": locations,
        "series": series,
    }
Esempio n. 33
0
def test_records_can_be_merged(app, records_not_merged_in_marcxml):
    with app.test_client() as client:
        assert client.get('/api/literature/111').status_code == 200
        assert client.get('/api/literature/222').status_code == 200

    record = get_db_record('literature', 222)
    record['deleted'] = True
    record['new_record'] = {'$ref': 'http://localhost:5000/api/record/111'}
    record.commit()
    if record:
        ri = RecordIndexer()
        ri.index(record)
    db.session.commit()

    with app.test_client() as client:
        assert client.get('/api/literature/111').status_code == 200
        assert client.get('/api/literature/222').status_code == 301
def test_before_deposit_index_hook_doesnt_create_new_buckets(
        create_record, db, es):

    deposit = create_record(published=False)
    bucket = Bucket.get(deposit['_buckets']['deposit'])
    obj = ObjectVersion.create(bucket, 'foo.txt')
    stream = BytesIO(b'Hello world!')
    obj.set_contents(stream,
                     size=len(stream.getvalue()),
                     size_limit=bucket.size_limit)
    db.session.commit()
    number_buckets_preindex = len(Bucket.query.all())
    indexer = RecordIndexer()

    indexer.index(deposit)

    assert len(Bucket.query.all()) == number_buckets_preindex
Esempio n. 35
0
def docs():
    """Load demo article records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.docid import \
        cernopendata_docid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/docs-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/docs')

    articles_json = get_jsons_from_dir(data)

    for filename in articles_json:
        name = filename.split('/')[-1]
        if name.startswith('opera'):
            click.echo('Skipping opera records ...')
            continue
        with open(filename, 'rb') as source:
            for data in json.load(source):

                # Replace body with responding content
                assert data["body"]["content"]
                content_filename = os.path.join(*([
                    "/",
                ] + filename.split('/')[:-1] + [
                    data["body"]["content"],
                ]))

                with open(content_filename) as body_field:
                    data["body"]["content"] = body_field.read()
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                id = uuid.uuid4()
                cernopendata_docid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 36
0
def software():
    """Load demo software records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/software-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/software')
    software_json = glob.glob(os.path.join(data, '*.json'))

    for filename in software_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', None)

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))
                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 37
0
def testdata_most_loaned(db, testdata):
    """Create, index and return test data for most loans tests."""
    most_loaned = load_json_from_datadir("loans_most_loaned.json")
    recs = _create_records(db, most_loaned, Loan, CIRCULATION_LOAN_PID_TYPE)

    ri = RecordIndexer()
    for rec in recs:
        ri.index(rec)

    current_search.flush_and_refresh(index="loans")

    return {
        "locations": testdata["locations"],
        "internal_locations": testdata["internal_locations"],
        "documents": testdata["documents"],
        "items": testdata["items"],
        "loans": most_loaned,
        "series": testdata["series"],
    }
Esempio n. 38
0
def importer_test_data(app, db, es_clear):
    """Provide test data for importer test suite."""
    data = load_json_from_datadir(
        "existing_documents.json", relpath="importer"
    )
    Document = current_app_ils.document_record_cls
    documents = _create_records(db, data, Document, DOCUMENT_PID_TYPE)

    data = load_json_from_datadir("existing_eitems.json", relpath="importer")
    eitems = _create_records(db, data, EItem, EITEM_PID_TYPE)

    # index
    ri = RecordIndexer()
    for rec in documents + eitems:
        ri.index(rec)

    current_search.flush_and_refresh(index="*")

    return {"documents": documents, "eitems": eitems}
def test_before_deposit_index_hook_sets_files(create_record, db, es):
    deposit = create_record(published=False)
    # Reproduce file upload: add file to bucket associated with deposit
    bucket = Bucket.get(deposit['_buckets']['deposit'])
    obj = ObjectVersion.create(bucket, 'foo.txt')
    stream = BytesIO(b'Hello world!')
    obj.set_contents(stream,
                     size=len(stream.getvalue()),
                     size_limit=bucket.size_limit)
    db.session.commit()
    indexer = RecordIndexer()

    indexer.index(deposit)

    # Get the raw indexed document
    index, doc_type = indexer.record_to_index(deposit)
    es_deposit = es.get(index=index, doc_type=doc_type, id=deposit.id)
    assert '_files' in es_deposit['_source']
    assert es_deposit['_source']['_files'][0]['type'] == 'txt'
Esempio n. 40
0
def test_access_permissions(
    client, json_headers, testdata, users, with_access
):
    """Test GET documents with `_access` ignoring `restricted`."""
    # set the documents to have read access only by patron2. `_access` should
    # be taken into account and take precedence over `restricted`.
    indexer = RecordIndexer()
    doc1 = Document.get_record_by_pid("docid-open-access")
    doc2 = Document.get_record_by_pid("docid-closed-access")
    for doc in [doc1, doc2]:
        doc.update(dict(_access=dict(read=[users["patron2"].id])))
        doc.commit()
        db.session.commit()
        indexer.index(doc)
    current_search.flush_and_refresh(index="documents")

    test_data = [
        ("anonymous", "docid-open-access", 401, 0),
        ("patron1", "docid-open-access", 403, 0),
        ("patron2", "docid-open-access", 200, 1),  # should have access
        ("librarian", "docid-open-access", 200, 1),
        ("admin", "docid-open-access", 200, 1),
        ("anonymous", "docid-closed-access", 401, 0),
        ("patron1", "docid-closed-access", 403, 0),
        ("patron2", "docid-closed-access", 200, 1),  # should have access
        ("librarian", "docid-closed-access", 200, 1),
        ("admin", "docid-closed-access", 200, 1),
    ]
    for user, pid, status_code, n_hits in test_data:
        # item endpoint
        user_login(client, user, users)
        url = url_for("invenio_records_rest.docid_item", pid_value=pid)
        res = client.get(url, headers=json_headers)
        assert res.status_code == status_code

        # list endpoint
        user_login(client, user, users)
        url = url_for(
            "invenio_records_rest.docid_list", q="pid:{}".format(pid)
        )
        res = client.get(url, headers=json_headers)
        hits = json.loads(res.data.decode("utf-8"))
        assert hits["hits"]["total"] == n_hits
Esempio n. 41
0
def demo_records(app):
    """Create demo records."""
    data_path = pkg_resources.resource_filename('cds.modules.fixtures',
                                                'data/records.xml')

    with open(data_path) as source:
        indexer = RecordIndexer()
        with _db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid'](rec_uuid, record)
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        _db.session.commit()
    return data_path
Esempio n. 42
0
def items():
    """Create circulation items."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer

    from invenio_circulation.api import Item
    from invenio_circulation.minters import circulation_item_minter

    for x in range(10):
        item = Item.create({
            'foo': 'bar{0}'.format(x),
            'title_statement': {'title': 'title{0}'.format(x)},
            'record': {'id': 1}
        })
        circulation_item_minter(item.id, item)
        item.commit()
        record_indexer = RecordIndexer()
        record_indexer.index(item)

    db.session.commit()
def test_crud_read(app, db, es):
    """Test REST API get functionality."""
    item = Item.create({'foo': 'bar'})
    circulation_item_minter(item.id, item)
    item.commit()
    db.session.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item)

    current_search.flush_and_refresh('_all')

    with app.test_request_context():
        with app.test_client() as client:
            url = url_for('circulation_rest.crcitm_item',
                          pid_value=item['control_number'])
            res = client.get(url)
            fetched_item = json.loads(res.data.decode('utf-8'))['metadata']

            assert fetched_item['control_number'] == item['control_number']
Esempio n. 44
0
def test_crud_read(app, db, es):
    """Test REST API get functionality."""
    item = Item.create({'foo': 'bar'})
    circulation_item_minter(item.id, item)
    item.commit()
    db.session.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item)

    current_search.flush_and_refresh('_all')

    with app.test_request_context():
        with app.test_client() as client:
            url = url_for('circulation_rest.crcitm_item',
                          pid_value=item['control_number'])
            res = client.get(url)
            fetched_item = json.loads(res.data.decode('utf-8'))['metadata']

            assert fetched_item['control_number'] == item['control_number']
def test_rest_search(app, db, es, url_addition, count):
    """Test REST API search functionality."""
    item = Item.create({'foo': 'bar'})
    circulation_item_minter(item.id, item)
    item.commit()
    db.session.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item)

    current_search.flush_and_refresh('_all')

    with app.test_request_context():
        with app.test_client() as client:
            base_url = url_for('circulation_rest.crcitm_list')
            url = base_url + url_addition

            res = client.get(url)
            hits = json.loads(res.data.decode('utf-8'))['hits']['hits']
            assert len(hits) == count
def testdata(in_cluster_app):
    """Create, index and return test data."""
    indexer = RecordIndexer()

    filenames = ("records.json", "authors.json")
    with mock.patch('invenio_records.api.Record.validate', return_value=None):
        records = load_json_from_datadir('records.json')
        for record in records:
            record = Record.create(record)
            record_minter(record.id, record)
            record.commit()
            db.session.commit()
            indexer.index(record)
        authors = load_json_from_datadir('authors.json')
        for record in authors:
            record = Record.create(record)
            author_minter(record.id, record)
            record.commit()
            db.session.commit()
            indexer.index(record)
Esempio n. 47
0
def test_rest_search(app, db, es, url_addition, count):
    """Test REST API search functionality."""
    item = Item.create({'foo': 'bar'})
    circulation_item_minter(item.id, item)
    item.commit()
    db.session.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item)

    current_search.flush_and_refresh('_all')

    with app.test_request_context():
        with app.test_client() as client:
            base_url = url_for('circulation_rest.crcitm_list')
            url = base_url + url_addition

            res = client.get(url)
            hits = json.loads(res.data.decode('utf-8'))['hits']['hits']
            assert len(hits) == count
Esempio n. 48
0
def _create_records(path, verbose):
    """Create demo records."""
    indexer = RecordIndexer(
        record_to_index=lambda record: ('records', 'record')
    )
    if verbose > 0:
        click.secho('Creating records', fg='yellow', bold=True)
    with db.session.begin_nested():
        records_dir = os.path.join(path, 'records')
        nb_records = 0
        for root, dirs, files in os.walk(records_dir):
            for filename in files:
                split_filename = os.path.splitext(filename)
                if split_filename[1] == '.json':
                    rec_uuid = UUID(split_filename[0])
                    with open(os.path.join(records_dir, root,
                                           filename)) as record_file:
                        record_str = record_file.read()
                    record_str = resolve_community_id(record_str)
                    record_str = resolve_block_schema_id(record_str)
                    deposit = Deposit.create(json.loads(record_str),
                                             id_=rec_uuid)
                    ObjectVersion.create(deposit.files.bucket, 'myfile',
                        stream=BytesIO(b'mycontent'))
                    deposit.publish()
                    pid, record = deposit.fetch_published()
                    # index the record
                    indexer.index(record)
                    if verbose > 1:
                        click.secho('CREATED RECORD {0}:\n {1}'.format(
                            str(rec_uuid), json.dumps(record,
                                                  indent=4)
                        ))
                        click.secho('CREATED DEPOSIT {0}:\n {1}'.format(
                            str(rec_uuid), json.dumps(deposit,
                                                  indent=4)
                        ))
                    nb_records += 1
    if verbose > 0:
        click.secho('Created {} records!'.format(nb_records), fg='green')
Esempio n. 49
0
    def prepare_data():
        """Prepare data."""
        days = current_app.config[
            "ILS_CIRCULATION_MAIL_OVERDUE_REMINDER_INTERVAL"
        ]
        loans = testdata["loans"]

        recs = []
        now = arrow.utcnow()

        def new_end_date(loan, date):
            loan["end_date"] = date.date().isoformat()
            loan["state"] = "ITEM_ON_LOAN"
            loan.commit()
            recs.append(loan)

        # overdue loans
        date = now - timedelta(days=days)
        new_end_date(loans[0], date)

        date = now - timedelta(days=days * 2)
        new_end_date(loans[1], date)

        # not overdue
        date = now - timedelta(days=-1)
        new_end_date(loans[2], date)

        # not overdue or overdue but not to be notified
        remaining_not_overdue = loans[3:]
        for loan in remaining_not_overdue:
            days = random.choice([-1, 0, 1])
            date = now - timedelta(days=days)
            new_end_date(loan, date)
        db.session.commit()

        indexer = RecordIndexer()
        for rec in recs:
            indexer.index(rec)

        current_search.flush_and_refresh(index="*")
Esempio n. 50
0
File: utils.py Progetto: N03/invenio
def create_record(data):
    """Create a record.

    :param dict data: The record data.
    """
    indexer = RecordIndexer()
    with db.session.begin_nested():
        # create uuid
        rec_uuid = uuid.uuid4()
        # add the schema
        host = current_app.config.get('JSONSCHEMAS_HOST')
        data["$schema"] = \
            current_app.extensions['invenio-jsonschemas'].path_to_url(
            'custom_record/custom-record-v1.0.0.json')
        # create PID
        current_pidstore.minters['custid'](
            rec_uuid, data, pid_value='custom_pid_{}'.format(rec_uuid))
        # create record
        created_record = Record.create(data, id_=rec_uuid)
        # index the record
        indexer.index(created_record)
    db.session.commit()
Esempio n. 51
0
def test_oai_set_result_count(mocker, audit_records, db, es, communities,
                              oai_sources, issues):
    db_records, es_records, oai2d_records = oai_sources

    for recid in db_records:
        _, record = record_resolver.resolve(recid)
        record['_oai']['sets'] = ['user-c1']
        record.commit()
    db.session.commit()

    indexer = RecordIndexer()
    for recid in es_records:
        _, record = record_resolver.resolve(recid)
        record['_oai']['sets'] = ['user-c1']
        indexer.index(record)
    current_search.flush_and_refresh(index='records')

    # '/oai2d' needs straight-forward cheating... There's no way to be sure
    # why the endpoint sometimes fails to report the correct results. It could
    # be a Resumption Token issue, or even an indexing issue on Elasticsearch.
    # Either way, we have to be able to replicate when running on production
    # this behavior and report it as an issue.
    oai2d_ids_mock = MagicMock()
    oai2d_ids_mock.return_value = set(oai2d_records)
    oai2d_ids_mock = mocker.patch(
        'zenodo.modules.auditor.oai.OAISetResultCheck'
        '._oai2d_endpoint_identifiers', new=oai2d_ids_mock)

    audit = OAIAudit('testAudit', logging.getLogger('auditorTesting'), [])
    check = OAISetResultCheck(audit, Community.get('c1'))
    check.perform()
    audit.clear_db_oai_set_cache()

    result_issues = check.issues.get('missing_ids', {})
    db_issues, es_issues, api_issues = issues
    assert set(result_issues.get('db', [])) == set(db_issues)
    assert set(result_issues.get('es', [])) == set(es_issues)
    assert set(result_issues.get('oai2d', [])) == set(api_issues)
def load_records(es_app, filename, schema):
    """Try to index records."""
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename("invenio_records", filename)
        records_data = load(data_filename)
        records = []
        for item in records_data:
            item_dict = dict(marc21.do(item))
            item_dict["$schema"] = schema
            record = Record.create(item_dict)
            records.append(record)
        db.session.commit()

        es_records = []
        for record in records:
            es_records.append(indexer.index(record))

        for record in es_records:
            search.client.get(index=record["_index"], doc_type=record["_type"], id=record["_id"])
def test_bibliographic_data(es_app):
    """Test indexation using bibliographic data."""
    search = InvenioSearch(es_app)
    search.create()
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename(
            'invenio_records', 'data/marc21/bibliographic.xml')
        records_data = load(data_filename)
        records = []
        for item in records_data:
            record = Record.create(item)
            record['$schema'] = "mappings/marc21_holdings.json"
            es_record = indexer.index(record)
            records.append(es_record)

    for record in records:
        search.client.get(index=record['_index'],
                          doc_type=record['_type'],
                          id=record['_id'])
    search.delete()
def load_records(es_app, filename, schema):
    """Try to index records."""
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename(
            'invenio_records', filename)
        records_data = load(data_filename)
        records = []
        for item in records_data:
            item_dict = dict(marc21.do(item))
            item_dict['$schema'] = schema
            record = Record.create(item_dict)
            records.append(record)
        db.session.commit()

        es_records = []
        for record in records:
            es_records.append(indexer.index(record))

        from invenio_search import current_search
        for record in es_records:
            current_search.client.get(index=record['_index'],
                                      doc_type=record['_type'],
                                      id=record['_id'])
Esempio n. 55
0
def records_not_merged_in_marcxml(app):
    snippet_merged = (
        '<record>'
        '  <controlfield tag="001">111</controlfield>'
        '  <controlfield tag="005">20160922232729.0</controlfield>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="a">10.11588/heidok.00021652</subfield>'
        '  </datafield>'
        '  <datafield tag="100" ind1=" " ind2=" ">'
        '    <subfield code="a">Humbert, Pascal</subfield>'
        '    <subfield code="u">Inst. Appl. Math., Heidelberg</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">THESIS</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">CORE</subfield>'
        '  </datafield>'
        '  <datafield tag="981" ind1=" " ind2=" ">'
        '    <subfield code="a">222</subfield>'
        '  </datafield>'
        '</record>'
    )

    snippet_deleted = (
        '<record>'
        '  <controlfield tag="001">222</controlfield>'
        '  <controlfield tag="005">20160922232729.0</controlfield>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="a">10.11588/heidok.00021652</subfield>'
        '  </datafield>'
        '  <datafield tag="100" ind1=" " ind2=" ">'
        '    <subfield code="a">Humbert, Pascal</subfield>'
        '    <subfield code="u">Inst. Appl. Math., Heidelberg</subfield>'
        '  </datafield>'
        '  <datafield tag="701" ind1=" " ind2=" ">'
        '    <subfield code="a">Lindner, Manfred</subfield>'
        '  </datafield>'
        '  <datafield tag="856" ind1="4" ind2=" ">'
        '    <subfield code="u">http://www.ub.uni-heidelberg.de/archiv/21652</subfield>'
        '    <subfield code="y">U. Heidelberg</subfield>'
        '  </datafield>'
        '  <datafield tag="909" ind1="C" ind2="O">'
        '    <subfield code="o">oai:inspirehep.net:222</subfield>'
        '    <subfield code="p">INSPIRE:HEP</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '  <datafield tag="981" ind1=" " ind2=" ">'
        '    <subfield code="a">222</subfield>'
        '  </datafield>'
        '</record>'
    )

    with app.app_context():
        json_record_merged = hep.do(create_record(snippet_merged))
        json_record_merged['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        json_record_deleted = hep.do(create_record(snippet_deleted))
        json_record_deleted['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record_merged = record_upsert(json_record_merged)
            record_deleted = record_upsert(json_record_deleted)
            if record_deleted:
                if record_merged:
                    r = RecordIndexer()
                    r.index(record_merged)
                    r.index(record_deleted)
                    es.indices.refresh('records-hep')
        db.session.commit()

    yield

    with app.app_context():
        _delete_merged_records_from_everywhere('literature', 111, 222)
Esempio n. 56
0
def records_already_merged_in_marcxml(app):
    snippet_merged = (
        '<record>'
        '  <controlfield tag="001">111</controlfield>'
        '  <controlfield tag="005">20160922232729.0</controlfield>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="a">10.11588/heidok.00021652</subfield>'
        '  </datafield>'
        '  <datafield tag="100" ind1=" " ind2=" ">'
        '    <subfield code="a">Humbert, Pascal</subfield>'
        '    <subfield code="u">Inst. Appl. Math., Heidelberg</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">THESIS</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">CORE</subfield>'
        '  </datafield>'
        '  <datafield tag="981" ind1=" " ind2=" ">'
        '    <subfield code="a">222</subfield>'
        '  </datafield>'
        '</record>'
    )

    snippet_deleted = (
        '<record>'
        '  <controlfield tag="001">222</controlfield>'
        '  <controlfield tag="005">20160914115512.0</controlfield>'
        '  <datafield tag="100" ind1=" " ind2=" ">'
        '    <subfield code="a">Humbert, Pascal</subfield>'
        '  </datafield>'
        '  <datafield tag="970" ind1=" " ind2=" ">'
        '    <subfield code="d">111</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">THESIS</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">CORE</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="c">DELETED</subfield>'
        '  </datafield>'
        '</record>'
    )

    with app.app_context():
        json_record_merged = hep.do(create_record(snippet_merged))
        json_record_merged['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        json_record_deleted = hep.do(create_record(snippet_deleted))
        json_record_deleted['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record_merged = record_upsert(json_record_merged)
            record_deleted = record_upsert(json_record_deleted)
            if record_deleted:
                if record_merged:
                    r = RecordIndexer()
                    r.index(record_merged)
                    r.index(record_deleted)
                    es.indices.refresh('records-hep')
        db.session.commit()

    yield

    with app.app_context():
        _delete_merged_records_from_everywhere('literature', 111, 222)
Esempio n. 57
0
def records(skip_files, files, profile, mode):
    """Load all records."""
    if profile:
        import cProfile
        import pstats
        import StringIO
        pr = cProfile.Profile()
        pr.enable()

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/record-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/records')
    action = None

    if files:
        record_json = files
    else:
        record_json = glob.glob(os.path.join(data, '*.json'))

    for filename in record_json:
        # name = filename.split('/')[-1]
        # if name.startswith('opera'):
        #     click.echo('Skipping opera records ...')
        #     continue
        click.echo('Loading records from {0} ...'.format(filename))
        with open(filename, 'rb') as source:
            for data in json.load(source):

                if not data:
                    click.echo('IGNORING a possibly broken or corrupted '
                               'record entry in file {0} ...'.format(filename))
                    continue

                files = data.get('files', [])

                if mode == 'insert-or-replace':
                    try:
                        pid = PersistentIdentifier.get('recid', data['recid'])
                        if pid:
                            record = update_record(
                                pid, schema, data, files, skip_files)
                            action = 'updated'
                    except PIDDoesNotExistError:
                        record = create_record(schema, data, files, skip_files)
                        action = 'inserted'
                elif mode == 'insert':
                    try:
                        pid = PersistentIdentifier.get('recid', data['recid'])
                        if pid:
                            click.echo(
                                'Record recid {} exists already;'
                                ' cannot insert it.  '.format(
                                    data.get('recid')), err=True)
                            return
                    except PIDDoesNotExistError:
                        record = create_record(schema, data, files, skip_files)
                        action = 'inserted'
                else:
                    try:
                        pid = PersistentIdentifier.get('recid', data['recid'])
                    except PIDDoesNotExistError:
                        click.echo(
                            'Record recid {} does not exist; '
                            'cannot replace it.'.format(
                                data.get('recid')), err=True)
                        return
                    record = update_record(
                        pid, schema, data, files, skip_files)
                    action = 'updated'

                if not skip_files:
                    record.files.flush()
                record.commit()
                db.session.commit()
                click.echo(
                    'Record recid {0} {1}.'.format(
                        data.get('recid'), action))
                indexer.index(record)
                db.session.expunge_all()

    if profile:
        pr.disable()
        s = StringIO.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
Esempio n. 58
0
def docs(files, mode):
    """Load demo article records."""
    from slugify import slugify

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/docs-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/docs')

    if files:
        articles_json = files
    else:
        articles_json = get_jsons_from_dir(data)

    for filename in articles_json:
        # name = filename.split('/')[-1]
        # if name.startswith('opera'):
        #     click.echo('Skipping opera records ...')
        #     continue

        click.echo('Loading docs from {0} ...'.format(filename))
        with open(filename, 'rb') as source:
            for data in json.load(source):

                # Replace body with responding content
                assert data["body"]["content"]
                content_filename = os.path.join(
                    *(
                        ["/", ] +
                        filename.split('/')[:-1] +
                        [data["body"]["content"], ]
                    )
                )

                with open(content_filename) as body_field:
                    data["body"]["content"] = body_field.read()
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                if mode == 'insert-or-replace':
                    try:
                        pid = PersistentIdentifier.get(
                            'docid', str(slugify(
                                data.get('slug', data['title']))))
                        if pid:
                            record = update_doc(pid, data)
                            action = 'updated'
                    except PIDDoesNotExistError:
                        record = create_doc(data, schema)
                        action = 'inserted'
                elif mode == 'insert':
                    try:
                        pid = PersistentIdentifier.get(
                            'docid', str(slugify(
                                data.get('slug', data['title']))))
                        if pid:
                            click.echo(
                                'Record docid {} exists already;'
                                ' cannot insert it.  '.format(
                                    str(slugify(
                                        data.get('slug', data['title'])))),
                                err=True)
                            return
                    except PIDDoesNotExistError:
                        record = create_doc(data, schema)
                        action = 'inserted'
                else:
                    try:
                        pid = PersistentIdentifier.get(
                            'docid', str(slugify(
                                data.get('slug', data['title']))))
                    except PIDDoesNotExistError:
                        click.echo(
                            'Record docid {} does not exist; '
                            'cannot replace it.'.format(
                                str(slugify(
                                    data.get('slug', data['title'])))),
                            err=True)
                        return
                    record = update_doc(pid, data)
                    action = 'updated'
                record.commit()
                db.session.commit()
                click.echo(
                    ' Record docid {0} {1}.'.format(
                        str(slugify(data.get(
                            'slug', data['title']))), action))
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 59
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))

                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()