Esempio n. 1
0
def records():
    """Load test data fixture."""
    import uuid
    from invenio_records.api import Record
    from invenio_pidstore.models import PersistentIdentifier, PIDStatus

    create_test_user()

    indexer = RecordIndexer()

    # Record 1 - Live record
    with db.session.begin_nested():
        rec_uuid = uuid.uuid4()
        pid1 = PersistentIdentifier.create('recid',
                                           '1',
                                           object_type='rec',
                                           object_uuid=rec_uuid,
                                           status=PIDStatus.REGISTERED)
        Record.create(
            {
                'title': 'Registered',
                'description': 'This is an awesome description',
                'control_number': '1',
                'access_right': 'restricted',
                'access_conditions': 'fuu',
                'owners': [1, 2],
                'recid': 1
            },
            id_=rec_uuid)
        indexer.index_by_id(pid1.object_uuid)

    db.session.commit()

    sleep(3)
Esempio n. 2
0
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            current_search.flush_and_refresh('_all')

    return records
Esempio n. 3
0
def test_indexer_bulk_index(app, queue):
    """Test delay indexing."""
    with app.app_context():
        with establish_connection() as c:
            indexer = RecordIndexer()
            id1 = uuid.uuid4()
            id2 = uuid.uuid4()
            indexer.bulk_index([id1, id2])
            indexer.bulk_delete([id1, id2])

            consumer = Consumer(connection=c,
                                queue=indexer.mq_queue.name,
                                exchange=indexer.mq_exchange.name,
                                routing_key=indexer.mq_routing_key)

            messages = list(consumer.iterqueue())
            [m.ack() for m in messages]

            assert len(messages) == 4
            data0 = messages[0].decode()
            assert data0['id'] == str(id1)
            assert data0['op'] == 'index'
            data2 = messages[2].decode()
            assert data2['id'] == str(id1)
            assert data2['op'] == 'delete'
Esempio n. 4
0
def load(source, verbose, cache, files, skip, max=None):
    """Load records attach files and index them."""
    data = json.load(source)
    if isinstance(data, dict):
        data = [data]

    # to upload remote fulltext files
    upload_dir = os.path.join(current_app.instance_path, 'uploads')
    try:
        os.makedirs(upload_dir)
    except FileExistsError:
        pass

    # initialize file location if needed
    if not Location.get_default():
        data_dir = os.path.join(current_app.instance_path, 'files')
        db.session.add(
            Location(name='default', uri='file://' + data_dir, default=True))
        db.session.commit()

    # create records and index them
    click.secho('Creating records...', fg='green')
    rec_uuids = load_records_with_files(data, upload_dir, max, verbose, files,
                                        cache, skip)
    click.secho('Put %d records for indexing...' % len(rec_uuids), fg='green')
    RecordIndexer().bulk_index(rec_uuids)
    click.secho('Execute "run" command to process the queue!', fg='yellow')
Esempio n. 5
0
def migrate_chunk(chunk, broken_output=None, dry_run=False):
    from invenio_indexer.api import RecordIndexer

    from ..pidstore.minters import inspire_recid_minter

    indexer = RecordIndexer()

    index_queue = []
    for raw_record in chunk:
        record = marc_create_record(raw_record, keep_singletons=False)
        json_record = create_record(record)
        if '$schema' in json_record:
            json_record['$schema'] = url_for(
                'invenio_jsonschemas.get_schema',
                schema_path="records/{0}".format(json_record['$schema'])
            )
        rec_uuid = str(Record.create(json_record, id_=None).id)

        # Create persistent identifier.
        pid = inspire_recid_minter(rec_uuid, json_record)

        index_queue.append(pid.object_uuid)

        db.session.commit()

    # Request record indexing
    for i in index_queue:
        indexer.index_by_id(i)

    # Send task to migrate files.
    return rec_uuid
Esempio n. 6
0
def test_index_action(app):
    """Test index action."""
    with app.app_context():
        record = Record.create({'title': 'Test'})
        db.session.commit()

        def receiver(sender, json=None, record=None, arguments=None, **kwargs):
            json['extra'] = 'extra'
            arguments['pipeline'] = 'foobar'

        with before_record_index.connected_to(receiver):
            action = RecordIndexer()._index_action(
                dict(
                    id=str(record.id),
                    op='index',
                ))
            assert action['_op_type'] == 'index'
            assert action['_index'] == app.config['INDEXER_DEFAULT_INDEX']
            assert action['_id'] == str(record.id)
            if lt_es7:
                assert action['_type'] == \
                    app.config['INDEXER_DEFAULT_DOC_TYPE']
                assert action['_version'] == record.revision_id
                assert action['_version_type'] == 'external_gte'
            else:
                assert action['_type'] == '_doc'
                assert action['version'] == record.revision_id
                assert action['version_type'] == 'external_gte'
            assert action['pipeline'] == 'foobar'
            assert 'title' in action['_source']
            assert 'extra' in action['_source']
Esempio n. 7
0
def remove(community_id, record_id):
    """Remove a record from community."""
    c = Community.get(community_id)
    assert c is not None
    c.remove_record(record_id)
    db.session.commit()
    RecordIndexer().index_by_id(record_id)
Esempio n. 8
0
def add_oai_information(obj, eng):
    """Adds OAI information like identifier"""

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_oai' not in existing_record:
        try:
            oaiid_minter(pid.object_uuid, existing_record)
        except PIDAlreadyExists:
            existing_record['_oai'] = {
                'id': 'oai:beta.scoap3.org:%s' % recid,
                'sets': _get_oai_sets(existing_record)
            }

    if 'id' not in existing_record['_oai']:
        current_app.logger.info('adding new oai id')
        oaiid_minter(pid.object_uuid, existing_record)

    if 'sets' not in existing_record[
            '_oai'] or not existing_record['_oai']['sets']:
        existing_record['_oai']['sets'] = _get_oai_sets(existing_record)

    existing_record['_oai']['updated'] = datetime.utcnow().strftime(
        '%Y-%m-%dT%H:%M:%SZ')

    existing_record.commit()
    obj.save()
    db.session.commit()
    indexer = RecordIndexer()
    indexer.index_by_id(pid.object_uuid)
Esempio n. 9
0
def _create_records(path, verbose):
    """Create demo records."""
    indexer = RecordIndexer(
        record_to_index=lambda record: ('records', 'record'))
    if verbose > 0:
        click.secho('Creating records', fg='yellow', bold=True)
    with db.session.begin_nested():
        records_dir = os.path.join(path, 'records')
        nb_records = 0
        for root, dirs, files in os.walk(records_dir):
            for filename in files:
                split_filename = os.path.splitext(filename)
                if split_filename[1] == '.json':
                    rec_uuid = UUID(split_filename[0])
                    path = os.path.join(records_dir, root, filename)
                    record, deposit = _create_record_from_filepath(
                        path, rec_uuid, indexer, nb_records, verbose)
                    if verbose > 1:
                        click.secho('CREATED RECORD {0}:\n {1}'.format(
                            str(rec_uuid), json.dumps(record, indent=4)))
                        click.secho('CREATED DEPOSIT {0}:\n {1}'.format(
                            str(rec_uuid), json.dumps(deposit, indent=4)))
                    nb_records += 1
    if verbose > 0:
        click.secho('Created {} records!'.format(nb_records), fg='green')
Esempio n. 10
0
def keywords_harvesting(self, max_retries=5, countdown=5):
    """Harvest all keywords."""
    try:
        # load from remote API the up-to-date list of keywords
        keywords_api = _get_keywords_from_api(
            url=current_app.config['CDS_KEYWORDS_HARVESTER_URL'])

        # load the list of keywords in the database
        keywords_db = query_to_objects(
            query=KeywordSearch().params(version=True), cls=Keyword)

        # index lists
        indexer = RecordIndexer()

        _update_existing_keywords(
            indexer=indexer,
            keywords_api=keywords_api,
            keywords_db=keywords_db)
        _delete_not_existing_keywords(
            indexer=indexer,
            keywords_api=keywords_api,
            keywords_db=keywords_db)

        db.session.commit()
    except RequestException as exc:
        raise self.retry(max_retries=max_retries, countdown=countdown, exc=exc)
Esempio n. 11
0
def store_record(obj, eng):
    """Stores record in database"""
    set_springer_source_if_needed(obj)

    obj.data['record_creation_year'] = parse_date(
        obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        pid = scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

        # Index record
        indexer = RecordIndexer()
        indexer.index_by_id(pid.object_uuid)

    except ValidationError as err:
        __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj,
                          eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", obj, eng)
Esempio n. 12
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                   not isinstance(data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 13
0
    def update_item_sort_custom_es(cls, index_path, sort_json=[]):
        """Set custom sort.

        :param index_path selected index path
        :param sort_json custom setted item sort

        """
        try:
            upd_item_sort_q = {"query": {"match": {"path.tree": "@index"}}}
            es_index = current_app.config['SEARCH_UI_SEARCH_INDEX']
            es_doc_type = current_app.config['INDEXER_DEFAULT_DOCTYPE']
            query_q = json.dumps(upd_item_sort_q).replace("@index", index_path)
            query_q = json.loads(query_q)
            indexer = RecordIndexer()
            res = indexer.client.search(index=es_index, body=query_q)

            for d in sort_json:
                for h in res.get("hits").get("hits"):
                    if int(h.get('_source').get('control_number')) == int(
                            d.get("id")):
                        body = {
                            'doc': {
                                'custom_sort': d.get('custom_sort'),
                            }
                        }
                        indexer.client.update(index=es_index,
                                              doc_type=es_doc_type,
                                              id=h.get("_id"),
                                              body=body)
                        break

        except Exception as ex:
            current_app.logger.debug(ex)
        return
Esempio n. 14
0
def test_cli_full_reindex(app, db, es, capsys, es_acl_prepare, test_users):
    pid, record = create_record(
        {
            '$schema': RECORD_SCHEMA,
            'keywords': ['blah']
        },
        clz=SchemaEnforcingRecord)
    RecordIndexer().index(record)
    current_search_client.indices.flush()
    with db.session.begin_nested():
        acl = ElasticsearchACL(name='test',
                               schemas=[RECORD_SCHEMA],
                               priority=0,
                               operation='get',
                               originator=test_users.u1,
                               record_selector={'term': {
                                   'keywords': 'blah'
                               }})
        db.session.add(acl)
        u = UserActor(name='test',
                      acl=acl,
                      originator=test_users.u1,
                      users=[test_users.u1])
        db.session.add(u)

    # now the record is not indexed and ACL is not in the helper index, check it ...
    retrieved = RecordsSearch(
        index=schema_to_index(RECORD_SCHEMA)[0]).get_record(
            record.id).execute().hits[0].to_dict()
    assert '_invenio_explicit_acls' not in retrieved

    # just a precaution test
    assert current_explicit_acls.enabled_schemas == {RECORD_SCHEMA}

    # and run the reindex - should reindex one record
    from invenio_explicit_acls.cli import full_reindex_impl
    full_reindex_impl(verbose=True, records=True, in_bulk=False)

    captured = capsys.readouterr()
    assert captured.out.strip() == """
Reindexing ACLs
Updating ACL representation for "test" (%s) on schemas ['records/record-v1.0.0.json']
Getting records for schema records/record-v1.0.0.json
   ... collected 1 records
Adding 1 records to indexing queue""".strip() % (acl.id)

    current_search_client.indices.flush()

    retrieved = RecordsSearch(
        index=schema_to_index(RECORD_SCHEMA)[0]).get_record(
            record.id).execute().hits[0].to_dict()
    assert clear_timestamp(retrieved['_invenio_explicit_acls']) == [{
        'id':
        str(acl.id),
        'operation':
        'get',
        'timestamp':
        'cleared',
        'user': [1]
    }]
Esempio n. 15
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit = None
        try:
            db.session.begin_nested()
            deposit = self.deposit_class.create(self.metadata, id_=id_)
            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}".format(
                            url=url))

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema':
                current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id':
                self.event.user_id,
                'github_id':
                self.release['author']['id'],
                'email':
                self.gh.account.user.email,
            }
            deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent)
            self.model.recordmetadata = deposit.model
            db.session.commit()

            # Send Datacite DOI registration task
            recid_pid, record = deposit.fetch_published()
            datacite_register.delay(recid_pid.pid_value, str(record.id))
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
Esempio n. 16
0
    def prepare_data():
        """Prepare data."""
        days = current_app.config[
            "ILS_CIRCULATION_MAIL_OVERDUE_REMINDER_INTERVAL"]
        loans = testdata["loans"]

        recs = []
        now = arrow.utcnow()

        def new_end_date(loan, date):
            loan["end_date"] = date.date().isoformat()
            loan["state"] = "ITEM_ON_LOAN"
            loan.commit()
            recs.append(loan)

        # overdue loans
        date = now - timedelta(days=days)
        new_end_date(loans[0], date)

        date = now - timedelta(days=days * 2)
        new_end_date(loans[1], date)

        # not overdue or overdue but not to be notified
        remaining_not_overdue = loans[2:]
        for loan in remaining_not_overdue:
            days = random.choice([-1, 0, 1])
            date = now - timedelta(days=days)
            new_end_date(loan, date)
        db.session.commit()

        indexer = RecordIndexer()
        for rec in recs:
            indexer.index(rec)

        current_search.flush_and_refresh(index="*")
Esempio n. 17
0
def oaiset_update_records(minimal_record, db, es):
    """Fixture with records for query-based OAISet updating tests."""
    rec_ok = {
        'title': 'extra',
        '_oai': {
            'id': '12345',
            'sets': ['extra', 'user-foobar'],
            'updated': datetime(1970, 1, 1).isoformat(),
        }
    }
    # Record which needs removal of 'extra' from oai sets
    rec_remove = deepcopy(rec_ok)
    rec_remove['title'] = 'other'

    # Record which needs addition of 'extra' to oai sets
    rec_add = deepcopy(rec_ok)
    rec_add['_oai']['sets'] = [
        'user-foobar',
    ]
    records = [
        rec_ok,
        rec_remove,
        rec_add,
    ]

    rec_uuids = []
    for record_meta in records:
        rec = RecordMetadata()
        rec.json = deepcopy(record_meta)
        db.session.add(rec)
        db.session.commit()
        RecordIndexer().index_by_id(rec.id)
        rec_uuids.append(rec.id)
    current_search.flush_and_refresh('records')
    return rec_uuids
Esempio n. 18
0
def continuous_migration():
    """Task to continuously migrate what is pushed up by Legacy."""
    indexer = RecordIndexer()
    redis_url = current_app.config.get('CACHE_REDIS_URL')
    r = StrictRedis.from_url(redis_url)

    try:
        while r.llen('legacy_records'):
            raw_record = r.lpop('legacy_records')
            if raw_record:
                # FIXME use migrate_and_insert_record(raw_record)
                # The record might be None, in case a parallel
                # continuous_migration task has already consumed the queue.
                raw_record = zlib.decompress(raw_record)
                record = marc_create_record(raw_record, keep_singletons=False)
                recid = int(record['001'][0])
                prod_record = InspireProdRecords(recid=recid)
                prod_record.marcxml = raw_record
                json_record = create_record(record)
                with db.session.begin_nested():
                    try:
                        record = record_upsert(json_record)
                    except ValidationError as e:
                        # Invalid record, will not get indexed
                        errors = "ValidationError: Record {0}: {1}".format(
                            recid, e
                        )
                        prod_record.valid = False
                        prod_record.errors = errors
                        db.session.merge(prod_record)
                        continue
                indexer.index_by_id(record.id)
    finally:
        db.session.commit()
        db.session.close()
Esempio n. 19
0
def create_fake_record(bulk_size, fake):
    """Create records for demo purposes."""
    records_bulk = []
    start = timeit.default_timer()
    for _ in range(bulk_size):
        # Create fake record metadata
        record_data = {
            "contributors": [{
                "name": fake.name()
            }],
            "description": fake.bs(),
            "title": fake.company() + "'s dataset",
        }

        # Create record in DB
        rec_uuid = uuid.uuid4()
        current_pidstore.minters["recid"](rec_uuid, record_data)
        Record.create(record_data, id_=rec_uuid)

        # Add record for bulk indexing
        records_bulk.append(rec_uuid)

    # Flush to index and database
    db.session.commit()
    click.secho(f"Writing {bulk_size} records to the database", fg="green")

    # Bulk index records
    ri = RecordIndexer()
    ri.bulk_index(records_bulk)
    current_search.flush_and_refresh(index="records")
    click.secho(f"Sending {bulk_size} records to be indexed", fg="green")
    stop = timeit.default_timer()
    click.secho(f"Creating {bulk_size} records took {stop - start}.",
                fg="green")
Esempio n. 20
0
def get_record_sets(record):
    """Find matching sets."""
    # get lists of sets with search_pattern equals to None but already in the
    # set list inside the record
    record_sets = set(record.get('_oai', {}).get('sets', []))
    for spec in _build_cache():
        if spec in record_sets:
            yield spec

    # get list of sets that match using percolator
    index, doc_type = RecordIndexer().record_to_index(record)
    document = record.dumps()

    percolator_doc_type = _get_percolator_doc_type(index)
    _create_percolator_mapping(index, percolator_doc_type)
    results = _percolate_query(index, doc_type, percolator_doc_type, document)
    prefix = 'oaiset-'
    prefix_len = len(prefix)
    for match in results:
        set_name = match['_id']
        if set_name.startswith(prefix):
            name = set_name[prefix_len:]
            yield name

    raise StopIteration
Esempio n. 21
0
    def delete_record(self, fileinstance_id, record_uuid):
        """Delete a record.

        :param fileinstance_id: The file instance id.
        :param record_uuid: The record's uuid.
        """
        # get the FileInstance object
        file_instance = FileInstance.get(fileinstance_id)
        # get the uri of the file for the directory of the folder
        uri = file_instance.uri
        # building the path to delete by storing the index of the folder data
        i = uri.find('data')

        # removing the record indexing, the record and the file instance
        recind = RecordIndexer()
        recind.delete_by_id(record_uuid=record_uuid)
        self.delete_bucket()
        FileInstance.query.filter_by(id=fileinstance_id).delete()
        PersistentIdentifier.query.filter_by(object_uuid=record_uuid).delete()
        db.session.commit()

        # removing the file on disk and the folder containing it
        # the full path is /home/<user>/.local/share/virtualenvs/
        # fare-platform-<code>/var/instance/data/<f1>/<f2>/<bucketid>/<filename>
        # after have stored the index of the folder "data", where there are all
        # the records, the path is passed to the function below
        # and trimmed at <f1>, a folder name composed by 2 character,
        # at the index "i" is added 8 because is the number of
        # character for completing the path, terminating at "<f1>/"
        shutil.rmtree(uri[:i + 8])

        current_app.logger.info("Deleted file= " + self['title'] +
                                ", by user= " + current_user.email)
Esempio n. 22
0
def test_get_record_no_acls_anonymous(app, db, es, es_acl_prepare, test_users):

    with db.session.begin_nested():
        # create an empty ACL in order to get the _invenio_explicit_acls filled
        acl = DefaultACL(name='test',
                         schemas=[RECORD_SCHEMA],
                         priority=0,
                         operation='get',
                         originator=test_users.u1)
        db.session.add(acl)
        actor = UserActor(name='test',
                          acl=acl,
                          users=[],
                          originator=test_users.u1)
        db.session.add(actor)

    pid, record = create_record({}, clz=SchemaEnforcingRecord)
    RecordIndexer().index(record)

    # make sure it is flushed
    current_search_client.indices.refresh()
    current_search_client.indices.flush()

    # try to get it ...
    with app.test_client() as client:
        res = client.get(record_url(pid))
        assert res.status_code == 401  # unauthorized

    # get it directly from ES
    res = get_from_es(pid)['_source']
    assert res['control_number'] == pid.pid_value
    assert res['$schema'] == 'https://localhost/schemas/' + RECORD_SCHEMA
    assert '_invenio_explicit_acls' in res
Esempio n. 23
0
    def run(self, event):
        """Process the circulation event.

        This method builds the frame, fetching the item and calling *_run*
        in a nested transaction.
        """
        resolver = Resolver(pid_type='crcitm',
                            object_type='rec',
                            getter=Item.get_record)
        _, item = resolver.resolve(event.payload['item_id'])

        self.circulation_event_schema.context['item'] = item

        data, errors = self.circulation_event_schema.load(event.payload)
        if errors:
            event.response_code = 400
            event.response = {'message': errors}
            return

        if data.get('dry_run'):
            event.response_code = 204
            return

        with db.session.begin_nested():
            data, _ = self.circulation_event_schema.dump(data)
            self._run(item, data)
            item.commit()
            RecordIndexer().index(item)
Esempio n. 24
0
def test_citation_formatter_citeproc_get(api, api_client, es, db, full_record,
                                         users):
    """Test records REST citeproc get."""
    r = Record.create(full_record)
    pid = PersistentIdentifier.create('recid',
                                      '12345',
                                      object_type='rec',
                                      object_uuid=r.id,
                                      status=PIDStatus.REGISTERED)
    db.session.commit()
    db.session.refresh(pid)

    RecordIndexer().index_by_id(r.id)
    current_search.flush_and_refresh(index='records')
    login_user_via_session(api_client, email=users[2]['email'])

    with api.test_request_context():
        records_url = url_for('invenio_records_rest.recid_item',
                              pid_value=pid.pid_value)

    res = api_client.get(records_url,
                         query_string={'style': 'apa'},
                         headers={'Accept': 'text/x-bibliography'})
    assert res.status_code == 200
    assert 'Doe, J.' in res.get_data(as_text=True)
    assert 'Test title (Version 1.2.5).' in res.get_data(as_text=True)
    assert '(2014).' in res.get_data(as_text=True)
Esempio n. 25
0
    def prepare_data():
        """Prepare data."""
        days = current_app.config["ILS_CIRCULATION_LOAN_WILL_EXPIRE_DAYS"]
        loans = testdata["loans"]

        recs = []
        now = arrow.utcnow()

        def new_end_date(loan, date):
            loan["end_date"] = date.date().isoformat()
            loan["state"] = "ITEM_ON_LOAN"
            loan.commit()
            recs.append(loan)

        # expiring loans
        date = now + timedelta(days=days)
        new_end_date(loans[0], date)
        new_end_date(loans[1], date)
        new_end_date(loans[2], date)

        # not expiring
        remaining_not_overdue = loans[3:]
        for loan in remaining_not_overdue:
            days = random.choice([-2, -1, 0, 1, 2])
            date = now + timedelta(days=days)
            new_end_date(loan, date)
        db.session.commit()

        indexer = RecordIndexer()
        for rec in recs:
            indexer.index(rec)

        current_search.flush_and_refresh(index="*")
Esempio n. 26
0
def cleanup_indexed_deposits():
    """Delete indexed deposits that do not exist in the database.

    .. note:: This task exists because of deposit REST API calls sometimes
        failing after the deposit has already been sent for indexing to ES,
        leaving an inconsistent state of a deposit existing in ES and not in
        the database. It should be removed once a proper signal mechanism has
        been implemented in the ``invenio-records-rest`` and
        ``invenio-deposit`` modules.
    """
    search = RecordsSearch(index='deposits')
    q = (search.query('term', **{
        '_deposit.status': 'draft'
    }).fields(['_deposit.id']))
    res = q.scan()
    es_depids_info = [(d.to_dict().get('_deposit.id', [None])[0], d.meta.id)
                      for d in res]
    es_depids = {p for p, _ in es_depids_info}
    db_depids_query = PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == 'depid',
        PersistentIdentifier.pid_value.in_(es_depids))
    db_depids = {d.pid_value for d in db_depids_query}
    missing_db_depids = filter(lambda d: d[0] not in db_depids, es_depids_info)

    indexer = RecordIndexer()
    deposit_index = 'deposits-records-record-v1.0.0'
    deposit_doc_type = 'deposit-record-v1.0.0'
    for _, deposit_id in missing_db_depids:
        indexer.client.delete(id=str(deposit_id),
                              index=deposit_index,
                              doc_type=deposit_doc_type)
Esempio n. 27
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_pidstore import current_pidstore
    from invenio_records.api import Record

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml')
    with open(data_path) as source:
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid'](rec_uuid, record)
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        db.session.commit()
def test_basic_search(app, db, es):
    """Test basic search functionality."""
    # The index should be empty
    assert len(ItemSearch().execute()) == 0

    # Create item1, search for everything
    item1 = Item.create({})
    item1.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item1)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 1

    # Create item2, search for everything again
    item2 = Item.create({'foo': 'bar'})
    item2.commit()
    record_indexer.index(item2)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 2

    # Search for item2
    assert len(ItemSearch().query('match', foo='bar').execute()) == 1

    # Search for nonsense
    assert len(ItemSearch().query('match', foo='banana').execute()) == 0
Esempio n. 29
0
def demo_init():
    """Initialize demo site."""
    from flask import current_app
    records = []
    # Import bibliographic records
    click.secho('Importing bibliographic records', fg='green')
    records += import_records(
        marc21,
        current_app.extensions['invenio-jsonschemas'].path_to_url(
            'marc21/bibliographic/bd-v1.0.2.json'),
        pkg_resources.resource_filename('invenio_records',
                                        'data/marc21/bibliographic.xml'),
    )
    # FIXME add support for authority records.
    # Import authority records
    # click.secho('Importing authority records', fg='green')
    # records += import_records(
    #     marc21_authority,
    #     current_app.extensions['invenio-jsonschemas'].path_to_url(
    #         'marc21/authority/ad-v1.0.2.json'),
    #     pkg_resources.resource_filename(
    #         'invenio_records', 'data/marc21/authority.xml'),
    # )
    db.session.commit()
    # Index all records
    click.secho('Indexing records', fg='green')
    indexer = RecordIndexer()
    indexer.bulk_index(records)
    indexer.process_bulk_queue()
Esempio n. 30
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()
        rec_uuid = uuid.uuid4()
        data = {'title': 'Test0'}
        record = Record.create(data, id_=rec_uuid)
        db.session.commit()

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code

        sleep(5)
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record)
        res = current_search_client.get(index=index,
                                        doc_type=doc_type,
                                        id=rec_uuid)
        assert res['found']

        # Destroy queue
        res = runner.invoke(cli.queue, ['delete'], obj=script_info)
        assert 0 == res.exit_code