コード例 #1
0
ファイル: tasks.py プロジェクト: gitter-badger/inspire-next
def migrate_chunk(chunk, broken_output=None, dry_run=False):
    from flask_sqlalchemy import models_committed
    from invenio_records.receivers import record_modification
    from invenio_records.tasks.index import get_record_index
    from invenio.base.globals import cfg
    from elasticsearch.helpers import bulk as es_bulk
    from inspirehep.modules.citations.receivers import (
        catch_citations_insert,
        add_citation_count_on_insert_or_update,
        catch_citations_update
    )
    from invenio_records.signals import before_record_index, after_record_insert
    models_committed.disconnect(record_modification)
    after_record_insert.disconnect(catch_citations_insert)
    before_record_index.disconnect(add_citation_count_on_insert_or_update)
    before_record_index.disconnect(catch_citations_update)

    records_to_index = []
    try:
        for record in chunk:
            recid = json = None
            try:
                recid, json = create_record(record,
                                            force=True, dry_run=dry_run)
                index = get_record_index(json) or \
                    cfg['SEARCH_ELASTIC_DEFAULT_INDEX']
                before_record_index.send(recid, json=json, index=index)
                json.update({'_index': index, '_type': 'record', '_id': recid, 'citation_count': 0})
                records_to_index.append(json)
            except Exception as err:
                logger.error("ERROR with record {} and json {}".format(recid, json))
                logger.exception(err)
                if broken_output:
                    broken_output_fd = open(broken_output, "a")
                    print(record, file=broken_output_fd)

        logger.info("Committing chunk")
        db.session.commit()
        logger.info("Sending chunk to elasticsearch")
        es_bulk(es, records_to_index, request_timeout=60)
    finally:
        models_committed.connect(record_modification)
        after_record_insert.connect(catch_citations_insert)
        before_record_index.connect(add_citation_count_on_insert_or_update)
        before_record_index.connect(catch_citations_update)
        db.session.close()
コード例 #2
0
ファイル: receivers.py プロジェクト: mihaibivol/inspire-next
def index_holdingpen_record(sender, **kwargs):
    """Index a Holding Pen record."""
    from invenio_ext.es import es
    from invenio_records.api import Record
    from invenio_records.signals import before_record_index
    from invenio_records.recordext.functions.get_record_collections import (
        get_record_collections,
    )
    from invenio_records.tasks.index import get_record_index

    from invenio_workflows.registry import workflows

    if not sender.workflow:
        # No workflow registered to object yet. Skip indexing
        return

    if sender.version == ObjectVersion.INITIAL:
        # Ignore initial versions
        return

    workflow = workflows.get(sender.workflow.name)
    if not workflow:
        current_app.logger.info(
            "Workflow {0} not found for sender: {1}".format(
                sender.workflow.name, sender.id
            )
        )
        return

    if not hasattr(sender, 'data'):
        sender.data = sender.get_data()
    if not hasattr(sender, 'extra_data'):
        sender.extra_data = sender.get_extra_data()

    record = Record({})
    record["version"] = ObjectVersion.name_from_version(sender.version)
    record["type"] = sender.data_type
    record["status"] = sender.status
    record["created"] = sender.created.isoformat()
    record["modified"] = sender.modified.isoformat()
    record["uri"] = sender.uri
    record["id_workflow"] = sender.id_workflow
    record["id_user"] = sender.id_user
    record["id_parent"] = sender.id_parent
    record["workflow"] = sender.workflow.name
    try:
        record.update(workflow.get_record(sender))
    except Exception as err:
        current_app.logger.exception(err)

    try:
        record.update(workflow.get_sort_data(sender))
    except Exception as err:
        current_app.logger.exception(err)

    # Add collection to get correct mapping
    record["_collections"] = get_record_collections(record)

    # Depends on "_collections" being filled correctly for record
    record_index = get_record_index(record) or current_app.config["SEARCH_ELASTIC_DEFAULT_INDEX"]

    # Trigger any before_record_index receivers
    before_record_index.send(sender.id, json=record, index=record_index)

    if record_index:
        index = current_app.config['WORKFLOWS_HOLDING_PEN_ES_PREFIX'] + record_index
        es.index(
            index=index,
            doc_type=current_app.config["WORKFLOWS_HOLDING_PEN_DOC_TYPE"],
            body=dict(record),
            id=sender.id
        )
コード例 #3
0
ファイル: views.py プロジェクト: inspirehep/invenio-records
    def decorated(recid, *args, **kwargs):
        from invenio_collections.models import Collection

        from .api import get_record
        from .access import check_user_can_view_record
        from invenio_records.tasks.index import get_record_index
        from invenio_records.api import Record
        from invenio.base.globals import cfg
        from invenio_ext.es import es
        from elasticsearch import TransportError

        # ensure recid to be integer
        recid = int(recid)

        # get record from db and the one from es
        db_record = get_record(recid)
        if db_record is None:
            abort(404)

        index = get_record_index(db_record) or \
                   cfg['SEARCH_ELASTIC_DEFAULT_INDEX']

        try:
            es_record = es.get(index=index, doc_type='record', id=recid)
        except TransportError:
            abort(404)

        g.record = record = Record(data=es_record['_source'])

        g.collection = collection = Collection.query.filter(
            Collection.name.in_(record['_collections'])).first()

        (auth_code, auth_msg) = check_user_can_view_record(
            current_user, record)

        # only superadmins can use verbose parameter for obtaining debug
        # information
        if not current_user.is_super_admin and 'verbose' in kwargs:
            kwargs['verbose'] = 0

        if auth_code:
            flash(auth_msg, 'error')
            abort(apache.HTTP_UNAUTHORIZED)

        if Query(cfg['RECORDS_DELETED_FIELD_QUERY']).match(record):
            # Record is deleted. Check for referred master recid if merged or 404
            if record.get(cfg['RECORDS_MERGED_MASTER_RECID_KEY']):
                return redirect(url_for('.' + f.func_name, recid=record.get(cfg['RECORDS_MERGED_MASTER_RECID_KEY'])))
            abort(404)

        title = record.get(cfg.get('RECORDS_BREADCRUMB_TITLE_KEY'), '')
        tabs = []

        def _format_record(record, of='hd', user_info=current_user, *args,
                           **kwargs):
            from invenio_formatter import format_record
            return format_record(record, of, user_info=user_info, *args,
                                 **kwargs)

        @register_template_context_processor
        def record_context():
            # from invenio.modules.comments.api import get_mini_reviews
            return dict(recid=recid,
                        record=record,
                        tabs=tabs,
                        title=title,
                        get_mini_reviews=lambda *args, **kwargs: '',
                        # FIXME get_mini_reviews,
                        collection=collection,
                        format_record=_format_record
                        )

        pre_template_render.send(
            "%s.%s" % (blueprint.name, f.__name__),
            recid=recid,
        )
        return f(recid, *args, **kwargs)
コード例 #4
0
    def decorated(recid, *args, **kwargs):
        from invenio_collections.models import Collection

        from .api import get_record
        from .access import check_user_can_view_record
        from invenio_records.tasks.index import get_record_index
        from invenio_records.api import Record
        from invenio.base.globals import cfg
        from invenio_ext.es import es
        from elasticsearch import TransportError

        # ensure recid to be integer
        recid = int(recid)

        # get record from db and the one from es
        db_record = get_record(recid)
        if db_record is None:
            abort(404)

        index = get_record_index(db_record) or \
                   cfg['SEARCH_ELASTIC_DEFAULT_INDEX']

        try:
            es_record = es.get(index=index, doc_type='record', id=recid)
        except TransportError:
            abort(404)

        g.record = record = Record(data=es_record['_source'])

        g.collection = collection = Collection.query.filter(
            Collection.name.in_(record['_collections'])).first()

        (auth_code,
         auth_msg) = check_user_can_view_record(current_user, record)

        # only superadmins can use verbose parameter for obtaining debug
        # information
        if not current_user.is_super_admin and 'verbose' in kwargs:
            kwargs['verbose'] = 0

        if auth_code:
            flash(auth_msg, 'error')
            abort(apache.HTTP_UNAUTHORIZED)

        # TODO check record status (exists, merged, deleted)

        title = record.get(cfg.get('RECORDS_BREADCRUMB_TITLE_KEY'), '')
        tabs = []

        def _format_record(record,
                           of='hd',
                           user_info=current_user,
                           *args,
                           **kwargs):
            from invenio_formatter import format_record
            return format_record(record,
                                 of,
                                 user_info=user_info,
                                 *args,
                                 **kwargs)

        @register_template_context_processor
        def record_context():
            # from invenio.modules.comments.api import get_mini_reviews
            return dict(
                recid=recid,
                record=record,
                tabs=tabs,
                title=title,
                get_mini_reviews=lambda *args, **kwargs: '',
                # FIXME get_mini_reviews,
                collection=collection,
                format_record=_format_record)

        pre_template_render.send(
            "%s.%s" % (blueprint.name, f.__name__),
            recid=recid,
        )
        return f(recid, *args, **kwargs)
コード例 #5
0
ファイル: tasks.py プロジェクト: jochenklein/inspire-next
def migrate_chunk(chunk, broken_output=None, dry_run=False):
    from flask_sqlalchemy import models_committed
    from invenio_records.receivers import record_modification
    from invenio_records.tasks.index import get_record_index
    from invenio.base.globals import cfg
    from elasticsearch.helpers import bulk as es_bulk
    from inspirehep.modules.citations.receivers import (
        catch_citations_insert,
        add_citation_count_on_insert_or_update,
        catch_citations_update
    )
    from invenio_records.signals import before_record_index, after_record_insert
    models_committed.disconnect(record_modification)
    after_record_insert.disconnect(catch_citations_insert)
    before_record_index.disconnect(add_citation_count_on_insert_or_update)
    before_record_index.disconnect(catch_citations_update)

    records_to_index = []
    try:
        for raw_record in chunk:
            json = None
            record = marc_create_record(raw_record, keep_singletons=False)
            recid = int(record['001'])
            if not dry_run:
                prod_record = InspireProdRecords(recid=recid)
                prod_record.marcxml = raw_record
            try:
                with db.session.begin_nested():
                    errors, recid, json = create_record(
                        recid, record, force=True,
                        dry_run=dry_run, validation=True
                    )
                    if dry_run:
                        continue
                    prod_record.valid = not errors
                    prod_record.errors = errors
                    index = get_record_index(json) or \
                        cfg['SEARCH_ELASTIC_DEFAULT_INDEX']
                    before_record_index.send(recid, json=json, index=index)
                    json.update({'_index': index, '_type': 'record',
                                 '_id': recid, 'citation_count': 0})
                    records_to_index.append(json)
                    prod_record.successful = True
                    db.session.merge(prod_record)
            except Exception as err:
                logger.error("ERROR with record {} and json {}".format(recid, json))
                logger.exception(err)
                if not dry_run:
                    prod_record.successful = False
                    db.session.merge(prod_record)
        logger.info("Committing chunk")
        db.session.commit()
        logger.info("Sending chunk to elasticsearch")
        if not dry_run:
            es_bulk(es, records_to_index, request_timeout=60)
    finally:
        models_committed.connect(record_modification)
        after_record_insert.connect(catch_citations_insert)
        before_record_index.connect(add_citation_count_on_insert_or_update)
        before_record_index.connect(catch_citations_update)
        db.session.close()