Esempio n. 1
0
def test_cv_search_cached(inspire_app):
    headers = {"Accept": "text/vnd+inspire.html+html"}
    data = {
        "control_number": 637275232,
        "titles": [{"title": "Yet another title"}],
    }
    record = create_record("lit", data=data)

    models_committed.disconnect(index_after_commit)

    data = dict(record)
    data["titles"] = [{"title": "Modified title"}]

    record.update(data)

    expected_status_code = 200
    expected_result = '<!DOCTYPE html><html><body>  <p><b>    <a href="https://localhost:5000/literature/637275232">      Yet another title    </a>  </b></p>          <br></body></html>'
    with inspire_app.test_client() as client:
        response = client.get("/literature", headers=headers)

    response_status_code = response.status_code
    response_data = response.get_data(as_text=True).replace("\n", "")
    assert expected_status_code == response_status_code
    assert expected_result == response_data

    models_committed.connect(index_after_commit)
Esempio n. 2
0
def test_index_record(inspire_app, celery_app_with_context,
                      celery_session_worker):
    models_committed.disconnect(index_after_commit)

    records = [
        create_record_async("lit"),
        create_record_async("aut"),
        create_record_async("job"),
        create_record_async("jou"),
        create_record_async("exp"),
        create_record_async("con"),
        create_record_async("dat"),
        create_record_async("ins"),
    ]

    uuids = [record.id for record in records]
    task = index_records.delay(uuids)

    results = task.get(timeout=5)

    uuids = [str(uuid) for uuid in uuids]
    assert results == uuids

    for record in records:
        result = InspireSearch.get_record_data_from_es(record)
        assert record["control_number"] == result["control_number"]
    models_committed.connect(index_after_commit)
Esempio n. 3
0
 def init_app(self, app):
     self._indexs = {}
     self.index_name = app.config.get('MSEARCH_INDEX_NAME', 'msearch')
     self._client = Elasticsearch(**app.config.get('ELASTICSEARCH', {}))
     if app.config.get('MSEARCH_ENABLE', True):
         models_committed.connect(self._index_signal)
     super(ElasticSearch, self).init_app(app)
Esempio n. 4
0
def test_process_references_in_records_reindexes_conferences_when_pub_info_changes(
        inspire_app, celery_app_with_context, celery_session_worker):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)
    conference_data = faker.record("con", with_control_number=True)
    conference_record = InspireRecord.create(conference_data)
    conference_control_number = conference_record["control_number"]
    conf_ref = f"http://localhost:8000/api/conferences/{conference_control_number}"
    data = faker.record("lit", with_control_number=True)
    data["publication_info"] = [{"conference_record": {"$ref": conf_ref}}]
    data["document_type"] = ["conference paper"]
    record = InspireRecord.create(data)
    db.session.commit()

    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)

    uuids = [record.id]

    task = process_references_in_records.delay(uuids)

    result = task.get(timeout=5)

    conference_record_es = InspireSearch.get_record_data_from_es(
        conference_record)
    expected_number_of_contributions = 1

    assert (expected_number_of_contributions ==
            conference_record_es["number_of_contributions"])
Esempio n. 5
0
def configure_db(app):
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True

    if app.config['TESTING']:
        # tests do not actually use sqlite but run a postgres instance and
        # reconfigure flask-sqlalchemy to use that database.  by setting
        # a dummy uri explicitly instead of letting flask-sqlalchemy do
        # the exact same thing we avoid a warning when running tests.
        app.config.setdefault('SQLALCHEMY_DATABASE_URI', 'sqlite:///:memory:')
    else:
        if config.SQLALCHEMY_DATABASE_URI is None:
            raise Exception("No proper SQLAlchemy store has been configured. Please edit your indico.conf")

        app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI
        app.config['SQLALCHEMY_RECORD_QUERIES'] = False
        app.config['SQLALCHEMY_POOL_SIZE'] = config.SQLALCHEMY_POOL_SIZE
        app.config['SQLALCHEMY_POOL_TIMEOUT'] = config.SQLALCHEMY_POOL_TIMEOUT
        app.config['SQLALCHEMY_POOL_RECYCLE'] = config.SQLALCHEMY_POOL_RECYCLE
        app.config['SQLALCHEMY_MAX_OVERFLOW'] = config.SQLALCHEMY_MAX_OVERFLOW

    import_all_models()
    db.init_app(app)
    if not app.config['TESTING']:
        apply_db_loggers(app)

    plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False)
    models_committed.connect(on_models_committed, app)
Esempio n. 6
0
def test_process_references_in_records_reindexes_experiments_when_linked_experiments_change(
        app, celery_app_with_context, celery_session_worker):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)

    experiment_data = faker.record("exp", with_control_number=True)
    experiment = InspireRecord.create(experiment_data)
    db.session.commit()

    experiment_control_number = experiment["control_number"]
    exp_ref = f"http://localhost:8000/api/experiments/{experiment_control_number}"

    data = faker.record("lit", with_control_number=True)

    data["accelerator_experiments"] = [{
        "legacy_name": "LIGO",
        "record": {
            "$ref": exp_ref
        }
    }]

    record = InspireRecord.create(data)
    db.session.commit()

    models_committed.connect(index_after_commit)

    task = process_references_in_records.delay([record.id])
    task.get(timeout=5)

    experiment_record_es = InspireSearch.get_record_data_from_es(experiment)
    expected_number_of_paper = 1

    assert expected_number_of_paper == experiment_record_es["number_of_papers"]
Esempio n. 7
0
def test_model_signals(db, Todo):
    recorded = []

    def committed(sender, changes):
        assert isinstance(changes, list)
        recorded.extend(changes)

    models_committed.connect(committed)
    todo = Todo("Awesome", "the text")
    db.session.add(todo)
    assert len(recorded) == 0
    db.session.commit()
    assert len(recorded) == 1
    assert recorded[0][0] == todo
    assert recorded[0][1] == "insert"
    del recorded[:]
    todo.text = "aha"
    db.session.commit()
    assert len(recorded) == 1
    assert recorded[0][0] == todo
    assert recorded[0][1] == "update"
    del recorded[:]
    db.session.delete(todo)
    db.session.commit()
    assert len(recorded) == 1
    assert recorded[0][0] == todo
    assert recorded[0][1] == "delete"
    models_committed.disconnect(committed)
Esempio n. 8
0
def migrate_chunk(chunk):
    models_committed.disconnect(receive_after_model_commit)
    current_collections.unregister_signals()

    index_queue = []

    try:
        for raw_record in chunk:
            with db.session.begin_nested():
                record = migrate_and_insert_record(raw_record)
                if record:
                    index_queue.append(create_index_op(record))
        db.session.commit()
    finally:
        db.session.close()

    req_timeout = current_app.config['INDEXER_BULK_REQUEST_TIMEOUT']
    es_bulk(
        es,
        index_queue,
        stats_only=True,
        request_timeout=req_timeout,
    )

    models_committed.connect(receive_after_model_commit)
    current_collections.register_signals()
Esempio n. 9
0
def create_records_from_mirror_recids(recids):
    """Task which migrates records
    Args:
        recids: records uuids to remigrate
    Returns:
         set: set of properly processed records uuids
    """
    models_committed.disconnect(index_after_commit)
    processed_records = set()
    for recid in recids:
        try:
            LOGGER.info("Migrate record from mirror", recid=recid)
            with db.session.begin_nested():
                record = migrate_record_from_mirror(
                    LegacyRecordsMirror.query.get(recid))
        except Exception:
            LOGGER.exception("Cannot migrate record", recid=recid)
            continue

        if record:
            processed_records.add(str(record.id))
        else:
            LOGGER.warning("Record is empty", recid=recid)
    db.session.commit()
    models_committed.connect(index_after_commit)

    return list(processed_records)
Esempio n. 10
0
def test_index_record_manually(app, celery_app_with_context,
                               celery_session_worker, retry_until_matched):
    data = faker.record("lit")
    rec = LiteratureRecord.create(data)
    models_committed.disconnect(index_after_commit)
    db.session.commit()
    models_committed.connect(index_after_commit)
    es.indices.refresh("records-hep")
    result = es.search("records-hep")
    assert result["hits"]["total"] == 0

    rec.index()
    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-hep"]
        },
        {
            "step": es.search,
            "args": ["records-hep"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 1
            },
        },
    ]
    retry_until_matched(steps)
Esempio n. 11
0
def configure_db(app):
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True

    if not app.config['TESTING']:
        cfg = Config.getInstance()
        db_uri = cfg.getSQLAlchemyDatabaseURI()

        if db_uri is None:
            raise Exception(
                "No proper SQLAlchemy store has been configured. Please edit your indico.conf"
            )

        app.config['SQLALCHEMY_DATABASE_URI'] = db_uri
        app.config['SQLALCHEMY_RECORD_QUERIES'] = False
        app.config['SQLALCHEMY_POOL_SIZE'] = cfg.getSQLAlchemyPoolSize()
        app.config['SQLALCHEMY_POOL_TIMEOUT'] = cfg.getSQLAlchemyPoolTimeout()
        app.config['SQLALCHEMY_POOL_RECYCLE'] = cfg.getSQLAlchemyPoolRecycle()
        app.config['SQLALCHEMY_MAX_OVERFLOW'] = cfg.getSQLAlchemyMaxOverflow()

    import_all_models()
    db.init_app(app)
    if not app.config['TESTING']:
        apply_db_loggers(app)

    plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False)
    models_committed.connect(on_models_committed, app)
Esempio n. 12
0
def create_app(config_name):
    app = Flask(__name__)
    app.config.from_object(config[config_name])

    db.init_app(app)
    csrf.init_app(app)
    bcrypt.init_app(app)
    mail.init_app(app)
    login_manager.init_app(app)
    redis_store.init_app(app)
    celery.conf.update(app.config)

    models_committed.connect(receive_change, app)

    sign_in_manage = {}
    sign_in_func = 'account.sign_in'
    for path in blueprints:
        bp = import_string('sayit.views.' + path[0])
        app.register_blueprint(bp, url_prefix=path[1])
        sign_in_manage[bp.name] = sign_in_func

    for path in jinja_filters:
        flt = import_string('sayit.filters:' + path[0])
        app.jinja_env.filters[path[1]] = flt

    login_manager.blueprint_login_views = sign_in_manage

    return app
Esempio n. 13
0
def configure_db(app):
    if not app.config['TESTING']:
        cfg = Config.getInstance()
        db_uri = cfg.getSQLAlchemyDatabaseURI()

        if db_uri is None:
            raise Exception("No proper SQLAlchemy store has been configured. Please edit your indico.conf")

        app.config['SQLALCHEMY_DATABASE_URI'] = db_uri

        # DB options
        app.config['SQLALCHEMY_ECHO'] = cfg.getSQLAlchemyEcho()
        app.config['SQLALCHEMY_RECORD_QUERIES'] = cfg.getSQLAlchemyRecordQueries()
        app.config['SQLALCHEMY_POOL_SIZE'] = cfg.getSQLAlchemyPoolSize()
        app.config['SQLALCHEMY_POOL_TIMEOUT'] = cfg.getSQLAlchemyPoolTimeout()
        app.config['SQLALCHEMY_POOL_RECYCLE'] = cfg.getSQLAlchemyPoolRecycle()
        app.config['SQLALCHEMY_MAX_OVERFLOW'] = cfg.getSQLAlchemyMaxOverflow()

    import_all_models()
    db.init_app(app)
    if not app.config['TESTING']:
        apply_db_loggers(app)

    plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False)
    models_committed.connect(on_models_committed, app)
Esempio n. 14
0
def migrate_chunk(chunk, skip_files=False):
    models_committed.disconnect(index_after_commit)

    index_queue = []

    try:
        for raw_record in chunk:
            with db.session.begin_nested():
                record = migrate_and_insert_record(
                    raw_record,
                    skip_files=skip_files,
                )
                if record:
                    index_queue.append(create_index_op(record))
        db.session.commit()
    finally:
        db.session.close()

    req_timeout = current_app.config['INDEXER_BULK_REQUEST_TIMEOUT']
    es_bulk(
        es,
        index_queue,
        stats_only=True,
        request_timeout=req_timeout,
    )

    models_committed.connect(index_after_commit)
Esempio n. 15
0
def configure_db(app):
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True

    if app.config['TESTING']:
        # tests do not actually use sqlite but run a postgres instance and
        # reconfigure flask-sqlalchemy to use that database.  by setting
        # a dummy uri explicitly instead of letting flask-sqlalchemy do
        # the exact same thing we avoid a warning when running tests.
        app.config.setdefault('SQLALCHEMY_DATABASE_URI', 'sqlite:///:memory:')
    else:
        if config.SQLALCHEMY_DATABASE_URI is None:
            raise Exception(
                "No proper SQLAlchemy store has been configured. Please edit your indico.conf"
            )

        app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI
        app.config['SQLALCHEMY_RECORD_QUERIES'] = False
        app.config['SQLALCHEMY_POOL_SIZE'] = config.SQLALCHEMY_POOL_SIZE
        app.config['SQLALCHEMY_POOL_TIMEOUT'] = config.SQLALCHEMY_POOL_TIMEOUT
        app.config['SQLALCHEMY_POOL_RECYCLE'] = config.SQLALCHEMY_POOL_RECYCLE
        app.config['SQLALCHEMY_MAX_OVERFLOW'] = config.SQLALCHEMY_MAX_OVERFLOW

    import_all_models()
    db.init_app(app)
    if not app.config['TESTING']:
        apply_db_loggers(app)

    plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False)
    models_committed.connect(on_models_committed, app)
 def init_app(self, app):
     self._setdefault(app)
     self._client = Elasticsearch(**app.config.get('ELASTICSEARCH', {}))
     self.pk = app.config["MSEARCH_PRIMARY_KEY"]
     self.index_name = app.config["MSEARCH_INDEX_NAME"]
     if app.config["MSEARCH_ENABLE"]:
         models_committed.connect(self._index_signal)
     super(ElasticSearch, self).init_app(app)
Esempio n. 17
0
 def _signal_connect(self, app):
     if app.config["MSEARCH_ENABLE"]:
         signal = app.config["MSEARCH_INDEX_SIGNAL"]
         if isinstance(signal, str):
             self._signal = import_string(signal)
         else:
             self._signal = signal
         models_committed.connect(self.index_signal)
Esempio n. 18
0
 def init_app(self, app):
     self._indexs = {}
     if self.analyzer is None:
         self.analyzer = DEFAULT_ANALYZER
     self.index_name = app.config.get('MSEARCH_INDEX_NAME',
                                      DEFAULT_WHOOSH_INDEX_NAME)
     if app.config.get('MSEARCH_ENABLE', True):
         models_committed.connect(self._index_signal)
     super(WhooshSearch, self).init_app(app)
Esempio n. 19
0
 def init_app(self, app):
     self._setdefault(app)
     if self.analyzer is None:
         self.analyzer = app.config["MSEARCH_ANALYZER"] or DEFAULT_ANALYZER
     self.pk = app.config["MSEARCH_PRIMARY_KEY"]
     self.index_name = app.config["MSEARCH_INDEX_NAME"]
     if app.config["MSEARCH_ENABLE"]:
         models_committed.connect(self._index_signal)
     super(WhooshSearch, self).init_app(app)
Esempio n. 20
0
 def init_app(self, app):
     self._indexs = {}
     es_setting = app.config.get('ELASTICSEARCH', {})
     self.index_name = app.config.get('MSEARCH_INDEX_NAME', 'msearch')
     self._client = Elasticsearch(**es_setting)
     if not self._client.indices.exists(index=self.index_name):
         self._client.indices.create(index=self.index_name, ignore=400)
     if app.config.get('MSEARCH_ENABLE', True):
         models_committed.connect(self._index_signal)
     super(ElasticSearch, self).init_app(app)
Esempio n. 21
0
def test_index_record_manually(inspire_app, clean_celery_session):
    data = faker.record("lit")
    rec = LiteratureRecord.create(data)
    models_committed.disconnect(index_after_commit)
    db.session.commit()
    models_committed.connect(index_after_commit)

    assert_es_hits_count(0)

    rec.index()

    assert_es_hits_count(1)
Esempio n. 22
0
def test_index_record_manually(inspire_app, clean_celery_session):
    data = faker.record("lit")
    rec = LiteratureRecord.create(data)
    models_committed.disconnect(index_after_commit)
    db.session.commit()
    models_committed.connect(index_after_commit)

    assert_record_not_in_es(rec["control_number"])

    rec.index()

    assert_record_in_es(rec["control_number"])
Esempio n. 23
0
def test_it_will_send_event_if_email_address_is_updated(mock_publisher: MagicMock, profile: Profile,
                                                        session: scoped_session,
                                                        commit: Callable[[], None]):
    event_publisher = send_update_events(publisher=mock_publisher)
    models_committed.connect(receiver=event_publisher)

    profile.add_email_address('*****@*****.**')
    session.add(profile)

    commit()

    assert mock_publisher.publish.call_count == 1
    assert mock_publisher.publish.call_args[0][0] == {'id': '12345678', 'type': 'profile'}
Esempio n. 24
0
 def init_app(self, app):
     self._indexs = {}
     self.whoosh_path = DEFAULT_WHOOSH_INDEX_NAME
     if self.analyzer is None:
         self.analyzer = DEFAULT_ANALYZER
     whoosh_path = app.config.get('MSEARCH_INDEX_NAME')
     if whoosh_path is not None:
         self.whoosh_path = whoosh_path
     if not os.path.exists(self.whoosh_path):
         os.mkdir(self.whoosh_path)
     if app.config.get('MSEARCH_ENABLE', True):
         models_committed.connect(self._index_signal)
     super(WhooshSearch, self).init_app(app)
Esempio n. 25
0
def signalling(app, changes, **kwargs):
    for instance, operation in changes:
        if instance.__tablename__ in [i.__tablename__ for i in [User]]:
            models_committed.disconnect(signalling)
            session = db.create_scoped_session()
            user = session.query(User).first()
            if user and user.username == 'signalling_test':
                user.username = '******'
                session.merge(user)
                session.commit()
            session.remove()
            models_committed.connect(signalling)
            break
Esempio n. 26
0
def test_process_references_in_records_with_different_type_of_records_doesnt_throw_an_exception(
        inspire_app, celery_app_with_context, celery_session_worker):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)

    cited_record_1 = LiteratureRecord.create(faker.record("lit"))
    cited_record_2 = LiteratureRecord.create(faker.record("lit"))

    data_citing_record_1 = faker.record(
        "lit", literature_citations=[cited_record_1["control_number"]])
    citing_record_1 = LiteratureRecord.create(data_citing_record_1)
    data_citing_record_2 = faker.record(
        "lit", literature_citations=[cited_record_2["control_number"]])
    citing_record_2 = LiteratureRecord.create(data_citing_record_2)

    db.session.commit()

    records = [
        create_record_async("aut"),
        create_record_async("job"),
        create_record_async("jou"),
        create_record_async("exp"),
        create_record_async("con"),
        create_record_async("dat"),
        create_record_async("ins"),
    ]

    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)
    uuids = [record.id
             for record in records] + [citing_record_1.id, citing_record_2.id]

    task = process_references_in_records.delay(uuids)
    results = task.get(timeout=5)

    uuids = [str(uuid) for uuid in uuids]
    assert results == uuids

    result_cited_record_1 = InspireSearch.get_record_data_from_es(
        cited_record_1)
    expected_result_cited_record_1_citation_count = 1

    assert (expected_result_cited_record_1_citation_count ==
            result_cited_record_1["citation_count"])

    result_cited_record_2 = InspireSearch.get_record_data_from_es(
        cited_record_2)
    expected_result_cited_record_2_citation_count = 1
    assert (expected_result_cited_record_2_citation_count ==
            result_cited_record_2["citation_count"])
def test_it_ignores_other_models_being_committed(orcid_token: OrcidToken,
                                                 orcid_config: Dict[str, str],
                                                 mock_orcid_client: MagicMock,
                                                 session: scoped_session,
                                                 url_safe_serializer: URLSafeSerializer):
    webhook_maintainer = maintain_orcid_webhook(orcid_config, mock_orcid_client,
                                                url_safe_serializer)
    models_committed.connect(receiver=webhook_maintainer)

    session.add(orcid_token)
    session.commit()

    assert mock_orcid_client.set_webhook.call_count == 0
    assert mock_orcid_client.remove_webhook.call_count == 0
Esempio n. 28
0
def test_gracefully_handle_records_updating_in_wrong_order(
        inspire_app, clean_celery_session):
    # We want to run indexing in weird order, so disable auto indexing
    models_committed.disconnect(index_after_commit)

    cited_record = LiteratureRecord.create(data=faker.record("lit"))
    record_data = faker.record(
        "lit", literature_citations=[cited_record.control_number])
    record = LiteratureRecord.create(data=record_data)
    db.session.commit()

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    index_record(record.id, record.model.versions[-1].version_id)
    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 1

    data = dict(record)
    del data["references"]

    record.update(data)
    db.session.commit()
    record = LiteratureRecord.get_record_by_pid_value(record.control_number)
    data = dict(record)
    data["titles"][0] = {"title": "New Title"}
    record.update(data)
    db.session.commit()

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    index_record(record.id, record.model.versions[-1].version_id)

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 1
    assert LiteratureSearch().get_source(record.id)["titles"] == [{
        "title":
        "New Title"
    }]

    index_record(record.id, record.model.versions[-2].version_id)

    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 0
    assert LiteratureSearch().get_source(record.id)["titles"] == [{
        "title":
        "New Title"
    }]
    models_committed.connect(index_after_commit)
Esempio n. 29
0
def create_app(config: Config, clients: Clients) -> Flask:
    app = Flask(__name__)
    app.TRAP_HTTP_EXCEPTIONS = True
    app.config.from_object(config)

    db.app = app
    db.init_app(app)

    Migrate(app, db)

    orcid_client = OrcidClient(config.orcid['api_uri'])
    app.orcid_client = orcid_client

    orcid_tokens = SQLAlchemyOrcidTokens(db)
    profiles = SQLAlchemyProfiles(db)

    uri_signer = URLSafeSerializer(config.orcid['webhook_key'],
                                   signer_kwargs={'key_derivation': 'hmac',
                                                  'digest_method': hashlib.sha512})

    config_bus = dict(config.bus)
    config_bus['env'] = config.name
    publisher = get_publisher(config=config_bus)
    app.commands = [
        ClearCommand(orcid_tokens, profiles),
        CreateProfileCommand(profiles),
        ReadConfiguration(config),
        SetOrcidWebhooksCommand(profiles, config.orcid, orcid_client, uri_signer)
    ]

    app.register_blueprint(api.create_blueprint(profiles))
    app.register_blueprint(oauth2.create_blueprint(config.orcid, clients, profiles, orcid_client,
                                                   orcid_tokens), url_prefix='/oauth2')
    app.register_blueprint(ping.create_blueprint())
    app.register_blueprint(webhook.create_blueprint(profiles, config.orcid, orcid_client,
                                                    orcid_tokens, uri_signer))

    from werkzeug.exceptions import default_exceptions
    for code in default_exceptions:
        app.errorhandler(code)(errors.http_error_handler)

    app.register_error_handler(Exception, errors.error_handler)
    app.register_error_handler(ClientError, errors.client_error_handler)
    app.register_error_handler(OAuth2Error, errors.oauth2_error_handler)

    models_committed.connect(maintain_orcid_webhook(config.orcid, orcid_client, uri_signer),
                             weak=False)
    models_committed.connect(send_update_events(publisher=publisher), weak=False)

    return app
Esempio n. 30
0
def test_it_will_send_event_for_affiliation_insert(mock_publisher: MagicMock, profile: Profile,
                                                   session: scoped_session,
                                                   commit: Callable[[], None]) -> None:
    event_publisher = send_update_events(publisher=mock_publisher)
    models_committed.connect(receiver=event_publisher)

    affiliation = Affiliation('1', Address(countries.get('gb'), 'City'), 'Organisation', Date(2017))

    profile.add_affiliation(affiliation)
    session.add(profile)

    commit()

    assert mock_publisher.publish.call_count == 1
    assert mock_publisher.publish.call_args[0][0] == {'id': '12345678', 'type': 'profile'}
Esempio n. 31
0
def test_process_references_in_records_process_author_records(
        mock_batch_index, inspire_app, clean_celery_session):
    author_record = AuthorsRecord.create(faker.record("aut"))
    lit_record = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))
    lit_record_2 = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))

    db.session.commit()

    def assert_records_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record)
        lit_record_from_es_2 = InspireSearch.get_record_data_from_es(
            lit_record_2)
        aut_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2

    retry_until_pass(assert_records_in_es, retry_interval=5)

    models_committed.disconnect(index_after_commit)
    author_record["name"]["value"] = "Another Name"
    author_record.update(dict(author_record))
    db.session.commit()
    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)
    task = process_references_in_records.delay([author_record.id])

    task.get(timeout=5)

    assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted(
        [str(lit_record.id), str(lit_record_2.id)])
Esempio n. 32
0
def test_index_record_fulltext_manually(inspire_app, clean_celery_session,
                                        override_config, s3, datadir):
    metadata = {"foo": "bar"}
    pdf_path = os.path.join(datadir, "2206.04407.pdf")
    create_s3_bucket(KEY)
    create_s3_file(
        current_s3_instance.get_bucket_for_file_key(KEY),
        KEY,
        pdf_path,
        metadata,
        **{"ContentType": "application/pdf"},
    )

    with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True,
                         FEATURE_FLAG_ENABLE_FILES=False):
        data = faker.record("lit")
        data.update({
            "documents": [{
                "source":
                "arxiv",
                "fulltext":
                True,
                "filename":
                "new_doc.pdf",
                "key":
                KEY,
                "url":
                "http://www.africau.edu/images/default/sample.pdf",
            }]
        })
        rec = LiteratureRecord.create(data)
        models_committed.disconnect(index_after_commit)
        db.session.commit()
        models_committed.connect(index_after_commit)

        assert_record_not_in_es(rec["control_number"])

        rec.index_fulltext()

        def assert_record_in_es():
            current_search.flush_and_refresh("*")
            record_lit_es = (LiteratureSearch().get_record(str(
                rec.id)).execute().hits.hits[0])
            document = record_lit_es._source["documents"][0]
            assert "attachment" in document
            assert "text" not in document  # pipeline should remove it

        retry_until_pass(assert_record_in_es, timeout=90, retry_interval=5)
Esempio n. 33
0
def migrate_chunk(chunk, broken_output=None, dry_run=False):
    from flask_sqlalchemy import models_committed
    from invenio_records.receivers import record_modification
    from invenio_records.tasks.index import get_record_index
    from invenio.base.globals import cfg
    from elasticsearch.helpers import bulk as es_bulk
    from inspirehep.modules.citations.receivers import (
        catch_citations_insert,
        add_citation_count_on_insert_or_update,
        catch_citations_update
    )
    from invenio_records.signals import before_record_index, after_record_insert
    models_committed.disconnect(record_modification)
    after_record_insert.disconnect(catch_citations_insert)
    before_record_index.disconnect(add_citation_count_on_insert_or_update)
    before_record_index.disconnect(catch_citations_update)

    records_to_index = []
    try:
        for record in chunk:
            recid = json = None
            try:
                recid, json = create_record(record,
                                            force=True, dry_run=dry_run)
                index = get_record_index(json) or \
                    cfg['SEARCH_ELASTIC_DEFAULT_INDEX']
                before_record_index.send(recid, json=json, index=index)
                json.update({'_index': index, '_type': 'record', '_id': recid, 'citation_count': 0})
                records_to_index.append(json)
            except Exception as err:
                logger.error("ERROR with record {} and json {}".format(recid, json))
                logger.exception(err)
                if broken_output:
                    broken_output_fd = open(broken_output, "a")
                    print(record, file=broken_output_fd)

        logger.info("Committing chunk")
        db.session.commit()
        logger.info("Sending chunk to elasticsearch")
        es_bulk(es, records_to_index, request_timeout=60)
    finally:
        models_committed.connect(record_modification)
        after_record_insert.connect(catch_citations_insert)
        before_record_index.connect(add_citation_count_on_insert_or_update)
        before_record_index.connect(catch_citations_update)
        db.session.close()
Esempio n. 34
0
def test_exception_is_handled_by_catch_exception_decorator(mock_publisher: MagicMock,
                                                           profile: Profile,
                                                           session: scoped_session,
                                                           commit: Callable[[], None]) -> None:
    mock_publisher.publish.side_effect = Exception('Some Exception')

    event_publisher = send_update_events(publisher=mock_publisher)
    models_committed.connect(receiver=event_publisher)

    affiliation = Affiliation('1', Address(countries.get('gb'), 'City'), 'Organisation', Date(2017))

    profile.add_affiliation(affiliation)
    session.add(profile)

    commit()

    assert mock_publisher.publish.call_count == 1
def test_it_sets_a_webhook_when_a_profile_is_inserted(profile: Profile,
                                                      orcid_config: Dict[str, str],
                                                      mock_orcid_client: MagicMock,
                                                      session: scoped_session,
                                                      url_safe_serializer: URLSafeSerializer,
                                                      commit: Callable[[], None]):
    webhook_maintainer = maintain_orcid_webhook(orcid_config, mock_orcid_client,
                                                url_safe_serializer)
    models_committed.connect(receiver=webhook_maintainer)

    session.add(profile)

    commit()

    assert mock_orcid_client.set_webhook.call_count == 1
    assert mock_orcid_client.set_webhook.call_args[0][0] == '0000-0002-1825-0097'
    assert mock_orcid_client.set_webhook.call_args[0][1] == 'http://localhost/orcid-webhook/{}' \
        .format(url_safe_serializer.dumps('0000-0002-1825-0097'))
Esempio n. 36
0
def migrate_recids_from_mirror(prod_recids, skip_files=False):
    models_committed.disconnect(index_after_commit)

    index_queue = []

    for recid in prod_recids:
        with db.session.begin_nested():
            record = migrate_record_from_mirror(
                LegacyRecordsMirror.query.get(recid),
                skip_files=skip_files,
            )
            if record and not record.get('deleted'):
                index_queue.append(create_index_op(record))
    db.session.commit()

    req_timeout = current_app.config['INDEXER_BULK_REQUEST_TIMEOUT']
    es_bulk(
        es,
        index_queue,
        stats_only=True,
        request_timeout=req_timeout,
    )

    models_committed.connect(index_after_commit)
Esempio n. 37
0
from extensions.socketio import socketio
from flask_sqlalchemy import models_committed
from .schemas import PostSchema
from .models import Post

namespace = '/blog/io'
post_schema = PostSchema()


def post_after_commit(sender, changes):
    for model, change in changes:
        if isinstance(model, Post) and change in ('insert',):
            emit_new_posts()
            break

models_committed.connect(post_after_commit)


def emit_new_posts():
    socketio.emit('new posts', namespace=namespace)
Esempio n. 38
0
from flask_sqlalchemy import models_committed, before_models_committed
def record_oplogs(app,changes):
    if request.headers.environ.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest':
        return
    from system.models import OperationLog, SecurityLog
    for change in changes:
        if isinstance(change[0], OperationLog) or isinstance(change[0], SecurityLog):
            continue
        oplog = OperationLog()
        oplog.user = current_user
        oplog.module = unicode(change[0])
        oplog.action = change[1]
        oplog.terminal_ip = request.remote_addr
        oplog.summary = str(change[0].__dict__)
        db.session.add(oplog)
models_committed.connect(record_oplogs)
before_models_committed.connect(record_oplogs)

@login_mgr.user_loader
def load_user(id):
    return User.query.get(int(id))
    #user = cache.get("user-"+id)
    #if user is None:
    #   user = User.query.get(int(id))
    #   cache.set("user-"+id, user)
    #return user

from tango.login import user_logged_in, user_logged_out
def record_login(app, user):
    #cache.set("user-"+str(user.id), user)
    from system.models import SecurityLog
Esempio n. 39
0
def migrate_chunk(chunk, broken_output=None, dry_run=False):
    from flask_sqlalchemy import models_committed
    from invenio_records.receivers import record_modification
    from invenio_records.tasks.index import get_record_index
    from invenio.base.globals import cfg
    from elasticsearch.helpers import bulk as es_bulk
    from inspirehep.modules.citations.receivers import (
        catch_citations_insert,
        add_citation_count_on_insert_or_update,
        catch_citations_update
    )
    from invenio_records.signals import before_record_index, after_record_insert
    models_committed.disconnect(record_modification)
    after_record_insert.disconnect(catch_citations_insert)
    before_record_index.disconnect(add_citation_count_on_insert_or_update)
    before_record_index.disconnect(catch_citations_update)

    records_to_index = []
    try:
        for raw_record in chunk:
            json = None
            record = marc_create_record(raw_record, keep_singletons=False)
            recid = int(record['001'])
            if not dry_run:
                prod_record = InspireProdRecords(recid=recid)
                prod_record.marcxml = raw_record
            try:
                with db.session.begin_nested():
                    errors, recid, json = create_record(
                        recid, record, force=True,
                        dry_run=dry_run, validation=True
                    )
                    if dry_run:
                        continue
                    prod_record.valid = not errors
                    prod_record.errors = errors
                    index = get_record_index(json) or \
                        cfg['SEARCH_ELASTIC_DEFAULT_INDEX']
                    before_record_index.send(recid, json=json, index=index)
                    json.update({'_index': index, '_type': 'record',
                                 '_id': recid, 'citation_count': 0})
                    records_to_index.append(json)
                    prod_record.successful = True
                    db.session.merge(prod_record)
            except Exception as err:
                logger.error("ERROR with record {} and json {}".format(recid, json))
                logger.exception(err)
                if not dry_run:
                    prod_record.successful = False
                    db.session.merge(prod_record)
        logger.info("Committing chunk")
        db.session.commit()
        logger.info("Sending chunk to elasticsearch")
        if not dry_run:
            es_bulk(es, records_to_index, request_timeout=60)
    finally:
        models_committed.connect(record_modification)
        after_record_insert.connect(catch_citations_insert)
        before_record_index.connect(add_citation_count_on_insert_or_update)
        before_record_index.connect(catch_citations_update)
        db.session.close()
def prepare_indexing(app):
    """Prepare indexing."""
    before_models_committed.connect(register_record_modification)
    models_committed.connect(index_record_modification)
Esempio n. 41
0
from datetime import datetime, timedelta
from cbbpoll import db, app
from cbbpoll.message import send_reddit_pm
from itsdangerous import TimedJSONWebSignatureSerializer as Serializer
from sqlalchemy import select, desc, UniqueConstraint
from sqlalchemy.ext.hybrid import hybrid_property, hybrid_method
from flask_sqlalchemy import models_committed
from flask_login import AnonymousUserMixin


def on_models_committed(_, changes):
    for obj, change in changes:
        if change == 'insert' and hasattr(obj, '__commit_insert__'):
            obj.__commit_insert__()

models_committed.connect(on_models_committed, sender=app)


class User(db.Model):
    __tablename__ = 'user'
    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True)

    nickname = db.Column(db.String(20), index=True)
    email = db.Column(db.String(120), index=True)
    emailConfirmed = db.Column(db.Boolean, default=False)
    role = db.Column(db.Enum('u', 'a'), default='u')
    accessToken = db.Column(db.String(30))
    refreshToken = db.Column(db.String(30))
    refreshAfter = db.Column(db.DateTime)
Esempio n. 42
0
from flask_sqlalchemy import models_committed

from calepin.app import app
from calepin.models import Blog

models_committed.connect(Blog.on_committed, sender=app)
Esempio n. 43
0
def test_record_indexing(app, queue):
    """Run record autoindexer."""
    @before_record_index.connect_via(app)
    def remove_schema(sender, json=None, record=None):
        if '$schema' in json:
            del json['$schema']

    models_committed.connect(process_models_committed_signal, sender=app)

    with app.app_context():

        current_search_client.indices.delete_alias('_all', '_all',
                                                   ignore=[400, 404])
        current_search_client.indices.delete('*')
        aliases = current_search_client.indices.get_aliases()
        assert 0 == len(aliases)

    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)

    with runner.isolated_filesystem():
        result = runner.invoke(cmd, ['destroy', '--yes-i-know'],
                               obj=script_info)
        result = runner.invoke(cmd, ['init'],
                               obj=script_info)
        assert 0 == result.exit_code

    with app.app_context():
        from invenio_records.models import RecordMetadata
        with db.session.begin_nested():
            record1 = RecordMetadata(json={
                '$schema': ('http://example.com/schemas/'  # external site
                            'records/default-v1.0.0.json'),
                'title': 'Test1',
            })
            db.session.add(record1)
            record2 = RecordMetadata(json={
                '$schema': {
                    '$ref': ('http://example.com/schemas/'  # external site
                             'records/default-v1.0.0.json')
                },
                'title': 'Test2',
            })
            db.session.add(record2)
        db.session.commit()

        record_indexer = RecordIndexer(queue=queue)
        result = record_indexer.process_bulk_queue()
        assert 2 == len(list(result))

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record1.id,
        )
        assert str(record1.id) == response['_id']

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record2.id,
        )
        assert str(record2.id) == response['_id']

        db.session.delete(record1)
        db.session.commit()

        record_indexer.process_bulk_queue()

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record1.id,
            ignore=404,
        )
        assert not response['found']

    # Clean-up:
    with app.app_context():
        result = runner.invoke(cmd, ['destroy', '--yes-i-know'],
                               obj=script_info)
        assert 0 == result.exit_code