def test_cv_search_cached(inspire_app): headers = {"Accept": "text/vnd+inspire.html+html"} data = { "control_number": 637275232, "titles": [{"title": "Yet another title"}], } record = create_record("lit", data=data) models_committed.disconnect(index_after_commit) data = dict(record) data["titles"] = [{"title": "Modified title"}] record.update(data) expected_status_code = 200 expected_result = '<!DOCTYPE html><html><body> <p><b> <a href="https://localhost:5000/literature/637275232"> Yet another title </a> </b></p> <br></body></html>' with inspire_app.test_client() as client: response = client.get("/literature", headers=headers) response_status_code = response.status_code response_data = response.get_data(as_text=True).replace("\n", "") assert expected_status_code == response_status_code assert expected_result == response_data models_committed.connect(index_after_commit)
def test_index_record(inspire_app, celery_app_with_context, celery_session_worker): models_committed.disconnect(index_after_commit) records = [ create_record_async("lit"), create_record_async("aut"), create_record_async("job"), create_record_async("jou"), create_record_async("exp"), create_record_async("con"), create_record_async("dat"), create_record_async("ins"), ] uuids = [record.id for record in records] task = index_records.delay(uuids) results = task.get(timeout=5) uuids = [str(uuid) for uuid in uuids] assert results == uuids for record in records: result = InspireSearch.get_record_data_from_es(record) assert record["control_number"] == result["control_number"] models_committed.connect(index_after_commit)
def init_app(self, app): self._indexs = {} self.index_name = app.config.get('MSEARCH_INDEX_NAME', 'msearch') self._client = Elasticsearch(**app.config.get('ELASTICSEARCH', {})) if app.config.get('MSEARCH_ENABLE', True): models_committed.connect(self._index_signal) super(ElasticSearch, self).init_app(app)
def test_process_references_in_records_reindexes_conferences_when_pub_info_changes( inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) conference_data = faker.record("con", with_control_number=True) conference_record = InspireRecord.create(conference_data) conference_control_number = conference_record["control_number"] conf_ref = f"http://localhost:8000/api/conferences/{conference_control_number}" data = faker.record("lit", with_control_number=True) data["publication_info"] = [{"conference_record": {"$ref": conf_ref}}] data["document_type"] = ["conference paper"] record = InspireRecord.create(data) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) uuids = [record.id] task = process_references_in_records.delay(uuids) result = task.get(timeout=5) conference_record_es = InspireSearch.get_record_data_from_es( conference_record) expected_number_of_contributions = 1 assert (expected_number_of_contributions == conference_record_es["number_of_contributions"])
def configure_db(app): app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True if app.config['TESTING']: # tests do not actually use sqlite but run a postgres instance and # reconfigure flask-sqlalchemy to use that database. by setting # a dummy uri explicitly instead of letting flask-sqlalchemy do # the exact same thing we avoid a warning when running tests. app.config.setdefault('SQLALCHEMY_DATABASE_URI', 'sqlite:///:memory:') else: if config.SQLALCHEMY_DATABASE_URI is None: raise Exception("No proper SQLAlchemy store has been configured. Please edit your indico.conf") app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI app.config['SQLALCHEMY_RECORD_QUERIES'] = False app.config['SQLALCHEMY_POOL_SIZE'] = config.SQLALCHEMY_POOL_SIZE app.config['SQLALCHEMY_POOL_TIMEOUT'] = config.SQLALCHEMY_POOL_TIMEOUT app.config['SQLALCHEMY_POOL_RECYCLE'] = config.SQLALCHEMY_POOL_RECYCLE app.config['SQLALCHEMY_MAX_OVERFLOW'] = config.SQLALCHEMY_MAX_OVERFLOW import_all_models() db.init_app(app) if not app.config['TESTING']: apply_db_loggers(app) plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False) models_committed.connect(on_models_committed, app)
def test_process_references_in_records_reindexes_experiments_when_linked_experiments_change( app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) experiment_data = faker.record("exp", with_control_number=True) experiment = InspireRecord.create(experiment_data) db.session.commit() experiment_control_number = experiment["control_number"] exp_ref = f"http://localhost:8000/api/experiments/{experiment_control_number}" data = faker.record("lit", with_control_number=True) data["accelerator_experiments"] = [{ "legacy_name": "LIGO", "record": { "$ref": exp_ref } }] record = InspireRecord.create(data) db.session.commit() models_committed.connect(index_after_commit) task = process_references_in_records.delay([record.id]) task.get(timeout=5) experiment_record_es = InspireSearch.get_record_data_from_es(experiment) expected_number_of_paper = 1 assert expected_number_of_paper == experiment_record_es["number_of_papers"]
def test_model_signals(db, Todo): recorded = [] def committed(sender, changes): assert isinstance(changes, list) recorded.extend(changes) models_committed.connect(committed) todo = Todo("Awesome", "the text") db.session.add(todo) assert len(recorded) == 0 db.session.commit() assert len(recorded) == 1 assert recorded[0][0] == todo assert recorded[0][1] == "insert" del recorded[:] todo.text = "aha" db.session.commit() assert len(recorded) == 1 assert recorded[0][0] == todo assert recorded[0][1] == "update" del recorded[:] db.session.delete(todo) db.session.commit() assert len(recorded) == 1 assert recorded[0][0] == todo assert recorded[0][1] == "delete" models_committed.disconnect(committed)
def migrate_chunk(chunk): models_committed.disconnect(receive_after_model_commit) current_collections.unregister_signals() index_queue = [] try: for raw_record in chunk: with db.session.begin_nested(): record = migrate_and_insert_record(raw_record) if record: index_queue.append(create_index_op(record)) db.session.commit() finally: db.session.close() req_timeout = current_app.config['INDEXER_BULK_REQUEST_TIMEOUT'] es_bulk( es, index_queue, stats_only=True, request_timeout=req_timeout, ) models_committed.connect(receive_after_model_commit) current_collections.register_signals()
def create_records_from_mirror_recids(recids): """Task which migrates records Args: recids: records uuids to remigrate Returns: set: set of properly processed records uuids """ models_committed.disconnect(index_after_commit) processed_records = set() for recid in recids: try: LOGGER.info("Migrate record from mirror", recid=recid) with db.session.begin_nested(): record = migrate_record_from_mirror( LegacyRecordsMirror.query.get(recid)) except Exception: LOGGER.exception("Cannot migrate record", recid=recid) continue if record: processed_records.add(str(record.id)) else: LOGGER.warning("Record is empty", recid=recid) db.session.commit() models_committed.connect(index_after_commit) return list(processed_records)
def test_index_record_manually(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("lit") rec = LiteratureRecord.create(data) models_committed.disconnect(index_after_commit) db.session.commit() models_committed.connect(index_after_commit) es.indices.refresh("records-hep") result = es.search("records-hep") assert result["hits"]["total"] == 0 rec.index() steps = [ { "step": es.indices.refresh, "args": ["records-hep"] }, { "step": es.search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] retry_until_matched(steps)
def configure_db(app): app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True if not app.config['TESTING']: cfg = Config.getInstance() db_uri = cfg.getSQLAlchemyDatabaseURI() if db_uri is None: raise Exception( "No proper SQLAlchemy store has been configured. Please edit your indico.conf" ) app.config['SQLALCHEMY_DATABASE_URI'] = db_uri app.config['SQLALCHEMY_RECORD_QUERIES'] = False app.config['SQLALCHEMY_POOL_SIZE'] = cfg.getSQLAlchemyPoolSize() app.config['SQLALCHEMY_POOL_TIMEOUT'] = cfg.getSQLAlchemyPoolTimeout() app.config['SQLALCHEMY_POOL_RECYCLE'] = cfg.getSQLAlchemyPoolRecycle() app.config['SQLALCHEMY_MAX_OVERFLOW'] = cfg.getSQLAlchemyMaxOverflow() import_all_models() db.init_app(app) if not app.config['TESTING']: apply_db_loggers(app) plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False) models_committed.connect(on_models_committed, app)
def create_app(config_name): app = Flask(__name__) app.config.from_object(config[config_name]) db.init_app(app) csrf.init_app(app) bcrypt.init_app(app) mail.init_app(app) login_manager.init_app(app) redis_store.init_app(app) celery.conf.update(app.config) models_committed.connect(receive_change, app) sign_in_manage = {} sign_in_func = 'account.sign_in' for path in blueprints: bp = import_string('sayit.views.' + path[0]) app.register_blueprint(bp, url_prefix=path[1]) sign_in_manage[bp.name] = sign_in_func for path in jinja_filters: flt = import_string('sayit.filters:' + path[0]) app.jinja_env.filters[path[1]] = flt login_manager.blueprint_login_views = sign_in_manage return app
def configure_db(app): if not app.config['TESTING']: cfg = Config.getInstance() db_uri = cfg.getSQLAlchemyDatabaseURI() if db_uri is None: raise Exception("No proper SQLAlchemy store has been configured. Please edit your indico.conf") app.config['SQLALCHEMY_DATABASE_URI'] = db_uri # DB options app.config['SQLALCHEMY_ECHO'] = cfg.getSQLAlchemyEcho() app.config['SQLALCHEMY_RECORD_QUERIES'] = cfg.getSQLAlchemyRecordQueries() app.config['SQLALCHEMY_POOL_SIZE'] = cfg.getSQLAlchemyPoolSize() app.config['SQLALCHEMY_POOL_TIMEOUT'] = cfg.getSQLAlchemyPoolTimeout() app.config['SQLALCHEMY_POOL_RECYCLE'] = cfg.getSQLAlchemyPoolRecycle() app.config['SQLALCHEMY_MAX_OVERFLOW'] = cfg.getSQLAlchemyMaxOverflow() import_all_models() db.init_app(app) if not app.config['TESTING']: apply_db_loggers(app) plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False) models_committed.connect(on_models_committed, app)
def migrate_chunk(chunk, skip_files=False): models_committed.disconnect(index_after_commit) index_queue = [] try: for raw_record in chunk: with db.session.begin_nested(): record = migrate_and_insert_record( raw_record, skip_files=skip_files, ) if record: index_queue.append(create_index_op(record)) db.session.commit() finally: db.session.close() req_timeout = current_app.config['INDEXER_BULK_REQUEST_TIMEOUT'] es_bulk( es, index_queue, stats_only=True, request_timeout=req_timeout, ) models_committed.connect(index_after_commit)
def configure_db(app): app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True if app.config['TESTING']: # tests do not actually use sqlite but run a postgres instance and # reconfigure flask-sqlalchemy to use that database. by setting # a dummy uri explicitly instead of letting flask-sqlalchemy do # the exact same thing we avoid a warning when running tests. app.config.setdefault('SQLALCHEMY_DATABASE_URI', 'sqlite:///:memory:') else: if config.SQLALCHEMY_DATABASE_URI is None: raise Exception( "No proper SQLAlchemy store has been configured. Please edit your indico.conf" ) app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI app.config['SQLALCHEMY_RECORD_QUERIES'] = False app.config['SQLALCHEMY_POOL_SIZE'] = config.SQLALCHEMY_POOL_SIZE app.config['SQLALCHEMY_POOL_TIMEOUT'] = config.SQLALCHEMY_POOL_TIMEOUT app.config['SQLALCHEMY_POOL_RECYCLE'] = config.SQLALCHEMY_POOL_RECYCLE app.config['SQLALCHEMY_MAX_OVERFLOW'] = config.SQLALCHEMY_MAX_OVERFLOW import_all_models() db.init_app(app) if not app.config['TESTING']: apply_db_loggers(app) plugins_loaded.connect(lambda sender: configure_mappers(), app, weak=False) models_committed.connect(on_models_committed, app)
def init_app(self, app): self._setdefault(app) self._client = Elasticsearch(**app.config.get('ELASTICSEARCH', {})) self.pk = app.config["MSEARCH_PRIMARY_KEY"] self.index_name = app.config["MSEARCH_INDEX_NAME"] if app.config["MSEARCH_ENABLE"]: models_committed.connect(self._index_signal) super(ElasticSearch, self).init_app(app)
def _signal_connect(self, app): if app.config["MSEARCH_ENABLE"]: signal = app.config["MSEARCH_INDEX_SIGNAL"] if isinstance(signal, str): self._signal = import_string(signal) else: self._signal = signal models_committed.connect(self.index_signal)
def init_app(self, app): self._indexs = {} if self.analyzer is None: self.analyzer = DEFAULT_ANALYZER self.index_name = app.config.get('MSEARCH_INDEX_NAME', DEFAULT_WHOOSH_INDEX_NAME) if app.config.get('MSEARCH_ENABLE', True): models_committed.connect(self._index_signal) super(WhooshSearch, self).init_app(app)
def init_app(self, app): self._setdefault(app) if self.analyzer is None: self.analyzer = app.config["MSEARCH_ANALYZER"] or DEFAULT_ANALYZER self.pk = app.config["MSEARCH_PRIMARY_KEY"] self.index_name = app.config["MSEARCH_INDEX_NAME"] if app.config["MSEARCH_ENABLE"]: models_committed.connect(self._index_signal) super(WhooshSearch, self).init_app(app)
def init_app(self, app): self._indexs = {} es_setting = app.config.get('ELASTICSEARCH', {}) self.index_name = app.config.get('MSEARCH_INDEX_NAME', 'msearch') self._client = Elasticsearch(**es_setting) if not self._client.indices.exists(index=self.index_name): self._client.indices.create(index=self.index_name, ignore=400) if app.config.get('MSEARCH_ENABLE', True): models_committed.connect(self._index_signal) super(ElasticSearch, self).init_app(app)
def test_index_record_manually(inspire_app, clean_celery_session): data = faker.record("lit") rec = LiteratureRecord.create(data) models_committed.disconnect(index_after_commit) db.session.commit() models_committed.connect(index_after_commit) assert_es_hits_count(0) rec.index() assert_es_hits_count(1)
def test_index_record_manually(inspire_app, clean_celery_session): data = faker.record("lit") rec = LiteratureRecord.create(data) models_committed.disconnect(index_after_commit) db.session.commit() models_committed.connect(index_after_commit) assert_record_not_in_es(rec["control_number"]) rec.index() assert_record_in_es(rec["control_number"])
def test_it_will_send_event_if_email_address_is_updated(mock_publisher: MagicMock, profile: Profile, session: scoped_session, commit: Callable[[], None]): event_publisher = send_update_events(publisher=mock_publisher) models_committed.connect(receiver=event_publisher) profile.add_email_address('*****@*****.**') session.add(profile) commit() assert mock_publisher.publish.call_count == 1 assert mock_publisher.publish.call_args[0][0] == {'id': '12345678', 'type': 'profile'}
def init_app(self, app): self._indexs = {} self.whoosh_path = DEFAULT_WHOOSH_INDEX_NAME if self.analyzer is None: self.analyzer = DEFAULT_ANALYZER whoosh_path = app.config.get('MSEARCH_INDEX_NAME') if whoosh_path is not None: self.whoosh_path = whoosh_path if not os.path.exists(self.whoosh_path): os.mkdir(self.whoosh_path) if app.config.get('MSEARCH_ENABLE', True): models_committed.connect(self._index_signal) super(WhooshSearch, self).init_app(app)
def signalling(app, changes, **kwargs): for instance, operation in changes: if instance.__tablename__ in [i.__tablename__ for i in [User]]: models_committed.disconnect(signalling) session = db.create_scoped_session() user = session.query(User).first() if user and user.username == 'signalling_test': user.username = '******' session.merge(user) session.commit() session.remove() models_committed.connect(signalling) break
def test_process_references_in_records_with_different_type_of_records_doesnt_throw_an_exception( inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) cited_record_1 = LiteratureRecord.create(faker.record("lit")) cited_record_2 = LiteratureRecord.create(faker.record("lit")) data_citing_record_1 = faker.record( "lit", literature_citations=[cited_record_1["control_number"]]) citing_record_1 = LiteratureRecord.create(data_citing_record_1) data_citing_record_2 = faker.record( "lit", literature_citations=[cited_record_2["control_number"]]) citing_record_2 = LiteratureRecord.create(data_citing_record_2) db.session.commit() records = [ create_record_async("aut"), create_record_async("job"), create_record_async("jou"), create_record_async("exp"), create_record_async("con"), create_record_async("dat"), create_record_async("ins"), ] # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) uuids = [record.id for record in records] + [citing_record_1.id, citing_record_2.id] task = process_references_in_records.delay(uuids) results = task.get(timeout=5) uuids = [str(uuid) for uuid in uuids] assert results == uuids result_cited_record_1 = InspireSearch.get_record_data_from_es( cited_record_1) expected_result_cited_record_1_citation_count = 1 assert (expected_result_cited_record_1_citation_count == result_cited_record_1["citation_count"]) result_cited_record_2 = InspireSearch.get_record_data_from_es( cited_record_2) expected_result_cited_record_2_citation_count = 1 assert (expected_result_cited_record_2_citation_count == result_cited_record_2["citation_count"])
def test_it_ignores_other_models_being_committed(orcid_token: OrcidToken, orcid_config: Dict[str, str], mock_orcid_client: MagicMock, session: scoped_session, url_safe_serializer: URLSafeSerializer): webhook_maintainer = maintain_orcid_webhook(orcid_config, mock_orcid_client, url_safe_serializer) models_committed.connect(receiver=webhook_maintainer) session.add(orcid_token) session.commit() assert mock_orcid_client.set_webhook.call_count == 0 assert mock_orcid_client.remove_webhook.call_count == 0
def test_gracefully_handle_records_updating_in_wrong_order( inspire_app, clean_celery_session): # We want to run indexing in weird order, so disable auto indexing models_committed.disconnect(index_after_commit) cited_record = LiteratureRecord.create(data=faker.record("lit")) record_data = faker.record( "lit", literature_citations=[cited_record.control_number]) record = LiteratureRecord.create(data=record_data) db.session.commit() record = LiteratureRecord.get_record_by_pid_value(record.control_number) index_record(record.id, record.model.versions[-1].version_id) assert LiteratureSearch().get_source( cited_record.id)["citation_count"] == 1 data = dict(record) del data["references"] record.update(data) db.session.commit() record = LiteratureRecord.get_record_by_pid_value(record.control_number) data = dict(record) data["titles"][0] = {"title": "New Title"} record.update(data) db.session.commit() record = LiteratureRecord.get_record_by_pid_value(record.control_number) index_record(record.id, record.model.versions[-1].version_id) record = LiteratureRecord.get_record_by_pid_value(record.control_number) assert LiteratureSearch().get_source( cited_record.id)["citation_count"] == 1 assert LiteratureSearch().get_source(record.id)["titles"] == [{ "title": "New Title" }] index_record(record.id, record.model.versions[-2].version_id) assert LiteratureSearch().get_source( cited_record.id)["citation_count"] == 0 assert LiteratureSearch().get_source(record.id)["titles"] == [{ "title": "New Title" }] models_committed.connect(index_after_commit)
def create_app(config: Config, clients: Clients) -> Flask: app = Flask(__name__) app.TRAP_HTTP_EXCEPTIONS = True app.config.from_object(config) db.app = app db.init_app(app) Migrate(app, db) orcid_client = OrcidClient(config.orcid['api_uri']) app.orcid_client = orcid_client orcid_tokens = SQLAlchemyOrcidTokens(db) profiles = SQLAlchemyProfiles(db) uri_signer = URLSafeSerializer(config.orcid['webhook_key'], signer_kwargs={'key_derivation': 'hmac', 'digest_method': hashlib.sha512}) config_bus = dict(config.bus) config_bus['env'] = config.name publisher = get_publisher(config=config_bus) app.commands = [ ClearCommand(orcid_tokens, profiles), CreateProfileCommand(profiles), ReadConfiguration(config), SetOrcidWebhooksCommand(profiles, config.orcid, orcid_client, uri_signer) ] app.register_blueprint(api.create_blueprint(profiles)) app.register_blueprint(oauth2.create_blueprint(config.orcid, clients, profiles, orcid_client, orcid_tokens), url_prefix='/oauth2') app.register_blueprint(ping.create_blueprint()) app.register_blueprint(webhook.create_blueprint(profiles, config.orcid, orcid_client, orcid_tokens, uri_signer)) from werkzeug.exceptions import default_exceptions for code in default_exceptions: app.errorhandler(code)(errors.http_error_handler) app.register_error_handler(Exception, errors.error_handler) app.register_error_handler(ClientError, errors.client_error_handler) app.register_error_handler(OAuth2Error, errors.oauth2_error_handler) models_committed.connect(maintain_orcid_webhook(config.orcid, orcid_client, uri_signer), weak=False) models_committed.connect(send_update_events(publisher=publisher), weak=False) return app
def test_it_will_send_event_for_affiliation_insert(mock_publisher: MagicMock, profile: Profile, session: scoped_session, commit: Callable[[], None]) -> None: event_publisher = send_update_events(publisher=mock_publisher) models_committed.connect(receiver=event_publisher) affiliation = Affiliation('1', Address(countries.get('gb'), 'City'), 'Organisation', Date(2017)) profile.add_affiliation(affiliation) session.add(profile) commit() assert mock_publisher.publish.call_count == 1 assert mock_publisher.publish.call_args[0][0] == {'id': '12345678', 'type': 'profile'}
def test_process_references_in_records_process_author_records( mock_batch_index, inspire_app, clean_celery_session): author_record = AuthorsRecord.create(faker.record("aut")) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) author_record["name"]["value"] = "Another Name" author_record.update(dict(author_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([author_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [str(lit_record.id), str(lit_record_2.id)])
def test_index_record_fulltext_manually(inspire_app, clean_celery_session, override_config, s3, datadir): metadata = {"foo": "bar"} pdf_path = os.path.join(datadir, "2206.04407.pdf") create_s3_bucket(KEY) create_s3_file( current_s3_instance.get_bucket_for_file_key(KEY), KEY, pdf_path, metadata, **{"ContentType": "application/pdf"}, ) with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True, FEATURE_FLAG_ENABLE_FILES=False): data = faker.record("lit") data.update({ "documents": [{ "source": "arxiv", "fulltext": True, "filename": "new_doc.pdf", "key": KEY, "url": "http://www.africau.edu/images/default/sample.pdf", }] }) rec = LiteratureRecord.create(data) models_committed.disconnect(index_after_commit) db.session.commit() models_committed.connect(index_after_commit) assert_record_not_in_es(rec["control_number"]) rec.index_fulltext() def assert_record_in_es(): current_search.flush_and_refresh("*") record_lit_es = (LiteratureSearch().get_record(str( rec.id)).execute().hits.hits[0]) document = record_lit_es._source["documents"][0] assert "attachment" in document assert "text" not in document # pipeline should remove it retry_until_pass(assert_record_in_es, timeout=90, retry_interval=5)
def migrate_chunk(chunk, broken_output=None, dry_run=False): from flask_sqlalchemy import models_committed from invenio_records.receivers import record_modification from invenio_records.tasks.index import get_record_index from invenio.base.globals import cfg from elasticsearch.helpers import bulk as es_bulk from inspirehep.modules.citations.receivers import ( catch_citations_insert, add_citation_count_on_insert_or_update, catch_citations_update ) from invenio_records.signals import before_record_index, after_record_insert models_committed.disconnect(record_modification) after_record_insert.disconnect(catch_citations_insert) before_record_index.disconnect(add_citation_count_on_insert_or_update) before_record_index.disconnect(catch_citations_update) records_to_index = [] try: for record in chunk: recid = json = None try: recid, json = create_record(record, force=True, dry_run=dry_run) index = get_record_index(json) or \ cfg['SEARCH_ELASTIC_DEFAULT_INDEX'] before_record_index.send(recid, json=json, index=index) json.update({'_index': index, '_type': 'record', '_id': recid, 'citation_count': 0}) records_to_index.append(json) except Exception as err: logger.error("ERROR with record {} and json {}".format(recid, json)) logger.exception(err) if broken_output: broken_output_fd = open(broken_output, "a") print(record, file=broken_output_fd) logger.info("Committing chunk") db.session.commit() logger.info("Sending chunk to elasticsearch") es_bulk(es, records_to_index, request_timeout=60) finally: models_committed.connect(record_modification) after_record_insert.connect(catch_citations_insert) before_record_index.connect(add_citation_count_on_insert_or_update) before_record_index.connect(catch_citations_update) db.session.close()
def test_exception_is_handled_by_catch_exception_decorator(mock_publisher: MagicMock, profile: Profile, session: scoped_session, commit: Callable[[], None]) -> None: mock_publisher.publish.side_effect = Exception('Some Exception') event_publisher = send_update_events(publisher=mock_publisher) models_committed.connect(receiver=event_publisher) affiliation = Affiliation('1', Address(countries.get('gb'), 'City'), 'Organisation', Date(2017)) profile.add_affiliation(affiliation) session.add(profile) commit() assert mock_publisher.publish.call_count == 1
def test_it_sets_a_webhook_when_a_profile_is_inserted(profile: Profile, orcid_config: Dict[str, str], mock_orcid_client: MagicMock, session: scoped_session, url_safe_serializer: URLSafeSerializer, commit: Callable[[], None]): webhook_maintainer = maintain_orcid_webhook(orcid_config, mock_orcid_client, url_safe_serializer) models_committed.connect(receiver=webhook_maintainer) session.add(profile) commit() assert mock_orcid_client.set_webhook.call_count == 1 assert mock_orcid_client.set_webhook.call_args[0][0] == '0000-0002-1825-0097' assert mock_orcid_client.set_webhook.call_args[0][1] == 'http://localhost/orcid-webhook/{}' \ .format(url_safe_serializer.dumps('0000-0002-1825-0097'))
def migrate_recids_from_mirror(prod_recids, skip_files=False): models_committed.disconnect(index_after_commit) index_queue = [] for recid in prod_recids: with db.session.begin_nested(): record = migrate_record_from_mirror( LegacyRecordsMirror.query.get(recid), skip_files=skip_files, ) if record and not record.get('deleted'): index_queue.append(create_index_op(record)) db.session.commit() req_timeout = current_app.config['INDEXER_BULK_REQUEST_TIMEOUT'] es_bulk( es, index_queue, stats_only=True, request_timeout=req_timeout, ) models_committed.connect(index_after_commit)
from extensions.socketio import socketio from flask_sqlalchemy import models_committed from .schemas import PostSchema from .models import Post namespace = '/blog/io' post_schema = PostSchema() def post_after_commit(sender, changes): for model, change in changes: if isinstance(model, Post) and change in ('insert',): emit_new_posts() break models_committed.connect(post_after_commit) def emit_new_posts(): socketio.emit('new posts', namespace=namespace)
from flask_sqlalchemy import models_committed, before_models_committed def record_oplogs(app,changes): if request.headers.environ.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest': return from system.models import OperationLog, SecurityLog for change in changes: if isinstance(change[0], OperationLog) or isinstance(change[0], SecurityLog): continue oplog = OperationLog() oplog.user = current_user oplog.module = unicode(change[0]) oplog.action = change[1] oplog.terminal_ip = request.remote_addr oplog.summary = str(change[0].__dict__) db.session.add(oplog) models_committed.connect(record_oplogs) before_models_committed.connect(record_oplogs) @login_mgr.user_loader def load_user(id): return User.query.get(int(id)) #user = cache.get("user-"+id) #if user is None: # user = User.query.get(int(id)) # cache.set("user-"+id, user) #return user from tango.login import user_logged_in, user_logged_out def record_login(app, user): #cache.set("user-"+str(user.id), user) from system.models import SecurityLog
def migrate_chunk(chunk, broken_output=None, dry_run=False): from flask_sqlalchemy import models_committed from invenio_records.receivers import record_modification from invenio_records.tasks.index import get_record_index from invenio.base.globals import cfg from elasticsearch.helpers import bulk as es_bulk from inspirehep.modules.citations.receivers import ( catch_citations_insert, add_citation_count_on_insert_or_update, catch_citations_update ) from invenio_records.signals import before_record_index, after_record_insert models_committed.disconnect(record_modification) after_record_insert.disconnect(catch_citations_insert) before_record_index.disconnect(add_citation_count_on_insert_or_update) before_record_index.disconnect(catch_citations_update) records_to_index = [] try: for raw_record in chunk: json = None record = marc_create_record(raw_record, keep_singletons=False) recid = int(record['001']) if not dry_run: prod_record = InspireProdRecords(recid=recid) prod_record.marcxml = raw_record try: with db.session.begin_nested(): errors, recid, json = create_record( recid, record, force=True, dry_run=dry_run, validation=True ) if dry_run: continue prod_record.valid = not errors prod_record.errors = errors index = get_record_index(json) or \ cfg['SEARCH_ELASTIC_DEFAULT_INDEX'] before_record_index.send(recid, json=json, index=index) json.update({'_index': index, '_type': 'record', '_id': recid, 'citation_count': 0}) records_to_index.append(json) prod_record.successful = True db.session.merge(prod_record) except Exception as err: logger.error("ERROR with record {} and json {}".format(recid, json)) logger.exception(err) if not dry_run: prod_record.successful = False db.session.merge(prod_record) logger.info("Committing chunk") db.session.commit() logger.info("Sending chunk to elasticsearch") if not dry_run: es_bulk(es, records_to_index, request_timeout=60) finally: models_committed.connect(record_modification) after_record_insert.connect(catch_citations_insert) before_record_index.connect(add_citation_count_on_insert_or_update) before_record_index.connect(catch_citations_update) db.session.close()
def prepare_indexing(app): """Prepare indexing.""" before_models_committed.connect(register_record_modification) models_committed.connect(index_record_modification)
from datetime import datetime, timedelta from cbbpoll import db, app from cbbpoll.message import send_reddit_pm from itsdangerous import TimedJSONWebSignatureSerializer as Serializer from sqlalchemy import select, desc, UniqueConstraint from sqlalchemy.ext.hybrid import hybrid_property, hybrid_method from flask_sqlalchemy import models_committed from flask_login import AnonymousUserMixin def on_models_committed(_, changes): for obj, change in changes: if change == 'insert' and hasattr(obj, '__commit_insert__'): obj.__commit_insert__() models_committed.connect(on_models_committed, sender=app) class User(db.Model): __tablename__ = 'user' id = db.Column(db.Integer, primary_key=True, autoincrement=True) nickname = db.Column(db.String(20), index=True) email = db.Column(db.String(120), index=True) emailConfirmed = db.Column(db.Boolean, default=False) role = db.Column(db.Enum('u', 'a'), default='u') accessToken = db.Column(db.String(30)) refreshToken = db.Column(db.String(30)) refreshAfter = db.Column(db.DateTime)
from flask_sqlalchemy import models_committed from calepin.app import app from calepin.models import Blog models_committed.connect(Blog.on_committed, sender=app)
def test_record_indexing(app, queue): """Run record autoindexer.""" @before_record_index.connect_via(app) def remove_schema(sender, json=None, record=None): if '$schema' in json: del json['$schema'] models_committed.connect(process_models_committed_signal, sender=app) with app.app_context(): current_search_client.indices.delete_alias('_all', '_all', ignore=[400, 404]) current_search_client.indices.delete('*') aliases = current_search_client.indices.get_aliases() assert 0 == len(aliases) runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) with runner.isolated_filesystem(): result = runner.invoke(cmd, ['destroy', '--yes-i-know'], obj=script_info) result = runner.invoke(cmd, ['init'], obj=script_info) assert 0 == result.exit_code with app.app_context(): from invenio_records.models import RecordMetadata with db.session.begin_nested(): record1 = RecordMetadata(json={ '$schema': ('http://example.com/schemas/' # external site 'records/default-v1.0.0.json'), 'title': 'Test1', }) db.session.add(record1) record2 = RecordMetadata(json={ '$schema': { '$ref': ('http://example.com/schemas/' # external site 'records/default-v1.0.0.json') }, 'title': 'Test2', }) db.session.add(record2) db.session.commit() record_indexer = RecordIndexer(queue=queue) result = record_indexer.process_bulk_queue() assert 2 == len(list(result)) response = current_search_client.get( index='records-default-v1.0.0', id=record1.id, ) assert str(record1.id) == response['_id'] response = current_search_client.get( index='records-default-v1.0.0', id=record2.id, ) assert str(record2.id) == response['_id'] db.session.delete(record1) db.session.commit() record_indexer.process_bulk_queue() response = current_search_client.get( index='records-default-v1.0.0', id=record1.id, ignore=404, ) assert not response['found'] # Clean-up: with app.app_context(): result = runner.invoke(cmd, ['destroy', '--yes-i-know'], obj=script_info) assert 0 == result.exit_code