def setup_and_teardown(app): """ Run create mapping and purge queue before tests and clear out the DB tables after the test """ import transaction from sqlalchemy import MetaData from zope.sqlalchemy import mark_changed # BEFORE THE TEST - just run CM for the TEST_TYPE by default create_mapping.run(app, collections=[TEST_TYPE], skip_indexing=True) app.registry[INDEXER_QUEUE].clear_queue() yield # run the test # AFTER THE TEST session = app.registry[DBSESSION] connection = session.connection().connect() meta = MetaData(bind=session.connection(), reflect=True) for table in meta.sorted_tables: print('Clear table %s' % table) print('Count before -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table))) connection.execute(table.delete()) print('Count after -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)), '\n') session.flush() mark_changed(session()) transaction.commit()
def test_create_mapping_index_diff(app, testapp, indexer_testapp): from snovault.elasticsearch import create_mapping es = app.registry[ELASTIC_SEARCH] # post a couple items, index, then remove one res = testapp.post_json(TEST_COLL, {'required': ''}) test_uuid = res.json['@graph'][0]['uuid'] testapp.post_json(TEST_COLL, {'required': ''}) # second item create_mapping.run(app, collections=[TEST_TYPE], purge_queue=True) indexer_testapp.post_json('/index', {'record': True}) time.sleep(4) initial_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') assert initial_count == 2 # remove one item es.delete(index=TEST_TYPE, doc_type=TEST_TYPE, id=test_uuid) time.sleep(8) second_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') assert second_count == 1 # patch the item to increment version res = testapp.patch_json(TEST_COLL + test_uuid, {'required': 'meh'}) # index with index_diff to ensure the item is reindexed create_mapping.run(app, collections=[TEST_TYPE], index_diff=True) res = indexer_testapp.post_json('/index', {'record': True}) time.sleep(4) third_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') assert third_count == initial_count
def test_es_indices(app, elasticsearch): """ Test overall create_mapping functionality using app. Do this by checking es directly before and after running mapping. Delete an index directly, run again to see if it recovers. """ es = app.registry[ELASTIC_SEARCH] item_types = app.registry[TYPES].by_item_type test_collections = [TEST_TYPE] # run create mapping for all types, but no need to index run(app, collections=test_collections, skip_indexing=True) # check that mappings and settings are in index for item_type in test_collections: item_mapping = type_mapping(app.registry[TYPES], item_type) try: item_index = es.indices.get(index=item_type) except: assert False found_index_mapping = item_index.get(item_type, {}).get('mappings').get(item_type, {}).get('properties', {}).get('embedded') found_index_settings = item_index.get(item_type, {}).get('settings') assert found_index_mapping assert found_index_settings # get the item record from meta and compare that full_mapping = create_mapping_by_type(item_type, app.registry) item_record = build_index_record(full_mapping, item_type) try: item_meta = es.get(index='meta', doc_type='meta', id=item_type) except: assert False meta_record = item_meta.get('_source', None) assert meta_record assert item_record == meta_record
def setup_and_teardown(app): """ Run create mapping and purge queue before tests and clear out the DB tables after the test """ # BEFORE THE TEST - run create mapping for tests types and clear queues create_mapping.run(app, collections=TEST_COLLECTIONS, skip_indexing=True) app.registry[INDEXER_QUEUE].clear_queue() yield # run the test # AFTER THE TEST session = app.registry[DBSESSION] connection = session.connection().connect() meta = MetaData(bind=session.connection()) meta.reflect() for table in meta.sorted_tables: print('Clear table %s' % table) print('Count before -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table))) connection.execute(table.delete()) print('Count after -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)), '\n') session.flush() mark_changed(session()) transaction.commit()
def test_sync_and_queue_indexing(app, testapp, indexer_testapp): es = app.registry[ELASTIC_SEARCH] indexer_queue = app.registry[INDEXER_QUEUE] # clear queue before starting this one indexer_queue.clear_queue() # queued on post - total of one item queued res = testapp.post_json(TEST_COLL, {'required': ''}) # synchronously index create_mapping.run(app, collections=[TEST_TYPE], sync_index=True) #time.sleep(6) doc_count = tries = 0 while(tries < 6): doc_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') if doc_count != 0: break time.sleep(1) tries += 1 assert doc_count == 1 # post second item to database but do not index (don't load into es) # queued on post - total of two items queued res = testapp.post_json(TEST_COLL, {'required': ''}) #time.sleep(2) doc_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') # doc_count has not yet updated assert doc_count == 1 # clear the queue by indexing and then run create mapping to queue the all items res = indexer_testapp.post_json('/index', {'record': True}) assert res.json['indexing_count'] == 2 create_mapping.run(app, collections=[TEST_TYPE]) res = indexer_testapp.post_json('/index', {'record': True}) assert res.json['indexing_count'] == 2 time.sleep(4) doc_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') assert doc_count == 2
def es_app(es_app_settings, **kwargs): """ App that uses both Postgres and ES - pass this as "app" argument to TestApp. Pass all kwargs onto create_mapping """ app = main({}, **es_app_settings) create_mapping.run(app, **kwargs) return app
def test_indexing_info(app, testapp, indexer_testapp): """ Test the information on indexing-info for a given uuid and make sure that it updates properly following indexing """ # first, run create mapping with the indices we will use create_mapping.run( app, collections=['testing_link_target_sno', 'testing_link_source_sno'], skip_indexing=True ) target1 = {'name': 't_one', 'uuid': str(uuid.uuid4())} target2 = {'name': 't_two', 'uuid': str(uuid.uuid4())} source = { 'name': 'idx_source', 'target': target1['uuid'], 'uuid': str(uuid.uuid4()), 'status': 'current', } testapp.post_json('/testing-link-targets-sno/', target1, status=201) testapp.post_json('/testing-link-targets-sno/', target2, status=201) testapp.post_json('/testing-link-sources-sno/', source, status=201) indexer_testapp.post_json('/index', {'record': True}) time.sleep(2) # indexing-info fails without uuid query param idx_info_err = testapp.get('/indexing-info') assert idx_info_err.json['status'] == 'error' src_idx_info = testapp.get('/indexing-info?uuid=%s' % source['uuid']) assert src_idx_info.json['status'] == 'success' # up to date assert src_idx_info.json['sid_es'] == src_idx_info.json['sid_db'] assert set(src_idx_info.json['uuids_invalidated']) == set([target1['uuid'], source['uuid']]) # update without indexing; view should capture the changes but sid_es will not change testapp.patch_json('/testing-link-sources-sno/' + source['uuid'], {'target': target2['uuid']}) src_idx_info2 = testapp.get('/indexing-info?uuid=%s' % source['uuid']) assert src_idx_info2.json['status'] == 'success' # es is now out of date, since not indexed yet assert src_idx_info2.json['sid_es'] < src_idx_info2.json['sid_db'] # target1 will still be in invalidated uuids, since es has not updated assert set(src_idx_info2.json['uuids_invalidated']) == set([target1['uuid'], target2['uuid'], source['uuid']]) indexer_testapp.post_json('/index', {'record': True}) time.sleep(2) # after indexing, make sure sid_es is updated src_idx_info3 = testapp.get('/indexing-info?uuid=%s' % source['uuid']) assert src_idx_info3.json['status'] == 'success' assert src_idx_info3.json['sid_es'] == src_idx_info3.json['sid_db'] # target1 has now been updated and removed from invalidated uuids assert set(src_idx_info3.json['uuids_invalidated']) == set([target2['uuid'], source['uuid']]) # try the view without calculated embedded view src_idx_info4 = testapp.get('/indexing-info?uuid=%s&run=False' % source['uuid']) assert src_idx_info4.json['status'] == 'success' assert 'uuids_invalidated' not in src_idx_info4.json assert 'embedded_seconds' not in src_idx_info4.json
def app(app_settings): from snowflakes import main from snovault.elasticsearch import create_mapping app = main({}, **app_settings) create_mapping.run(app, skip_indexing=True) yield app from snovault import DBSESSION DBSession = app.registry[DBSESSION] # Dispose connections so postgres can tear down. DBSession.bind.pool.dispose()
def app(app_settings, **kwargs): """ Pass all kwargs onto create_mapping """ from encoded import main from snovault.elasticsearch import create_mapping app = main({}, **app_settings) create_mapping.run(app, **kwargs) yield app from snovault import DBSESSION DBSession = app.registry[DBSESSION] # Dispose connections so postgres can tear down. DBSession.bind.pool.dispose()
def test_indexing_invalid_sid_linked_items(app, testapp, indexer_testapp): """ Make sure that items sent to the deferred queue do not trigger indexing of secondary items """ indexer_queue = app.registry[INDEXER_QUEUE] es = app.registry[ELASTIC_SEARCH] create_mapping.run( app, collections=['testing_link_target_sno', 'testing_link_source_sno'], skip_indexing=True ) target1 = {'name': 't_one', 'uuid': str(uuid.uuid4())} source = { 'name': 'idx_source', 'target': target1['uuid'], 'uuid': str(uuid.uuid4()), 'status': 'current', } testapp.post_json('/testing-link-targets-sno/', target1, status=201) testapp.post_json('/testing-link-sources-sno/', source, status=201) indexer_testapp.post_json('/index', {'record': True}) time.sleep(2) es_item = es.get(index='testing_link_target_sno', doc_type='testing_link_target_sno', id=target1['uuid']) inital_version = es_item['_version'] # now try to manually bump an invalid version for the queued item # expect it to be sent to the deferred queue. to_queue = { 'uuid': target1['uuid'], 'sid': inital_version + 2, 'strict': False, 'timestamp': datetime.utcnow().isoformat() } indexer_queue.send_messages([to_queue], target_queue='primary') # make sure nothing is in secondary queue after calling /index received_secondary = indexer_queue.receive_messages(target_queue='secondary') assert len(received_secondary) == 0 res = indexer_testapp.post_json('/index', {'record': True}) time.sleep(4) assert res.json['indexing_count'] == 0 # make sure nothing is in secondary queue after calling /index received_secondary = indexer_queue.receive_messages(target_queue='secondary') assert len(received_secondary) == 0 received_deferred = indexer_queue.receive_messages(target_queue='deferred') assert len(received_deferred) == 1 indexer_queue.delete_messages(received_deferred, target_queue='deferred')
def app(app_settings): from snowflakes import main from snovault.elasticsearch import create_mapping app = main({}, **app_settings) create_mapping.run(app) yield app # Shutdown multiprocessing pool to close db conns. from snovault.elasticsearch import INDEXER app.registry[INDEXER].shutdown() from snovault import DBSESSION DBSession = app.registry[DBSESSION] # Dispose connections so postgres can tear down. DBSession.bind.pool.dispose()
def test_index_data_workbook(app, workbook, testapp, indexer_testapp, htmltestapp): from snovault.elasticsearch import create_mapping es = app.registry['elasticsearch'] # we need to reindex the collections to make sure numbers are correct # TODO: NAMESPACE - here, passed in list to create_mapping # turn of logging for a bit create_mapping.run(app, sync_index=True) # check counts and ensure they're equal testapp_counts = testapp.get('/counts') total_counts = testapp_counts.json['db_es_total'] split_counts = total_counts.split() # 2nd item is db counts, 4th is es assert(int(split_counts[1]) == int(split_counts[3])) for item_type in TYPE_LENGTH.keys(): tries = 0 item_len = None while item_len is None or (item_len != TYPE_LENGTH[item_type] and tries < 3): if item_len != None: create_mapping.run(app, collections=[item_type], strict=True, sync_index=True) es.indices.refresh(index=item_type) item_len = es.count(index=item_type, doc_type=item_type).get('count') print('... ES COUNT: %s' % item_len) print('... TYPE COUNT: %s' % TYPE_LENGTH[item_type]) tries += 1 assert item_len == TYPE_LENGTH[item_type] if item_len > 0: res = testapp.get('/%s?limit=all' % item_type, status=[200, 301, 404]) res = res.follow() for item_res in res.json.get('@graph', []): index_view_res = es.get(index=item_type, doc_type=item_type, id=item_res['uuid'])['_source'] # make sure that the linked_uuids match the embedded data assert 'linked_uuids_embedded' in index_view_res assert 'embedded' in index_view_res found_uuids = recursively_find_uuids(index_view_res['embedded'], set()) # all found uuids must be within the linked_uuids assert found_uuids <= set([link['uuid'] for link in index_view_res['linked_uuids_embedded']]) # if uuids_rev_linking to me, make sure they show up in @@links if len(index_view_res.get('uuids_rev_linked_to_me', [])) > 0: links_res = testapp.get('/' + item_res['uuid'] + '/@@links', status=200) link_uuids = [lnk['uuid'] for lnk in links_res.json.get('uuids_linking_to')] assert set(index_view_res['uuids_rev_linked_to_me']) <= set(link_uuids) # previously test_html_pages try: html_res = htmltestapp.get(item_res['@id']) assert html_res.body.startswith(b'<!DOCTYPE html>') except Exception as e: pass
def teardown(app, use_collections=TEST_COLLECTIONS): import transaction from sqlalchemy import MetaData from zope.sqlalchemy import mark_changed from snovault import DBSESSION from snovault.elasticsearch import create_mapping from .conftest import indexer_testapp # index and then run create mapping to clear things out indexer_testapp(app).post_json('/index', {'record': True}) create_mapping.run(app, collections=use_collections, skip_indexing=True) session = app.registry[DBSESSION] connection = session.connection().connect() meta = MetaData(bind=session.connection(), reflect=True) for table in meta.sorted_tables: print('Clear table %s' % table) print('Count before -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table))) connection.execute(table.delete()) print('Count after -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)), '\n') session.flush() mark_changed(session()) transaction.commit()
def test_es_purge_uuid(app, testapp, indexer_testapp, session): indexer_queue = app.registry[INDEXER_QUEUE] es = app.registry[ELASTIC_SEARCH] ## Adding new test resource to DB storage = app.registry[STORAGE] test_body = {'required': '', 'simple1' : 'foo', 'simple2' : 'bar' } res = testapp.post_json(TEST_COLL, test_body) test_uuid = res.json['@graph'][0]['uuid'] check = storage.get_by_uuid(test_uuid) assert str(check.uuid) == test_uuid # Then index it: create_mapping.run(app, collections=[TEST_TYPE], sync_index=True, purge_queue=True) time.sleep(4) ## Now ensure that we do have it in ES: try: es_item = es.get(index=TEST_TYPE, doc_type=TEST_TYPE, id=test_uuid) except: assert False item_uuid = es_item.get('_source', {}).get('uuid') assert item_uuid == test_uuid check_post_from_rdb = storage.write.get_by_uuid(test_uuid) assert check_post_from_rdb is not None assert es_item['_source']['embedded']['simple1'] == test_body['simple1'] assert es_item['_source']['embedded']['simple2'] == test_body['simple2'] # The actual delete storage.purge_uuid(test_uuid) # We can optionally pass in TEST_TYPE as well for better performance. check_post_from_rdb_2 = storage.write.get_by_uuid(test_uuid) assert check_post_from_rdb_2 is None time.sleep(5) # Allow time for ES API to send network request to ES server to perform delete. check_post_from_es_2 = es.get(index=TEST_TYPE, doc_type=TEST_TYPE, id=test_uuid, ignore=[404]) assert check_post_from_es_2['found'] == False
def test_create_mapping_check_first(app, testapp, indexer_testapp): # ensure create mapping has been run from snovault.elasticsearch import create_mapping es = app.registry[ELASTIC_SEARCH] # post an item and then index it testapp.post_json(TEST_COLL, {'required': ''}) indexer_testapp.post_json('/index', {'record': True}) time.sleep(4) initial_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') # make sure the meta entry is created assert es.get(index='meta', doc_type='meta', id=TEST_TYPE) # run with check_first but skip indexing. counts should still match because # the index wasn't removed create_mapping.run(app, check_first=True, collections=[TEST_TYPE], skip_indexing=True) time.sleep(4) assert es.get(index='meta', doc_type='meta', id=TEST_TYPE) second_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') counter = 0 while (second_count != initial_count and counter < 10): time.sleep(2) second_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') counter +=1 assert second_count == initial_count # make sure the meta entry is still there assert es.get(index='meta', doc_type='meta', id=TEST_TYPE) # remove the index manually and do not index # should cause create_mapping w/ check_first to recreate es.delete(index='meta', doc_type='meta', id=TEST_TYPE) es.indices.delete(index=TEST_TYPE) create_mapping.run(app, collections=[TEST_TYPE], check_first=True, skip_indexing=True) third_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') assert third_count == 0 # but the meta entry should be there assert es.get(index='meta', doc_type='meta', id=TEST_TYPE)
def app(app_settings): from .. import test_indexing from snovault.elasticsearch import create_mapping for app in test_indexing.app(app_settings): create_mapping.run(app) yield app
def test_aggregated_items(app, testapp, indexer_testapp): """ Test that the item aggregation works, which only occurs when indexing is actually run. This test does the following: - Post a TestingLinkAggregateSno, which links to 2 TestingLinkSourceSno - Check aggregated-items view for the item; should be empty before indexing - Index and retrieve the TestingLinkAggregateSno from ES - Check that the aggregations worked correctly - Patch the TestingLinkAggregateSno to only 1 TestingLinkSourceSno, index - Ensure that the aggregated_items changed, checking ES - Ensure that duplicate aggregated_items are deduplicated - Check aggregated-items view; should now match ES results """ import webtest es = app.registry[ELASTIC_SEARCH] indexer_queue = app.registry[INDEXER_QUEUE] # first, run create mapping with the indices we will use create_mapping.run( app, collections=['testing_link_target_sno', 'testing_link_aggregate_sno'], skip_indexing=True ) # generate a uuid for the aggregate item agg_res_uuid = str(uuid.uuid4()) target1 = {'name': 'one', 'uuid': '775795d3-4410-4114-836b-8eeecf1d0c2f'} target2 = {'name': 'two', 'uuid': '775795d3-4410-4114-836b-8eeecf1daabc'} aggregated = { 'name': 'A', 'targets': [ { 'test_description': 'target one', 'target': '775795d3-4410-4114-836b-8eeecf1d0c2f' }, { 'test_description': 'target two', 'target': '775795d3-4410-4114-836b-8eeecf1daabc' } ], 'uuid': agg_res_uuid, 'status': 'current' } # you can do stuff like this and it will take effect # app.registry['types']['testing_link_aggregate_sno'].aggregated_items['targets'] = ['target.name', 'test_description'] target1_res = testapp.post_json('/testing-link-targets-sno/', target1, status=201) target2_res = testapp.post_json('/testing-link-targets-sno/', target2, status=201) agg_res = testapp.post_json('/testing-link-aggregates-sno/', aggregated, status=201) agg_res_atid = agg_res.json['@graph'][0]['@id'] # ensure that aggregated-items view shows nothing before indexing pre_agg_view = testapp.get(agg_res_atid + '@@aggregated-items', status=200).json assert pre_agg_view['@id'] == agg_res_atid assert pre_agg_view['aggregated_items'] == {} # wait for the items to index indexer_testapp.post_json('/index', {'record': True}) time.sleep(2) # wait for test-link-aggregated item to index doc_count = es.count(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno').get('count') tries = 0 while doc_count < 1 and tries < 5: time.sleep(2) doc_count = es.count(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno').get('count') tries += 1 assert doc_count == 1 es_agg_res = es.get(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno', id=agg_res_uuid) assert 'aggregated_items' in es_agg_res['_source'] es_agg_items = es_agg_res['_source']['aggregated_items'] assert 'targets' in es_agg_items assert len(es_agg_items['targets']) == 2 for idx, target_agg in enumerate(es_agg_items['targets']): # order of targets should be maintained assert target_agg['parent'] == agg_res.json['@graph'][0]['@id'] assert target_agg['embedded_path'] == 'targets' if idx == 0: assert target_agg['item']['test_description'] == 'target one' assert target_agg['item']['target']['uuid'] == target1['uuid'] else: assert target_agg['item']['test_description'] == 'target two' assert target_agg['item']['target']['uuid'] == target2['uuid'] # now make sure they get updated on a patch # use duplicate items, which should be deduplicated testapp.patch_json('/testing-link-aggregates-sno/' + aggregated['uuid'], {'targets': [ {'test_description': 'target one revised', 'target': '775795d3-4410-4114-836b-8eeecf1d0c2f'}, {'test_description': 'target one revised', 'target': '775795d3-4410-4114-836b-8eeecf1d0c2f'}, ]}) indexer_testapp.post_json('/index', {'record': True}) time.sleep(10) # be lazy and just wait a bit es_agg_res = es.get(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno', id=agg_res_uuid) assert 'aggregated_items' in es_agg_res['_source'] es_agg_items = es_agg_res['_source']['aggregated_items'] assert 'targets' in es_agg_items assert len(es_agg_items['targets']) == 1 assert es_agg_items['targets'][0]['item']['test_description'] == 'target one revised' # check that the aggregated-items view now works post_agg_view = testapp.get(agg_res_atid + '@@aggregated-items', status=200).json assert post_agg_view['@id'] == agg_res_atid assert post_agg_view['aggregated_items'] == es_agg_res['_source']['aggregated_items'] # clean up the test items testapp.patch_json('/testing-link-aggregates-sno/' + aggregated['uuid'], {'targets': []}) indexer_testapp.post_json('/index', {'record': True})
def main(): import argparse parser = argparse.ArgumentParser( description="Run development servers", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--app-name', help="Pyramid app name in configfile") parser.add_argument('config_uri', help="path to configfile") parser.add_argument('--clear', action="store_true", help="Clear existing data") parser.add_argument('--init', action="store_true", help="Init database") parser.add_argument('--load', action="store_true", help="Load test set") parser.add_argument('--datadir', default='/tmp/snovault', help="path to datadir") parser.add_argument('--access-key', help="store local or copy to s3, will generate and store access key for admin user", default=None) args = parser.parse_args() logging.basicConfig(format='') # Loading app will have configured from config file. Reconfigure here: logging.getLogger('snowvault').setLevel(logging.DEBUG) # get the config and see if we want to connect to non-local servers config = get_appsettings(args.config_uri, args.app_name) from snovault.tests import elasticsearch_fixture, postgresql_fixture from snovault.elasticsearch import create_mapping datadir = os.path.abspath(args.datadir) pgdata = os.path.join(datadir, 'pgdata') esdata = os.path.join(datadir, 'esdata') ### comment out from HERE... if args.clear: for dirname in [pgdata, esdata]: if os.path.exists(dirname): shutil.rmtree(dirname) if args.init: postgresql_fixture.initdb(pgdata, echo=True) ### ... to HERE to disable recreation of test db ### may have to `rm /tmp/snovault/pgdata/postmaster.pid` postgres = postgresql_fixture.server_process(pgdata, echo=True) elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True) nginx = nginx_server_process(echo=True) processes = [postgres, elasticsearch, nginx] @atexit.register def cleanup_process(): for process in processes: if process.poll() is None: process.terminate() for process in processes: try: for line in process.stdout: sys.stdout.write(line.decode('utf-8')) except IOError: pass process.wait() app = get_app(args.config_uri, args.app_name) if args.load: from pyramid.path import DottedNameResolver load_test_data = app.registry.settings.get('snovault.load_test_data') load_test_data = DottedNameResolver().resolve(load_test_data) load_res = load_test_data(app, args.access_key) if load_res: # None if successful raise(load_res) if args.init: create_mapping.run(app, check_first=False, purge_queue=True) print('Started. ^C to exit.') stdouts = [p.stdout for p in processes] # Ugly should probably use threads instead while True: readable, writable, err = select.select(stdouts, [], stdouts, 5) for stdout in readable: for line in iter(stdout.readline, b''): sys.stdout.write(line.decode('utf-8')) if err: for stdout in err: for line in iter(stdout.readline, b''): sys.stdout.write(line.decode('utf-8')) break
def app(app_settings): from encoded.tests.test_indexing import _app from snovault.elasticsearch import create_mapping for app in _app(app_settings): create_mapping.run(app) yield app
def teardown(app, dbapi_conn): from snovault.elasticsearch import create_mapping create_mapping.run(app) cursor = dbapi_conn.cursor() cursor.execute("""TRUNCATE resources, transactions CASCADE;""") cursor.close()
def test_queue_indexing_with_linked(app, testapp, indexer_testapp, dummy_request): """ Test a whole bunch of things here: - posting/patching invalidates rev linked items - check linked_uuids/rev_link_names/rev_linked_to_me fields in ES - test indexer_utils.find_uuids_for_indexing fxn - test check_es_and_cache_linked_sids & validate_es_content - test purge functionality before and after removing links to an item """ import webtest from snovault import util from pyramid.traversal import traverse from snovault.tests.testing_views import TestingLinkSourceSno es = app.registry[ELASTIC_SEARCH] indexer_queue = app.registry[INDEXER_QUEUE] # first, run create mapping with the indices we will use create_mapping.run( app, collections=['testing_link_target_sno', 'testing_link_source_sno'], skip_indexing=True ) ppp_res = testapp.post_json(TEST_COLL, {'required': ''}) ppp_uuid = ppp_res.json['@graph'][0]['uuid'] target = {'name': 'one', 'uuid': '775795d3-4410-4114-836b-8eeecf1d0c2f'} source = { 'name': 'A', 'target': '775795d3-4410-4114-836b-8eeecf1d0c2f', 'ppp': ppp_uuid, 'uuid': '16157204-8c8f-4672-a1a4-14f4b8021fcd', 'status': 'current', } target_res = testapp.post_json('/testing-link-targets-sno/', target, status=201) res = indexer_testapp.post_json('/index', {'record': True}) time.sleep(2) # wait for the first item to index doc_count_target = es.count(index='testing_link_target_sno', doc_type='testing_link_target_sno').get('count') doc_count_ppp = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') tries = 0 while (doc_count_target < 1 or doc_count_ppp < 1) and tries < 5: time.sleep(4) doc_count_target = es.count(index='testing_link_target_sno', doc_type='testing_link_target_sno').get('count') doc_count_ppp = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count') tries += 1 assert doc_count_target == 1 assert doc_count_ppp == 1 # indexing the source will also reindex the target, since it has a reverse link source_res = testapp.post_json('/testing-link-sources-sno/', source, status=201) source_uuid = source_res.json['@graph'][0]['uuid'] time.sleep(2) res = indexer_testapp.post_json('/index', {'record': True}) assert res.json['indexing_count'] == 2 time.sleep(2) # wait for them to index doc_count = es.count(index='testing_link_source_sno', doc_type='testing_link_source_sno').get('count') tries = 0 while doc_count < 1 and tries < 5: time.sleep(4) doc_count = es.count(index='testing_link_source_sno', doc_type='testing_link_source_sno').get('count') assert doc_count == 1 # patching json will not queue the embedded ppp # the target will be indexed though, since it has a linkTo back to the source testapp.patch_json('/testing-link-sources-sno/' + source_uuid, {'name': 'ABC'}) time.sleep(2) res = indexer_testapp.post_json('/index', {'record': True}) assert res.json['indexing_count'] == 2 time.sleep(3) # check some stuff on the es results for source and target es_source = es.get(index='testing_link_source_sno', doc_type='testing_link_source_sno', id=source['uuid']) uuids_linked_emb = [link['uuid'] for link in es_source['_source']['linked_uuids_embedded']] uuids_linked_obj = [link['uuid'] for link in es_source['_source']['linked_uuids_object']] assert set(uuids_linked_emb) == {target['uuid'], source['uuid'], ppp_uuid} assert uuids_linked_obj == [source['uuid']] assert es_source['_source']['rev_link_names'] == {} assert es_source['_source']['rev_linked_to_me'] == [target['uuid']] es_target = es.get(index='testing_link_target_sno', doc_type='testing_link_target_sno', id=target['uuid']) # just the source uuid itself in the linked uuids for the object view uuids_linked_emb2 = [link['uuid'] for link in es_target['_source']['linked_uuids_embedded']] uuids_linked_obj2 = [link['uuid'] for link in es_target['_source']['linked_uuids_object']] assert set(uuids_linked_emb2) == {target['uuid'], source['uuid']} assert uuids_linked_obj2 == [target['uuid']] assert es_target['_source']['rev_link_names'] == {'reverse': [source['uuid']]} assert es_target['_source']['rev_linked_to_me'] == [] # test find_uuids_for_indexing to_index = indexer_utils.find_uuids_for_indexing(app.registry, {target['uuid']}) assert to_index == {target['uuid'], source['uuid']} to_index = indexer_utils.find_uuids_for_indexing(app.registry, {ppp_uuid}) assert to_index == {ppp_uuid, source['uuid']} # this will return the target uuid, since it has an indexed rev link to_index = indexer_utils.find_uuids_for_indexing(app.registry, {source['uuid']}) assert to_index == {target['uuid'], source['uuid']} # now use a made-up uuid; only result should be itself fake_uuid = str(uuid.uuid4()) to_index = indexer_utils.find_uuids_for_indexing(app.registry, {fake_uuid}) assert to_index == {fake_uuid} # test @@links functionality source_links_res = testapp.get('/' + source['uuid'] + '/@@links', status=200) linking_uuids = source_links_res.json.get('uuids_linking_to') assert linking_uuids and len(linking_uuids) == 1 assert linking_uuids[0]['uuid'] == target['uuid'] # rev_link from target # test check_es_and_cache_linked_sids and validate_es_content # must get the context object through request traversal dummy_request.datastore = 'database' assert dummy_request._sid_cache == {} source_ctxt = traverse(dummy_request.root, source_res.json['@graph'][0]['@id'])['context'] target_ctxt = traverse(dummy_request.root, target_res.json['@graph'][0]['@id'])['context'] # first check frame=object for target tar_es_res_obj = util.check_es_and_cache_linked_sids(target_ctxt, dummy_request, 'object') assert tar_es_res_obj['uuid'] == target['uuid'] assert set(uuids_linked_obj2) == set(dummy_request._sid_cache) # frame=embedded for source src_es_res_emb = util.check_es_and_cache_linked_sids(source_ctxt, dummy_request, 'embedded') assert src_es_res_emb['uuid'] == source['uuid'] assert set(uuids_linked_emb) == set(dummy_request._sid_cache) # make everything in _sid_cache is present and up to date for rid in dummy_request._sid_cache: found_sid = dummy_request.registry[STORAGE].write.get_by_uuid(rid).sid assert dummy_request._sid_cache.get(rid) == found_sid # test validate_es_content with the correct sids and then an incorrect one valid = util.validate_es_content(source_ctxt, dummy_request, src_es_res_emb, 'embedded') assert valid is True # lastly, test purge_uuid and delete functionality with pytest.raises(webtest.AppError) as excinfo: del_res0 = testapp.delete_json('/' + source['uuid'] + '/?purge=True') assert 'Item status must equal deleted before purging' in str(excinfo.value) del_res1 = testapp.delete_json('/' + source['uuid']) assert del_res1.json['status'] == 'success' # this item will still have items linking to it indexing occurs with pytest.raises(webtest.AppError) as excinfo: del_res2 = testapp.delete_json('/' + source['uuid'] + '/?purge=True') assert 'Cannot purge item as other items still link to it' in str(excinfo.value) # the source should fail due to outdated sids # must manually update _sid_cache on dummy_request for source src_sid = dummy_request.registry[STORAGE].write.get_by_uuid(source['uuid']).sid dummy_request._sid_cache[source['uuid']] = src_sid valid2 = util.validate_es_content(source_ctxt, dummy_request, src_es_res_emb, 'embedded') assert valid2 is False # the target should fail due to outdated rev_links (at least frame=object) # need to get a new the target context again, otherwise get a sqlalchemy error target_ctxt2 = traverse(dummy_request.root, target_res.json['@graph'][0]['@id'])['context'] valid3 = util.validate_es_content(target_ctxt2, dummy_request, tar_es_res_obj, 'object') assert valid3 is False res = indexer_testapp.post_json('/index', {'record': True}) del_res3 = testapp.delete_json('/' + source['uuid'] + '/?purge=True') assert del_res3.json['status'] == 'success' assert del_res3.json['notification'] == 'Permanently deleted ' + source['uuid'] time.sleep(3) # make sure everything has updated on ES check_es_source = es.get(index='testing_link_source_sno', doc_type='testing_link_source_sno', id=source['uuid'], ignore=[404]) assert check_es_source['found'] == False # source uuid removed from the target uuid check_es_target = es.get(index='testing_link_target_sno', doc_type='testing_link_target_sno', id=target['uuid']) uuids_linked_emb2 = [link['uuid'] for link in check_es_target['_source']['linked_uuids_embedded']] assert source['uuid'] not in uuids_linked_emb2 # the source is now purged testapp.get('/' + source['uuid'], status=404) # make sure check_es_and_cache_linked_sids fails for the purged item es_res_emb2 = util.check_es_and_cache_linked_sids(source_ctxt, dummy_request, 'embedded') assert es_res_emb2 is None
def main(): import argparse parser = argparse.ArgumentParser( description="Run development servers", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--app-name', help="Pyramid app name in configfile") parser.add_argument('config_uri', help="path to configfile") parser.add_argument('--clear', action="store_true", help="Clear existing data") parser.add_argument('--init', action="store_true", help="Init database") parser.add_argument('--load', action="store_true", help="Load test set") parser.add_argument('--datadir', default='/tmp/snovault', help="path to datadir") args = parser.parse_args() logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('snovault').setLevel(logging.INFO) from snovault.tests import elasticsearch_fixture, postgresql_fixture from snovault.elasticsearch import create_mapping datadir = os.path.abspath(args.datadir) pgdata = os.path.join(datadir, 'pgdata') esdata = os.path.join(datadir, 'esdata') if args.clear: for dirname in [pgdata, esdata]: if os.path.exists(dirname): shutil.rmtree(dirname) if args.init: postgresql_fixture.initdb(pgdata, echo=True) postgres = postgresql_fixture.server_process(pgdata, echo=True) elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True) nginx = nginx_server_process(echo=True) processes = [postgres, elasticsearch, nginx] print_processes = [] @atexit.register def cleanup_process(): for process in processes: if process.poll() is None: process.terminate() for process in processes: try: for line in process.stdout: sys.stdout.write(line.decode('utf-8')) except IOError: pass process.wait() for p in print_processes: p.terminate() if args.init: app = get_app(args.config_uri, args.app_name) create_mapping.run(app) if args.load: from pyramid.path import DottedNameResolver load_test_data = app.registry.settings.get('snovault.load_test_data') load_test_data = DottedNameResolver().resolve(load_test_data) load_test_data(app) print('Started. ^C to exit.') stdouts = [p.stdout for p in processes] def print_to_terminal(stdout): while True: for line in iter(stdout.readline, b''): sys.stdout.write(line.decode('utf-8')) readable, writable, err = select.select(stdouts, [], stdouts, 5) for stdout in readable: print_processes.append( Process(target=print_to_terminal, args=(stdout, ))) for stdout in err: print_processes.append( Process(target=print_to_terminal, args=(stdout, ))) for p in print_processes: p.start()
def main(): import argparse parser = argparse.ArgumentParser( description="Run development servers", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--app-name', help="Pyramid app name in configfile") parser.add_argument('config_uri', help="path to configfile") parser.add_argument('--clear', action="store_true", help="Clear existing data") parser.add_argument('--init', action="store_true", help="Init database") parser.add_argument('--load', action="store_true", help="Load test set") parser.add_argument('--datadir', default='/tmp/snovault', help="path to datadir") args = parser.parse_args() logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('snovault').setLevel(logging.INFO) from snovault.tests import elasticsearch_fixture, postgresql_fixture from snovault.elasticsearch import create_mapping datadir = os.path.abspath(args.datadir) pgdata = os.path.join(datadir, 'pgdata') esdata = os.path.join(datadir, 'esdata') if args.clear: for dirname in [pgdata, esdata]: if os.path.exists(dirname): shutil.rmtree(dirname) if args.init: postgresql_fixture.initdb(pgdata, echo=True) postgres = postgresql_fixture.server_process(pgdata, echo=True) elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True) nginx = nginx_server_process(echo=True) processes = [postgres, elasticsearch, nginx] print_processes = [] @atexit.register def cleanup_process(): for process in processes: if process.poll() is None: process.terminate() for process in processes: try: for line in process.stdout: sys.stdout.write(line.decode('utf-8')) except IOError: pass process.wait() for p in print_processes: p.terminate() if args.init: app = get_app(args.config_uri, args.app_name) create_mapping.run(app) if args.load: from pyramid.path import DottedNameResolver load_test_data = app.registry.settings.get('snovault.load_test_data') load_test_data = DottedNameResolver().resolve(load_test_data) load_test_data(app) print('Started. ^C to exit.') stdouts = [p.stdout for p in processes] def print_to_terminal(stdout): while True: for line in iter(stdout.readline, b''): sys.stdout.write(line.decode('utf-8')) readable, writable, err = select.select(stdouts, [], stdouts, 5) for stdout in readable: print_processes.append(Process(target=print_to_terminal, args=(stdout,))) for stdout in err: print_processes.append(Process(target=print_to_terminal, args=(stdout,))) for p in print_processes: p.start()