def es_index(): """ Fixture for a properly initialized ES index """ recreate_index() yield delete_indices()
def test_update_during_recreate_index(self): """ If an indexing action happens during a recreate_index it should update all active indices """ conn = get_conn(verify=False) recreate_index() temp_aliases = {} index_types = [ PRIVATE_ENROLLMENT_INDEX_TYPE, PUBLIC_ENROLLMENT_INDEX_TYPE ] for index_type in index_types: # create temporary index temp_index = make_backing_index_name() temp_alias = make_alias_name(index_type=index_type, is_reindexing=True) clear_and_create_index(temp_index, index_type=index_type) conn.indices.put_alias(index=temp_index, name=temp_alias) temp_aliases[index_type] = temp_alias with patch('search.signals.transaction.on_commit', side_effect=lambda callback: callback()): program_enrollment = ProgramEnrollmentFactory.create() for index_type in index_types: assert_search(es.search(index_type), [program_enrollment], index_type=index_type) # Temp alias should get updated temp_alias = temp_aliases[index_type] refresh_index(temp_alias) temp_hits = conn.search(index=temp_alias)['hits'] assert_search(temp_hits, [program_enrollment], index_type=index_type)
def test_update_during_recreate_index(self): """ If an indexing action happens during a recreate_index it should update all active indices """ conn = get_conn(verify=False) recreate_index() temp_aliases = {} index_types = [PRIVATE_ENROLLMENT_INDEX_TYPE, PUBLIC_ENROLLMENT_INDEX_TYPE] for index_type in index_types: # create temporary index temp_index = make_backing_index_name() temp_alias = make_alias_name(index_type=index_type, is_reindexing=True) clear_and_create_index(temp_index, index_type=index_type) conn.indices.put_alias(index=temp_index, name=temp_alias) temp_aliases[index_type] = temp_alias with patch('search.signals.transaction.on_commit', side_effect=lambda callback: callback()): program_enrollment = ProgramEnrollmentFactory.create() for index_type in index_types: assert_search(es.search(index_type), [program_enrollment], index_type=index_type) # Temp alias should get updated temp_alias = temp_aliases[index_type] refresh_index(temp_alias) temp_hits = conn.search(index=temp_alias)['hits'] assert_search(temp_hits, [program_enrollment], index_type=index_type)
def handle(self, *args, **options): program_data_list = load_json_from_file(PROGRAM_DATA_PATH) user_data_list = load_json_from_file(USER_DATA_PATH) existing_fake_user_count = User.objects.filter( username__startswith=FAKE_USER_USERNAME_PREFIX).count() existing_fake_program_count = Program.objects.filter( description__startswith=FAKE_PROGRAM_DESC_PREFIX).count() if len(user_data_list) == existing_fake_user_count and len( program_data_list) == existing_fake_program_count: fake_programs = Program.objects.filter( description__startswith=FAKE_PROGRAM_DESC_PREFIX).all() self.stdout.write( "Realistic users and programs appear to exist already.") else: fake_programs = deserialize_program_data_list(program_data_list) fake_course_runs = CourseRun.objects.filter( course__program__description__contains=FAKE_PROGRAM_DESC_PREFIX ).all() fake_user_count = deserialize_user_data_list( user_data_list, fake_course_runs) recreate_index() self.stdout.write("Created {} new programs from '{}'.".format( len(fake_programs), PROGRAM_DATA_PATH)) self.stdout.write("Created {} new users from '{}'.".format( fake_user_count, USER_DATA_PATH)) if fake_programs and options.get('staff_user'): self.assign_staff_user_to_programs(options['staff_user'], fake_programs) self.stdout.write( "Added enrollment and 'staff' role for user '{}' to {} programs" .format(options['staff_user'], len(fake_programs)))
def test_country_limit(self, browser, base_test_data): """ There should be more than 20 countries in current country and birth country facets """ with open("profiles/data/countries.csv") as f: reader = csv.DictReader(f) country_codes = [row['code'] for row in reader] create_enrolled_user_batch(len(country_codes), program=base_test_data.program, is_staff=False) # Don't update elasticsearch for each profile, do that in bulk after with mute_signals(post_save): for i, profile in enumerate(Profile.objects.all()): code = country_codes[i % len(country_codes)] profile.birth_country = code profile.country = code profile.save() recreate_index() browser.get("/learners") browser.wait_until_loaded(By.CLASS_NAME, 'menu-icon') current_selector = '.filter--country .sk-hierarchical-menu-list__item' country_count = browser.driver.execute_script( "return document.querySelectorAll('{}').length".format(current_selector) ) assert country_count == len(country_codes)
def test_country_limit(self, browser, base_test_data): """ There should be more than 20 countries in current country and birth country facets """ with open("profiles/data/countries.csv") as f: reader = csv.DictReader(f) country_codes = [row['code'] for row in reader] create_enrolled_user_batch(len(country_codes), program=base_test_data.program, is_staff=False) # Don't update elasticsearch for each profile, do that in bulk after with mute_signals(post_save): for i, profile in enumerate(Profile.objects.all()): code = country_codes[i % len(country_codes)] profile.birth_country = code profile.country = code profile.save() recreate_index() browser.get("/learners") browser.wait_until_loaded(By.CLASS_NAME, 'menu-icon') current_selector = '.filter--country .sk-hierarchical-menu-list__item' country_count = browser.driver.execute_script( "return document.querySelectorAll('{}').length".format( current_selector)) assert country_count == len(country_codes)
def test_keep_alias(self, existing_temp_alias, index_type): """ Test that recreate_index will point an existing alias at a new backing index """ conn = get_conn(verify=False) default_alias = make_alias_name(index_type, is_reindexing=False) temp_alias = make_alias_name(index_type, is_reindexing=True) assert conn.indices.exists_alias(name=temp_alias) is False if existing_temp_alias: # Create a temp alias to assert that it doesn't change anything backing_index = "{}_backing".format(temp_alias) conn.indices.create(backing_index) conn.indices.put_alias(name=temp_alias, index=backing_index) old_backing_indexes = list( conn.indices.get_alias(name=default_alias).keys()) assert len(old_backing_indexes) == 1 recreate_index() new_backing_indexes = list( conn.indices.get_alias(name=default_alias).keys()) assert len(new_backing_indexes) == 1 # Backing index should have changed assert old_backing_indexes != new_backing_indexes # Temp index should have been deleted assert conn.indices.exists_alias(name=temp_alias) is False
def setUp(self): # Make sure index exists when signals are run. # We want to run recreate_index instead of clear_index # because the test data is contained in a transaction # which is reverted after each test runs, so signals don't get run # that keep ES up to date. recreate_index() super().setUp()
def test_no_index_not_default(self): """ Test that an error is raised if we don't have an index """ # Reset default index so it does not cause an error recreate_index() other_index = "other" delete_indices() with self.assertRaises(ReindexException) as ex: get_conn(verify_indices=[other_index]) assert str(ex.exception) == "Unable to find index {}".format(other_index)
def handle(self, *args, **options): # pylint: disable=too-many-locals program_data_list = load_json_from_file(PROGRAM_DATA_PATH) user_data_list = load_json_from_file(USER_DATA_PATH) existing_fake_user_count = User.objects.filter(username__startswith=FAKE_USER_USERNAME_PREFIX).count() existing_fake_program_count = fake_programs_query().count() if len(user_data_list) == existing_fake_user_count and len(program_data_list) == existing_fake_program_count: fake_programs = fake_programs_query().all() self.stdout.write("Seed data appears to already exist.") else: recreate_index() # Mute post_save to prevent updates to Elasticsearch on a per program or user basis. # recreate_index() is run afterwards to do this indexing in bulk. with mute_signals(post_save): fake_programs = deserialize_program_data_list(program_data_list) fake_user_count = deserialize_user_data_list(user_data_list, fake_programs) # Handle FA programs fake_financial_aid_programs = filter(lambda program: program.financial_aid_availability, fake_programs) tiered_program_count, tiers_created = ( create_tiers(fake_financial_aid_programs, int(options["tiers"])) ) recreate_index() program_msg = ( "Created {num} new programs from '{path}'." ).format( num=len(fake_programs), path=PROGRAM_DATA_PATH ) if tiers_created: program_msg = "{}\nCreated {} tiers for {} FA-enabled programs".format( program_msg, tiers_created, tiered_program_count ) user_msg = ( "Created {num} new users from '{path}'." ).format( num=fake_user_count, path=USER_DATA_PATH, ) self.stdout.write(program_msg) self.stdout.write(user_msg) if fake_programs and options.get('staff_user'): self.assign_staff_user_to_programs(options['staff_user'], fake_programs) msg = ( "Added enrollment and 'staff' role for user '{user}' to {num} programs" ).format( user=options['staff_user'], num=len(fake_programs), ) self.stdout.write(msg)
def test_update_index(self, index_type): """ Test that recreate_index will clear old data and index all profiles """ with patch('search.signals.transaction.on_commit', side_effect=lambda callback: callback()): program_enrollment = ProgramEnrollmentFactory.create() assert_search(es.search(index_type), [program_enrollment], index_type=index_type) remove_program_enrolled_user(program_enrollment.id) assert_search(es.search(index_type), [], index_type=index_type) # recreate_index should index the program-enrolled user recreate_index() assert_search(es.search(index_type), [program_enrollment], index_type=index_type)
def test_update_index(self): # pylint: disable=no-self-use """ Test that recreate_index will clear old data and index all profiles """ recreate_index() program_enrollment = ProgramEnrollmentFactory.create() assert_search(es.search(), [program_enrollment]) remove_program_enrolled_user(program_enrollment) assert_search(es.search(), []) # recreate_index should index the program-enrolled user recreate_index() assert_search(es.search(), [program_enrollment])
def test_learners_states(browser, override_allowed_hosts, seeded_database_loader, django_db_blocker, test_data): """Iterate through all possible dashboard states and save screenshots/API results of each one""" output_directory = DASHBOARD_STATES_OPTIONS.get('output_directory') os.makedirs(output_directory, exist_ok=True) use_mobile = DASHBOARD_STATES_OPTIONS.get('mobile') if use_mobile: browser.driver.set_window_size(480, 854) learners_states = LearnersStates(test_data['user']) learners_state_iter = enumerate(learners_states) match = DASHBOARD_STATES_OPTIONS.get('match') if match is not None: learners_state_iter = filter( lambda scenario: match in make_filename(scenario[0], scenario[1][1] ), learners_state_iter) LoginPage(browser).log_in_via_admin(learners_states.user, DEFAULT_PASSWORD) recreate_index() # warm the cache browser.get("/learners") for num, (run_scenario, name) in learners_state_iter: skip_screenshot = False with django_db_blocker.unblock(): learners_states.user.refresh_from_db() filename = make_filename(num, name, output_directory=output_directory, use_mobile=use_mobile) new_url = run_scenario() if not skip_screenshot: browser.get(new_url) browser.wait_until_loaded(By.CSS_SELECTOR, '.sk-hits,.no-hits') browser.wait_until_loaded(By.CLASS_NAME, 'micromasters-title') try: browser.click_when_loaded( By.CSS_SELECTOR, '.filter--company_name .Select-arrow-zone', retries=0, ) except ElementNotVisibleException: # We are trying to make the work history visible, but if it doesn't exist # there's nothing to do pass # sometimes the browser scrolls down for some reason after clicking browser.driver.execute_script("window.scrollTo(0, 0)") browser.take_screenshot(filename=filename) with django_db_blocker.unblock(): terminate_db_connections() seeded_database_loader.load_backup()
def test_no_index_not_default(self): """ Test that an error is raised if we don't have an index """ # Reset default index so it does not cause an error recreate_index() other_index = "other" delete_indices() with self.assertRaises(ReindexException) as ex: get_conn(verify_indices=[other_index]) assert str( ex.exception) == "Unable to find index {}".format(other_index)
def handle(self, *args, **options): # pylint: disable=too-many-locals program_data_list = load_json_from_file(PROGRAM_DATA_PATH) user_data_list = load_json_from_file(USER_DATA_PATH) existing_fake_user_count = User.objects.filter( username__startswith=FAKE_USER_USERNAME_PREFIX).count() existing_fake_program_count = fake_programs_query().count() if len(user_data_list) == existing_fake_user_count and len( program_data_list) == existing_fake_program_count: fake_programs = fake_programs_query().all() self.stdout.write("Seed data appears to already exist.") else: recreate_index() # Mute post_save to prevent updates to Elasticsearch on a per program or user basis. # recreate_index() is run afterwards to do this indexing in bulk. with mute_signals(post_save): fake_programs = deserialize_program_data_list( program_data_list) fake_user_count = deserialize_user_data_list( user_data_list, fake_programs) # Handle FA programs fake_financial_aid_programs = filter( lambda program: program.financial_aid_availability, fake_programs) tiered_program_count, tiers_created = (create_tiers( fake_financial_aid_programs, int(options["tiers"]))) recreate_index() program_msg = ("Created {num} new programs from '{path}'.").format( num=len(fake_programs), path=PROGRAM_DATA_PATH) if tiers_created: program_msg = "{}\nCreated {} tiers for {} FA-enabled programs".format( program_msg, tiers_created, tiered_program_count) user_msg = ("Created {num} new users from '{path}'.").format( num=fake_user_count, path=USER_DATA_PATH, ) self.stdout.write(program_msg) self.stdout.write(user_msg) if fake_programs and options.get('staff_user'): self.assign_staff_user_to_programs(options['staff_user'], fake_programs) msg = ( "Added enrollment and 'staff' role for user '{user}' to {num} programs" ).format( user=options['staff_user'], num=len(fake_programs), ) self.stdout.write(msg)
def handle(self, *args, **kwargs): # pylint: disable=unused-argument """ Recreates the index """ log = logging.getLogger(indexing_api_name) console = logging.StreamHandler(self.stderr) console.setLevel(logging.DEBUG) log.addHandler(console) log.level = logging.INFO if kwargs['profile']: import cProfile import uuid profile = cProfile.Profile() profile.enable() recreate_index() profile.disable() filename = 'recreate_index_{}.profile'.format(uuid.uuid4()) profile.dump_stats(filename) self.stdout.write('Output profiling data to: {}'.format(filename)) else: recreate_index()
def test_keep_alias(self, existing_temp_alias, index_type): """ Test that recreate_index will point an existing alias at a new backing index """ conn = get_conn(verify=False) default_alias = make_alias_name(index_type, is_reindexing=False) temp_alias = make_alias_name(index_type, is_reindexing=True) assert conn.indices.exists_alias(name=temp_alias) is False if existing_temp_alias: # Create a temp alias to assert that it doesn't change anything backing_index = "{}_backing".format(temp_alias) conn.indices.create(backing_index) conn.indices.put_alias(name=temp_alias, index=backing_index) old_backing_indexes = list(conn.indices.get_alias(name=default_alias).keys()) assert len(old_backing_indexes) == 1 recreate_index() new_backing_indexes = list(conn.indices.get_alias(name=default_alias).keys()) assert len(new_backing_indexes) == 1 # Backing index should have changed assert old_backing_indexes != new_backing_indexes # Temp index should have been deleted assert conn.indices.exists_alias(name=temp_alias) is False
def test_create_index(self): # pylint: disable=no-self-use """ Test that recreate_index will create an index and let search successfully """ recreate_index() assert es.search()['total'] == 0
def handle(self, *args, **options): unseed_db() recreate_index()
def handle(self, *args, **kwargs): # pylint: disable=unused-argument """ Recreates the index """ recreate_index()
def test_fix_field_error(self): """recreate_index should not cause any error with this percolate query""" query = { "query": { "bool": { "filter": [ { "bool": { "must": [ { "term": { "program.is_learner": True } }, { "term": { "profile.email_optin": True } } ], "should": [ { "term": { "program.id": 1 } }, { "term": { "program.id": 2 } }, { "term": { "program.id": 13 } } ], "minimum_should_match": 1 } }, { "term": { "profile.filled_out": True } }, { "bool": { "must": [ { "nested": { "path": "program.courses", "query": { "bool": { "must": [ { "term": { "program.courses.course_title": "Supply Chain Fundamentals (SC1x)" } }, { "term": { "program.courses.payment_status": "Auditing" } } ] } } } }, { 'nested': { 'path': "program.course_runs", 'query': { 'term': { 'program.course_runs.semester': "2016 - Summer" } } } }, { "term": { "profile.birth_country": "DE" } }, { "term": { "profile.country": "US" } }, { "term": { "program.id": 1 } } ] } } ] } } } PercolateQuery.objects.create(query=query, original_query=query) recreate_index()
def setUp(self): super(ESTestCase, self).setUp() recreate_index()
def setUpClass(cls): # Make sure index exists when signals are run. recreate_index() super().setUpClass()
def test_fix_field_error(self): """recreate_index should not cause any error with this percolate query""" query = { "query": { "bool": { "filter": [{ "bool": { "must": [{ "term": { "program.is_learner": True } }, { "term": { "profile.email_optin": True } }], "should": [{ "term": { "program.id": 1 } }, { "term": { "program.id": 2 } }, { "term": { "program.id": 13 } }], "minimum_should_match": 1 } }, { "term": { "profile.filled_out": True } }, { "bool": { "must": [{ "nested": { "path": "program.courses", "query": { "bool": { "must": [{ "term": { "program.courses.course_title": "Supply Chain Fundamentals (SC1x)" } }, { "term": { "program.courses.payment_status": "Auditing" } }] } } } }, { 'nested': { 'path': "program.course_runs", 'query': { 'term': { 'program.course_runs.semester': "2016 - Summer" } } } }, { "term": { "profile.birth_country": "DE" } }, { "term": { "profile.country": "US" } }, { "term": { "program.id": 1 } }] } }] } } } PercolateQuery.objects.create(query=query, original_query=query) recreate_index()