def test_url_shared_between_two_users_isnt_unique_for_either(self): # Create visits where two participants visited the same page create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=4, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) # Run a routine to compute whether each URL is unique for each participant compute_unique_urls(page_type_lookup=PAGE_TYPE_LOOKUP) unique_urls = UniqueUrl.select() # Two unique URL records should have been created: one for each participant for each URL self.assertEqual(unique_urls.count(), 2) records = [(u.user_id, u.url, u.unique) for u in unique_urls] self.assertIn((3, "page1", False), records) self.assertIn((4, "page1", False), records)
def test_include_all_concerns_if_no_concern_index_provided(self): # Both events should be captured create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) self.assertEqual(NavigationEdge.select().count(), 6)
def test_filter_to_only_one_concern_if_concern_index_provided(self): # This event should be ignored create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) # This event should be captured create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(concern_index=1, page_type_lookup=PAGE_TYPE_LOOKUP) self.assertEqual(NavigationEdge.select().count(), 3) edges = NavigationEdge.select() transition_list = [(e.source_vertex.page_type, e.target_vertex.page_type) for e in edges] self.assertIn(("page_type_2", "page_type_2"), transition_list)
def test_edge_occurrences_counts_number_of_transitions_between_page_types( self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = {(edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges} self.assertEqual(edge_dict[('Start', 'page_type_1')].occurrences, 1) self.assertEqual(edge_dict[('page_type_1', 'page_type_1')].occurrences, 2) self.assertEqual(edge_dict[('page_type_1', 'page_type_2')].occurrences, 1) self.assertEqual(edge_dict[('page_type_2', 'End')].occurrences, 1)
def test_edge_occurrences_counts_number_of_transitions_between_page_types(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = { (edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges } self.assertEqual(edge_dict[('Start', 'page_type_1')].occurrences, 1) self.assertEqual(edge_dict[('page_type_1', 'page_type_1')].occurrences, 2) self.assertEqual(edge_dict[('page_type_1', 'page_type_2')].occurrences, 1) self.assertEqual(edge_dict[('page_type_2', 'End')].occurrences, 1)
def test_graph_skips_redirects(self): # Because redirects typically don't show any content but are just a gateway to # another page, we will leave them out of the graph of navigation. It's more # meaningful to connect the link before it, and the link that it points to. create_location_visit( url="redirect", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) # There should only be one edge---from "Start" to "End" self.assertEqual(NavigationEdge.select().count(), 1)
def test_edge_transition_probabilities_normalize_occurrences(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 9, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 10, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = {(edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges} self.assertAlmostEqual( edge_dict[('page_type_1', 'page_type_1')].probability, float(1) / 2) self.assertAlmostEqual( edge_dict[('page_type_1', 'page_type_2')].probability, float(1) / 4) self.assertAlmostEqual(edge_dict[('page_type_1', 'End')].probability, float(1) / 4) self.assertAlmostEqual( edge_dict[('page_type_2', 'page_type_1')].probability, 1)
def test_edge_not_added_between_concerns_for_the_same_participant(self): create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) # 4 edges should have been created---between the Start vertex, the one URL, and # the End vertex for each of the concerns self.assertEqual(NavigationEdge.select().count(), 4)
def test_skip_pages_that_appear_to_be_redirects(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) # In the page lookup dictionary, this entry will have a "redirect" flag that's # set to true. It should be skipped in the ngrams. create_location_visit( url="redirect", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_navigation_ngrams(length=2, page_type_lookup=PAGE_TYPE_LOOKUP) ngram_models = NavigationNgram.select() self.assertEqual(ngram_models.count(), 1) ngram = ngram_models.first() self.assertEqual(ngram.ngram, "page_type_1, page_type_2")
def test_vertex_mean_time_averages_time_of_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), # 1 second ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), # + 2 seconds = 3 seconds (avg: 1.5s) ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), # 1 second (avg: 1s) ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) vertices = NavigationVertex.select() page_type_1_vertex = vertices.where( NavigationVertex.page_type == "page_type_1").first() page_type_2_vertex = vertices.where( NavigationVertex.page_type == "page_type_2").first() self.assertEqual(page_type_1_vertex.mean_time, 1.5) self.assertEqual(page_type_2_vertex.mean_time, 1)
def test_graph_computation_uses_only_latest_computed_visits(self): create_location_visit( compute_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( compute_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( compute_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) self.assertEqual(NavigationEdge.select().count(), 3) edges = NavigationEdge.select() transition_list = [(e.source_vertex.page_type, e.target_vertex.page_type) for e in edges] self.assertIn(("page_type_2", "page_type_2"), transition_list)
def test_url_that_no_one_else_has_visited_is_unique(self): create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) # In this visit, participant 3 goes to a URL that participant 4 hadn't. create_location_visit( user_id=3, concern_index=2, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=4, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_unique_urls(page_type_lookup=PAGE_TYPE_LOOKUP) unique_urls = UniqueUrl.select() self.assertEqual(unique_urls.count(), 3) records = [(u.user_id, u.url, u.unique) for u in unique_urls] self.assertIn((3, "page1", False), records) self.assertIn((3, "page2", True), records) self.assertIn((4, "page1", False), records)
def test_compute_ngrams_within_participant(self): create_location_visit( user_id=3, concern_index=1, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=3, concern_index=1, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) # This third visit is by another participant, and shouldn't be connected # to the past visits as an n-gram create_location_visit( user_id=4, concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_navigation_ngrams(length=2, page_type_lookup=PAGE_TYPE_LOOKUP) ngram_models = NavigationNgram.select() self.assertEqual(ngram_models.count(), 1)
def test_ignore_participants_with_excluded_user_ids(self): create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=4, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_unique_urls(page_type_lookup=PAGE_TYPE_LOOKUP, exclude_users=[2, 3]) unique_urls = UniqueUrl.select() self.assertEqual(unique_urls.count(), 1) records = [(u.user_id, u.url, u.unique) for u in unique_urls] self.assertIn((4, "page1", True), records)
def test_vertex_total_time_counts_time_of_all_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), # 1 second ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), # + 3 seconds = 4 seconds ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), # 1 second ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) vertices = NavigationVertex.select() page_type_1_vertex = vertices.where( NavigationVertex.page_type == "page_type_1").first() page_type_2_vertex = vertices.where( NavigationVertex.page_type == "page_type_2").first() self.assertEqual(page_type_1_vertex.total_time, 4) self.assertEqual(page_type_2_vertex.total_time, 1)
def test_edge_added_between_all_consecutive_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() # There will be 4 edges: # * 2 for the transitions between the 3 URLs above # * 1 for the transition from "Start" to the first URL # * 1 for the transition from the last URL to "End" self.assertEqual(edges.count(), 4) edge_page_type_pairs = [(edge.source_vertex.page_type, edge.target_vertex.page_type) for edge in edges] self.assertIn(("Start", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_2"), edge_page_type_pairs) self.assertIn(("page_type_2", "End"), edge_page_type_pairs)
def test_edge_added_between_all_consecutive_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() # There will be 4 edges: # * 2 for the transitions between the 3 URLs above # * 1 for the transition from "Start" to the first URL # * 1 for the transition from the last URL to "End" self.assertEqual(edges.count(), 4) edge_page_type_pairs = [ (edge.source_vertex.page_type, edge.target_vertex.page_type) for edge in edges ] self.assertIn(("Start", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_2"), edge_page_type_pairs) self.assertIn(("page_type_2", "End"), edge_page_type_pairs)
def test_vertex_mean_time_averages_time_of_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), # 1 second ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), # + 2 seconds = 3 seconds (avg: 1.5s) ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), # 1 second (avg: 1s) ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) vertices = NavigationVertex.select() page_type_1_vertex = vertices.where(NavigationVertex.page_type == "page_type_1").first() page_type_2_vertex = vertices.where(NavigationVertex.page_type == "page_type_2").first() self.assertEqual(page_type_1_vertex.mean_time, 1.5) self.assertEqual(page_type_2_vertex.mean_time, 1)
def test_vertex_total_time_counts_time_of_all_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), # 1 second ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), # + 3 seconds = 4 seconds ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), # 1 second ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) vertices = NavigationVertex.select() page_type_1_vertex = vertices.where(NavigationVertex.page_type == "page_type_1").first() page_type_2_vertex = vertices.where(NavigationVertex.page_type == "page_type_2").first() self.assertEqual(page_type_1_vertex.total_time, 4) self.assertEqual(page_type_2_vertex.total_time, 1)
def test_edge_transition_probabilities_normalize_occurrences(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 9, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 10, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = { (edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges } self.assertAlmostEqual(edge_dict[('page_type_1', 'page_type_1')].probability, float(1) / 2) self.assertAlmostEqual(edge_dict[('page_type_1', 'page_type_2')].probability, float(1) / 4) self.assertAlmostEqual(edge_dict[('page_type_1', 'End')].probability, float(1) / 4) self.assertAlmostEqual(edge_dict[('page_type_2', 'page_type_1')].probability, 1)
def test_only_one_record_appears_per_participant_and_url(self): # Create visits where one participant visited the same URL twice create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_unique_urls(page_type_lookup=PAGE_TYPE_LOOKUP) unique_urls = UniqueUrl.select() self.assertEqual(unique_urls.count(), 1) records = [(u.user_id, u.url, u.unique) for u in unique_urls] self.assertIn((3, "page1", True), records)
def test_vertex_occurrences_count_visits_to_page_type(self): # Create a set of visits that a participant made to a few pages create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) # Compute a navigation graph by inspecting the location visits one by one. compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) vertices = NavigationVertex.select() # Four vertices have been created: # * two, for the two distinct URLs above # * one for the "Start" vertex # * one for the "End" vertex self.assertEqual(vertices.count(), 4) page_type_1_vertex = vertices.where( NavigationVertex.page_type == "page_type_1").first() page_type_2_vertex = vertices.where( NavigationVertex.page_type == "page_type_2").first() self.assertEqual(page_type_1_vertex.occurrences, 2) self.assertEqual(page_type_2_vertex.occurrences, 1) # Start and end vertices should have 1 occurrence by default start_vertex = vertices.where( NavigationVertex.page_type == "Start").first() end_vertex = vertices.where( NavigationVertex.page_type == "End").first() self.assertEqual(start_vertex.occurrences, 1) self.assertEqual(end_vertex.occurrences, 1)
def test_compute_bigrams_from_location_sequence(self): # Create a set of visits that a participant made to a few pages create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( user_id=3, concern_index=2, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( user_id=3, concern_index=2, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) # Run a routine to compute all n-grams of page visits of a given length compute_navigation_ngrams(length=2, page_type_lookup=PAGE_TYPE_LOOKUP) ngram_models = NavigationNgram.select() # Two ngrams should have been created from a sequence of 3 visits self.assertEqual(ngram_models.count(), 2) # Make sure that all of the left-to-right subsequences can be found ngrams = [n.ngram for n in ngram_models] self.assertIn("page_type_1, page_type_1", ngrams) self.assertIn("page_type_1, page_type_2", ngrams) # Make sure the participant's ID, ngram length, and task's concern index are stored ngram_model = ngram_models.first() self.assertEqual(ngram_model.length, 2) self.assertEqual(ngram_model.user_id, 3) self.assertEqual(ngram_model.concern_index, 2)
def test_vertex_occurrences_count_visits_to_page_type(self): # Create a set of visits that a participant made to a few pages create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) # Compute a navigation graph by inspecting the location visits one by one. compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) vertices = NavigationVertex.select() # Four vertices have been created: # * two, for the two distinct URLs above # * one for the "Start" vertex # * one for the "End" vertex self.assertEqual(vertices.count(), 4) page_type_1_vertex = vertices.where(NavigationVertex.page_type == "page_type_1").first() page_type_2_vertex = vertices.where(NavigationVertex.page_type == "page_type_2").first() self.assertEqual(page_type_1_vertex.occurrences, 2) self.assertEqual(page_type_2_vertex.occurrences, 1) # Start and end vertices should have 1 occurrence by default start_vertex = vertices.where(NavigationVertex.page_type == "Start").first() end_vertex = vertices.where(NavigationVertex.page_type == "End").first() self.assertEqual(start_vertex.occurrences, 1) self.assertEqual(end_vertex.occurrences, 1)