def test_match_with_log_date_not_visit_date(self): # Setup: create a rating event that occurred within a task create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(9000, 1, 1, 1, 0, 0, 0), event_type="Rating: 1", url="http://url1.com", user_id=0, ) create_location_event( log_date=datetime.datetime(9000, 1, 1, 1, 0, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 1", url="http://url2.com", user_id=0, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, user_id=0, ) # Test: a rating should be created for the URL visited with # the index of the task and concern of the task period that was taking place at the time. compute_location_ratings() ratings = LocationRating.select() self.assertEqual(ratings.count(), 1) self.assertEqual(ratings.first().url, "http://url1.com")
def test_return_ratings_that_couldnt_be_classified(self): # The first and last events don't fall into a valid task period unmatched_rating_0 = create_location_event( log_date=datetime.datetime(2000, 1, 1, 11, 0, 0, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 1", url="http://url2.com", user_id=0, ) unmatched_rating_1 = create_location_event( log_date=datetime.datetime(2000, 1, 1, 13, 0, 0, 0), event_type="Rating: 2", url="http://url2.com", user_id=0, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), user_id=0, ) unclassified = compute_location_ratings() self.assertIn({'user_id': 0, 'event_id': unmatched_rating_0.id}, unclassified) self.assertIn({'user_id': 0, 'event_id': unmatched_rating_1.id}, unclassified)
def test_if_task_compute_index_specified_only_match_tasks_with_that_index(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) compute_location_ratings(task_compute_index=1) rating = LocationRating.select()[0] self.assertEqual(rating.task_index, 3)
def test_if_task_compute_index_specified_only_match_tasks_with_that_index( self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) compute_location_ratings(task_compute_index=1) rating = LocationRating.select()[0] self.assertEqual(rating.task_index, 3)
def test_acceptable_activating_location_events(self): time = datetime.datetime(2000, 1, 1, 12, 0, 1, 0) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) for activating_event_type in [ "Tab activated", "Window activated", ]: create_location_event( log_date=time, event_type=activating_event_type, tab_id='1', ) time += datetime.timedelta(seconds=1) create_location_event( log_date=time, event_type="Window deactivated", tab_id='1', ) time += datetime.timedelta(seconds=1) compute_location_visits() self.assertEqual(LocationVisit.select().count(), 2)
def test_by_default_associate_rating_with_latest_computed_task_periods(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) # All three of these tasks have the same (matching) periods. # But the second one was the latest one to be computed (compute_index=2) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) compute_location_ratings() rating = LocationRating.select()[0] self.assertEqual(rating.task_index, 2)
def test_associate_location_visit_with_task_period_it_occured_within(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") create_task_period( start=datetime.datetime(2000, 1, 1, 11, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 11, 59, 0, 0), task_index=1, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 4, 0, 0), task_index=3, ) compute_location_visits() visit = LocationVisit.select()[0] self.assertEqual(visit.task_index, 2)
def test_acceptable_activating_location_events(self): time = datetime.datetime(2000, 1, 1, 12, 0, 1, 0) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) for activating_event_type in [ "Tab activated", "Window activated", ]: create_location_event( log_date=time, event_type=activating_event_type, tab_id='1', ) time += datetime.timedelta(seconds=1) create_location_event( log_date=time, event_type="Window deactivated", tab_id='1', ) time += datetime.timedelta(seconds=1) compute_location_visits() self.assertEqual(LocationVisit.select().count(), 2)
def test_visit_times_based_on_visit_dates_not_log_dates(self): # We've found browsers are rarely synced with the server time. # To preserve the timing as it appeared to the user, we save the # times that they visited each location in the browser. # While we associate visits with tasks based on the logging date (as that # is most likely to match well on the server side), we store all visits # with times seen by the browser. create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 2, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) compute_location_visits() visits = LocationVisit.select() visit = visits[0] self.assertEqual(visit.start, datetime.datetime(2000, 1, 1, 10, 0, 1, 0)) self.assertEqual(visit.end, datetime.datetime(2000, 1, 1, 10, 0, 2, 0))
def test_make_location_visit_associated_with_tasks_of_same_user(self): create_location_event( user_id=0, log_date=datetime.datetime(2000, 1, 1, 11, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 11, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( user_id=0, log_date=datetime.datetime(2000, 1, 1, 11, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 11, 0, 2, 0), event_type="Tab activated", tab_id='2', url="http://url1.com" ) create_task_period( user_id=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) self.assertEqual(LocationVisit.select().count(), 0)
def test_by_default_associate_rating_with_latest_computed_task_periods( self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) # All three of these tasks have the same (matching) periods. # But the second one was the latest one to be computed (compute_index=2) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) compute_location_ratings() rating = LocationRating.select()[0] self.assertEqual(rating.task_index, 2)
def test_order_events_by_visit_date(self): # We want to handle the case where the server may receive browser events # in a different order than the browser encounters them. So, when we order # the events that we've found for a task, they get ordered in "browser order." # In this test case, we create jumbled server log order, but the browser # order needs to come through for the events created. # Logged first, visited second create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 2, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") # Logged second, visited first create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 1, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) compute_location_visits() visits = LocationVisit.select() visit = visits[0] self.assertEqual(visit.url, "http://url2.com")
def test_visit_times_based_on_visit_dates_not_log_dates(self): # We've found browsers are rarely synced with the server time. # To preserve the timing as it appeared to the user, we save the # times that they visited each location in the browser. # While we associate visits with tasks based on the logging date (as that # is most likely to match well on the server side), we store all visits # with times seen by the browser. create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 2, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) compute_location_visits() visits = LocationVisit.select() visit = visits[0] self.assertEqual(visit.start, datetime.datetime(2000, 1, 1, 10, 0, 1, 0)) self.assertEqual(visit.end, datetime.datetime(2000, 1, 1, 10, 0, 2, 0))
def test_associate_location_visit_with_task_period_it_occured_within(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) create_task_period( start=datetime.datetime(2000, 1, 1, 11, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 11, 59, 0, 0), task_index=1, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 4, 0, 0), task_index=3, ) compute_location_visits() visit = LocationVisit.select()[0] self.assertEqual(visit.task_index, 2)
def test_match_with_log_date_not_visit_date(self): # Setup: create a rating event that occurred within a task create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(9000, 1, 1, 1, 0, 0, 0), event_type="Rating: 1", url="http://url1.com", user_id=0, ) create_location_event( log_date=datetime.datetime(9000, 1, 1, 1, 0, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 1", url="http://url2.com", user_id=0, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, user_id=0, ) # Test: a rating should be created for the URL visited with # the index of the task and concern of the task period that was taking place at the time. compute_location_ratings() ratings = LocationRating.select() self.assertEqual(ratings.count(), 1) self.assertEqual(ratings.first().url, "http://url1.com")
def test_make_no_location_visit_if_it_doesnt_start_after_or_end_before_end_of_task( self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 11, 59, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 11, 59, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 1, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 1, 0, 0), event_type="Tab activated", tab_id='2', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), event_type="Tab activated", tab_id='1', url="http://url2.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 5, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 5, 0, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) self.assertEqual(LocationVisit.select().count(), 0)
def test_dont_make_rating_for_non_rating_event(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab opened", # this event is not a rating and shouldn't be read as one url="http://url1.com", ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) compute_location_ratings() self.assertEqual(LocationRating.select().count(), 0)
def test_hand_label_rating_events(self): # This rating event isn't aligned with the task period below (it comes after). # But we will still be able to associate it with that task created because # with a list of hand labels that we pass in. rating_event = create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), event_type="Rating: 1", url="http://url2.com", user_id=2, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), user_id=2, task_index=4, ) compute_location_ratings(labels=[ {'user_id': 2, 'task_index': 4, 'event_id': rating_event.id}, ]) self.assertEqual(LocationRating.select().count(), 1) rating = LocationRating.select().first() self.assertEqual(rating.task_index, 4) self.assertEqual(rating.hand_aligned, True)
def test_dont_make_rating_for_non_rating_event(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type= "Tab opened", # this event is not a rating and shouldn't be read as one url="http://url1.com", ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) compute_location_ratings() self.assertEqual(LocationRating.select().count(), 0)
def test_hand_label_rating_events(self): # This rating event isn't aligned with the task period below (it comes after). # But we will still be able to associate it with that task created because # with a list of hand labels that we pass in. rating_event = create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), event_type="Rating: 1", url="http://url2.com", user_id=2, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), user_id=2, task_index=4, ) compute_location_ratings(labels=[ { 'user_id': 2, 'task_index': 4, 'event_id': rating_event.id }, ]) self.assertEqual(LocationRating.select().count(), 1) rating = LocationRating.select().first() self.assertEqual(rating.task_index, 4) self.assertEqual(rating.hand_aligned, True)
def test_ignore_content_loaded_in_other_tabs(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", url="http://url1.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab content loaded (pageshow)", url="http://url2.com", tab_id='2', ) compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 0)
def test_ignore_content_loaded_in_other_tabs(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", url="http://url1.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab content loaded (pageshow)", url="http://url2.com", tab_id='2', ) compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 0)
def test_by_default_associate_visit_with_latest_computed_task_periods(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) # All three of these tasks have the same (matching) periods. # But the second one was the latest one to be computed (compute_index=2) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) compute_location_visits() visit = LocationVisit.select()[0] self.assertEqual(visit.task_index, 2)
def test_chain_multiple_location_visits_by_activations(self): create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", url="http://url1.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab activated", url="http://url2.com", tab_id='2', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), event_type="Tab activated", url="http://url3.com", tab_id='3', ) compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 2) urls = [visit.url for visit in LocationVisit.select()] self.assertIn("http://url1.com", urls) self.assertIn("http://url2.com", urls)
def test_ignore_consecutive_page_loads_of_same_url(self): create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", url="http://url1.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab content loaded (pageshow)", url="http://url2.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), event_type="Tab content loaded (ready)", url="http://url2.com", tab_id='1', ) compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 1) urls = [visit.url for visit in LocationVisit.select()] self.assertIn("http://url1.com", urls)
def test_by_default_associate_visit_with_latest_computed_task_periods( self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") # All three of these tasks have the same (matching) periods. # But the second one was the latest one to be computed (compute_index=2) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) compute_location_visits() visit = LocationVisit.select()[0] self.assertEqual(visit.task_index, 2)
def test_ignore_consecutive_page_loads_of_same_url(self): create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", url="http://url1.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab content loaded (pageshow)", url="http://url2.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), event_type="Tab content loaded (ready)", url="http://url2.com", tab_id='1', ) compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 1) urls = [visit.url for visit in LocationVisit.select()] self.assertIn("http://url1.com", urls)
def test_chain_multiple_location_visits_by_activations(self): create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", url="http://url1.com", tab_id='1', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab activated", url="http://url2.com", tab_id='2', ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), event_type="Tab activated", url="http://url3.com", tab_id='3', ) compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 2) urls = [visit.url for visit in LocationVisit.select()] self.assertIn("http://url1.com", urls) self.assertIn("http://url2.com", urls)
def test_window_deactivated_flushes_old_location(self): time = datetime.datetime(2000, 1, 1, 12, 0, 1, 0) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), ) create_location_event( log_date=time, visit_date=time, event_type="Tab activated", ) create_location_event( log_date=time + datetime.timedelta(seconds=1), visit_date=time + datetime.timedelta(seconds=1), event_type="Window deactivated", ) create_location_event( log_date=time + datetime.timedelta(seconds=2), visit_date=time + datetime.timedelta(seconds=2), event_type="Tab activated", ) compute_location_visits() # Make sure that only one event was created---when the tab was deactivated self.assertEqual(LocationVisit.select().count(), 1) # Make sure that the event that was created eneded when the window was # deactivated, and not when the next tab was activated. visits = LocationVisit.select() visit = visits[0] self.assertEqual(visit.end, datetime.datetime(2000, 1, 1, 12, 0, 2, 0))
def test_skip_ratings_that_couldnt_be_classified(self): # The first and last events don't fall into a valid task period create_location_event( log_date=datetime.datetime(2000, 1, 1, 11, 0, 0, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 1", url="http://url2.com", user_id=0, ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 13, 0, 0, 0), event_type="Rating: 2", url="http://url2.com", user_id=0, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), user_id=0, ) compute_location_ratings() ratings = LocationRating.select() self.assertEqual(ratings.count(), 1) self.assertEqual(ratings.first().rating, 1)
def test_window_deactivated_flushes_old_location(self): time = datetime.datetime(2000, 1, 1, 12, 0, 1, 0) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), ) create_location_event( log_date=time, visit_date=time, event_type="Tab activated", ) create_location_event( log_date=time + datetime.timedelta(seconds=1), visit_date=time + datetime.timedelta(seconds=1), event_type="Window deactivated", ) create_location_event( log_date=time + datetime.timedelta(seconds=2), visit_date=time + datetime.timedelta(seconds=2), event_type="Tab activated", ) compute_location_visits() # Make sure that only one event was created---when the tab was deactivated self.assertEqual(LocationVisit.select().count(), 1) # Make sure that the event that was created eneded when the window was # deactivated, and not when the next tab was activated. visits = LocationVisit.select() visit = visits[0] self.assertEqual(visit.end, datetime.datetime(2000, 1, 1, 12, 0, 2, 0))
def test_skip_ratings_that_couldnt_be_classified(self): # The first and last events don't fall into a valid task period create_location_event( log_date=datetime.datetime(2000, 1, 1, 11, 0, 0, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 1", url="http://url2.com", user_id=0, ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 13, 0, 0, 0), event_type="Rating: 2", url="http://url2.com", user_id=0, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), user_id=0, ) compute_location_ratings() ratings = LocationRating.select() self.assertEqual(ratings.count(), 1) self.assertEqual(ratings.first().rating, 1)
def test_if_task_compute_index_specified_only_match_tasks_with_that_index(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) # By specifying the task compute index here, we should restrict the # location to match only the task with this compute index. compute_location_visits(task_compute_index=0) visit = LocationVisit.select()[0] self.assertEqual(visit.task_index, 1)
def test_location_event_must_match_task_period_and_user_id(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) # For the task periods below, the `task_index` parameter serves no function, # except to vary between all periods so that we know which task the rating # is associated with after it has been extracted. # This task is a mismatch because it's for the wrong user. create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, user_id=1, ) # This task is a match, with the right timing and the right user. create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, user_id=0, ) # This task is a mismatch, with the wrong timing but the right user. create_task_period( start=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 4, 0, 0), task_index=4, user_id=0, ) compute_location_ratings() ratings = LocationRating.select() self.assertEqual(ratings.count(), 1) rating = ratings[0] self.assertEqual(rating.user_id, 0) self.assertEqual(rating.task_index, 3)
def test_if_task_compute_index_specified_only_match_tasks_with_that_index( self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") create_task_period( compute_index=0, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=1, ) create_task_period( compute_index=2, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, ) create_task_period( compute_index=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, ) # By specifying the task compute index here, we should restrict the # location to match only the task with this compute index. compute_location_visits(task_compute_index=0) visit = LocationVisit.select()[0] self.assertEqual(visit.task_index, 1)
def test_location_event_must_match_task_period_and_user_id(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) # For the task periods below, the `task_index` parameter serves no function, # except to vary between all periods so that we know which task the rating # is associated with after it has been extracted. # This task is a mismatch because it's for the wrong user. create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=2, user_id=1, ) # This task is a match, with the right timing and the right user. create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, user_id=0, ) # This task is a mismatch, with the wrong timing but the right user. create_task_period( start=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 4, 0, 0), task_index=4, user_id=0, ) compute_location_ratings() ratings = LocationRating.select() self.assertEqual(ratings.count(), 1) rating = ratings[0] self.assertEqual(rating.user_id, 0) self.assertEqual(rating.task_index, 3)
def test_make_location_visit_associated_with_tasks_of_same_user(self): create_location_event( user_id=0, log_date=datetime.datetime(2000, 1, 1, 11, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 11, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( user_id=0, log_date=datetime.datetime(2000, 1, 1, 11, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 11, 0, 2, 0), event_type="Tab activated", tab_id='2', url="http://url1.com") create_task_period( user_id=1, start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) self.assertEqual(LocationVisit.select().count(), 0)
def test_return_ratings_that_couldnt_be_classified(self): # The first and last events don't fall into a valid task period unmatched_rating_0 = create_location_event( log_date=datetime.datetime(2000, 1, 1, 11, 0, 0, 0), event_type="Rating: 0", url="http://url1.com", user_id=0, ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Rating: 1", url="http://url2.com", user_id=0, ) unmatched_rating_1 = create_location_event( log_date=datetime.datetime(2000, 1, 1, 13, 0, 0, 0), event_type="Rating: 2", url="http://url2.com", user_id=0, ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), user_id=0, ) unclassified = compute_location_ratings() self.assertIn({ 'user_id': 0, 'event_id': unmatched_rating_0.id }, unclassified) self.assertIn({ 'user_id': 0, 'event_id': unmatched_rating_1.id }, unclassified)
def test_create_location_visit(self): # Setup: create two location events bounding a single visit create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) # Test: make sure a 'visit' is created for a URL that is visited and then left, # that inherits the time bounds defined by entering and exiting the URL, and that includes # the index of the task and concern of the task period that was taking place at that time. compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 1) visit = visits[0] self.assertEqual(visit.user_id, 0) self.assertEqual(visit.task_index, 3) self.assertEqual(visit.concern_index, 5) self.assertEqual(visit.start, datetime.datetime(2000, 1, 1, 12, 0, 1, 0)) self.assertEqual(visit.end, datetime.datetime(2000, 1, 1, 12, 0, 2, 0)) self.assertEqual(visit.url, "http://url1.com")
def test_order_events_by_visit_date(self): # We want to handle the case where the server may receive browser events # in a different order than the browser encounters them. So, when we order # the events that we've found for a task, they get ordered in "browser order." # In this test case, we create jumbled server log order, but the browser # order needs to come through for the events created. # Logged first, visited second create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 2, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) # Logged second, visited first create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 10, 0, 1, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) compute_location_visits() visits = LocationVisit.select() visit = visits[0] self.assertEqual(visit.url, "http://url2.com")
def test_create_location_visit(self): # Setup: create two location events bounding a single visit create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com") create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), event_type="Tab activated", tab_id='2', url="http://url2.com") create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), task_index=3, concern_index=5, ) # Test: make sure a 'visit' is created for a URL that is visited and then left, # that inherits the time bounds defined by entering and exiting the URL, and that includes # the index of the task and concern of the task period that was taking place at that time. compute_location_visits() visits = LocationVisit.select() self.assertEqual(visits.count(), 1) visit = visits[0] self.assertEqual(visit.user_id, 0) self.assertEqual(visit.task_index, 3) self.assertEqual(visit.concern_index, 5) self.assertEqual(visit.start, datetime.datetime(2000, 1, 1, 12, 0, 1, 0)) self.assertEqual(visit.end, datetime.datetime(2000, 1, 1, 12, 0, 2, 0)) self.assertEqual(visit.url, "http://url1.com")
def test_make_no_location_visit_if_it_doesnt_start_after_or_end_before_end_of_task(self): create_location_event( log_date=datetime.datetime(2000, 1, 1, 11, 59, 1, 0), visit_date=datetime.datetime(2000, 1, 1, 11, 59, 1, 0), event_type="Tab activated", tab_id='1', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 1, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 1, 0, 0), event_type="Tab activated", tab_id='2', url="http://url1.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 3, 0, 0), event_type="Tab activated", tab_id='1', url="http://url2.com" ) create_location_event( log_date=datetime.datetime(2000, 1, 1, 12, 5, 0, 0), visit_date=datetime.datetime(2000, 1, 1, 12, 5, 0, 0), event_type="Tab activated", tab_id='2', url="http://url2.com" ) create_task_period( start=datetime.datetime(2000, 1, 1, 12, 0, 0, 0), end=datetime.datetime(2000, 1, 1, 12, 2, 0, 0), ) self.assertEqual(LocationVisit.select().count(), 0)