コード例 #1
0
    def test_make_tasks_with_events_interleaved_between_users(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(user_id=0, time=START_TIME, event_type='get task')
        create_question_event(
            user_id=1,
            time=START_TIME + datetime.timedelta(seconds=1),
            event_type='get task'
        )
        create_question_event(
            user_id=0,
            time=START_TIME + datetime.timedelta(seconds=2),
            event_type='post task'
        )
        create_question_event(
            user_id=1,
            time=START_TIME + datetime.timedelta(seconds=3),
            event_type='post task'
        )

        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 2)
        user_ids = [task_period.user_id for task_period in TaskPeriod.select()]
        self.assertIn(0, user_ids)
        self.assertIn(1, user_ids)
コード例 #2
0
    def test_add_periods_from_extras_even_if_it_matches_discard_pattern(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(user_id=3,
                              question_index=4,
                              time=START_TIME,
                              event_type='get task')
        create_question_event(user_id=3,
                              question_index=4,
                              time=START_TIME + datetime.timedelta(seconds=1),
                              event_type='post task')
        compute_task_periods(discard_periods=({
            'user_id': 3,
            'task_index': 4
        }, ),
                             extra_periods=({
                                 'user_id':
                                 3,
                                 'task_index':
                                 4,
                                 'start':
                                 datetime.datetime(2000, 1, 1, 12, 0, 1, 0),
                                 'end':
                                 datetime.datetime(2000, 1, 1, 12, 0, 2, 0),
                             }, ))
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
コード例 #3
0
    def test_add_periods_from_extras_even_if_it_matches_discard_pattern(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(
            user_id=3,
            question_index=4,
            time=START_TIME,
            event_type='get task'
        )
        create_question_event(
            user_id=3,
            question_index=4,
            time=START_TIME + datetime.timedelta(seconds=1),
            event_type='post task'
        )
        compute_task_periods(
            discard_periods=({'user_id': 3, 'task_index': 4},),
            extra_periods=({
                'user_id': 3,
                'task_index': 4,
                'start': datetime.datetime(2000, 1, 1, 12, 0, 1, 0),
                'end': datetime.datetime(2000, 1, 1, 12, 0, 2, 0),
            },))
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
コード例 #4
0
def create_location_rating(compute_index, task_compute_index, event, rating, labels):
    ''' Returns True if this rating could be matched to an existing task, False otherwise. '''

    # Check for hand-written task index labels for this event
    matching_labels = filter(lambda l: l['event_id'] == event.id, labels)
    if len(matching_labels) > 0:
        task_index = matching_labels[0]['task_index']
        task_periods = (
            TaskPeriod.select()
            .where(
                TaskPeriod.compute_index == task_compute_index,
                TaskPeriod.task_index == task_index,
                TaskPeriod.user_id == event.user_id,
            )
        )
        hand_aligned = True
    # If a hand-written label wasn't found, search for a task that this rating could have
    # occurred within.  If we successfully find a task, then save the rating event.
    else:
        task_periods = (
            TaskPeriod.select()
            .where(
                TaskPeriod.compute_index == task_compute_index,
                TaskPeriod.user_id == event.user_id,
                TaskPeriod.start < event.log_date,
                TaskPeriod.end > event.log_date,
            )
        )
        hand_aligned = False
    # If a matching task has been found, then save the rating alongside that task.
    if task_periods.count() > 0:
        task_period = task_periods[0]
        LocationRating.create(
            compute_index=compute_index,
            user_id=event.user_id,
            task_index=task_period.task_index,
            concern_index=task_period.concern_index,
            url=event.url,
            rating=rating,
            title=event.title,
            visit_date=event.visit_date,
            hand_aligned=hand_aligned,
        )

    return (task_periods.count() > 0)
コード例 #5
0
def create_location_rating(compute_index, task_compute_index, event, rating,
                           labels):
    ''' Returns True if this rating could be matched to an existing task, False otherwise. '''

    # Check for hand-written task index labels for this event
    matching_labels = filter(lambda l: l['event_id'] == event.id, labels)
    if len(matching_labels) > 0:
        task_index = matching_labels[0]['task_index']
        task_periods = (TaskPeriod.select().where(
            TaskPeriod.compute_index == task_compute_index,
            TaskPeriod.task_index == task_index,
            TaskPeriod.user_id == event.user_id,
        ))
        hand_aligned = True
    # If a hand-written label wasn't found, search for a task that this rating could have
    # occurred within.  If we successfully find a task, then save the rating event.
    else:
        task_periods = (TaskPeriod.select().where(
            TaskPeriod.compute_index == task_compute_index,
            TaskPeriod.user_id == event.user_id,
            TaskPeriod.start < event.log_date,
            TaskPeriod.end > event.log_date,
        ))
        hand_aligned = False
    # If a matching task has been found, then save the rating alongside that task.
    if task_periods.count() > 0:
        task_period = task_periods[0]
        LocationRating.create(
            compute_index=compute_index,
            user_id=event.user_id,
            task_index=task_period.task_index,
            concern_index=task_period.concern_index,
            url=event.url,
            rating=rating,
            title=event.title,
            visit_date=event.visit_date,
            hand_aligned=hand_aligned,
        )

    return (task_periods.count() > 0)
コード例 #6
0
    def test_skip_task_period_with_unmatching_task_indexes(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(question_index=0,
                              time=START_TIME,
                              event_type='get task')
        create_question_event(question_index=1,
                              time=START_TIME + datetime.timedelta(seconds=1),
                              event_type='post task')

        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 0)
コード例 #7
0
    def test_make_task_only_if_both_events_from_one_user(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(user_id=0,
                              time=START_TIME,
                              event_type='get task')
        create_question_event(user_id=1,
                              time=START_TIME + datetime.timedelta(seconds=1),
                              event_type='post task')

        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 0)
コード例 #8
0
    def test_make_task_only_if_both_events_from_one_user(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(user_id=0, time=START_TIME, event_type='get task')
        create_question_event(
            user_id=1,
            time=START_TIME + datetime.timedelta(seconds=1),
            event_type='post task'
        )

        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 0)
コード例 #9
0
    def test_skip_task_period_with_unmatching_task_indexes(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(question_index=0, time=START_TIME, event_type='get task')
        create_question_event(
            question_index=1,
            time=START_TIME + datetime.timedelta(seconds=1),
            event_type='post task'
        )

        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 0)
コード例 #10
0
    def test_make_tasks_with_events_interleaved_between_users(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(user_id=0,
                              time=START_TIME,
                              event_type='get task')
        create_question_event(user_id=1,
                              time=START_TIME + datetime.timedelta(seconds=1),
                              event_type='get task')
        create_question_event(user_id=0,
                              time=START_TIME + datetime.timedelta(seconds=2),
                              event_type='post task')
        create_question_event(user_id=1,
                              time=START_TIME + datetime.timedelta(seconds=3),
                              event_type='post task')

        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 2)
        user_ids = [task_period.user_id for task_period in TaskPeriod.select()]
        self.assertIn(0, user_ids)
        self.assertIn(1, user_ids)
コード例 #11
0
    def test_add_periods_with_extras_specification(self):

        compute_task_periods(extra_periods=({
            'user_id': 3,
            'task_index': 4,
            'start': datetime.datetime(2000, 1, 1, 12, 0, 1, 0),
            'end': datetime.datetime(2000, 1, 1, 12, 0, 2, 0),
        },))
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
        period = task_periods[0]
        self.assertEqual(period.user_id, 3)
        self.assertEqual(period.task_index, 4)
        self.assertEqual(period.start, datetime.datetime(2000, 1, 1, 12, 0, 1, 0))
        self.assertEqual(period.end, datetime.datetime(2000, 1, 1, 12, 0, 2, 0))
コード例 #12
0
def compute_location_ratings(labels=HAND_LABELED_EVENTS,
                             task_compute_index=None):

    # Create a new index for this computation
    last_compute_index = LocationRating.select(
        fn.Max(LocationRating.compute_index)).scalar() or 0
    compute_index = last_compute_index + 1

    # Determine what will be the compute index of the task periods that ratings are matched to.
    # This will become the latest compute index if it hasn't been specified.
    if task_compute_index is None:
        task_compute_index = TaskPeriod.select(fn.Max(
            TaskPeriod.compute_index)).scalar()

    # Create a list to hold all ratings that couldn't be matched to a task period.
    # At the end, we want to return these, in case it's important for the caller to know
    # which events we couldn't create rating records for.
    unmatched_ratings = []

    for event in LocationEvent.select():

        # Check to see whether this is a rating event
        rating_match = re.match("^Rating: (\d)+$", event.event_type)
        if rating_match:

            # If this is a rating event, extract the rating
            rating = int(rating_match.group(1))
            rating_created = create_location_rating(
                compute_index=compute_index,
                task_compute_index=task_compute_index,
                event=event,
                rating=rating,
                labels=labels,
            )

            # If a rating wasn't created, this probably couldn't be matched to a task.
            # Save a record of which event failed to be matched to a task and which user
            # this event happened for.
            if not rating_created:
                unmatched_ratings.append({
                    'user_id': event.user_id,
                    'event_id': event.id,
                })

    return unmatched_ratings
コード例 #13
0
    def test_add_periods_with_extras_specification(self):

        compute_task_periods(
            extra_periods=({
                'user_id': 3,
                'task_index': 4,
                'start': datetime.datetime(2000, 1, 1, 12, 0, 1, 0),
                'end': datetime.datetime(2000, 1, 1, 12, 0, 2, 0),
            }, ))
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
        period = task_periods[0]
        self.assertEqual(period.user_id, 3)
        self.assertEqual(period.task_index, 4)
        self.assertEqual(period.start,
                         datetime.datetime(2000, 1, 1, 12, 0, 1, 0))
        self.assertEqual(period.end, datetime.datetime(2000, 1, 1, 12, 0, 2,
                                                       0))
コード例 #14
0
    def test_make_task_period(self):

        # Setup: create two events bounding a single task
        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(time=START_TIME, event_type='get task')
        create_question_event(time=START_TIME + datetime.timedelta(seconds=1),
                              event_type='post task')

        # Test: make sure a task has been created with the time bounds of the
        # events that started and ended it.
        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
        period = task_periods[0]
        self.assertEqual(period.user_id, 0)
        self.assertEqual(period.start,
                         datetime.datetime(2000, 1, 1, 12, 0, 1, 0))
        self.assertEqual(period.end, datetime.datetime(2000, 1, 1, 12, 0, 2,
                                                       0))
コード例 #15
0
    def test_make_task_period(self):

        # Setup: create two events bounding a single task
        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(time=START_TIME, event_type='get task')
        create_question_event(
            time=START_TIME + datetime.timedelta(seconds=1),
            event_type='post task'
        )

        # Test: make sure a task has been created with the time bounds of the
        # events that started and ended it.
        compute_task_periods(extra_periods=())
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
        period = task_periods[0]
        self.assertEqual(period.user_id, 0)
        self.assertEqual(period.start, datetime.datetime(2000, 1, 1, 12, 0, 1, 0))
        self.assertEqual(period.end, datetime.datetime(2000, 1, 1, 12, 0, 2, 0))
コード例 #16
0
def compute_location_ratings(labels=HAND_LABELED_EVENTS, task_compute_index=None):

    # Create a new index for this computation
    last_compute_index = LocationRating.select(fn.Max(LocationRating.compute_index)).scalar() or 0
    compute_index = last_compute_index + 1

    # Determine what will be the compute index of the task periods that ratings are matched to.
    # This will become the latest compute index if it hasn't been specified.
    if task_compute_index is None:
        task_compute_index = TaskPeriod.select(fn.Max(TaskPeriod.compute_index)).scalar()

    # Create a list to hold all ratings that couldn't be matched to a task period.
    # At the end, we want to return these, in case it's important for the caller to know
    # which events we couldn't create rating records for.
    unmatched_ratings = []

    for event in LocationEvent.select():

        # Check to see whether this is a rating event
        rating_match = re.match("^Rating: (\d)+$", event.event_type)
        if rating_match:

            # If this is a rating event, extract the rating
            rating = int(rating_match.group(1))
            rating_created = create_location_rating(
                compute_index=compute_index,
                task_compute_index=task_compute_index,
                event=event,
                rating=rating,
                labels=labels,
            )

            # If a rating wasn't created, this probably couldn't be matched to a task.
            # Save a record of which event failed to be matched to a task and which user
            # this event happened for.
            if not rating_created:
                unmatched_ratings.append({
                    'user_id': event.user_id,
                    'event_id': event.id,
                })

    return unmatched_ratings
コード例 #17
0
    def test_dont_discard_periods_that_dont_match_discard_pattern(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(user_id=3,
                              question_index=5,
                              time=START_TIME,
                              event_type='get task')
        create_question_event(user_id=3,
                              question_index=5,
                              time=START_TIME + datetime.timedelta(seconds=1),
                              event_type='post task')
        compute_task_periods(
            discard_periods=({
                'user_id': 3,
                'question_index': 4
            }, ),
            extra_periods=(),
        )
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
コード例 #18
0
    def test_dont_discard_periods_that_dont_match_discard_pattern(self):

        START_TIME = datetime.datetime(2000, 1, 1, 12, 0, 1, 0)
        create_question_event(
            user_id=3,
            question_index=5,
            time=START_TIME,
            event_type='get task'
        )
        create_question_event(
            user_id=3,
            question_index=5,
            time=START_TIME + datetime.timedelta(seconds=1),
            event_type='post task'
        )
        compute_task_periods(
            discard_periods=({'user_id': 3, 'question_index': 4},),
            extra_periods=(),
        )
        task_periods = TaskPeriod.select()
        self.assertEqual(task_periods.count(), 1)
コード例 #19
0
def compute_task_periods(discard_periods=DISCARD_TASK_PERIODS, extra_periods=EXTRA_TASK_PERIODS):

    # Create a new index for this computation
    last_compute_index = TaskPeriod.select(fn.Max(TaskPeriod.compute_index)).scalar() or 0
    compute_index = last_compute_index + 1

    # Compute the ID of the last user to complete the study
    max_user_id = QuestionEvent.select(fn.Max(QuestionEvent.user_id)).scalar() or 0

    # Compute the time that each user spends in each question
    for user_id in range(0, max_user_id + 1):

        question_events = (
            QuestionEvent
            .select()
            .where(QuestionEvent.user_id == user_id)
            .order_by(QuestionEvent.time.asc())
            )

        start_task_event = None

        for question_event in question_events:

            # If the 'task' page has been loaded, store the question event that started it.
            if question_event.event_type == 'get task':
                start_task_event = question_event

            elif question_event.event_type == 'post task':

                if start_task_event is not None:

                    # Save an event if the index of task for a 'post' event that comes
                    # after a task starts matches the task index of the event that started it.
                    if question_event.question_index == start_task_event.question_index:

                        # Only save a task period if its user and index are not in the discard list.
                        task_discard_specification = {
                            'user_id': user_id,
                            'task_index': question_event.question_index,
                        }
                        if task_discard_specification not in discard_periods:
                            TaskPeriod.create(
                                compute_index=compute_index,
                                user_id=user_id,
                                task_index=question_event.question_index,
                                concern_index=_get_concern_index(
                                    user_id, question_event.question_index),
                                start=start_task_event.time,
                                end=question_event.time,
                            )

                # As long as we have seen an event for the end of a task, reset
                # state such that no "start task" event has been seen
                start_task_event = None

    # The caller may have provided a list of extra task periods to append to the computed results.
    # Add these records in one by one.
    for period_data in extra_periods:
        TaskPeriod.create(
            compute_index=compute_index,
            user_id=period_data['user_id'],
            task_index=period_data['task_index'],
            concern_index=_get_concern_index(period_data['user_id'], period_data['task_index']),
            start=period_data['start'],
            end=period_data['end'],
        )
コード例 #20
0
def compute_task_periods(discard_periods=DISCARD_TASK_PERIODS,
                         extra_periods=EXTRA_TASK_PERIODS):

    # Create a new index for this computation
    last_compute_index = TaskPeriod.select(fn.Max(
        TaskPeriod.compute_index)).scalar() or 0
    compute_index = last_compute_index + 1

    # Compute the ID of the last user to complete the study
    max_user_id = QuestionEvent.select(fn.Max(
        QuestionEvent.user_id)).scalar() or 0

    # Compute the time that each user spends in each question
    for user_id in range(0, max_user_id + 1):

        question_events = (QuestionEvent.select().where(
            QuestionEvent.user_id == user_id).order_by(
                QuestionEvent.time.asc()))

        start_task_event = None

        for question_event in question_events:

            # If the 'task' page has been loaded, store the question event that started it.
            if question_event.event_type == 'get task':
                start_task_event = question_event

            elif question_event.event_type == 'post task':

                if start_task_event is not None:

                    # Save an event if the index of task for a 'post' event that comes
                    # after a task starts matches the task index of the event that started it.
                    if question_event.question_index == start_task_event.question_index:

                        # Only save a task period if its user and index are not in the discard list.
                        task_discard_specification = {
                            'user_id': user_id,
                            'task_index': question_event.question_index,
                        }
                        if task_discard_specification not in discard_periods:
                            TaskPeriod.create(
                                compute_index=compute_index,
                                user_id=user_id,
                                task_index=question_event.question_index,
                                concern_index=_get_concern_index(
                                    user_id, question_event.question_index),
                                start=start_task_event.time,
                                end=question_event.time,
                            )

                # As long as we have seen an event for the end of a task, reset
                # state such that no "start task" event has been seen
                start_task_event = None

    # The caller may have provided a list of extra task periods to append to the computed results.
    # Add these records in one by one.
    for period_data in extra_periods:
        TaskPeriod.create(
            compute_index=compute_index,
            user_id=period_data['user_id'],
            task_index=period_data['task_index'],
            concern_index=_get_concern_index(period_data['user_id'],
                                             period_data['task_index']),
            start=period_data['start'],
            end=period_data['end'],
        )
コード例 #21
0
def compute_location_visits(task_compute_index=None):

    # Create a new index for this computation
    last_compute_index = LocationVisit.select(fn.Max(LocationVisit.compute_index)).scalar() or 0
    compute_index = last_compute_index + 1

    # Determine what will be the compute index of the task periods that these visits are matched to.
    # This will become the latest compute index if it hasn't been specified.
    if task_compute_index is None:
        task_compute_index = TaskPeriod.select(fn.Max(TaskPeriod.compute_index)).scalar()

    # Compute the ID of the last user to complete the study
    max_user_id = LocationEvent.select(fn.Max(LocationEvent.user_id)).scalar()

    # Compute the time that each user spends in each question
    for user_id in range(0, max_user_id + 1):

        # Visit all tasks for each user
        for task_index in TASK_RANGE:

            # Fetch the period of time for this task
            task_periods = (
                TaskPeriod.select()
                .where(
                    TaskPeriod.compute_index == task_compute_index,
                    TaskPeriod.task_index == task_index,
                    TaskPeriod.user_id == user_id,
                )
            )
            if task_periods.count() < 1:
                continue
            task_period = task_periods[0]

            # Fetch the events for all locations the user has visited during this task
            location_events = (
                LocationEvent
                .select()
                .where(
                    LocationEvent.user_id == user_id,
                    LocationEvent.log_date >= task_period.start,
                    LocationEvent.log_date <= task_period.end,
                )
                # While we inspect the "log date" when the server received notice of
                # the event, we use the "visit date" when the browser experienced the
                # events to sort them, as we think this will preserve the original
                # ordering much better.  See the notes in the `create_location_visit`
                # method for more details.
                .order_by(LocationEvent.visit_date.asc())
            )

            # In the space below, we assemble "visits" from sequences of events.
            # This dictionary maps a tab-URL tuple to the event that made it active.
            active_tab_id = None
            active_tab_latest_url_event = None

            for event in location_events:

                # When a new page is loaded in the current tab, this is the end of the
                # last event and the start of a new one (that will be in the same tab).
                if event.event_type in NEW_PAGE_EVENTS:
                    if active_tab_id is not None and event.tab_id == active_tab_id:
                        if event.url != active_tab_latest_url_event.url:
                            create_location_visit(
                                compute_index=compute_index,
                                task_period=task_period,
                                user_id=user_id,
                                activating_event=active_tab_latest_url_event,
                                deactivating_event=event,
                            )
                            active_tab_latest_url_event = event

                # If the window has been deactivated, then end the visit in the current tab
                if event.event_type in DEACTIVATING_EVENTS:
                    if active_tab_id is not None:
                        create_location_visit(
                            compute_index=compute_index,
                            task_period=task_period,
                            user_id=user_id,
                            activating_event=active_tab_latest_url_event,
                            deactivating_event=event,
                        )
                        active_tab_id = None
                        active_tab_latest_url_event = None

                # If a tab or window has been activated, that tab is now active.
                if event.event_type in ACTIVATING_EVENTS:

                    # End any visits in progress for other tabs
                    if active_tab_id is not None:
                        create_location_visit(
                            compute_index=compute_index,
                            task_period=task_period,
                            user_id=user_id,
                            activating_event=active_tab_latest_url_event,
                            deactivating_event=event,
                        )

                    # Set the new active tab
                    active_tab_id = event.tab_id
                    active_tab_latest_url_event = event