Exemple #1
0
def process_segments(mongodb, log_entries):
    """
    For a list of log entries, parse them into a format that makes it easy to construct segments.
    Indexed by username, each entry in the resulting data structure includes the following:
    - segments: all segments for this user
    - entries: all raw log entries
    """
    collection = mongodb[VIDEOS_COL]
    current_videos = list(collection.find({}, {"video_id": 1}).distinct("video_id"))
    videos = []
    for video in current_videos:
        videos.append(video)
    data = {}
    index = 0
    for entry in log_entries:
        index += 1
        if index % 1000 == 0:
            print ".",
        # print entry
        username = get_prop(entry, "USERNAME")
        # ignore ones that are already processed
        #if entry["processed"] == 1:
        #    continue
        # ignore if username is empty
        if username == "":
            continue
        video_id = get_prop(entry, "VIDEO_ID")
        # non-video player events
        if video_id == "":
            # TODO: use this to more accurately capture sessions
            # and also display before/after destinations
            continue
        # video player events
        else:
            # if this video is not in the video database, add it
            if video_id not in videos:
                print "adding video", video_id
                register_new_video(mongodb, video_id, entry)
                videos.append(video_id)
            if video_id not in data:
                data[video_id] = {}
            if username not in data[video_id]:
                data[video_id][username] = {}
                data[video_id][username]["segments"] = []
                data[video_id][username]["entries"] = []

        #TODO: unindent?
            data[video_id][username]["entries"].append(entry)

    #TODO: not rely on data?
    for video_id in data:
        for username in data[video_id]:
            #print video_id, username
            #for entry in data[video_id][username]["entries"]:
            #    print "    ", get_prop(entry, "TYPE_EVENT")
            data[video_id][username]["segments"] = \
                construct_segments(data[video_id][username]["entries"])
            # print video_id, username, len(data[video_id][username]["segments"]), len(data[video_id][username]["entries"])
            del data[video_id][username]["entries"]
    return data
def video_interaction_event(mongodb, events):
    """
    Store all video-related events from the tracking log
    into the database. There are three collections:
    1) video_events: raw event information
    2) video_segments: watching segments recovered from events
    3) video_heatmap: view counts for each second of a video

    To send events, refer to send_event.py
    """
    valid_events = 0
    # Store raw event information
    for event in events:
        entry = {}
        for key in event.keys():
            entry[key] = event[key]
            # flag indicating whether this item has been processed.
            entry["processed"] = 0
        collection = mongodb[EVENTS_COL]
        # get a list of event types to keep:
        # everything that starts with EVT defined in common.py
        temp_list = [CONF[key] for key in CONF if key.startswith("EVT")]
        events_type_list = list(chain(*temp_list))
        if get_prop(event, "TYPE_EVENT") in events_type_list:
            collection.insert(entry)
            valid_events += 1
    print "=========== INCOMING EVENTS", len(events), "total,", valid_events, "valid. ============="
Exemple #3
0
def register_new_video(mongodb, video_id, entry):
    """
    Add a new video entry to the videos collection.
    An important thing is to get the video duration information.
    """
    collection = mongodb[VIDEOS_COL]
    db_entry = {}
    db_entry["video_id"] = video_id
    db_entry["host"] = CONF["VIDEO_HOST"]
    db_entry["duration"] = get_video_duration(video_id, db_entry["host"])
    db_entry["video_name"] = get_prop(entry, "VIDEO_NAME")
    collection.insert(db_entry)
def process_segments(mongodb, log_entries):
    """
    For a list of log entries, parse them into a format that makes it easy to construct segments.
    Indexed by username, each entry in the resulting data structure includes the following:
    - segments: all segments for this user
    - entries: all raw log entries
    """
    collection = mongodb['videos']
    current_videos = list(collection.find({}, {"video_id": 1}).distinct("video_id"))
    videos = []
    for video in current_videos:
        videos.append(video)
    data = {}
    for entry in log_entries:
        username = get_prop(entry, "USERNAME")
        video_id = get_prop(entry, "VIDEO_ID")
        # if this video is not in the video database, add it
        if video_id not in videos:
            register_new_video(mongodb, video_id, entry)
            videos.append(video_id)

        if video_id not in data:
            data[video_id] = {}
        if username not in data[video_id]:
            data[video_id][username] = {}
            data[video_id][username]["segments"] = []
            data[video_id][username]["entries"] = []

        data[video_id][username]["entries"].append(entry)

    for video_id in data:
        for username in data[video_id]:
            data[video_id][username]["segments"] = \
                construct_segments(data[video_id][username]["entries"])
            # print video_id, username, len(data[video_id][username]["segments"]), len(data[video_id][username]["entries"])
            print video_id, username
            for entry in data[video_id][username]["entries"]:
                print "    ", get_prop(entry, "TYPE_EVENT")
            del data[video_id][username]["entries"]
    return data
def test(mongodb):
    """
    Test property retrieval
    """
    collection = mongodb['video_events']
    # For incremental updates, retrieve only the events not processed yet.
    entries = list(collection.find({"processed": 0}))
    print "RESULT:", get_prop(entries[0], "TIMESTAMP")
    print "RESULT:", get_prop(entries[0], "VIDEO_ID")
    print "RESULT:", get_prop(entries[0], "VIDEO_TIME")
    print "RESULT:", get_prop(entries[0], "VIDEO_SPEED")
    print "RESULT:", get_prop(entries[0], "TIXXMESTAMP")
    return "RESULT:", get_prop(entries[0], "TIMESTAMP")
Exemple #6
0
def construct_segments(log_entries):
    """
    Construct a video-watching segment from a list of video player log entries for a single video.
    A segment indicates a block of time a student watched a part of a video clip.
    It is used to create various visualizations of students' interaction with video content.
    A segment includes
        time_start: when does this segment start? (in sec)
        time_end: when does this segment end? (in sec)
        date_start: when did this watching start? (timestamp)
        date_end: when did this watching end? (timestamp)
    """
    # TODO: do not assume that entries are time-ordered.
    # make sure it's sorted by time
    #sorted_entries = sorted(log_entries, key=lambda e: e["time"])
    segments = []
    # two items are compared, so start from index 1
    for i in range(1, len(log_entries)):
        entry1 = log_entries[i-1]
        entry2 = log_entries[i]
        try:
            e1_time = datetime.strptime(get_prop(entry1, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S.%f")
        except ValueError:
            try:
                e1_time = datetime.strptime(get_prop(entry1, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S.%f")
            except ValueError:
                e1_time = datetime.strptime(get_prop(entry1, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S")
            except:
                print "time format error. moving on"
                continue
        try:
            e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S.%f")
        except ValueError:
            try:
                e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S.%f")
            except ValueError:
                e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S")
            except:
                print "time format error. moving on"
                continue
        try:
            segment = {}
            if get_prop(entry1, "TYPE_EVENT") not in CONF["EVT_VIDEO_PLAY"]:
                continue
            # case 1. play-pause: watch for a while and pause
            if get_prop(entry2, "TYPE_EVENT") in CONF["EVT_VIDEO_PAUSE"]:
                # 1) compute time elapsed between play and pause
                # 2) subtract from the final position to get the starting position
                # 3) avoid negative time with max(x, 0)
                # time_diff = time.mktime(e2_time) - time.mktime(e1_time)
                time_diff = e2_time - e1_time
                time_diff_secs = time_diff.days * 60 * 60 * 24 + time_diff.seconds
                try:
                    elapsed_time = float(get_prop(entry2, "VIDEO_TIME")) - time_diff_secs
                    segment["time_start"] = max(elapsed_time, 0)
                    segment["time_end"] = float(get_prop(entry2, "VIDEO_TIME"))
                except TypeError:
                    print "malformatted field. skipping"
                    continue
            # case 2. play-play: watch for a while, access another part of the clip
            elif get_prop(entry2, "TYPE_EVENT") in CONF["EVT_VIDEO_PLAY"]:
                try:
                    segment["time_start"] = float(get_prop(entry1, "VIDEO_TIME"))
                    segment["time_end"] = float(get_prop(entry2, "VIDEO_TIME"))
                except TypeError:
                    print "malformatted field. skipping"
                    continue

            segment["date_start"] = get_prop(entry1, "TIMESTAMP")
            segment["date_end"] = get_prop(entry2, "TIMESTAMP")
            segment["speed"] = get_prop(entry1, "VIDEO_SPEED")
            # print segment
            segments.append(segment)
        except ValueError:
            # corrupt data missing certain fields
            pass
    return segments