def process_segments(mongodb, log_entries):
    """
    For a list of log entries, parse them into a format that makes it easy to construct segments.
    Indexed by username, each entry in the resulting data structure includes the following:
    - segments: all segments for this user
    - entries: all raw log entries
    """
    collection = mongodb[VIDEOS_COL]
    current_videos = list(collection.find({}, {"video_id": 1}).distinct("video_id"))
    videos = []
    for video in current_videos:
        videos.append(video)
    data = {}
    index = 0
    for entry in log_entries:
        index += 1
        if index % 1000 == 0:
            print ".",
        # print entry
        username = get_prop(entry, "USERNAME")
        # ignore ones that are already processed
        #if entry["processed"] == 1:
        #    continue
        # ignore if username is empty
        if username == "":
            continue
        video_id = get_prop(entry, "VIDEO_ID")
        # non-video player events
        if video_id == "":
            # TODO: use this to more accurately capture sessions
            # and also display before/after destinations
            continue
        # video player events
        else:
            # if this video is not in the video database, add it
            if video_id not in videos:
                print "adding video", video_id
                register_new_video(mongodb, video_id, entry)
                videos.append(video_id)
            if video_id not in data:
                data[video_id] = {}
            if username not in data[video_id]:
                data[video_id][username] = {}
                data[video_id][username]["segments"] = []
                data[video_id][username]["entries"] = []

        #TODO: unindent?
            data[video_id][username]["entries"].append(entry)

    #TODO: not rely on data?
    for video_id in data:
        for username in data[video_id]:
            #print video_id, username
            #for entry in data[video_id][username]["entries"]:
            #    print "    ", get_prop(entry, "TYPE_EVENT")
            data[video_id][username]["segments"] = \
                construct_segments(data[video_id][username]["entries"])
            # print video_id, username, len(data[video_id][username]["segments"]), len(data[video_id][username]["entries"])
            del data[video_id][username]["entries"]
    return data
def send_events_local():
    global results
    client = MongoClient()
    mongodb = client[MONGODB_NAME] 

    start_time = time.time()
    valid_events = 0
    # Store raw event information
    for event in results:
        #entry = {}
        #for key in event.keys():
        #    entry[key] = event[key]
            # flag indicating whether this item has been processed.
        #    entry["processed"] = 0
        event["processed"] = 0
        collection = mongodb[EVENTS_COL]
        # get a list of event types to keep:
        # everything that starts with EVT defined in common.py
        temp_list = [CONF[key] for key in CONF if key.startswith("EVT")]
        events_type_list = list(chain(*temp_list))
        if get_prop(event, "TYPE_EVENT") in events_type_list:
            collection.insert(event)
            valid_events += 1
    print "=========== INCOMING EVENTS", len(results), "total,", valid_events, "valid. ============="
    print sys._getframe().f_code.co_name, "COMPLETED", (time.time() - start_time), "seconds"
Exemple #3
0
def video_interaction_event(mongodb, events):
    """
    Store all video-related events from the tracking log
    into the database. There are three collections:
    1) video_events: raw event information
    2) video_segments: watching segments recovered from events
    3) video_heatmap: view counts for each second of a video

    To send events, refer to send_event.py
    """
    valid_events = 0
    # Store raw event information
    for event in events:
        entry = {}
        for key in event.keys():
            entry[key] = event[key]
            # flag indicating whether this item has been processed.
            entry["processed"] = 0
        collection = mongodb[EVENTS_COL]
        # get a list of event types to keep:
        # everything that starts with EVT defined in common.py
        temp_list = [CONF[key] for key in CONF if key.startswith("EVT")]
        events_type_list = list(chain(*temp_list))
        if get_prop(event, "TYPE_EVENT") in events_type_list:
            collection.insert(entry)
            valid_events += 1
    print "=========== INCOMING EVENTS", len(events), "total,", valid_events, "valid. ============="
def register_new_video(mongodb, video_id, entry):
    """
    Add a new video entry to the videos collection.
    An important thing is to get the video duration information.
    """
    collection = mongodb[VIDEOS_COL]
    db_entry = {}
    db_entry["video_id"] = video_id
    db_entry["host"] = CONF["VIDEO_HOST"]
    db_entry["duration"] = get_video_duration(video_id, db_entry["host"])
    db_entry["video_name"] = get_prop(entry, "VIDEO_NAME")
    collection.insert(db_entry)
def construct_segments(log_entries):
    """
    Construct a video-watching segment from a list of video player log entries for a single video.
    A segment indicates a block of time a student watched a part of a video clip.
    It is used to create various visualizations of students' interaction with video content.
    A segment includes
        time_start: when does this segment start? (in sec)
        time_end: when does this segment end? (in sec)
        date_start: when did this watching start? (timestamp)
        date_end: when did this watching end? (timestamp)
    """
    # TODO: do not assume that entries are time-ordered.
    # make sure it's sorted by time
    #sorted_entries = sorted(log_entries, key=lambda e: e["time"])
    segments = []
    # two items are compared, so start from index 1
    for i in range(1, len(log_entries)):
        entry1 = log_entries[i-1]
        entry2 = log_entries[i]
        # print get_prop(entry1, "TYPE_EVENT"), get_prop(entry2, "TYPE_EVENT")
        try:
            e1_time = datetime.strptime(get_prop(entry1, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S.%f")
        except ValueError:
            try:
                e1_time = datetime.strptime(get_prop(entry1, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S")
            except ValueError, v:
                if len(v.args) > 0 and v.args[0].startswith('unconverted data remains: '):
                    new_time_string = get_prop(entry1, "TIMESTAMP")[:-(len(v.args[0])-26)]
                    e1_time = datetime.strptime(new_time_string, "%Y-%m-%dT%H:%M:%S")   
                else:
                    e1_time = datetime.strptime(get_prop(entry1, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S")
                    # print "e1_time", e1_time     
            except:
                print "time format error. moving on"
                continue
        try:
            e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S.%f")
        except ValueError:
            try:
                e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S")
            except ValueError, v:
                if len(v.args) > 0 and v.args[0].startswith('unconverted data remains: '):
                    new_time_string = get_prop(entry2, "TIMESTAMP")[:-(len(v.args[0])-26)]
                    e2_time = datetime.strptime(new_time_string, "%Y-%m-%dT%H:%M:%S")  
                else:          
                    e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S")
                    # print "e2_time", e2_time
            except:
                print "time format error. moving on"
                continue
         continue
 try:
     e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%d %H:%M:%S.%f")
 except ValueError:
     try:
         e2_time = datetime.strptime(get_prop(entry2, "TIMESTAMP"), "%Y-%m-%dT%H:%M:%S")
     except ValueError, v:
         if len(v.args) > 0 and v.args[0].startswith('unconverted data remains: '):
             new_time_string = get_prop(entry2, "TIMESTAMP")[:-(len(v.args[0])-26)]
             e2_time = datetime.strptime(new_time_string, "%Y-%m-%dT%H:%M:%S")            
     except:
         print "time format error. moving on"
         continue
 try:
     segment = {}
     if get_prop(entry1, "TYPE_EVENT") not in CONF["EVT_VIDEO_PLAY"]:
         continue
     # case 1. play-pause: watch for a while and pause
     if get_prop(entry2, "TYPE_EVENT") in CONF["EVT_VIDEO_PAUSE"]:
         # 1) compute time elapsed between play and pause
         # 2) subtract from the final position to get the starting position
         # 3) avoid negative time with max(x, 0)
         # time_diff = time.mktime(e2_time) - time.mktime(e1_time)
         time_diff = e2_time - e1_time
         time_diff_secs = time_diff.days * 60 * 60 * 24 + time_diff.seconds
         try:
             elapsed_time = float(get_prop(entry2, "VIDEO_TIME")) - time_diff_secs
             segment["time_start"] = max(elapsed_time, 0)
             segment["time_end"] = float(get_prop(entry2, "VIDEO_TIME"))
         except TypeError:
             print "malformatted field. skipping"