def record_heatmaps(mongodb):
    """
    Record heatmap bins for each video, based on segments
    for a single video?
    """
    start_time = time.time()

    # TODO: handle cut segments (i.e., start event exists but end event missing)
    # TODO: only remove the corresponding entries in the database: (video, user)
    collection = mongodb['video_segments']
    segments = list(collection.find())
    collection = mongodb['video_heatmaps']
    collection.remove()
    print len(segments), "segments found"

    results = defaultdict(dict)
    for segment in segments:
        if not segment["user_id"] in results[segment["video_id"]]:
            results[segment["video_id"]][segment["user_id"]] = []
        results[segment["video_id"]][segment["user_id"]].append(segment)
    vid_col = mongodb['videos']
    for video_id in results:
        result = list(vid_col.find({"video_id": video_id}))
        if len(result):
            process_heatmaps(mongodb, results[video_id], video_id, result[0]["duration"])
        else:
            print "ERROR in video information retrieval"
    # Make sure the collection is indexed.
    from pymongo import ASCENDING
    collection.ensure_index([("video_id", ASCENDING)])
        # [("video_id", ASCENDING), ("time", ASCENDING)])

    print sys._getframe().f_code.co_name, "COMPLETED", (time.time() - start_time), "seconds"
Example #2
0
def record_heatmaps_ajax(mongodb, index):
    """
    Record heatmap bins for each video, based on segments
    for a single video?
    """
    bin_size = 100000
    start_time = time.time()

    collection = mongodb[HEATMAPS_COL]
    collection.remove()
    # TODO: handle cut segments (i.e., start event exists but end event missing)
    # TODO: only remove the corresponding entries in the database: (video, user)
    vid_col = mongodb['videos']
    video_list = list(vid_col.find())
    num_videos = len(video_list)
    for index, video in enumerate(video_list):
        video_id = video["video_id"]
        loop_start_time = time.time()
        collection = mongodb[SEGMENTS_COL]
        segments = list(collection.find({"video_id": video_id}))
        #segments = collection.find().limit(bin_size).skip(index*bin_size) #.batch_size(1000)
        print index, "/", num_videos, video_id, ":", len(segments), "segments", (time.time() - loop_start_time), "seconds"
        if len(segments):
            loop_start_time2 = time.time()
            results = defaultdict(dict)
            for segment in segments:
                if not segment["user_id"] in results[segment["video_id"]]:
                    results[segment["video_id"]][segment["user_id"]] = []
                results[segment["video_id"]][segment["user_id"]].append(segment)
            process_heatmaps(mongodb, results[video_id], video_id, video["duration"])
            print (time.time() - loop_start_time2), "seconds"
    # Make sure the collection is indexed.
    from pymongo import ASCENDING
    collection.ensure_index([("video_id", ASCENDING)])
        # [("video_id", ASCENDING), ("time", ASCENDING)])

    print sys._getframe().f_code.co_name, "COMPLETED", (time.time() - start_time), "seconds"