def save(self, update_userlog=True, *args, **kwargs): # To deal with backwards compatibility, # check video_id, whether imported or not. if not self.video_id: assert kwargs.get("imported", False), "video_id better be set by internal code." assert self.youtube_id, "If not video_id, you better have set youtube_id!" self.video_id = i18n.get_video_id(self.youtube_id) or self.youtube_id # for unknown videos, default to the youtube_id if not kwargs.get("imported", False): self.full_clean() # Compute learner status already_complete = self.complete self.complete = (self.points >= VideoLog.POINTS_PER_VIDEO) if not already_complete and self.complete: self.completion_timestamp = datetime.now() # Tell logins that they are still active (ignoring validation failures). # TODO(bcipolli): Could log video information in the future. if update_userlog: try: UserLog.update_user_activity(self.user, activity_type="login", update_datetime=(self.completion_timestamp or datetime.now()), language=self.language) except ValidationError as e: logging.error("Failed to update userlog during video: %s" % e) super(VideoLog, self).save(*args, **kwargs)
def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all): # Files that exist, but are not in the DB, should be assumed to be good videos, # and just needing to be added to the DB. Add them to the DB in this way, # so that these files also trigger the update code below (and trigger cache invalidation) youtube_ids_needing_model_creation = list( youtube_ids_in_filesystem - videos_marked_at_all) new_video_files = [] if youtube_ids_needing_model_creation: for lang_code, youtube_ids in divide_videos_by_language( youtube_ids_needing_model_creation).iteritems(): # OK to do bulk_create; cache invalidation triggered via save download lang_video_files = [ VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids ] VideoFile.objects.bulk_create(lang_video_files) new_video_files += lang_video_files caching.invalidate_all_caches( ) # Do this within the loop, to update users ASAP self.stdout.write( "Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files)) return [ i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files ]
def save(self, update_userlog=True, *args, **kwargs): # To deal with backwards compatibility, # check video_id, whether imported or not. if not self.video_id: assert kwargs.get( "imported", False), "video_id better be set by internal code." assert self.youtube_id, "If not video_id, you better have set youtube_id!" self.video_id = i18n.get_video_id( self.youtube_id ) or self.youtube_id # for unknown videos, default to the youtube_id if not kwargs.get("imported", False): self.full_clean() # Compute learner status already_complete = self.complete self.complete = (self.points >= VideoLog.POINTS_PER_VIDEO) if not already_complete and self.complete: self.completion_timestamp = datetime.now() # Tell logins that they are still active (ignoring validation failures). # TODO(bcipolli): Could log video information in the future. if update_userlog: try: UserLog.update_user_activity( self.user, activity_type="login", update_datetime=(self.completion_timestamp or datetime.now()), language=self.language) except ValidationError as e: logging.error("Failed to update userlog during video: %s" % e) super(VideoLog, self).save(*args, **kwargs)
def delete_objects_for_incomplete_videos(): # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) deleted_video_ids = [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete] video_files_to_delete.delete() if deleted_video_ids: self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids)) return deleted_video_ids
def update_objects_to_be_complete(youtube_ids_in_filesystem): # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False updated_video_ids = [] for chunk in break_into_chunks(youtube_ids_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk) video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False) updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update] if updated_video_ids: caching.invalidate_all_caches() self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids)) return updated_video_ids
def delete_objects_for_incomplete_videos(): # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter( download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) deleted_video_ids = [ i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete ] video_files_to_delete.delete() if deleted_video_ids: self.stdout.write( "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids)) return deleted_video_ids
def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all): # Files that exist, but are not in the DB, should be assumed to be good videos, # and just needing to be added to the DB. Add them to the DB in this way, # so that these files also trigger the update code below (and trigger cache invalidation) youtube_ids_needing_model_creation = list(youtube_ids_in_filesystem - videos_marked_at_all) new_video_files = [] if youtube_ids_needing_model_creation: for lang_code, youtube_ids in divide_videos_by_language(youtube_ids_needing_model_creation).iteritems(): # OK to do bulk_create; cache invalidation triggered via save download lang_video_files = [VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids] VideoFile.objects.bulk_create(lang_video_files) new_video_files += lang_video_files caching.invalidate_all_caches() # Do this within the loop, to update users ASAP self.stdout.write("Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files)) return [i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files]
def update_objects_to_be_complete(youtube_ids_in_filesystem): # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False updated_video_ids = [] for chunk in break_into_chunks(youtube_ids_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter( percent_complete=0, download_in_progress=False, youtube_id__in=chunk) video_files_needing_model_update.update( percent_complete=100, flagged_for_download=False) updated_video_ids += [ i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update ] if updated_video_ids: caching.invalidate_all_caches() self.stdout.write( "Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids)) return updated_video_ids
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write(_("Nothing to download; exiting.") + "\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id}) # Update the progress logging self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: ensure_dir(settings.CONTENT_ROOT) progress_callback = partial(self.download_progress_callback, video) try: # Download via urllib download_video(video.youtube_id, callback=progress_callback) except URLNotFound: # Video was not found on amazon cloud service, # either due to a KA mistake, or due to the fact # that it's a dubbed video. # # We can use youtube-dl to get that video!! logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.youtube_id}) def youtube_dl_cb(stats, progress_callback, *args, **kwargs): if stats['status'] == "finished": percent = 100. elif stats['status'] == "downloading": percent = 100. * stats['downloaded_bytes'] / stats['total_bytes'] else: percent = 0. progress_callback(percent=percent) scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback)) # If we got here, we downloaded ... somehow :) handled_youtube_ids.append(video.youtube_id) self.stdout.write(_("Download is complete!") + "\n") except DownloadCancelled: # Cancellation event video.percent_complete = 0 video.flagged_for_download = False video.download_in_progress = False video.save() failed_youtube_ids.append(video.youtube_id) except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.youtube_id, "error_msg": unicode(e)} self.stderr.write("%s\n" % msg) # If a connection error, we should retry. if isinstance(e, DownloadError): connection_error = "[Errno 8]" in e.message elif isinstance(e, IOError) and hasattr(e, "strerror"): connection_error = e.strerror[0] == 8 else: connection_error = False video.download_in_progress = False video.flagged_for_download = connection_error # Any error other than a connection error is fatal. video.save() # Rather than getting stuck on one video, continue to the next video. self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg}) failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids: self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids]))) # Update self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e}) raise
def get_video_by_youtube_id(youtube_id): # TODO(bcipolli): will need to change for dubbed videos video_id = i18n.get_video_id(youtube_id=youtube_id) return get_node_cache("Video").get(video_id, [None])[0]
class TestSaveVideoLog(KALiteTestCase): ORIGINAL_POINTS = 84 ORIGINAL_SECONDS_WATCHED = 32 NEW_POINTS = 32 NEW_SECONDS_WATCHED = 15 YOUTUBE_ID = "aNqG4ChKShI" VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy" YOUTUBE_ID2 = "b22tMEc6Kko" VIDEO_ID2 = i18n.get_video_id(YOUTUBE_ID2) or "dummy2" USERNAME = "******" PASSWORD = "******" def setUp(self): super(TestSaveVideoLog, self).setUp() # create a facility and user that can be referred to in models across tests self.facility = Facility(name="Test Facility") self.facility.save() self.user = FacilityUser(username=self.USERNAME, facility=self.facility) self.user.set_password(self.PASSWORD) self.user.save() # create an initial VideoLog instance so we have something to update later self.original_videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user) self.original_videolog.points = self.ORIGINAL_POINTS self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED self.original_videolog.save() def test_new_videolog(self): # make sure the target video log does not already exist videologs = VideoLog.objects.filter(video_id=self.VIDEO_ID2, user__username=self.USERNAME) self.assertEqual( videologs.count(), 0, "The target video log to be newly created already exists") c = KALiteClient() # login success = c.login(username=self.USERNAME, password=self.PASSWORD, facility=self.facility.id) self.assertTrue(success, "Was not able to login as the test user") # save a new video log result = c.save_video_log( video_id=self.VIDEO_ID2, youtube_id=self.YOUTUBE_ID2, total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED, points=self.NEW_POINTS, ) self.assertEqual( result.status_code, 200, "An error (%d) was thrown while saving the video log." % result.status_code) # get a reference to the newly created VideoLog videolog = VideoLog.objects.get(video_id=self.VIDEO_ID2, user__username=self.USERNAME) # make sure the VideoLog was properly created self.assertEqual(videolog.points, self.NEW_POINTS, "The VideoLog's points were not saved correctly.") self.assertEqual( videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's seconds watched was not saved correctly.") def test_update_videolog(self): # get a new reference to the existing VideoLog videolog = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog hasn't already been changed self.assertEqual(videolog.points, self.ORIGINAL_POINTS, "The VideoLog's points have already changed.") self.assertEqual(videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's seconds watched already changed.") c = KALiteClient() # login success = c.login(username=self.USERNAME, password=self.PASSWORD, facility=self.facility.id) self.assertTrue(success, "Was not able to login as the test user") # save a new record onto the video log, with a correct answer (increasing the points and streak) result = c.save_video_log( video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED + self.NEW_SECONDS_WATCHED, points=self.ORIGINAL_POINTS + self.NEW_POINTS, ) self.assertEqual( result.status_code, 200, "An error (%d) was thrown while saving the video log." % result.status_code) # get a reference to the updated VideoLog videolog = VideoLog.objects.get(video_id=self.VIDEO_ID, user__username=self.USERNAME) # make sure the VideoLog was properly updated self.assertEqual(videolog.points, self.ORIGINAL_POINTS + self.NEW_POINTS, "The VideoLog's points were not updated correctly.") self.assertEqual( videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED + self.NEW_SECONDS_WATCHED, "The VideoLog's seconds watched was not updated correctly.")
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write( _("Nothing to download; exiting.") + "\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write( (_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id}) # Update the progress logging self.set_stages( num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: ensure_dir(settings.CONTENT_ROOT) progress_callback = partial( self.download_progress_callback, video) try: # Download via urllib download_video(video.youtube_id, callback=progress_callback) except URLNotFound: # Video was not found on amazon cloud service, # either due to a KA mistake, or due to the fact # that it's a dubbed video. # # We can use youtube-dl to get that video!! logging.debug( _("Retrieving youtube video %(youtube_id)s via youtube-dl" ) % {"youtube_id": video.youtube_id}) def youtube_dl_cb(stats, progress_callback, *args, **kwargs): if stats['status'] == "finished": percent = 100. elif stats['status'] == "downloading": percent = 100. * stats[ 'downloaded_bytes'] / stats['total_bytes'] else: percent = 0. progress_callback(percent=percent) scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial( youtube_dl_cb, progress_callback=progress_callback)) # If we got here, we downloaded ... somehow :) handled_youtube_ids.append(video.youtube_id) self.stdout.write(_("Download is complete!") + "\n") except DownloadCancelled: # Cancellation event video.percent_complete = 0 video.flagged_for_download = False video.download_in_progress = False video.save() failed_youtube_ids.append(video.youtube_id) except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. msg = _( "Error in downloading %(youtube_id)s: %(error_msg)s" ) % { "youtube_id": video.youtube_id, "error_msg": unicode(e) } self.stderr.write("%s\n" % msg) # If a connection error, we should retry. if isinstance(e, DownloadError): connection_error = "[Errno 8]" in e.message elif isinstance(e, IOError) and hasattr(e, "strerror"): connection_error = e.strerror[0] == 8 else: connection_error = False video.download_in_progress = False video.flagged_for_download = connection_error # Any error other than a connection error is fatal. video.save() # Rather than getting stuck on one video, continue to the next video. self.update_stage( stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg}) failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled( ) and handled_youtube_ids: self.update_stage( stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list( set([ i18n.get_video_id(yid) or yid for yid in handled_youtube_ids ]))) # Update self.complete( notes= _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully." ) % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e}) raise
def show_logs(request, ndays=None): """Show file-based logging info for video downloads, language packs, and subtitles""" ndays = ndays or int(request.GET.get("days", 7)) def get_logger_filename(logger_type): return stats_logger(logger_type).handlers[0].baseFilename def parse_data(logger_type, data_fields, windowsize=128, ndays=None): parsed_data = {} nparts = len(data_fields) summary_data = dict([(fld, {}) for fld in (data_fields + ["date"])]) filepath = get_logger_filename(logger_type) if not os.path.exists(filepath): return (parsed_data, summary_data) # Group by ip, date, and youtube_id old_data = "" first_loop = True last_loop = False with open(filepath, "r") as fp: fp.seek(0, 2) # go to the end of the stream while True: # Read the next chunk of data try: # Get the data try: if first_loop: fp.seek(-windowsize, 1) # go backwards by a few first_loop = False else: fp.seek(-2 * windowsize, 1) # go backwards by a few cur_data = fp.read(windowsize) + old_data except: if last_loop and not old_data: raise elif last_loop: cur_data = old_data old_data = "" else: last_loop = True fp.seek(0) cur_data = fp.read(windowsize) + old_data # could be some overlap... if not cur_data: break except: break # Parse the data lines = cur_data.split("\n") old_data = lines[0] if len(lines) > 1 else "" new_data = lines[1:] if len(lines) > 1 else lines for l in new_data: if not l: continue # All start with a date parts = l.split(" - ", 2) if len(parts) != 2: continue tim = parts[0] dat = tim.split(" ")[0] # Validate that this date is within the accepted range parsed_date = datetime.datetime.strptime(dat, "%Y-%m-%d") logging.debug("%s %s" % (parsed_date, (datetime.datetime.now() - timedelta(days=ndays)))) if ndays is not None and datetime.datetime.now() - timedelta(days=ndays) > parsed_date: last_loop = True old_data = "" break # The rest is semicolon-delimited parts = parts[1].split(";") # vd;127.0.0.1;xvnpSRO9IDM # Now save things off parsed_data[tim] = dict([(data_fields[idx], parts[idx]) for idx in range(nparts)]) summary_data["date"][dat] = 1 + summary_data["date"].get(dat, 0) for idx in range(nparts): summary_data[data_fields[idx]][parts[idx]] = 1 + summary_data[data_fields[idx]].get( parts[idx], 0 ) for key, val in summary_data.iteritems(): summary_data[key] = sorted_dict(val, key=lambda t: t[0]) return (parsed_data, summary_data) (video_raw_data, video_summary_data) = parse_data("videos", ["task_id", "ip_address", "youtube_id"], ndays=ndays) (lp_raw_data, lp_summary_data) = parse_data( "language_packs", ["task_id", "ip_address", "lang_code", "version"], ndays=ndays ) (srt_raw_data, srt_summary_data) = parse_data( "subtitles", ["task_id", "ip_address", "lang_code", "youtube_id"], ndays=ndays ) return { "ndays": ndays, "videos": { "raw": video_raw_data, "dates": video_summary_data["date"], "ips": video_summary_data["ip_address"], "slugs": sum_counter( video_summary_data["youtube_id"], fn=lambda yid: get_id2slug_map().get(get_video_id(yid)) ), "lang_codes": sum_counter(video_summary_data["youtube_id"], fn=lambda yid: get_video_language(yid)), }, "language_packs": { "raw": lp_raw_data, "dates": lp_summary_data["date"], "ips": lp_summary_data["ip_address"], "lang_codes": lp_summary_data["lang_code"], "versions": lp_summary_data["version"], }, "subtitles": { "raw": srt_raw_data, "dates": srt_summary_data["date"], "ips": srt_summary_data["ip_address"], "lang_codes": srt_summary_data["lang_code"], }, }
class TestVideoLogs(KALiteTestCase): ORIGINAL_POINTS = 37 ORIGINAL_SECONDS_WATCHED = 3 NEW_POINTS = 22 NEW_SECONDS_WATCHED = 5 YOUTUBE_ID = "aNqG4ChKShI" VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy" def setUp(self): super(TestVideoLogs, self).setUp() # create a facility and user that can be referred to in models across tests self.facility = Facility(name="Test Facility") self.facility.save() self.user = FacilityUser(username="******", facility=self.facility) self.user.set_password("dumber") self.user.save() # create an initial VideoLog instance so we have something to collide with later self.original_videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user) self.original_videolog.points = self.ORIGINAL_POINTS self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED self.original_videolog.save() # get a new reference to the existing VideoLog videolog = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog was created correctly self.assertEqual(videolog.points, self.ORIGINAL_POINTS, "The VideoLog's points have already changed.") self.assertEqual( videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's total seconds watched have already changed.") def test_videolog_update(self): # get a new reference to the existing VideoLog videolog = VideoLog.objects.get(id=self.original_videolog.id) # update the VideoLog videolog.points = self.NEW_POINTS videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED videolog.save() # get a new reference to the existing VideoLog videolog2 = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog was updated self.assertEqual(videolog2.points, self.NEW_POINTS, "The VideoLog's points were not updated.") self.assertEqual( videolog2.total_seconds_watched, self.NEW_SECONDS_WATCHED, "The VideoLog's total seconds watched were not updated.") @unittest.skip("Auto-merging is not yet automatic, so skip this") def test_videolog_collision(self): # create a new video log with the same youtube_id and user, but different points/total seconds watched videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user) videolog.points = self.NEW_POINTS videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED # try saving the new VideoLog: this is where the collision will happen, hopefully leading to a merge videolog.save() # get a new reference to the existing VideoLog videolog2 = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog has been properly merged self.assertEqual(videolog.points, max(self.ORIGINAL_POINTS, self.NEW_POINTS), "The VideoLog's points were not properly merged.") self.assertEqual( videolog.total_seconds_watched, max(self.ORIGINAL_ATTEMPTS, self.NEW_SECONDS_WATCHED), "The VideoLog's total seconds watched have already changed.")
def forwards(self, orm): # Setting the video ID for vlog in orm["main.VideoLog"].objects.all(): vlog.video_id = i18n.get_video_id(vlog.youtube_id) or vlog.youtube_id vlog.save()
def show_logs(request, ndays=None): """Show file-based logging info for video downloads, language packs, and subtitles""" ndays = ndays or int(request.GET.get("days", 7)) def get_logger_filename(logger_type): return stats_logger(logger_type).handlers[0].baseFilename def parse_data(logger_type, data_fields, windowsize=128, ndays=None): parsed_data = {} nparts = len(data_fields) summary_data = dict([(fld, {}) for fld in (data_fields + ["date"])]) filepath = get_logger_filename(logger_type) if not os.path.exists(filepath): return (parsed_data, summary_data) # Group by ip, date, and youtube_id old_data = "" first_loop = True last_loop = False with open(filepath, "r") as fp: fp.seek(0, 2) # go to the end of the stream while True: # Read the next chunk of data try: # Get the data try: if first_loop: fp.seek(-windowsize, 1) # go backwards by a few first_loop = False else: fp.seek(-2 * windowsize, 1) # go backwards by a few cur_data = fp.read(windowsize) + old_data except: if last_loop and not old_data: raise elif last_loop: cur_data = old_data old_data = "" else: last_loop = True fp.seek(0) cur_data = fp.read( windowsize ) + old_data # could be some overlap... if not cur_data: break except: break # Parse the data lines = cur_data.split("\n") old_data = lines[0] if len(lines) > 1 else "" new_data = lines[1:] if len(lines) > 1 else lines for l in new_data: if not l: continue # All start with a date parts = l.split(" - ", 2) if len(parts) != 2: continue tim = parts[0] dat = tim.split(" ")[0] # Validate that this date is within the accepted range parsed_date = datetime.datetime.strptime(dat, "%Y-%m-%d") logging.debug( "%s %s" % (parsed_date, (datetime.datetime.now() - timedelta(days=ndays)))) if ndays is not None and datetime.datetime.now( ) - timedelta(days=ndays) > parsed_date: last_loop = True old_data = "" break # The rest is semicolon-delimited parts = parts[1].split(";") # vd;127.0.0.1;xvnpSRO9IDM # Now save things off parsed_data[tim] = dict([(data_fields[idx], parts[idx]) for idx in range(nparts)]) summary_data["date"][dat] = 1 + summary_data["date"].get( dat, 0) for idx in range(nparts): summary_data[data_fields[idx]][parts[ idx]] = 1 + summary_data[data_fields[idx]].get( parts[idx], 0) for key, val in summary_data.iteritems(): summary_data[key] = sorted_dict(val, key=lambda t: t[0]) return (parsed_data, summary_data) (video_raw_data, video_summary_data) = parse_data("videos", ["task_id", "ip_address", "youtube_id"], ndays=ndays) (lp_raw_data, lp_summary_data) = parse_data( "language_packs", ["task_id", "ip_address", "lang_code", "version"], ndays=ndays) (srt_raw_data, srt_summary_data) = parse_data( "subtitles", ["task_id", "ip_address", "lang_code", "youtube_id"], ndays=ndays) return { "ndays": ndays, "videos": { "raw": video_raw_data, "dates": video_summary_data["date"], "ips": video_summary_data["ip_address"], "slugs": sum_counter( video_summary_data["youtube_id"], fn=lambda yid: get_id2slug_map().get(get_video_id(yid))), "lang_codes": sum_counter(video_summary_data["youtube_id"], fn=lambda yid: get_video_language(yid)), }, "language_packs": { "raw": lp_raw_data, "dates": lp_summary_data["date"], "ips": lp_summary_data["ip_address"], "lang_codes": lp_summary_data["lang_code"], "versions": lp_summary_data["version"], }, "subtitles": { "raw": srt_raw_data, "dates": srt_summary_data["date"], "ips": srt_summary_data["ip_address"], "lang_codes": srt_summary_data["lang_code"], }, }