async def proxy(request: Request) -> web.Response: def prepare_headers(headers): headers['host'] = PROXY_SITE headers['Accept-Encoding'] = 'deflate' return headers def response_body(raw: bytes) -> bytes: content = raw.decode() soup = BeautifulSoup( content, "html.parser") # OPTIMIZE: use lxml instead html.parser for processor, kwargs in PROCESSORS.items(): func = getattr(processors, processor) soup = func(soup, **kwargs) return str.encode(str(soup)) url = '{site}{url}'.format(site=PROXY_URL, url=request.match_info['path']) async with aiohttp.ClientSession() as session: async with session.request(request.method, url, headers=prepare_headers(request.headers), params=request.rel_url.query, data=await request.read()) as resp: LOG.debug("Got %s response from %s", resp.status, url) raw = await resp.read() if 'text/html' in resp.headers['Content-Type']: # exclude static from parsing raw = response_body(raw) response = web.Response(body=raw, status=resp.status, headers=resp.headers) response.enable_chunked_encoding() return response
def recurse_nodes_to_extract_knowledge_map(node, node_cache): """ Internal function for recursing the topic tree and building the knowledge map. Requires rebranding of metadata done by recurse_nodes function. """ assert node["kind"] == "Topic" if node.get("in_knowledge_map", None): if node["slug"] not in knowledge_map["topics"]: logging.debug("Not in knowledge map: %s" % node["slug"]) node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False knowledge_topics[node["slug"]] = topic_tools.get_all_leaves(node, leaf_type="Exercise") if not knowledge_topics[node["slug"]]: sys.stderr.write("Removing topic from topic tree: no exercises. %s" % node["slug"]) del knowledge_topics[node["slug"]] del knowledge_map["topics"][node["slug"]] node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False else: if node["slug"] in knowledge_map["topics"]: sys.stderr.write("Removing topic from topic tree; does not belong. '%s'" % node["slug"]) logging.warn("Removing from knowledge map: %s" % node["slug"]) del knowledge_map["topics"][node["slug"]] for child in [n for n in node.get("children", []) if n["kind"] == "Topic"]: recurse_nodes_to_extract_knowledge_map(child, node_cache)
def invalidate_inmemory_caches(): for module in (i18n, topic_tools): for cache_var in getattr(module, "CACHE_VARS", []): logging.debug("Emptying cache %s.%s" % (module.__name__, cache_var)) setattr(module, cache_var, None) logging.info("Great success emptying the in-memory cache.")
def process_request(self, request): next = request.GET.get("next", "") if next.startswith("/"): logging.debug("next='%s'" % next) request.next = next else: request.next = ""
def increment_language_pack_version(stored_meta, updated_meta): """Increment language pack version if translations have been updated (start over if software version has incremented) """ for att in VERSION_CHANGING_ATTRIBUTES: # Everything is OK except for stored_metadata to contain something # that updated_metadata does not. in_updated = att in updated_meta in_stored = att in stored_meta assert ( not in_stored ) or in_updated, "VERSION_CHANGING_ATTRIBUTES %s not contained in the update." % att assert "software_version" not in stored_meta or stored_meta[ "software_version"] == updated_meta[ "software_version"], "Metadata must be a version match." # Search for any attributes that would cause a version change. language_pack_version = stored_meta.get("language_pack_version", 0) # will increment to one for att in VERSION_CHANGING_ATTRIBUTES: if stored_meta.get(att) != updated_meta.get(att): language_pack_version += 1 logging.debug("Increasing %s language pack version to %d" % (updated_meta["code"], language_pack_version)) break return language_pack_version
def set_cached_password(cls, user, raw_password): assert user.id, "Your user must have an ID before calling this function." if not cls.is_enabled(): # Must delete, to make sure we don't get out of sync. cls.invalidate_cached_password(user=user) else: try: # Set the cached password. n_cached_iters = cls.iters_for_user_type(user) # TODO(bcipolli) Migrate this to an extended django class # that uses get_or_initialize cached_password = get_object_or_None( cls, user=user) or cls(user=user) cached_password.password = crypt(raw_password, iterations=n_cached_iters) cached_password.save() logging.debug( "Set cached password for user=%s; iterations=%d" % (user.username, n_cached_iters)) except Exception as e: # If we fail to create a cache item... just keep going--functionality # can still move forward. logging.error(e)
def download_kmap_icons(knowledge_map): for key, value in knowledge_map["topics"].items(): # Note: id here is retrieved from knowledge_map, so we're OK # that we blew away ID in the topic tree earlier. if "icon_url" not in value: logging.debug("No icon URL for %s" % key) value["icon_url"] = iconfilepath + value["id"] + iconextension knowledge_map["topics"][key] = value out_path = data_path + "../" + value["icon_url"] if os.path.exists(out_path) and not force_icons: continue icon_khan_url = "http://www.khanacademy.org" + value["icon_url"] sys.stdout.write("Downloading icon %s from %s..." % (value["id"], icon_khan_url)) sys.stdout.flush() try: icon = requests.get(icon_khan_url) except Exception as e: sys.stdout.write("\n") # complete the "downloading" output sys.stderr.write("Failed to download %-80s: %s\n" % (icon_khan_url, e)) continue if icon.status_code == 200: iconfile = file(data_path + "../" + value["icon_url"], "w") iconfile.write(icon.content) else: sys.stdout.write(" [NOT FOUND]") value["icon_url"] = iconfilepath + defaulticon + iconextension sys.stdout.write(" done.\n") # complete the "downloading" output
def handle(self, *args, **options): if len(args)==1 and args[0]== "test": # Callback for "weak" test--checks at least that the django project compiles (local_settings is OK) sys.stdout.write("Success!\n") exit(0) try: if options.get("branch", None): # Specified a repo self.update_via_git(**options) elif options.get("zip_file", None): # Specified a file if not os.path.exists(options.get("zip_file")): raise CommandError("Specified zip file does not exist: %s" % options.get("zip_file")) self.update_via_zip(**options) elif options.get("url", None): self.update_via_zip(**options) elif os.path.exists(settings.PROJECT_PATH + "/../.git"): # If we detect a git repo, try git if len(args) == 1 and not options["branch"]: options["branch"] = args[0] elif len(args) != 0: raise CommandError("Specified too many command-line arguments") self.update_via_git(**options) elif len(args) > 1: raise CommandError("Too many command-line arguments.") elif len(args) == 1: # Specify zip via first command-line arg if options['zip_file'] is not None: raise CommandError("Cannot specify a zipfile as unnamed and named command-line arguments at the same time.") options['zip_file'] = args[0] self.update_via_zip(**options) else: # No params, no git repo: try to get a file online. zip_file = tempfile.mkstemp()[1] for url in ["http://%s/api/download/kalite/latest/%s/%s/" % (settings.CENTRAL_SERVER_HOST, platform.system().lower(), "en")]: logging.info("Downloading repo snapshot from %s to %s" % (url, zip_file)) try: urllib.urlretrieve(url, zip_file) sys.stdout.write("success @ %s\n" % url) break; except Exception as e: logging.debug("Failed to get zipfile from %s: %s" % (url, e)) continue options["zip_file"] = zip_file self.update_via_zip(**options) except Exception as e: if self.started() and not not self.ended(): self.cancel(stage_status="error", notes=unicode(e)) raise assert self.ended(), "Subroutines should complete() if they start()!"
def call_outside_command_with_output(kalite_location, command, *args, **kwargs): """ Runs call_command for a KA Lite installation at the given location, and returns the output. """ # build the command cmd = (sys.executable,kalite_location + "/kalite/manage.py",command) for arg in args: cmd += (arg,) for key,val in kwargs.items(): key = key.replace("_","-") prefix = "--" if command != "runcherrypyserver" else "" if isinstance(val,bool): cmd += ("%s%s" % (prefix,key),) else: cmd += ("%s%s=%s" % (prefix,key,str(val)),) logging.debug(cmd) # Execute the command, using subprocess/Popen cwd = os.getcwd() os.chdir(kalite_location + "/kalite") p = subprocess.Popen(cmd, shell=False, cwd=os.path.split(cmd[0])[0], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out = p.communicate() os.chdir(cwd) logging.debug(out[1] if out[1] else out[0]) # tuple output of stdout, stderr, and exit code return out + (1 if out[1] else 0,)
def __init__(self, comment=None, fixture=None, **kwargs): self.return_dict = {} self.return_dict['comment'] = comment self.return_dict['class']=type(self).__name__ self.return_dict['uname'] = platform.uname() self.return_dict['fixture'] = fixture try: self.verbosity = int(kwargs.get("verbosity")) except: self.verbosity = 1 try: branch = subprocess.Popen(["git", "describe", "--contains", "--all", "HEAD"], stdout=subprocess.PIPE).communicate()[0] self.return_dict['branch'] = branch[:-1] head = subprocess.Popen(["git", "log", "--pretty=oneline", "--abbrev-commit", "--max-count=1"], stdout=subprocess.PIPE).communicate()[0] self.return_dict['head'] = head[:-1] except: self.return_dict['branch'] = None self.return_dict['head'] = None # if setup fails, what could we do? # let the exception bubble up is the best. try: self._setup(**kwargs) except Exception as e: logging.debug("Failed setup (%s); trying to tear down" % e) try: self._teardown() except: pass raise e
def force_job(command, name="", frequency="YEARLY", stop=False, launch_cron=True): """ Mark a job as to run immediately (or to stop). By default, call cron directly, to resolve. """ jobs = Job.objects.filter(command=command) if jobs.count() > 0: job = jobs[0] else: job = Job(command=command) job.frequency = frequency job.name = name or command if stop: job.is_running = False else: job.next_run = datetime.now() job.save() if launch_cron: # Just start cron directly, so that the process starts immediately. # Note that if you're calling force_job frequently, then # you probably want to avoid doing this on every call. if get_count() and not job_status(command): logging.debug("Ready to launch command '%s'" % command) call_command_async("cron")
def am_i_online(url, expected_val=None, search_string=None, timeout=5, allow_redirects=True): """Test whether we are online or not. returns True or False. Eats all exceptions! """ assert not (search_string and expected_val is not None), "Search string and expected value cannot both be set" try: if not search_string and expected_val is None: response = requests.head(url) else: response = requests.get(url, timeout=timeout, allow_redirects=allow_redirects) # Validate that response came from the requested url if response.status_code != 200: return False elif not allow_redirects and response.url != url: return False # Check the output, if expected values are specified if expected_val is not None: return expected_val == response.text elif search_string: return search_string in response.text return True except Exception as e: logging.debug("am_i_online: %s" % e) return False
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt): """Update language_srt_map to reflect download status lang_code in IETF format """ # Open JSON file filepath = get_lang_map_filepath(lang_code) try: with open(filepath, "r") as fp: language_srt_map = json.load(fp) except Exception as e: logging.error("Something went wrong while trying to open the json file (%s): %s" % (filepath, e)) return False # create updated entry entry = language_srt_map[youtube_id] entry["downloaded"] = downloaded entry["api_response"] = api_response entry["last_attempt"] = time_of_attempt if api_response == "success": entry["last_success"] = time_of_attempt # update full-size JSON with new information language_srt_map[youtube_id].update(entry) # write it to file json_file = open(filepath, "wb") json_file.write(json.dumps(language_srt_map)) json_file.close() logging.debug("File updated.") return True
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt): """Update language_srt_map to reflect download status lang_code in IETF format """ # Open JSON file filepath = get_lang_map_filepath(lang_code) language_srt_map = softload_json(filepath, logger=logging.error) if not language_srt_map: return False # create updated entry entry = language_srt_map[youtube_id] entry["downloaded"] = downloaded entry["api_response"] = api_response entry["last_attempt"] = time_of_attempt if api_response == "success": entry["last_success"] = time_of_attempt # update full-size JSON with new information language_srt_map[youtube_id].update(entry) # write it to file json_file = open(filepath, "wb") json_file.write(json.dumps(language_srt_map)) json_file.close() logging.debug("File updated.") return True
def validate_times(srt_content, srt_issues): times = re.findall("([0-9:,]+) --> ([0-9:,]+)\r\n", srt_content, re.S | re.M) parse_time = lambda str: datetime.datetime.strptime(str, "%H:%M:%S,%f") for i in range(len(times)): try: between_subtitle_time = datediff(parse_time(times[i][0]), parse_time(times[i-1][1] if i > 0 else "00:00:00,000")) within_subtitle_time = datediff(parse_time(times[i][1]), parse_time(times[i][0])) if between_subtitle_time > 60.: srt_issues.append("Between-subtitle gap of %5.2f seconds" % between_subtitle_time) if within_subtitle_time > 60.: srt_issues.append("Within-subtitle duration of %5.2f seconds" % within_subtitle_time) elif within_subtitle_time == 0.: logging.debug("Subtitle flies by too fast (%s --> %s)." % times[i]) #print "Start: %s\tB: %5.2f\tW: %5.2f" % (parse_time(times[i][0]), between_subtitle_time, within_subtitle_time) except Exception as e: if not times[i][1].startswith('99:59:59'): srt_issues.append("Error checking times: %s" % e) else: if len(times) - i > 1 and len(times) - i - 1 > len(times)/10.: if i == 0: srt_issues.append("No subtitles have a valid starting point.") else: logging.debug("Hit end of movie, but %d (of %d) subtitle(s) remain in the queue." % (len(times) - i - 1, len(times))) break
def end_user_activity(cls, user, activity_type="login", end_datetime=None): """Helper function to complete an existing user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not end_datetime: # must be done outside the function header (else becomes static) end_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) # No unstopped starts. Start should have been called first! if not cur_user_log_entry: logging.warn( "%s: Had to create a user log entry, but STOPPING('%d')! @ %s" % (user.username, activity_type, end_datetime)) cur_user_log_entry = cls.begin_user_activity( user=user, activity_type=activity_type, start_datetime=end_datetime) logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime)) cur_user_log_entry.end_datetime = end_datetime cur_user_log_entry.save() # total-seconds will be computed here.
def begin_user_activity(cls, user, activity_type="login", start_datetime=None): """Helper function to create a user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not start_datetime: # must be done outside the function header (else becomes static) start_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime)) # Seems we're logging in without logging out of the previous. # Best thing to do is simulate a login # at the previous last update time. # # Note: this can be a recursive call if cur_user_log_entry: logging.warn("%s: END activity on a begin @ %s" % (user.username, start_datetime)) cls.end_user_activity( user=user, activity_type=activity_type, end_datetime=cur_user_log_entry.last_active_datetime ) # Create a new entry cur_user_log_entry = cls( user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime ) cur_user_log_entry.save() return cur_user_log_entry
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt): """Update language_srt_map to reflect download status""" # Open JSON file filepath = get_lang_map_filepath(lang_code) try: language_srt_map = json.loads(open(filepath).read()) except: logging.debug("Something went wrong while trying to open the json file: %s" % filepath) # create updated entry entry = language_srt_map[youtube_id] entry["downloaded"] = downloaded entry["api_response"] = api_response entry["last_attempt"] = time_of_attempt if api_response == "success": entry["last_success"] = time_of_attempt # update full-size JSON with new information language_srt_map[youtube_id].update(entry) # write it to file logging.info("File updated.") json_file = open(filepath, "wb") json_file.write(json.dumps(language_srt_map)) json_file.close()
def refresh_topic_cache_wrapper_fn(request, cached_nodes={}, *args, **kwargs): """ Centralized logic for how to refresh the topic cache, for each type of object. When the object is desired to be used, this code runs to refresh data, balancing between correctness and efficiency. """ if not cached_nodes: cached_nodes = {"topics": topicdata.TOPICS} for node in cached_nodes.values(): if not node: continue has_children = bool(node.get("children")) has_grandchildren = has_children and any(["children" in child for child in node["children"]]) # Propertes not yet marked if node["kind"] == "Video": if force or "urls" not in node: # #stamp_urls_on_video(node, force=force) # will be done by force below recount_videos_and_invalidate_parents(node["parent"], force=True) elif node["kind"] == "Topic": if not force and (not has_grandchildren or "nvideos_local" not in node): # if forcing, would do this here, and again below--so skip if forcing. logging.debug("cache miss: stamping urls on videos") for video in topic_tools.get_topic_videos(path=node["path"]): stamp_urls_on_video(video, force=force) recount_videos_and_invalidate_parents(node, force=force or not has_grandchildren) kwargs.update(cached_nodes) return handler(request, *args, **kwargs)
def recurse_nodes_to_clean_related_videos(node): """ Internal function for recursing the topic tree and marking related exercises. Requires rebranding of metadata done by recurse_nodes function. """ def get_video_node(video_slug, node): if node["kind"] == "Topic": for child in node.get("children", []): video_node = get_video_node(video_slug, child) if video_node: return video_node elif node["kind"] == "Video" and node["slug"] == video_slug: return node return None if node["kind"] == "Exercise": videos_to_delete = [] for vi, video_slug in enumerate(node["related_video_readable_ids"]): if not get_video_node(video_slug, topictree): videos_to_delete.append(vi) for vi in reversed(videos_to_delete): logging.debug("Deleting unknown video %s" % node["related_video_readable_ids"][vi]) del node["related_video_readable_ids"][vi] for child in node.get("children", []): recurse_nodes_to_clean_related_videos(child)
def generate_test_files(): """Insert asterisks as translations in po files""" # Open them up and insert asterisks for all empty msgstrs logging.info("Generating test po files") en_po_dir = os.path.join(settings.LOCALE_PATHS[0], "en/LC_MESSAGES/") for po_file in glob.glob(os.path.join(en_po_dir, "*.po")): msgid_pattern = re.compile(r'msgid \"(.*)\"\nmsgstr', re.S | re.M) content = open(os.path.join(en_po_dir, po_file), 'r').read() results = content.split("\n\n") with open(os.path.join(en_po_dir, "tmp.po"), 'w') as temp_file: # We know the first block is static, so just dump that. temp_file.write(results[0]) # Now work through actual translations for result in results[1:]: try: msgid = re.findall(msgid_pattern, result)[0] temp_file.write("\n\n") temp_file.write(result.replace("msgstr \"\"", "msgstr \"***%s***\"" % msgid)) except Exception as e: logging.error("Failed to insert test string: %s\n\n%s\n\n" % (e, result)) # Once done replacing, rename temp file to overwrite original os.rename(os.path.join(en_po_dir, "tmp.po"), os.path.join(en_po_dir, po_file)) (out, err, rc) = compile_po_files("en") if err: logging.debug("Error executing compilemessages: %s" % err)
def save(self, *args, **kwargs): if not kwargs.get("imported", False): self.full_clean() # Compute learner status if self.attempts > 20 and not self.complete: self.struggling = True already_complete = self.complete self.complete = (self.streak_progress >= 100) if not already_complete and self.complete: self.struggling = False self.completion_timestamp = datetime.now() self.completion_counter = Device.get_own_device().get_counter() self.attempts_before_completion = self.attempts # Tell logins that they are still active (ignoring validation failures). # TODO(bcipolli): Could log exercise information in the future. try: UserLog.update_user_activity( self.user, activity_type="login", update_datetime=(self.completion_timestamp or datetime.now())) except ValidationError as e: logging.debug("Failed to update userlog during exercise: %s" % e) super(ExerciseLog, self).save(*args, **kwargs)
def select_best_available_language(target_code, available_codes=None): """ Critical function for choosing the best available language for a resource, given a target language code. This is used by video and exercise pages, for example, to determine what file to serve, based on available resources and the current requested language. """ # Scrub the input target_code = lcode_to_django_lang(target_code) if available_codes is None: available_codes = get_installed_language_packs().keys() available_codes = [lcode_to_django_lang(lc) for lc in available_codes] # Hierarchy of language selection if target_code in available_codes: actual_code = target_code elif target_code.split("-", 1)[0] in available_codes: actual_code = target_code.split("-", 1)[0] elif settings.LANGUAGE_CODE in available_codes: actual_code = settings.LANGUAGE_CODE elif "en" in available_codes: actual_code = "en" elif available_codes: actual_code = available_codes[0] else: actual_code = None if actual_code != target_code: logging.debug("Requested code %s, got code %s" % (target_code, actual_code)) return actual_code
def begin_user_activity(cls, user, activity_type="login", start_datetime=None, language=None, suppress_save=False): """Helper function to create a user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not start_datetime: # must be done outside the function header (else becomes static) start_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # Seems we're logging in without logging out of the previous. # Best thing to do is simulate a login # at the previous last update time. # # Note: this can be a recursive call logging.warn("%s: had to END activity on a begin(%d) @ %s" % (user.username, activity_type, start_datetime)) # Don't mark current language when closing an old one cls.end_user_activity(user=user, activity_type=activity_type, end_datetime=cur_log.last_active_datetime) # can't suppress save cur_log = None # Create a new entry logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime)) cur_log = cls(user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime, language=language) if not suppress_save: cur_log.save() return cur_log
def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=None, suppress_save=False): """Helper function to update an existing user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not update_datetime: # must be done outside the function header (else becomes static) update_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # How could you start after you updated?? if cur_log.start_datetime > update_datetime: raise ValidationError("Update time must always be later than the login time.") else: # No unstopped starts. Start should have been called first! logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime)) cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True) logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime)) cur_log.last_active_datetime = update_datetime cur_log.language = language or cur_log.language # set the language to the current language, if there is one. if not suppress_save: cur_log.save() return cur_log
def recurse_nodes_to_delete_exercise(node, OLD_NODE_CACHE): """ Internal function for recursing the topic tree and removing new exercises. Requires rebranding of metadata done by recurse_nodes function. """ # Stop recursing when we hit leaves if node["kind"] != "Topic": return children_to_delete = [] for ci, child in enumerate(node.get("children", [])): # Mark all unrecognized exercises for deletion if child["kind"] == "Exercise": if not child["slug"] in OLD_NODE_CACHE["Exercise"].keys(): children_to_delete.append(ci) # Recurse over children to delete elif child.get("children", None): recurse_nodes_to_delete_exercise(child, OLD_NODE_CACHE) # Delete children without children (all their children were removed) if not child.get("children", None): logging.debug("Removing now-childless topic node '%s'" % child["slug"]) children_to_delete.append(ci) # If there are no longer exercises, be honest about it elif not any([ch["kind"] == "Exercise" or "Exercise" in ch.get("contains", []) for ch in child["children"]]): child["contains"] = list(set(child["contains"]) - set(["Exercise"])) # Do the actual deletion for i in reversed(children_to_delete): logging.debug("Deleting unknown exercise %s" % node["children"][i]["slug"]) del node["children"][i]
def end_user_activity(cls, user, activity_type="login", end_datetime=None, suppress_save=False): # don't accept language--we're just closing previous activity. """Helper function to complete an existing user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not end_datetime: # must be done outside the function header (else becomes static) end_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # How could you start after you ended?? if cur_log.start_datetime > end_datetime: raise ValidationError("Update time must always be later than the login time.") else: # No unstopped starts. Start should have been called first! logging.warn("%s: Had to BEGIN a user log entry, but ENDING(%d)! @ %s" % (user.username, activity_type, end_datetime)) cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=end_datetime, suppress_save=True) logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime)) cur_log.end_datetime = end_datetime if not suppress_save: cur_log.save() # total-seconds will be computed here. return cur_log
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt): """Update language_srt_map to reflect download status""" # Open JSON file filepath = get_lang_map_filepath(lang_code) try: language_srt_map = json.loads(open(filepath).read()) except: logging.debug( "Something went wrong while trying to open the json file: %s" % filepath) # create updated entry entry = language_srt_map[youtube_id] entry["downloaded"] = downloaded entry["api_response"] = api_response entry["last_attempt"] = time_of_attempt if api_response == "success": entry["last_success"] = time_of_attempt # update full-size JSON with new information language_srt_map[youtube_id].update(entry) # write it to file logging.info("File updated.") json_file = open(filepath, "wb") json_file.write(json.dumps(language_srt_map)) json_file.close()
def add_to_summary(sender, **kwargs): assert UserLog.is_enabled(), "We shouldn't be saving unless UserLog is enabled." instance = kwargs["instance"] if not instance.start_datetime: raise ValidationError("start_datetime cannot be None") if instance.last_active_datetime and instance.start_datetime > instance.last_active_datetime: raise ValidationError("UserLog date consistency check for start_datetime and last_active_datetime") if instance.end_datetime and not instance.total_seconds: # Compute total_seconds, save to summary # Note: only supports setting end_datetime once! instance.full_clean() # The top computation is more lenient: user activity is just time logged in, literally. # The bottom computation is more strict: user activity is from start until the last "action" # recorded--in the current case, that means from login until the last moment an exercise or # video log was updated. #instance.total_seconds = datediff(instance.end_datetime, instance.start_datetime, units="seconds") instance.total_seconds = 0 if not instance.last_active_datetime else datediff(instance.last_active_datetime, instance.start_datetime, units="seconds") # Confirm the result (output info first for easier debugging) if instance.total_seconds < 0: raise ValidationError("Total learning time should always be non-negative.") logging.debug("%s: total time (%d): %d seconds" % (instance.user.username, instance.activity_type, instance.total_seconds)) # Save only completed log items to the UserLogSummary UserLogSummary.add_log_to_summary(instance)
def save(self, *args, **kwargs): """When this model is saved, check if the activity is ended. If so, compute total_seconds and update the corresponding summary log.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return # Compute total_seconds, save to summary # Note: only supports setting end_datetime once! if self.end_datetime and not self.total_seconds: self.full_clean() # The top computation is more lenient: user activity is just time logged in, literally. # The bottom computation is more strict: user activity is from start until the last "action" # recorded--in the current case, that means from login until the last moment an exercise or # video log was updated. #self.total_seconds = datediff(self.end_datetime, self.start_datetime, units="seconds") self.total_seconds = 0 if not self.last_active_datetime else datediff(self.last_active_datetime, self.start_datetime, units="seconds") # Confirm the result (output info first for easier debugging) logging.debug("%s: total learning time: %d seconds" % (self.user.username, self.total_seconds)) assert self.total_seconds >= 0, "Total learning time should always be non-negative." # Save only completed log items to the UserLogSummary UserLogSummary.add_log_to_summary(self) super(UserLog, self).save(*args, **kwargs) if UserLog.objects.count() > settings.USER_LOG_MAX_RECORDS: # Unfortunately, could not do an aggregate delete when doing a # slice in query to_discard = UserLog.objects.order_by("start_datetime")[0:UserLog.objects.count()-settings.USER_LOG_MAX_RECORDS] UserLog.objects.filter(pk__in=to_discard).delete()
def add_log_to_summary(cls, user_log, device=None): """Adds total_time to the appropriate user/device/activity's summary log.""" assert user_log.end_datetime, "all log items must have an end_datetime to be saved here." assert user_log.total_seconds >= 0, "all log items must have a non-negative total_seconds to be saved here." device = device or Device.get_own_device() # Must be done here, or install fails # Check for an existing object log_summary = cls.objects.filter( device=device, user=user_log.user, activity_type=user_log.activity_type, start_datetime__lte=user_log.end_datetime, end_datetime__gte=user_log.end_datetime, ) assert log_summary.count() <= 1, "There should never be multiple summaries in the same time period/device/user/type combo" # Get (or create) the log item log_summary = log_summary[0] if log_summary.count() else cls( device=device, user=user_log.user, activity_type=user_log.activity_type, start_datetime=cls.get_period_start_datetime(user_log.end_datetime, settings.USER_LOG_SUMMARY_FREQUENCY), end_datetime=cls.get_period_end_datetime(user_log.end_datetime, settings.USER_LOG_SUMMARY_FREQUENCY), total_seconds=0, count=0, ) logging.debug("Adding %d seconds for %s/%s/%d, period %s to %s" % (user_log.total_seconds, device.name, user_log.user.username, user_log.activity_type, log_summary.start_datetime, log_summary.end_datetime)) # Add the latest info log_summary.total_seconds += user_log.total_seconds log_summary.count += 1 log_summary.save()
def move_exercises(lang_code): lang_pack_location = os.path.join(LOCALE_ROOT, lang_code) src_exercise_dir = os.path.join(lang_pack_location, "exercises") dest_exercise_dir = get_localized_exercise_dirpath(lang_code, is_central_server=False) if not os.path.exists(src_exercise_dir): logging.warn("Could not find downloaded exercises; skipping: %s" % src_exercise_dir) else: # Move over one at a time, to combine with any other resources that were there before. ensure_dir(dest_exercise_dir) all_exercise_files = glob.glob(os.path.join(src_exercise_dir, "*.html")) logging.info("Moving %d downloaded exercises to %s" % (len(all_exercise_files), dest_exercise_dir)) for exercise_file in all_exercise_files: shutil.move( exercise_file, os.path.join(dest_exercise_dir, os.path.basename(exercise_file))) logging.debug("Removing emtpy directory") try: shutil.rmtree(src_exercise_dir) except Exception as e: logging.error("Error removing dubbed video directory (%s): %s" % (src_exercise_dir, e))
def end_user_activity(cls, user, activity_type="login", end_datetime=None): """Helper function to complete an existing user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not end_datetime: # must be done outside the function header (else becomes static) end_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) # No unstopped starts. Start should have been called first! if not cur_user_log_entry: logging.warn( "%s: Had to create a user log entry, but STOPPING('%d')! @ %s" % (user.username, activity_type, end_datetime) ) cur_user_log_entry = cls.begin_user_activity( user=user, activity_type=activity_type, start_datetime=end_datetime ) logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime)) cur_user_log_entry.end_datetime = end_datetime cur_user_log_entry.save() # total-seconds will be computed here.
def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=language, suppress_save=False): """Helper function to update an existing user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not update_datetime: # must be done outside the function header (else becomes static) update_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # How could you start after you updated?? if cur_log.start_datetime > update_datetime: raise ValidationError("Update time must always be later than the login time.") else: # No unstopped starts. Start should have been called first! logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime)) cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True) logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime)) cur_log.last_active_datetime = update_datetime cur_log.language = language or cur_log.language # set the language to the current language, if there is one. if not suppress_save: cur_log.save() return cur_log
def force_job(command, name="", frequency="YEARLY", stop=False, launch_cron=True): """ Mark a job as to run immediately (or to stop). By default, call cron directly, to resolve. """ jobs = Job.objects.filter(command=command) if jobs.count() > 0: job = jobs[0] else: job = Job(command=command) job.frequency = frequency job.name = name or command if stop: job.is_running = False else: job.next_run = datetime.now() job.save() if launch_cron: # Just start cron directly, so that the process starts immediately. # Note that if you're calling force_job frequently, then # you probably want to avoid doing this on every call. if get_count() and not job_status(command): logging.debug("Ready to launch command '%s'" % command) call_command_async("cron", manage_py_dir=settings.PROJECT_PATH)
def compute_one_way(cls, zone, from_device, to_device): """ """ assert from_device.is_trusted() or from_device.get_zone() == zone # Trace back from this device to the zone-trusted device. chain = [{"device": from_device}] devices_in_chain = set([]) for i in range(cls.MAX_CHAIN_LENGTH ): # max chain size: 1000 (avoids infinite loops) # We're going to traverse the chain backwards, until we get to # the zone_owner (to_device), or a trusted device. cur_link = chain[-1] # Get a devicezone and/or zone invitation for the current device. cur_link["zone_invitation"] = get_object_or_None( ZoneInvitation, used_by=cur_link["device"].signed_by, revoked=False) if cur_link["zone_invitation"]: cur_link["zone_invitation"].verify( ) # make sure it's a valid invitation cur_link["device_zone"] = get_object_or_None( DeviceZone, device=cur_link["device"].signed_by, revoked=False) # Determine the next step. Three terminal steps, one continuing step if not cur_link["zone_invitation"] and not cur_link["device_zone"]: # A break in the chain. No connection between the device and the zone. break elif cur_link["device"] == to_device or cur_link[ "device"].is_trusted(): logging.debug("Found end of chain!") break next_device = getattr(cur_link["zone_invitation"], "invited_by", None) next_device = next_device or getattr(cur_link["device_zone"], "signed_by") if next_device in devices_in_chain: logging.warn("loop detected.") break else: # So far, we're OK--keep looking for the (valid) end of the chain assert next_device.is_trusted() or next_device.get_zone( ) == zone devices_in_chain.add(next_device) chain.append({"device": next_device}) # Validate the chain of trust to the zone zone_owner terminal_link = chain[-1] terminal_device = terminal_link["device"] obj = terminal_link["zone_invitation"] or terminal_link["device_zone"] if obj and not (terminal_device.is_creator(obj) or terminal_device.is_trusted()): logging.warn("Could not verify chain of trust.") return chain # No device data gets "synced" through the same sync mechanism as data--it is only synced # through the special hand-shaking mechanism
def my_handler2(sender, **kwargs): """ Listen in to see when available videos become unavailable. """ was_available = kwargs["instance"].percent_complete == 100 if was_available: logging.debug("Invalidating cache on delete for %s" % kwargs["instance"]) invalidate_all_pages_related_to_video(video_id=i18n.get_video_id(kwargs["instance"].youtube_id))
def invalidate_on_video_delete(sender, **kwargs): """ Listen in to see when available videos become unavailable. """ was_available = kwargs["instance"] and kwargs["instance"].percent_complete == 100 if was_available: logging.debug("Invalidating cache on delete for %s" % kwargs["instance"]) invalidate_all_caches()
def save(self, *args, **kwargs): """When this model is saved, check if the activity is ended. If so, compute total_seconds and update the corresponding summary log.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not self.is_enabled(): return if not self.start_datetime: raise ValidationError("start_datetime cannot be None") if self.last_active_datetime and self.start_datetime > self.last_active_datetime: raise ValidationError("UserLog date consistency check for start_datetime and last_active_datetime") if not self.end_datetime: # Conflict_resolution related_open_logs = UserLog.objects \ .filter(user=self.user, activity_type=self.activity_type, end_datetime__isnull=True) \ .exclude(pk=self.pk) for log in related_open_logs: log.end_datetime = datetime.now() log.save() elif not self.total_seconds: # Compute total_seconds, save to summary # Note: only supports setting end_datetime once! self.full_clean() # The top computation is more lenient: user activity is just time logged in, literally. # The bottom computation is more strict: user activity is from start until the last "action" # recorded--in the current case, that means from login until the last moment an exercise or # video log was updated. #self.total_seconds = datediff(self.end_datetime, self.start_datetime, units="seconds") self.total_seconds = 0 if not self.last_active_datetime else datediff(self.last_active_datetime, self.start_datetime, units="seconds") # Confirm the result (output info first for easier debugging) logging.debug("%s: total time (%d): %d seconds" % (self.user.username, self.activity_type, self.total_seconds)) if self.total_seconds < 0: raise ValidationError("Total learning time should always be non-negative.") # Save only completed log items to the UserLogSummary UserLogSummary.add_log_to_summary(self) # This is inefficient only if something goes awry. Otherwise, # this will really only do something on ADD. # AND, if you're using recommended config (USER_LOG_MAX_RECORDS_PER_USER == 1), # this will be very efficient. if settings.USER_LOG_MAX_RECORDS_PER_USER: # Works for None, out of the box current_models = UserLog.objects.filter(user=self.user, activity_type=self.activity_type) if current_models.count() > settings.USER_LOG_MAX_RECORDS_PER_USER: # Unfortunately, could not do an aggregate delete when doing a # slice in query to_discard = current_models \ .order_by("start_datetime")[0:current_models.count() - settings.USER_LOG_MAX_RECORDS_PER_USER] UserLog.objects.filter(pk__in=to_discard).delete() # Do it here, for efficiency of the above delete. super(UserLog, self).save(*args, **kwargs)
def write_count_to_json(subtitle_counts, data_path): """Write JSON to file in static/data/subtitles/""" current_counts = softload_json(SUBTITLE_COUNTS_FILEPATH, logger=logging.error) current_counts.update(subtitle_counts) logging.debug("Writing fresh srt counts to %s" % SUBTITLE_COUNTS_FILEPATH) with open(SUBTITLE_COUNTS_FILEPATH, 'wb') as fp: # sort here, so we don't have to sort later when seving to clients json.dump(current_counts, fp, sort_keys=True)
def update_all_distributed_callback(request): """ """ if request.method != "POST": raise PermissionDenied("Only POST allowed to this URL endpoint.") videos = json.loads(request.POST["video_logs"]) exercises = json.loads(request.POST["exercise_logs"]) user = FacilityUser.objects.get(id=request.POST["user_id"]) node_cache = get_node_cache() # Save videos n_videos_uploaded = 0 for video in videos: video_id = video['video_id'] youtube_id = video['youtube_id'] # Only save video logs for videos that we recognize. if video_id not in node_cache["Video"]: logging.warn("Skipping unknown video %s" % video_id) continue try: (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id) for key,val in video.iteritems(): setattr(vl, key, val) logging.debug("Saving video log for %s: %s" % (video_id, vl)) vl.save() n_videos_uploaded += 1 except KeyError: # logging.error("Could not save video log for data with missing values: %s" % video) except Exception as e: error_message = "Unexpected error importing videos: %s" % e return JsonResponseMessageError(error_message) # Save exercises n_exercises_uploaded = 0 for exercise in exercises: # Only save video logs for videos that we recognize. if exercise['exercise_id'] not in node_cache['Exercise']: logging.warn("Skipping unknown video %s" % exercise['exercise_id']) continue try: (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"]) for key,val in exercise.iteritems(): setattr(el, key, val) logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el)) el.save() n_exercises_uploaded += 1 except KeyError: logging.error("Could not save exercise log for data with missing values: %s" % exercise) except Exception as e: error_message = "Unexpected error importing exercises: %s" % e return JsonResponseMessageError(error_message) return JsonResponse({"success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded)})
def handle(self, *args, **options): if len(args) == 1 and args[0] == "test": # Callback for "weak" test--checks at least that the django project compiles (local_settings is OK) sys.stdout.write("Success!\n") exit(0) if options.get("repo", None): # Specified a repo self.update_via_git(**options) elif options.get("zip_file", None): # Specified a file if not os.path.exists(options.get("zip_file")): raise CommandError("Specified zip file does not exist: %s" % options.get("zip_file")) self.update_via_zip(**options) elif os.path.exists(settings.PROJECT_PATH + "/../.git"): # Without params, if we detect a git repo, try git self.update_via_git(**options) elif len(args) > 1: raise CommandError("Too many command-line arguments.") elif len(args) == 1: # Specify zip via first command-line arg if options['zip_file'] is not None: raise CommandError( "Cannot specify a zipfile as unnamed and named command-line arguments at the same time." ) options['zip_file'] = args[0] self.update_via_zip(**options) else: # No params, no git repo: try to get a file online. zip_file = tempfile.mkstemp()[1] for url in [ "https://github.com/learningequality/ka-lite/archive/master.zip", "http://%s/download/kalite/%s/%s/" % (settings.CENTRAL_SERVER_HOST, platform.system().lower(), "all") ]: logging.info("Downloading repo snapshot from %s to %s" % (url, zip_file)) try: urllib.urlretrieve(url, zip_file) sys.stdout.write("success @ %s\n" % url) break except Exception as e: logging.debug("Failed to get zipfile from %s: %s" % (url, e)) continue self.update_via_zip(zip_file=zip_file, **options) self.stdout.write("Update is complete!\n")
def save_topic_tree(topic_tree=None, node_cache=None, data_path=os.path.join(settings.PROJECT_PATH, "static", "data")): assert bool(topic_tree) + bool(node_cache) == 1, "Must specify either topic_tree or node_cache parameter" # Dump the topic tree (again) topic_tree = topic_tree or node_cache["Topic"]["root"][0] dest_filepath = os.path.join(data_path, topic_tools.topics_file) logging.debug("Saving topic tree to %s" % dest_filepath) with open(dest_filepath, "w") as fp: fp.write(json.dumps(topic_tree, indent=2))
def test_get_exercise_load_status(self): for path in get_exercise_paths(): logging.debug("Testing path : " + path) self.browser.get(self.live_server_url + path) error_list = self.browser.execute_script("return window.js_errors;") if error_list: logging.error("Found JS error(s) while loading path: " + path) for e in error_list: logging.error(e) self.assertFalse(error_list)
def invalidate_on_video_delete(sender, **kwargs): """ Listen in to see when available videos become unavailable. """ was_available = kwargs["instance"] and kwargs[ "instance"].percent_complete == 100 if was_available: logging.debug("Invalidating cache on delete for %s" % kwargs["instance"]) invalidate_all_caches()
def get_file2id_map(force=False): global YT2ID_MAP if YT2ID_MAP is None or force: YT2ID_MAP = {} for lang_code, dic in get_dubbed_video_map().iteritems(): for english_youtube_id, dubbed_youtube_id in dic.iteritems(): if dubbed_youtube_id in YT2ID_MAP: logging.debug("conflicting entry of dubbed_youtube_id %s in %s dubbed video map" % (dubbed_youtube_id, lang_code)) YT2ID_MAP[dubbed_youtube_id] = english_youtube_id # assumes video id is the english youtube_id return YT2ID_MAP
def my_handler2(sender, **kwargs): """ Listen in to see when available videos become unavailable. """ was_available = kwargs["instance"].percent_complete == 100 if was_available: logging.debug("Invalidating cache on delete for %s" % kwargs["instance"]) invalidate_all_pages_related_to_video( video_id=i18n.get_video_id(kwargs["instance"].youtube_id))
def extract_files(self, zip_file): """Extract all files to a temp location""" sys.stdout.write("*\n") sys.stdout.write("* temp location == %s\n" % self.working_dir) sys.stdout.write( "* Extracting all files to a temporary location; please wait...") sys.stdout.flush() # Speedup debug by not extracting when path exists and it's not empty. # Works because we don't use a randomly generated temp name in debug mode. if settings.DEBUG and os.path.exists(self.working_dir + "/install.sh"): sys.stdout.write("** NOTE ** NOT EXTRACTING IN DEBUG MODE") return if not os.path.exists(self.working_dir): os.mkdir(self.working_dir) if not zipfile.is_zipfile(zip_file): raise CommandError("bad zip file") zip = ZipFile(zip_file, "r") nfiles = len(zip.namelist()) for fi, afile in enumerate(zip.namelist()): if fi > 0 and fi % round(nfiles / 10) == 0: pct_done = round(100. * (fi + 1.) / nfiles) sys.stdout.write(" %d%%" % pct_done) zip.extract(afile, path=self.working_dir) # If it's a unix script, give permissions to execute if os.path.splitext(afile)[1] == ".sh": os.chmod(os.path.realpath(self.working_dir + "/" + afile), 0755) logging.debug("\tChanging perms on script %s\n" % os.path.realpath(self.working_dir + "/" + afile)) sys.stdout.write("\n") # Error checking (successful unpacking would skip all the following logic.) if not os.path.exists(self.working_dir + "/kalite/"): subdirs = os.listdir(self.working_dir) if len(subdirs) == 1: # This happens if zip was downloaded from git, rather than being created through the zip_kalite command. self.working_dir += "/" + subdirs[0] + "/" sys.stdout.write( "Note: found a git-based package. Updating working dir to %s\n" % self.working_dir) else: # Unexpected situation: no kalite dir, and more than one directory. What could it be? raise CommandError( "Expected %s to exist, but it doesn't. Unknown failure in extraction; exiting." % (self.working_dir + "/kalite/"))