def topic_update_from_live(self, topics, remap_doc_id): step_log = KhanSyncStepLog(topics=", ".join(sorted(topics)), remap_doc_id=remap_doc_id) logging.info("Importing topics from khanacademy.org: %s", ", ".join(sorted(topics))) topictree = util.fetch_from_url("http://www.khanacademy.org/api/v1/topictree", as_json=True) result = util.fetch_from_url("https://docs.google.com/spreadsheet/pub?key=%s&single=true&gid=0&output=csv" % remap_doc_id) def filter_unwanted(branch): if not branch["kind"] == "Topic": return None if branch["id"] in topics: topics.remove(branch["id"]) # so we can tell if any topics weren't found return True wanted_children = [] wanted = False for child in branch["children"]: ret = filter_unwanted(child) if ret in (None, True): wanted_children.append(child) wanted |= (ret or False) branch["children"][:] = wanted_children return wanted filter_unwanted(topictree) if topics: raise Exception("These topics were not found in the live topictree: %s", ", ".join(sorted(topics))) mapping = {} reader = csv.reader(StringIO.StringIO(result.content)) for row in reader: if set(map(str.lower, row)) & set(["serial","subject","english","hebrew"]): header = [re.sub("\W","_",r.lower()) for r in row] mapped_vids = (dict(zip(header, row)) for row in reader) mapping = dict((m["english"], m["hebrew"]) for m in mapped_vids if m["hebrew"]) logging.info("Loaded %s mapped videos", len(mapping)) break if not mapping: raise Exception("Unrecognized spreadsheet format") logging.info("calling import-queue") step_log.put() # importing the full topic tree can be too large so pickling and compressing deferred.defer(khan_import_task, step_log, zlib.compress(pickle.dumps((topictree, mapping))), _queue="import-queue")
def sync_exercise_related_videos(exercise): related_videos = fetch_from_url("http://www.khanacademy.org/api/v1/exercises/%s/videos" % exercise.name, as_json=True) if related_videos: readable_ids = [r['readable_id'] for r in related_videos] logging.info("%s -> %s", exercise.name, ", ".join(readable_ids)) UpdateExercise.do_update_related_videos(exercise, readable_ids)