def __init__(self, cartridge, link_file=None): self.cartridge = cartridge self.doc = None self.link_file = link_file self.iframe_link_parser = None if link_file: self.iframe_link_parser = KalturaIframeLinkParser(self.link_file)
def test_video_olx_bad_link_map(self, iframes, link_map_bad_csv): """ Test that error is raised when youtube Id or edX Id are not provided """ iframe_link_parser = KalturaIframeLinkParser(link_map_bad_csv) doc = xml.dom.minidom.Document() with pytest.raises(Exception): video_olx, _ = iframe_link_parser.get_video_olx(doc, iframes)
def __init__(self, cartridge, link_file=None, passport_file=None): self.cartridge = cartridge self.doc = None self.link_file = link_file self.passport_file = passport_file self.iframe_link_parser = None if link_file: self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) self.lti_consumer_present = False self.lti_consumer_ids = set()
def test_video_olx_edx_id_only(self, iframes, link_map_edx_only_csv): """ Test that video olx is generated and produced when only edX Id is supplied """ iframe_link_parser = KalturaIframeLinkParser(link_map_edx_only_csv) doc = xml.dom.minidom.Document() video_olx, _ = iframe_link_parser.get_video_olx(doc, iframes) assert len(video_olx) == 1 actual_video_olx = video_olx[0] assert actual_video_olx.hasAttribute("edx_video_id")
def test_video_olx_languages(self, iframes, link_map_languages_csv): """ Test that video olx is generated and produced when transcript languages are provided """ iframe_link_parser = KalturaIframeLinkParser(link_map_languages_csv) doc = xml.dom.minidom.Document() video_olx, _ = iframe_link_parser.get_video_olx(doc, iframes) assert len(video_olx) == 1 actual_video_olx = video_olx[0] # The first line in the fixtures file has two languages listed assert len(actual_video_olx.childNodes) == 2 assert actual_video_olx.firstChild.nodeName == "transcript" assert actual_video_olx.firstChild.hasAttribute("language") assert actual_video_olx.firstChild.hasAttribute("src")
def iframe_link_parser(link_map_csv): """ This fixture provides us with the Kaltura parser. Args: link_map_csv ([str]): Link file path. Returns: [Iframe Link Parse]: Instance of link parser class. """ iframe_link_parser = KalturaIframeLinkParser(link_map_csv) return iframe_link_parser
class OlxExport: """ This class is used to convert intermediate representation of Common Cartridge to OLX. OLX guide: https://edx.readthedocs.io/projects/edx-open-learning-xml/en/latest/ """ # content types HTML = "html" LINK = "link" VIDEO = "video" LTI = "lti" QTI = "qti" DISCUSSION = "discussion" def __init__(self, cartridge, link_file=None): self.cartridge = cartridge self.doc = None self.link_file = link_file self.iframe_link_parser = None if link_file: self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) def xml(self): self.doc = xml.dom.minidom.Document() self.doc.appendChild(self.doc.createComment(" Generated by cc2olx ")) xcourse = self.doc.createElement("course") xcourse.setAttribute("org", self.cartridge.get_course_org()) xcourse.setAttribute("course", "Some_cc_Course") xcourse.setAttribute("name", self.cartridge.get_title()) xcourse.setAttribute("url_name", "course") self.doc.appendChild(xcourse) tags = "chapter sequential vertical".split() self._add_olx_nodes(xcourse, self.cartridge.normalized["children"], tags) return self.doc.toprettyxml() def policy(self): """ Returns minimal course policy file with disabled wiki tab in form of json string. See details about policy here: https://edx.readthedocs.io/projects/edx-open-learning-xml/en/latest/policies/course.html """ policy = { "course/course": { "tabs": [ { "course_staff_only": True, "name": "Home", "type": "course_info" }, { "course_staff_only": False, "name": "Course", "type": "courseware", }, { "course_staff_only": False, "name": "Textbooks", "type": "textbooks", }, { "course_staff_only": False, "name": "Discussion", "type": "discussion", }, { "course_staff_only": False, "name": "Wiki", "type": "wiki", "is_hidden": True, }, { "course_staff_only": False, "name": "Progress", "type": "progress", }, ] } } return json.dumps(policy) def _add_olx_nodes(self, element, course_data, tags): """ Recursively loops through the normalized common cartridge course data and adds appropriate OLX nodes to given course element. Expects `course_data` to be a list of triple nested elements that represent chapters in OLX courseware structure, like: ``` [ { 'children': [ <----- chapter 'children': [ <----- sequential 'children': [ <----- vertical ...content of vertical... ] ] ] } ] ``` """ leaf = not tags for element_data in course_data: if leaf: content_type, details = self._get_content(element_data) children = self._create_olx_nodes(content_type, details) else: children = [self.doc.createElement(tags[0])] for child in children: if "title" in element_data: child.setAttribute("display_name", element_data["title"]) child.setAttribute("url_name", element_data["identifierref"]) element.appendChild(child) if "children" in element_data: self._add_olx_nodes(child, element_data["children"], tags[1:]) def _get_content(self, element_data): """ Gets content type and details from element's data. """ content_type = None details = None if "identifierref" in element_data: idref = element_data["identifierref"] content_type, details = self.cartridge.get_resource_content(idref) if content_type is None: content_type = self.HTML details = { "html": "<p>MISSING CONTENT</p>", } if content_type == self.LINK: content_type, details = process_link(details) return content_type, details def _process_static_links(self, html): """ Process static links like src and href to have appropriate links. """ items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html) def process_wiki_reference(item, html): """ Replace $WIKI_REFERENCE$ with edx /jump_to_id/<url_name> """ search_key = urllib.parse.unquote(item).replace( "$WIKI_REFERENCE$/pages/", "") # remove query params and add suffix .html to match with resource_id_by_href search_key = search_key.split("?")[0] + ".html" for key in self.cartridge.resource_id_by_href.keys(): if key.endswith(search_key): replace_with = "/jump_to_id/{}".format( self.cartridge.resource_id_by_href[key]) html = html.replace(item, replace_with) return html logger.warn("Unable to process Wiki link - %s", item) return html def process_canvas_reference(item, html): """ Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/<url_name> """ object_id = urllib.parse.unquote(item).replace( "$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/") html = html.replace(item, object_id) return html def process_ims_cc_filebase(item, html): """ Replace $IMS-CC-FILEBASE$ with /static """ new_item = urllib.parse.unquote(item).replace( "$IMS-CC-FILEBASE$", "/static") # skip query parameters for static files new_item = new_item.split("?")[0] # & is not valid in an URL. But some file seem to have it when it should be & new_item = new_item.replace("&", "&") html = html.replace(item, new_item) return html for _, item in items: if "IMS-CC-FILEBASE" in item: html = process_ims_cc_filebase(item, html) elif "WIKI_REFERENCE" in item: html = process_wiki_reference(item, html) elif "CANVAS_OBJECT_REFERENCE" in item: html = process_canvas_reference(item, html) return html def _process_static_links_from_details(self, details): """ Take a variable and recursively find & escape all static links within strings Args: self: self details: A dictionary or list of dictionaries containing node data. Returns: details: Returns detail data with static link escaped to an OLX-friendly format. """ if isinstance(details, str): return self._process_static_links(details) if isinstance(details, list): for index, value in enumerate(details): details[index] = self._process_static_links_from_details(value) elif isinstance(details, dict): for key, value in details.items(): details[key] = self._process_static_links_from_details(value) return details def _create_olx_nodes(self, content_type, details): """ This helps to create OLX node of different type. For eg HTML, VIDEO, QTI, LTI, Discussion. Args: content_type ([str]): The type of node that has to be created. details (Dict[str, str]): Dictionary of the element and content of the element. Raises: OlxExportException: Exception when nodes are not able to be created. Returns: [List]: List of OLX nodes that needs to be written. """ nodes = [] details = self._process_static_links_from_details(details) if content_type == self.HTML: nodes += self._process_html(details) elif content_type == self.VIDEO: nodes += self._create_video_node(details) elif content_type == self.LTI: nodes.append(self._create_lti_node(details)) elif content_type == self.QTI: qti_export = QtiExport(self.doc) nodes += qti_export.create_qti_node(details) elif content_type == self.DISCUSSION: nodes += self._create_discussion_node(details) else: raise OlxExportException( f'Content type "{content_type}" is not supported.') return nodes def _create_video_node(self, details): """ This function creates Video OLX nodes. Args: details (Dict[str, str]): Dictionary that has Video tag value. Returns: [OLX Element]: Video OLX element. """ xml_element = element_builder(self.doc) attributes = { "youtube": "1.00:" + details["youtube"], "youtube_id_1_0": details["youtube"] } child = xml_element("video", children=None, attributes=attributes) return child def _process_html(self, details): """ This function helps to process the html and gives out corresponding HTML or Video OLX nodes. Args: details (Dict[str, str]): Dictionary that has HTML tag value. Returns: List[OLX Element]: List of html/Video OLX element. """ video_olx = [] nodes = [] child = self.doc.createElement("html") html = self._process_static_links(details["html"]) if self.link_file: html, video_olx = self._process_html_for_iframe(html) txt = self.doc.createCDATASection(html) child.appendChild(txt) nodes.append(child) for olx in video_olx: nodes.append(olx) return nodes def _process_html_for_iframe(self, html_str): """ This function helps to parse the iframe with embedded video, to be converted into video xblock. Args: html_str ([str]): Html file content. Returns: html_str [str]: The html content of the file, if iframe is present and converted into xblock then iframe is removed from the HTML. video_olx [List[xml]]: List of xml children, i.e video xblock. """ video_olx = [] parsed_html = html.fromstring(html_str) iframes = parsed_html.xpath("//iframe") if not iframes: return html_str, video_olx video_olx, converted_iframes = self.iframe_link_parser.get_video_olx( self.doc, iframes) if video_olx: # If video xblock is present then we modify the HTML to remove the iframe # hence we need to convert the modified HTML back to string. We also remove # the parent if there are no other children. for iframe in converted_iframes: parent = iframe.getparent() parent.remove(iframe) if not parent.getchildren(): parent.getparent().remove(parent) return html.tostring(parsed_html).decode("utf-8"), video_olx return html_str, video_olx def _create_lti_node(self, details): node = self.doc.createElement("lti_consumer") custom_parameters = "[{params}]".format(params=", ".join([ '"{key}={value}"'.format( key=key, value=value, ) for key, value in details["custom_parameters"].items() ]), ) node.setAttribute("custom_parameters", custom_parameters) node.setAttribute("description", details["description"]) node.setAttribute("display_name", details["title"]) node.setAttribute("inline_height", details["height"]) node.setAttribute("inline_width", details["width"]) node.setAttribute("launch_url", details["launch_url"]) node.setAttribute("modal_height", details["height"]) node.setAttribute("modal_width", details["width"]) node.setAttribute("xblock-family", "xblock.v1") return node def _create_discussion_node(self, details): node = self.doc.createElement("discussion") node.setAttribute("display_name", "") node.setAttribute("discussion_category", details["title"]) node.setAttribute("discussion_target", details["title"]) html_node = self.doc.createElement("html") txt = self.doc.createCDATASection(details["text"]) html_node.appendChild(txt) return [html_node, node]