Example #1
0
    def test_add_descriptors(self):
        """
        Test add_descriptors.
        """
        # Note url_name_orig in chapter.
        input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER1
        bundle = XBundle(keep_urls=True)
        bundle.load(file_from_string(input_xml))

        # str(bundle) doesn't change input xml, but export_to_directory will.
        self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle)))

        old_current_dir = os.getcwd()
        tempdir = mkdtemp()
        try:
            os.chdir(tempdir)
            bundle.export_to_directory()

            bundle2 = XBundle(keep_urls=True)
            bundle2.import_from_directory()

            expected = expected_data.URL_NAME_ORIG

            self.assertEqual(clean_xml(expected), clean_xml(str(bundle2)))
        finally:
            os.chdir(old_current_dir)
            rmtree(tempdir)
Example #2
0
    def test_import_export(self):  # pylint: disable=no-self-use
        """
        Test import then export.
        """

        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata", "mitx.01"))

        tdir = mkdtemp()
        try:
            bundle.export_to_directory(tdir)

            knownDir = os.path.join("input_testdata", "mitx.01.exported")
            knownTempDir = os.path.join(tdir, 'mitx.01.exported')
            newDir = os.path.join(tdir, "mitx.01")

            # Transform xml files to remove spaces. This allows for cross tests
            # to pass across platforms with slightly different xml serializers
            # (see: travis). We copy the files for easy cleanup.
            copytree(knownDir, knownTempDir)
            _normalize_xml(tdir)

            check_call([
                "diff", "-r", knownTempDir, newDir
            ])
        finally:
            rmtree(tdir)
Example #3
0
    def test_save(self):
        """
        Test save method.
        """

        input_xml = "<xbundle><metadata /><course /></xbundle>"
        bundle = XBundle()
        bundle.load(file_from_string(input_xml))
        self.assertEqual(clean_xml(str(bundle)), clean_xml(input_xml))

        curdir = os.getcwd()
        tempdir = mkdtemp()
        try:
            os.chdir(tempdir)
            bundle.save()

            with open(os.path.join(tempdir, "xbundle.xml")) as f:
                self.assertEqual(clean_xml(f.read()), clean_xml(input_xml))

            bundle.save(filename="other.xml")
            with open(os.path.join(tempdir, "other.xml")) as f:
                self.assertEqual(clean_xml(f.read()), clean_xml(input_xml))

            handle_path = os.path.join(tempdir, "third.xml")
            with open(handle_path, "w") as f:
                bundle.save(file_handle=f)
            with open(handle_path) as f:
                self.assertEqual(clean_xml(f.read()), clean_xml(input_xml))
        finally:
            os.chdir(curdir)
            rmtree(tempdir)
Example #4
0
    def test_nested_leaves(self):
        """
        Test that nested leaves are not imported.
        """
        template = """
<course org="DevOps" course="0.001" url_name="2015_Summer"
    semester="2015_Summer">
  <chapter>
    <sequential>
      <vertical>
        <{tag}><{tag}></{tag}></{tag}>
      </vertical>
    </sequential>
  </chapter>
</course>
"""

        for tag in ("html", "problem", "discussion", "video"):
            repo = create_repo(
                "{tag}_repo".format(tag=tag), "...", self.user.id)
            xml = etree.fromstring(template.format(tag=tag))
            bundle = XBundle(
                keep_urls=True, keep_studio_urls=True, preserve_url_name=True
            )
            bundle.set_course(xml)

            import_course(bundle, repo.id, self.user.id, "")

            self.assertEqual(
                LearningResource.objects.filter(
                    learning_resource_type__name=tag
                ).count(),
                1
            )
Example #5
0
    def test_export_and_keep_urls(self):
        """
        Test the changes to url_name after export_to_directory and import.
        """
        # Note url_name_orig in chapter.
        input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER2
        bundle = XBundle(keep_urls=True, force_studio_format=True)
        bundle.load(file_from_string(input_xml))

        # str(bundle) doesn't change input xml, but export_to_directory will.
        self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle)))

        old_current_dir = os.getcwd()
        tempdir = mkdtemp()
        try:
            os.chdir(tempdir)
            bundle.export_to_directory()

            bundle2 = XBundle(keep_urls=True, force_studio_format=True)
            bundle2.import_from_directory()

            expected = expected_data.KEEP_URLS_FORCE_STUDIO_FORMAT

            self.assertEqual(clean_xml(expected), clean_xml(str(bundle2)))
        finally:
            os.chdir(old_current_dir)
            rmtree(tempdir)
Example #6
0
    def test_fix_old_descriptor_name(self):
        """
        Test fix_old_descriptor_name.
        """
        bundle = XBundle()
        elem = etree.XML('<sequential name="abc" />')
        bundle.fix_old_descriptor_name(elem)

        expected = '<sequential display_name="abc" />'
        self.assertEqual(clean_xml(expected), clean_xml(etree.tostring(elem)))
Example #7
0
    def test_fix_old_descriptor_name(self):
        """
        Test fix_old_descriptor_name.
        """
        bundle = XBundle()
        elem = etree.XML('<sequential name="abc" />')
        bundle.fix_old_descriptor_name(elem)

        expected = '<sequential display_name="abc" />'
        self.assertEqual(clean_xml(expected), clean_xml(etree.tostring(elem)))
Example #8
0
    def test_save(self):
        """
        Test save method.
        """

        input_xml = "<xbundle><metadata /><course /></xbundle>"
        bundle = XBundle()
        bundle.load(file_from_string(input_xml))
        self.assertEqual(clean_xml(str(bundle)), clean_xml(input_xml))

        curdir = os.getcwd()
        tempdir = mkdtemp()
        try:
            os.chdir(tempdir)
            bundle.save()

            with open(os.path.join(tempdir, "xbundle.xml")) as f:
                self.assertEqual(clean_xml(f.read()), clean_xml(input_xml))

            bundle.save(filename="other.xml")
            with open(os.path.join(tempdir, "other.xml")) as f:
                self.assertEqual(clean_xml(f.read()), clean_xml(input_xml))

            handle_path = os.path.join(tempdir, "third.xml")
            with open(handle_path, "w") as f:
                bundle.save(file_handle=f)
            with open(handle_path) as f:
                self.assertEqual(clean_xml(f.read()), clean_xml(input_xml))
        finally:
            os.chdir(curdir)
            rmtree(tempdir)
Example #9
0
    def test_xml_header(self):
        """
        Test removal of xml header. The <?xml ... should not show up in the
        output and the XML should still be parsed correctly.
        """
        input_xml = input_data.EMPTY_XBUNDLE

        bundle = XBundle()
        bundle.load(file_from_string(input_xml))
        self.assertFalse(str(bundle).startswith("<?xml"))
        self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle)))
Example #10
0
    def test_xml_header(self):
        """
        Test removal of xml header. The <?xml ... should not show up in the
        output and the XML should still be parsed correctly.
        """
        input_xml = input_data.EMPTY_XBUNDLE

        bundle = XBundle()
        bundle.load(file_from_string(input_xml))
        self.assertFalse(str(bundle).startswith("<?xml"))
        self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle)))
Example #11
0
    def test_import_skip_hidden(self):
        """
        Test skip_hidden flag.
        """
        bundle = XBundle(skip_hidden=True)
        path = os.path.join('input_testdata', 'mitx.01')
        bundle.import_from_directory(path)

        expected = expected_data.SKIP_HIDDEN

        self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
Example #12
0
    def test_fix_old_course_section(self):
        """
        Test fix_old_course_section.
        """
        bundle = XBundle()
        bundle.import_from_directory(
            os.path.join("input_testdata", "sections"))

        # Section element should be removed.
        expected = expected_data.MISSING_SECTION
        self.assertEqual(clean_xml(expected), clean_xml(str(bundle)))
Example #13
0
    def test_import_skip_hidden(self):
        """
        Test skip_hidden flag.
        """
        bundle = XBundle(skip_hidden=True)
        path = os.path.join('input_testdata', 'mitx.01')
        bundle.import_from_directory(path)

        expected = expected_data.SKIP_HIDDEN

        self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
Example #14
0
    def test_fix_old_course_section(self):
        """
        Test fix_old_course_section.
        """
        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata",
                                                  "sections"))

        # Section element should be removed.
        expected = expected_data.MISSING_SECTION
        self.assertEqual(clean_xml(expected), clean_xml(str(bundle)))
Example #15
0
def import_course_from_path(path, repo_id, user_id):
    """
    Import course from an OLX directory.

    Args:
        path (unicode): path to extracted OLX tree
        user_id (int): pk of Django user doing the import
    """
    bundle = XBundle()
    bundle.import_from_directory(path)
    return import_course(bundle, repo_id, user_id)
Example #16
0
    def test_import_url_name(self):
        """
        Test that we import url_name as url_name_orig.
        """
        bundle = XBundle(keep_urls=True, keep_studio_urls=True)
        bundle.import_from_directory(os.path.join('input_testdata', 'mitx.01'))

        bundle_string = str(bundle)

        expected = expected_data.KEEP_URLS
        self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
Example #17
0
    def test_import_url_name(self):
        """
        Test that we import url_name as url_name_orig.
        """
        bundle = XBundle(keep_urls=True, keep_studio_urls=True)
        bundle.import_from_directory(os.path.join('input_testdata', 'mitx.01'))

        bundle_string = str(bundle)

        expected = expected_data.KEEP_URLS
        self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
Example #18
0
    def test_preserve_url_name(self):
        """
        Test that preserve_url_name imports as url_name and not url_name_orig.
        """
        bundle = XBundle(
            keep_urls=True, keep_studio_urls=True, preserve_url_name=True)
        bundle.import_from_directory('input_testdata/mitx.01')

        bundle_string = str(bundle)

        expected = expected_data.PRESERVE_URL_NAME
        self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
Example #19
0
    def test_preserve_url_name(self):
        """
        Test that preserve_url_name imports as url_name and not url_name_orig.
        """
        bundle = XBundle(keep_urls=True,
                         keep_studio_urls=True,
                         preserve_url_name=True)
        bundle.import_from_directory('input_testdata/mitx.01')

        bundle_string = str(bundle)

        expected = expected_data.PRESERVE_URL_NAME
        self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
Example #20
0
    def test_add_descriptors(self):
        """
        Test add_descriptors.
        """
        # Note url_name_orig in chapter.
        input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER1
        bundle = XBundle(keep_urls=True)
        bundle.load(file_from_string(input_xml))

        # str(bundle) doesn't change input xml, but export_to_directory will.
        self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle)))

        old_current_dir = os.getcwd()
        tempdir = mkdtemp()
        try:
            os.chdir(tempdir)
            bundle.export_to_directory()

            bundle2 = XBundle(keep_urls=True)
            bundle2.import_from_directory()

            expected = expected_data.URL_NAME_ORIG

            self.assertEqual(clean_xml(expected), clean_xml(str(bundle2)))
        finally:
            os.chdir(old_current_dir)
            rmtree(tempdir)
Example #21
0
    def test_export_and_keep_urls(self):
        """
        Test the changes to url_name after export_to_directory and import.
        """
        # Note url_name_orig in chapter.
        input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER2
        bundle = XBundle(keep_urls=True, force_studio_format=True)
        bundle.load(file_from_string(input_xml))

        # str(bundle) doesn't change input xml, but export_to_directory will.
        self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle)))

        old_current_dir = os.getcwd()
        tempdir = mkdtemp()
        try:
            os.chdir(tempdir)
            bundle.export_to_directory()

            bundle2 = XBundle(keep_urls=True, force_studio_format=True)
            bundle2.import_from_directory()

            expected = expected_data.KEEP_URLS_FORCE_STUDIO_FORMAT

            self.assertEqual(clean_xml(expected), clean_xml(str(bundle2)))
        finally:
            os.chdir(old_current_dir)
            rmtree(tempdir)
Example #22
0
def documents_from_olx(olx_path):  # pylint: disable=too-many-locals
    """
    Extract text from OLX directory

    Args:
        olx_path (str): The path to the directory with the OLX data

    Returns:
        list of tuple:
            A list of (bytes of content, metadata)
    """
    documents = []
    bundle = XBundle()
    bundle.import_from_directory(olx_path)
    for index, vertical in enumerate(bundle.course.findall(".//vertical")):
        content = get_text_from_element(vertical)

        documents.append((
            content,
            {
                "key": f"vertical_{index + 1}",
                "content_type": CONTENT_TYPE_VERTICAL,
                "title": vertical.attrib.get("display_name") or "",
                "mime_type": "application/xml",
            },
        ))

    counter = _infinite_counter()

    for root, _, files in os.walk(olx_path):
        for filename in files:
            _, extension = os.path.splitext(filename)
            extension_lower = extension.lower()
            if extension_lower in VALID_TEXT_FILE_TYPES:
                with open(os.path.join(root, filename), "rb") as f:
                    filebytes = f.read()

                mimetype = mimetypes.types_map.get(extension_lower)

                documents.append((
                    filebytes,
                    {
                        "key": f"document_{next(counter)}_{filename}",
                        "content_type": CONTENT_TYPE_FILE,
                        "mime_type": mimetype,
                    },
                ))

    return documents
Example #23
0
def import_course_from_path(path, repo_id, user_id):
    """
    Import course from an OLX directory.

    Args:
        path (unicode): Path to extracted OLX tree
        repo_id (int): Primary key of repository course belongs to
        user_id (int): Primary key of Django user doing the import
    Returns:
        course (learningresources.Course)
    """
    bundle = XBundle(
        keep_urls=True, keep_studio_urls=True, preserve_url_name=True
    )
    bundle.import_from_directory(path)
    static_dir = join(path, 'static')
    course = import_course(bundle, repo_id, user_id, static_dir)
    return course
Example #24
0
    def test_unicode_in_html(self):
        """
        Test that unicode doesn't cause problems in overview file.
        """
        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata", "mitx.01"))
        bundle.add_about_file("overview.html",
                              "\u2e18 interrobang \u203d")

        expected = expected_data.ESCAPED_UNICODE
        self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))

        # Reimport to start from a clean slate. This time use bytes.
        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata", "mitx.01"))

        bundle.add_about_file(
            "overview.html", "\u2e18 interrobang \u203d".encode('utf-8'))
        self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
Example #25
0
def import_course_from_path(path, repo_id, user_id):
    """
    Import course from an OLX directory.

    Args:
        path (unicode): Path to extracted OLX tree
        repo_id (int): Primary key of repository course belongs to
        user_id (int): Primary key of Django user doing the import
    Returns:
        course (learningresources.Course)
    """
    bundle = XBundle(keep_urls=True,
                     keep_studio_urls=True,
                     preserve_url_name=True)
    bundle.import_from_directory(path)
    static_dir = join(path, 'static')
    with transaction.atomic():
        course = import_course(bundle, repo_id, user_id, static_dir)
    return course
Example #26
0
    def test_parent_preview_link(self):
        """
        Test that if url_name is blank we import the parent's url_name when
        viewing the preview link.
        """
        xml = """
<course org="DevOps" course="0.001" url_name="2015_Summer"
    semester="2015_Summer">
  <chapter>
    <sequential>
      <vertical>
        <html></html>
      </vertical>
    </sequential>
  </chapter>
</course>
"""

        repo = create_repo("html_repo", "...", self.user.id)
        xml = etree.fromstring(xml)
        bundle = XBundle(
            keep_urls=True, keep_studio_urls=True, preserve_url_name=True
        )
        bundle.set_course(xml)

        import_course(bundle, repo.id, self.user.id, "")

        html_resources = LearningResource.objects.filter(
            learning_resource_type__name="html"
        )
        self.assertEqual(html_resources.count(), 1)
        html_resource = html_resources.first()
        self.assertEqual(
            get_preview_url(html_resource),
            "{base}courses/{org}/{course}/{run}/jump_to_id/{url_path}".format(
                base=settings.LORE_PREVIEW_BASE_URL,
                org=html_resource.course.org,
                course=html_resource.course.course_number,
                run=html_resource.course.run,
                url_path="2015_Summer"
            )
        )
Example #27
0
    def test_is_not_random_urlname(self):
        """
        Test behavior of is_not_random_urlname.
        """
        # Randomness test used in method
        input_hash = 'z5bc076ad06e4ede9d0561948c03be2f'
        input_letters = 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
        input_empty = ''

        # Function always returns True if self.keep_studio_urls is True.
        bundle_studio_urls = XBundle(keep_studio_urls=True)
        self.assertTrue(bundle_studio_urls.is_not_random_urlname(input_hash))
        self.assertTrue(
            bundle_studio_urls.is_not_random_urlname(input_letters))
        self.assertTrue(bundle_studio_urls.is_not_random_urlname(input_empty))

        bundle = XBundle()
        self.assertFalse(bundle.is_not_random_urlname(input_hash))
        self.assertTrue(bundle.is_not_random_urlname(input_letters))
        self.assertTrue(bundle.is_not_random_urlname(input_empty))
Example #28
0
    def test_import_export(self):  # pylint: disable=no-self-use
        """
        Test import then export.
        """

        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata", "mitx.01"))

        tdir = mkdtemp()
        try:
            bundle.export_to_directory(tdir)

            knownDir = os.path.join("input_testdata", "mitx.01.exported")
            knownTempDir = os.path.join(tdir, 'mitx.01.exported')
            newDir = os.path.join(tdir, "mitx.01")

            # Transform xml files to remove spaces. This allows for cross tests
            # to pass across platforms with slightly different xml serializers
            # (see: travis). We copy the files for easy cleanup.
            copytree(knownDir, knownTempDir)
            _normalize_xml(tdir)

            check_call(["diff", "-r", knownTempDir, newDir])
        finally:
            rmtree(tdir)
Example #29
0
    def test_import_large(self):
        """
        Test import of a course slightly larger than mitx.01.
        """
        bundle = XBundle()
        path = os.path.join('input_testdata', 'content-devops-0001')
        bundle.import_from_directory(path)

        expected_path = os.path.join(
            'input_testdata', 'content-devops-0001.out.xml')
        with open(expected_path) as f:
            self.assertEqual(clean_xml(f.read()), clean_xml(str(bundle)))

        tempdir = mkdtemp()
        try:
            bundle.export_to_directory(tempdir, xml_only=True, newfmt=True)

            for _, _, files in os.walk(os.path.join(tempdir, "0.001")):
                for filename in files:
                    # We set xml_only=True so there shouldn't be anything else.
                    self.assertTrue(filename.endswith(".xml"))
        finally:
            rmtree(tempdir)
Example #30
0
    def test_is_not_random_urlname(self):
        """
        Test behavior of is_not_random_urlname.
        """
        # Randomness test used in method
        input_hash = 'z5bc076ad06e4ede9d0561948c03be2f'
        input_letters = 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
        input_empty = ''

        # Function always returns True if self.keep_studio_urls is True.
        bundle_studio_urls = XBundle(keep_studio_urls=True)
        self.assertTrue(bundle_studio_urls.is_not_random_urlname(input_hash))
        self.assertTrue(
            bundle_studio_urls.is_not_random_urlname(input_letters))
        self.assertTrue(bundle_studio_urls.is_not_random_urlname(input_empty))

        bundle = XBundle()
        self.assertFalse(bundle.is_not_random_urlname(input_hash))
        self.assertTrue(bundle.is_not_random_urlname(input_letters))
        self.assertTrue(bundle.is_not_random_urlname(input_empty))
Example #31
0
    def test_import_large(self):
        """
        Test import of a course slightly larger than mitx.01.
        """
        bundle = XBundle()
        path = os.path.join('input_testdata', 'content-devops-0001')
        bundle.import_from_directory(path)

        expected_path = os.path.join('input_testdata',
                                     'content-devops-0001.out.xml')
        with open(expected_path) as f:
            self.assertEqual(clean_xml(f.read()), clean_xml(str(bundle)))

        tempdir = mkdtemp()
        try:
            bundle.export_to_directory(tempdir, xml_only=True, newfmt=True)

            for _, _, files in os.walk(os.path.join(tempdir, "0.001")):
                for filename in files:
                    # We set xml_only=True so there shouldn't be anything else.
                    self.assertTrue(filename.endswith(".xml"))
        finally:
            rmtree(tempdir)
Example #32
0
    def export(self):
        meta = self.meta
        sys.stderr.write("metadata = %s\n" % meta)
    
        fn = self.dir / 'contents/Syllabus/index.htm'
        sxml = self.parse_broken_html(fn=fn)
        edxxml = etree.Element('course')
        edxxml.set('dirname',os.path.basename(os.getcwd()))
        edxxml.set('semester', self.DefaultSemester)
        for k, v in meta.items():
            edxxml.set(k,v)
        
        self.processed_files = [fn]		# track which content files have been ingested, to avoid duplication
        self.files_to_copy = {}			# dict of files (key=OCW source, val=edX static dest) to copy to "/static"
        self.processed_pdf_files = []
        self.element_counts = defaultdict(int)

        self.do_chapters(sxml, edxxml)

        policies = self.policies

        # grab course image via index.htm
        self.get_course_image()
        
        # make xbundle 
        xb = XBundle(force_studio_format=True)
        xb.DefaultOrg = self.DefaultOrg
        xb.set_course(edxxml)
        xb.add_policies(policies)
        self.add_about_files(xb)

        def c(x):
            return len(xb.course.findall(".//%s" % x))
        elist = ["chapter", "sequential", "vertical", "problem", "html", "video"]
        xbundle_counts = {x:c(x) for x in elist}
        self.element_counts['n_static_files'] = len(self.files_to_copy)
        self.element_counts['n_ocw_files_processed'] = len(self.processed_files)

        # save it
        outfn = self.output_fn or ('%s_xbundle.xml' % self.cid)
        if outfn.endswith(".xml"):
            xb.save(outfn)
            self.copy_static_files(".")
        elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"):
            tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle")
            cdir = path(tempd) / "course"
            os.mkdir(cdir)
            self.copy_static_files(cdir)
            xb.export_to_directory(cdir, dir_include_course_id=False)
            curdir = os.path.abspath(os.curdir)
            cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn)
            print cmd
            os.system(cmd)
            shutil.rmtree(tempd)
        else:
            if not os.path.exists(outfn):
                print "Making directory for output: %s" % outfn
                os.mkdir(outfn)
            self.copy_static_files(outfn)
            xb.export_to_directory(outfn, dir_include_course_id=False)

        print "OCW element counts: %s" % json.dumps(self.element_counts, indent=4)
        print "edX XML element counts: %s" % json.dumps(xbundle_counts, indent=4)
        print "Done, wrote to %s" % outfn
Example #33
0
    def test_set_course(self):
        """
        Test functionality of set_course.
        """
        input_xml = input_data.EMPTY_COURSE

        bundle = XBundle(keep_urls=True)
        bundle.load(file_from_string(input_xml))

        # No org or semester is specified in XML above.
        self.assertEqual(bundle.course.get("org"), None)
        self.assertEqual(bundle.course.get("semester"), None)
        self.assertEqual(bundle.semester, "")

        # Note lack of org attribute and url_name for course element.
        course_str = input_data.NO_COURSE
        with self.assertRaises(Exception) as ex:
            bundle.set_course(etree.XML("<x>" + course_str + "</x>"))
        self.assertTrue(
            "set_course should be called with a <course> element"
            in ex.exception.args)

        with self.assertRaises(Exception) as ex:
            bundle.set_course(etree.XML("<course />"))
        self.assertTrue("No semester found." in ex.exception.args)

        bundle.set_course(etree.XML("<course url_name='x' />"))
        self.assertEqual(bundle.semester, "x")

        bundle.set_course(etree.XML(course_str))

        # MITx is not present in data, it is automatically set.
        self.assertEqual(bundle.course.get("org"), "MITx")
        self.assertEqual(bundle.course.get("semester"), "2013_Spring")
        self.assertEqual(bundle.semester, "2013_Spring")

        bundle_string = str(bundle)

        expected = expected_data.SET_COURSE
        self.assertEqual(clean_xml(bundle_string), clean_xml(expected))
Example #34
0
    def test_export_import(self):
        """
        Test export then import.
        """
        bundle = XBundle()
        cxmls = input_data.COURSE

        pxmls = input_data.POLICIES

        bundle.set_course(etree.XML(cxmls))
        bundle.add_policies(etree.XML(pxmls))
        bundle.add_about_file("overview.html", "hello overview")

        xbin = str(bundle)

        tdir = mkdtemp()
        try:
            bundle.export_to_directory(tdir)

            # Test round- trip.
            xb2 = XBundle()
            xb2.import_from_directory(os.path.join(tdir, 'mitx.01'))

            xbreloaded = str(xb2)

            self.assertEqual(clean_xml(xbin), clean_xml(xbreloaded))
        finally:
            rmtree(tdir)
Example #35
0
    def test_export_import(self):
        """
        Test export then import.
        """
        bundle = XBundle()
        cxmls = input_data.COURSE

        pxmls = input_data.POLICIES

        bundle.set_course(etree.XML(cxmls))
        bundle.add_policies(etree.XML(pxmls))
        bundle.add_about_file("overview.html", "hello overview")

        xbin = str(bundle)

        tdir = mkdtemp()
        try:
            bundle.export_to_directory(tdir)

            # Test round- trip.
            xb2 = XBundle()
            xb2.import_from_directory(os.path.join(tdir, 'mitx.01'))

            xbreloaded = str(xb2)

            self.assertEqual(clean_xml(xbin), clean_xml(xbreloaded))
        finally:
            rmtree(tdir)
Example #36
0
    def export(self):
        meta = self.meta
        sys.stderr.write("metadata = %s\n" % meta)

        fn = self.dir / 'contents/Syllabus/index.htm'
        sxml = self.parse_broken_html(fn=fn)
        edxxml = etree.Element('course')
        edxxml.set('dirname', os.path.basename(os.getcwd()))
        edxxml.set('semester', self.DefaultSemester)
        for k, v in meta.items():
            edxxml.set(k, v)

        self.processed_files = [
            fn
        ]  # track which content files have been ingested, to avoid duplication
        self.files_to_copy = {
        }  # dict of files (key=OCW source, val=edX static dest) to copy to "/static"
        self.processed_pdf_files = []
        self.element_counts = defaultdict(int)

        self.do_chapters(sxml, edxxml)

        policies = self.policies

        # grab course image via index.htm
        self.get_course_image()

        # make xbundle
        xb = XBundle(force_studio_format=True)
        xb.DefaultOrg = self.DefaultOrg
        xb.set_course(edxxml)
        xb.add_policies(policies)
        self.add_about_files(xb)

        def c(x):
            return len(xb.course.findall(".//%s" % x))

        elist = [
            "chapter", "sequential", "vertical", "problem", "html", "video"
        ]
        xbundle_counts = {x: c(x) for x in elist}
        self.element_counts['n_static_files'] = len(self.files_to_copy)
        self.element_counts['n_ocw_files_processed'] = len(
            self.processed_files)

        # save it
        outfn = self.output_fn or ('%s_xbundle.xml' % self.cid)
        if outfn.endswith(".xml"):
            xb.save(outfn)
            self.copy_static_files(".")
        elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"):
            tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle")
            cdir = path(tempd) / "course"
            os.mkdir(cdir)
            self.copy_static_files(cdir)
            xb.export_to_directory(cdir, dir_include_course_id=False)
            curdir = os.path.abspath(os.curdir)
            cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn)
            print cmd
            os.system(cmd)
            shutil.rmtree(tempd)
        else:
            if not os.path.exists(outfn):
                print "Making directory for output: %s" % outfn
                os.mkdir(outfn)
            self.copy_static_files(outfn)
            xb.export_to_directory(outfn, dir_include_course_id=False)

        print "OCW element counts: %s" % json.dumps(self.element_counts,
                                                    indent=4)
        print "edX XML element counts: %s" % json.dumps(xbundle_counts,
                                                        indent=4)
        print "Done, wrote to %s" % outfn
Example #37
0
    def test_unicode_in_html(self):
        """
        Test that unicode doesn't cause problems in overview file.
        """
        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata", "mitx.01"))
        bundle.add_about_file("overview.html", "\u2e18 interrobang \u203d")

        expected = expected_data.ESCAPED_UNICODE
        self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))

        # Reimport to start from a clean slate. This time use bytes.
        bundle = XBundle()
        bundle.import_from_directory(os.path.join("input_testdata", "mitx.01"))

        bundle.add_about_file("overview.html",
                              "\u2e18 interrobang \u203d".encode('utf-8'))
        self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
Example #38
0
    def test_set_course(self):
        """
        Test functionality of set_course.
        """
        input_xml = input_data.EMPTY_COURSE

        bundle = XBundle(keep_urls=True)
        bundle.load(file_from_string(input_xml))

        # No org or semester is specified in XML above.
        self.assertEqual(bundle.course.get("org"), None)
        self.assertEqual(bundle.course.get("semester"), None)
        self.assertEqual(bundle.semester, "")

        # Note lack of org attribute and url_name for course element.
        course_str = input_data.NO_COURSE
        with self.assertRaises(Exception) as ex:
            bundle.set_course(etree.XML("<x>" + course_str + "</x>"))
        self.assertTrue("set_course should be called with a <course> element"
                        in ex.exception.args)

        with self.assertRaises(Exception) as ex:
            bundle.set_course(etree.XML("<course />"))
        self.assertTrue("No semester found." in ex.exception.args)

        bundle.set_course(etree.XML("<course url_name='x' />"))
        self.assertEqual(bundle.semester, "x")

        bundle.set_course(etree.XML(course_str))

        # MITx is not present in data, it is automatically set.
        self.assertEqual(bundle.course.get("org"), "MITx")
        self.assertEqual(bundle.course.get("semester"), "2013_Spring")
        self.assertEqual(bundle.semester, "2013_Spring")

        bundle_string = str(bundle)

        expected = expected_data.SET_COURSE
        self.assertEqual(clean_xml(bundle_string), clean_xml(expected))