def test_import_export(self): # pylint: disable=no-self-use """ Test import then export. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) tdir = mkdtemp() try: bundle.export_to_directory(tdir) knownDir = os.path.join("input_testdata", "mitx.01.exported") knownTempDir = os.path.join(tdir, 'mitx.01.exported') newDir = os.path.join(tdir, "mitx.01") # Transform xml files to remove spaces. This allows for cross tests # to pass across platforms with slightly different xml serializers # (see: travis). We copy the files for easy cleanup. copytree(knownDir, knownTempDir) _normalize_xml(tdir) check_call([ "diff", "-r", knownTempDir, newDir ]) finally: rmtree(tdir)
def test_import_export(self): # pylint: disable=no-self-use """ Test import then export. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) tdir = mkdtemp() try: bundle.export_to_directory(tdir) knownDir = os.path.join("input_testdata", "mitx.01.exported") knownTempDir = os.path.join(tdir, 'mitx.01.exported') newDir = os.path.join(tdir, "mitx.01") # Transform xml files to remove spaces. This allows for cross tests # to pass across platforms with slightly different xml serializers # (see: travis). We copy the files for easy cleanup. copytree(knownDir, knownTempDir) _normalize_xml(tdir) check_call(["diff", "-r", knownTempDir, newDir]) finally: rmtree(tdir)
def test_export_import(self): """ Test export then import. """ bundle = XBundle() cxmls = input_data.COURSE pxmls = input_data.POLICIES bundle.set_course(etree.XML(cxmls)) bundle.add_policies(etree.XML(pxmls)) bundle.add_about_file("overview.html", "hello overview") xbin = str(bundle) tdir = mkdtemp() try: bundle.export_to_directory(tdir) # Test round- trip. xb2 = XBundle() xb2.import_from_directory(os.path.join(tdir, 'mitx.01')) xbreloaded = str(xb2) self.assertEqual(clean_xml(xbin), clean_xml(xbreloaded)) finally: rmtree(tdir)
def test_export_and_keep_urls(self): """ Test the changes to url_name after export_to_directory and import. """ # Note url_name_orig in chapter. input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER2 bundle = XBundle(keep_urls=True, force_studio_format=True) bundle.load(file_from_string(input_xml)) # str(bundle) doesn't change input xml, but export_to_directory will. self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle))) old_current_dir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.export_to_directory() bundle2 = XBundle(keep_urls=True, force_studio_format=True) bundle2.import_from_directory() expected = expected_data.KEEP_URLS_FORCE_STUDIO_FORMAT self.assertEqual(clean_xml(expected), clean_xml(str(bundle2))) finally: os.chdir(old_current_dir) rmtree(tempdir)
def test_add_descriptors(self): """ Test add_descriptors. """ # Note url_name_orig in chapter. input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER1 bundle = XBundle(keep_urls=True) bundle.load(file_from_string(input_xml)) # str(bundle) doesn't change input xml, but export_to_directory will. self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle))) old_current_dir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.export_to_directory() bundle2 = XBundle(keep_urls=True) bundle2.import_from_directory() expected = expected_data.URL_NAME_ORIG self.assertEqual(clean_xml(expected), clean_xml(str(bundle2))) finally: os.chdir(old_current_dir) rmtree(tempdir)
def test_export_import(self): """ Test export then import. """ bundle = XBundle() cxmls = input_data.COURSE pxmls = input_data.POLICIES bundle.set_course(etree.XML(cxmls)) bundle.add_policies(etree.XML(pxmls)) bundle.add_about_file("overview.html", "hello overview") xbin = str(bundle) tdir = mkdtemp() try: bundle.export_to_directory(tdir) # Test round- trip. xb2 = XBundle() xb2.import_from_directory(os.path.join(tdir, 'mitx.01')) xbreloaded = str(xb2) self.assertEqual(clean_xml(xbin), clean_xml(xbreloaded)) finally: rmtree(tdir)
def test_export_and_keep_urls(self): """ Test the changes to url_name after export_to_directory and import. """ # Note url_name_orig in chapter. input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER2 bundle = XBundle(keep_urls=True, force_studio_format=True) bundle.load(file_from_string(input_xml)) # str(bundle) doesn't change input xml, but export_to_directory will. self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle))) old_current_dir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.export_to_directory() bundle2 = XBundle(keep_urls=True, force_studio_format=True) bundle2.import_from_directory() expected = expected_data.KEEP_URLS_FORCE_STUDIO_FORMAT self.assertEqual(clean_xml(expected), clean_xml(str(bundle2))) finally: os.chdir(old_current_dir) rmtree(tempdir)
def test_add_descriptors(self): """ Test add_descriptors. """ # Note url_name_orig in chapter. input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER1 bundle = XBundle(keep_urls=True) bundle.load(file_from_string(input_xml)) # str(bundle) doesn't change input xml, but export_to_directory will. self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle))) old_current_dir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.export_to_directory() bundle2 = XBundle(keep_urls=True) bundle2.import_from_directory() expected = expected_data.URL_NAME_ORIG self.assertEqual(clean_xml(expected), clean_xml(str(bundle2))) finally: os.chdir(old_current_dir) rmtree(tempdir)
def test_import_large(self): """ Test import of a course slightly larger than mitx.01. """ bundle = XBundle() path = os.path.join('input_testdata', 'content-devops-0001') bundle.import_from_directory(path) expected_path = os.path.join( 'input_testdata', 'content-devops-0001.out.xml') with open(expected_path) as f: self.assertEqual(clean_xml(f.read()), clean_xml(str(bundle))) tempdir = mkdtemp() try: bundle.export_to_directory(tempdir, xml_only=True, newfmt=True) for _, _, files in os.walk(os.path.join(tempdir, "0.001")): for filename in files: # We set xml_only=True so there shouldn't be anything else. self.assertTrue(filename.endswith(".xml")) finally: rmtree(tempdir)
def test_import_large(self): """ Test import of a course slightly larger than mitx.01. """ bundle = XBundle() path = os.path.join('input_testdata', 'content-devops-0001') bundle.import_from_directory(path) expected_path = os.path.join('input_testdata', 'content-devops-0001.out.xml') with open(expected_path) as f: self.assertEqual(clean_xml(f.read()), clean_xml(str(bundle))) tempdir = mkdtemp() try: bundle.export_to_directory(tempdir, xml_only=True, newfmt=True) for _, _, files in os.walk(os.path.join(tempdir, "0.001")): for filename in files: # We set xml_only=True so there shouldn't be anything else. self.assertTrue(filename.endswith(".xml")) finally: rmtree(tempdir)
def export(self): meta = self.meta sys.stderr.write("metadata = %s\n" % meta) fn = self.dir / 'contents/Syllabus/index.htm' sxml = self.parse_broken_html(fn=fn) edxxml = etree.Element('course') edxxml.set('dirname', os.path.basename(os.getcwd())) edxxml.set('semester', self.DefaultSemester) for k, v in meta.items(): edxxml.set(k, v) self.processed_files = [ fn ] # track which content files have been ingested, to avoid duplication self.files_to_copy = { } # dict of files (key=OCW source, val=edX static dest) to copy to "/static" self.processed_pdf_files = [] self.element_counts = defaultdict(int) self.do_chapters(sxml, edxxml) policies = self.policies # grab course image via index.htm self.get_course_image() # make xbundle xb = XBundle(force_studio_format=True) xb.DefaultOrg = self.DefaultOrg xb.set_course(edxxml) xb.add_policies(policies) self.add_about_files(xb) def c(x): return len(xb.course.findall(".//%s" % x)) elist = [ "chapter", "sequential", "vertical", "problem", "html", "video" ] xbundle_counts = {x: c(x) for x in elist} self.element_counts['n_static_files'] = len(self.files_to_copy) self.element_counts['n_ocw_files_processed'] = len( self.processed_files) # save it outfn = self.output_fn or ('%s_xbundle.xml' % self.cid) if outfn.endswith(".xml"): xb.save(outfn) self.copy_static_files(".") elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"): tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle") cdir = path(tempd) / "course" os.mkdir(cdir) self.copy_static_files(cdir) xb.export_to_directory(cdir, dir_include_course_id=False) curdir = os.path.abspath(os.curdir) cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn) print cmd os.system(cmd) shutil.rmtree(tempd) else: if not os.path.exists(outfn): print "Making directory for output: %s" % outfn os.mkdir(outfn) self.copy_static_files(outfn) xb.export_to_directory(outfn, dir_include_course_id=False) print "OCW element counts: %s" % json.dumps(self.element_counts, indent=4) print "edX XML element counts: %s" % json.dumps(xbundle_counts, indent=4) print "Done, wrote to %s" % outfn
def export(self): meta = self.meta sys.stderr.write("metadata = %s\n" % meta) fn = self.dir / 'contents/Syllabus/index.htm' sxml = self.parse_broken_html(fn=fn) edxxml = etree.Element('course') edxxml.set('dirname',os.path.basename(os.getcwd())) edxxml.set('semester', self.DefaultSemester) for k, v in meta.items(): edxxml.set(k,v) self.processed_files = [fn] # track which content files have been ingested, to avoid duplication self.files_to_copy = {} # dict of files (key=OCW source, val=edX static dest) to copy to "/static" self.processed_pdf_files = [] self.element_counts = defaultdict(int) self.do_chapters(sxml, edxxml) policies = self.policies # grab course image via index.htm self.get_course_image() # make xbundle xb = XBundle(force_studio_format=True) xb.DefaultOrg = self.DefaultOrg xb.set_course(edxxml) xb.add_policies(policies) self.add_about_files(xb) def c(x): return len(xb.course.findall(".//%s" % x)) elist = ["chapter", "sequential", "vertical", "problem", "html", "video"] xbundle_counts = {x:c(x) for x in elist} self.element_counts['n_static_files'] = len(self.files_to_copy) self.element_counts['n_ocw_files_processed'] = len(self.processed_files) # save it outfn = self.output_fn or ('%s_xbundle.xml' % self.cid) if outfn.endswith(".xml"): xb.save(outfn) self.copy_static_files(".") elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"): tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle") cdir = path(tempd) / "course" os.mkdir(cdir) self.copy_static_files(cdir) xb.export_to_directory(cdir, dir_include_course_id=False) curdir = os.path.abspath(os.curdir) cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn) print cmd os.system(cmd) shutil.rmtree(tempd) else: if not os.path.exists(outfn): print "Making directory for output: %s" % outfn os.mkdir(outfn) self.copy_static_files(outfn) xb.export_to_directory(outfn, dir_include_course_id=False) print "OCW element counts: %s" % json.dumps(self.element_counts, indent=4) print "edX XML element counts: %s" % json.dumps(xbundle_counts, indent=4) print "Done, wrote to %s" % outfn