Example #1
0
    def export(self):
        meta = self.meta
        sys.stderr.write("metadata = %s\n" % meta)

        fn = self.dir / 'contents/Syllabus/index.htm'
        sxml = self.parse_broken_html(fn=fn)
        edxxml = etree.Element('course')
        edxxml.set('dirname', os.path.basename(os.getcwd()))
        edxxml.set('semester', self.DefaultSemester)
        for k, v in meta.items():
            edxxml.set(k, v)

        self.processed_files = [
            fn
        ]  # track which content files have been ingested, to avoid duplication
        self.files_to_copy = {
        }  # dict of files (key=OCW source, val=edX static dest) to copy to "/static"
        self.processed_pdf_files = []
        self.element_counts = defaultdict(int)

        self.do_chapters(sxml, edxxml)

        policies = self.policies

        # grab course image via index.htm
        self.get_course_image()

        # make xbundle
        xb = XBundle(force_studio_format=True)
        xb.DefaultOrg = self.DefaultOrg
        xb.set_course(edxxml)
        xb.add_policies(policies)
        self.add_about_files(xb)

        def c(x):
            return len(xb.course.findall(".//%s" % x))

        elist = [
            "chapter", "sequential", "vertical", "problem", "html", "video"
        ]
        xbundle_counts = {x: c(x) for x in elist}
        self.element_counts['n_static_files'] = len(self.files_to_copy)
        self.element_counts['n_ocw_files_processed'] = len(
            self.processed_files)

        # save it
        outfn = self.output_fn or ('%s_xbundle.xml' % self.cid)
        if outfn.endswith(".xml"):
            xb.save(outfn)
            self.copy_static_files(".")
        elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"):
            tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle")
            cdir = path(tempd) / "course"
            os.mkdir(cdir)
            self.copy_static_files(cdir)
            xb.export_to_directory(cdir, dir_include_course_id=False)
            curdir = os.path.abspath(os.curdir)
            cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn)
            print cmd
            os.system(cmd)
            shutil.rmtree(tempd)
        else:
            if not os.path.exists(outfn):
                print "Making directory for output: %s" % outfn
                os.mkdir(outfn)
            self.copy_static_files(outfn)
            xb.export_to_directory(outfn, dir_include_course_id=False)

        print "OCW element counts: %s" % json.dumps(self.element_counts,
                                                    indent=4)
        print "edX XML element counts: %s" % json.dumps(xbundle_counts,
                                                        indent=4)
        print "Done, wrote to %s" % outfn
Example #2
0
    def export(self):
        meta = self.meta
        sys.stderr.write("metadata = %s\n" % meta)
    
        fn = self.dir / 'contents/Syllabus/index.htm'
        sxml = self.parse_broken_html(fn=fn)
        edxxml = etree.Element('course')
        edxxml.set('dirname',os.path.basename(os.getcwd()))
        edxxml.set('semester', self.DefaultSemester)
        for k, v in meta.items():
            edxxml.set(k,v)
        
        self.processed_files = [fn]		# track which content files have been ingested, to avoid duplication
        self.files_to_copy = {}			# dict of files (key=OCW source, val=edX static dest) to copy to "/static"
        self.processed_pdf_files = []
        self.element_counts = defaultdict(int)

        self.do_chapters(sxml, edxxml)

        policies = self.policies

        # grab course image via index.htm
        self.get_course_image()
        
        # make xbundle 
        xb = XBundle(force_studio_format=True)
        xb.DefaultOrg = self.DefaultOrg
        xb.set_course(edxxml)
        xb.add_policies(policies)
        self.add_about_files(xb)

        def c(x):
            return len(xb.course.findall(".//%s" % x))
        elist = ["chapter", "sequential", "vertical", "problem", "html", "video"]
        xbundle_counts = {x:c(x) for x in elist}
        self.element_counts['n_static_files'] = len(self.files_to_copy)
        self.element_counts['n_ocw_files_processed'] = len(self.processed_files)

        # save it
        outfn = self.output_fn or ('%s_xbundle.xml' % self.cid)
        if outfn.endswith(".xml"):
            xb.save(outfn)
            self.copy_static_files(".")
        elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"):
            tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle")
            cdir = path(tempd) / "course"
            os.mkdir(cdir)
            self.copy_static_files(cdir)
            xb.export_to_directory(cdir, dir_include_course_id=False)
            curdir = os.path.abspath(os.curdir)
            cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn)
            print cmd
            os.system(cmd)
            shutil.rmtree(tempd)
        else:
            if not os.path.exists(outfn):
                print "Making directory for output: %s" % outfn
                os.mkdir(outfn)
            self.copy_static_files(outfn)
            xb.export_to_directory(outfn, dir_include_course_id=False)

        print "OCW element counts: %s" % json.dumps(self.element_counts, indent=4)
        print "edX XML element counts: %s" % json.dumps(xbundle_counts, indent=4)
        print "Done, wrote to %s" % outfn