Beispiel #1
0
def main():
    args = docopt(__doc__, version='meTypeset 0.1')
    bare_gv = GV(args)

    if args['--debug']:
        bare_gv.debug.enable_debug(args['--nogit'])

    nlm_instance = TeiToNlm(bare_gv)

    if args['process']:
            # run non-transform portions of teitonlm
            TeiToNlm(bare_gv).run(True, False)
            # run reference linker
            rl = ReferenceLinker(bare_gv)
            rl.run(args['--interactive'])
            rl.cleanup()

            bibliography_classifier = BibliographyClassifier(bare_gv)

            # run table classifier
            cc = CaptionClassifier(bare_gv)
            if int(args['--aggression']) > int(bare_gv.settings.get_setting('tablecaptions',
                                                                            None, domain='aggression')):
                cc.run_tables()

            if int(args['--aggression']) > int(bare_gv.settings.get_setting('graphiccaptions',
                                                                            None, domain='aggression')):
                cc.run_graphics()

            if args['--interactive']:
                bibliography_classifier.run_prompt(True)

            # process any bibliography entries that are possible
            BibliographyDatabase(bare_gv).run()

            # remove stranded titles
            manipulate = NlmManipulate(bare_gv)
            manipulate.final_clean()

            if args['--identifiers']:
                IdGenerator(bare_gv).run()

            if args['--chain']:
                # construct and run an XSLT chainer
                XslChain(bare_gv).run()

            if args['--clean']:
                ComplianceEnforcer(bare_gv).run()
Beispiel #2
0
def main():
    args = docopt(__doc__, version='meTypeset 0.1')
    bare_gv = GV(args)

    if args['--debug']:
        bare_gv.debug.enable_debug(args['--nogit'])

    nlm_instance = TeiToNlm(bare_gv)

    if args['process']:
        # run non-transform portions of teitonlm
        TeiToNlm(bare_gv).run(True, False)
        # run reference linker
        rl = ReferenceLinker(bare_gv)
        rl.run(args['--interactive'])
        rl.cleanup()

        bibliography_classifier = BibliographyClassifier(bare_gv)

        # run table classifier
        cc = CaptionClassifier(bare_gv)
        if int(args['--aggression']) > int(
                bare_gv.settings.get_setting(
                    'tablecaptions', None, domain='aggression')):
            cc.run_tables()

        if int(args['--aggression']) > int(
                bare_gv.settings.get_setting(
                    'graphiccaptions', None, domain='aggression')):
            cc.run_graphics()

        if args['--interactive']:
            bibliography_classifier.run_prompt(True)

        # process any bibliography entries that are possible
        BibliographyDatabase(bare_gv).run()

        # remove stranded titles
        manipulate = NlmManipulate(bare_gv)
        manipulate.final_clean()

        if args['--identifiers']:
            IdGenerator(bare_gv).run()

        if args['--chain']:
            # construct and run an XSLT chainer
            XslChain(bare_gv).run()

        if args['--clean']:
            ComplianceEnforcer(bare_gv).run()
Beispiel #3
0
    def run_modules(self):
        ag = int(self.gv.settings.args['--aggression'])
        self.debug.print_debug(self,
                               u'Running at aggression level {0} {1}'.format(ag,
                                                                             "[grrr!]" if ag == 10 else ""))

        if ag > 10:
            self.debug.print_debug(self, "WARNING: safety bail-out features are disabled at aggression level 11")

        if self.args['bibscan']:

            BibliographyDatabase(self.gv).scan()
        else:
            # check for stylesheets
            self.gv.check_file_exists(self.gv.docx_style_sheet_dir)
            # metadata file
            gv.metadata_file = self.set_metadata_file()

            self.gv.mk_dir(self.gv.output_folder_path)

            if self.args['doc']:
                # run doc to docx conversion
                # then run docx to tei
                UnoconvToDocx(self.gv).run('doc')
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['odt']:
                # run odt to docx conversion
                # then run docx to tei
                UnoconvToDocx(self.gv).run('odt')
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['other']:
                # run other unoconv-supported format to docx conversion
                # then run docx to tei
                UnoconvToDocx(self.gv).run('unoconv')
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['docx']:
                # run docx to tei conversion
                # includes hooks for proprietary transforms if enabled
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['docxextracted']:
                self.debug.print_debug(self, u'Skipping docx extraction')
                DocxToTei(self.gv).run(False, self.args['--proprietary'])
            elif self.args['tei']:
                self.debug.print_debug(self, u'Skipping docx extraction; processing TEI file')
                DocxToTei(self.gv).run(False, self.args['--proprietary'], tei=True)

            if self.args['--puretei']:
                self.debug.print_debug(self, u'Exiting as TEI transform complete')
                return

            metadata = Metadata(self.gv)
            metadata.pre_clean()

            # run size classifier
            # aggression 5
            SizeClassifier(self.gv).run()

            # run bibliographic addins handler
            # aggression 4
            found_bibliography = BibliographyAddins(self.gv).run()

            # run list classifier
            # aggression 4
            ListClassifier(self.gv).run()

            bibliography_classifier = BibliographyClassifier(self.gv)

            if not found_bibliography:
                # run bibliographic classifier
                # aggression 4
                bibliography_classifier.run()

            # tei
            # aggression 3
            TeiManipulate(self.gv).run()

            # run tei to nlm conversion
            TeiToNlm(self.gv).run(not found_bibliography)

            if self.gv.settings.args['--purenlm']:
                self.debug.print_debug(self, u'Exiting as NLM transform complete')
                return

            manipulate = NlmManipulate(self.gv)

            if not self.gv.used_list_method:
                manipulate.fuse_references()

            # run reference linker
            if not (self.args['--nolink']):
                rl = ReferenceLinker(self.gv)
                rl.run(self.args['--interactive'])
                rl.cleanup()

            # run table classifier
            cc = CaptionClassifier(self.gv)
            if int(self.args['--aggression']) > int(self.gv.settings.get_setting('tablecaptions',
                                                                                 self, domain='aggression')):
                cc.run_tables()

            if int(self.args['--aggression']) > int(self.gv.settings.get_setting('graphiccaptions',
                                                                                 self, domain='aggression')):
                cc.run_graphics()

            # run metadata merge
            metadata.run()

            if self.args['--interactive']:
                bibliography_classifier.run_prompt(True)

            # process any bibliography entries that are possible
            BibliographyDatabase(self.gv).run()

            # remove stranded titles and cleanup
            manipulate.final_clean()

            if self.args['--identifiers']:
                IdGenerator(self.gv).run()

            if self.args['--chain']:
                # construct and run an XSLT chainer
                XslChain(self.gv).run()

            if self.args['--clean']:
                ComplianceEnforcer(self.gv).run()
Beispiel #4
0
    def run_modules(self):
        ag = int(self.gv.settings.args['--aggression'])
        self.debug.print_debug(
            self, u'Running at aggression level {0} {1}'.format(
                ag, "[grrr!]" if ag == 10 else ""))

        if ag > 10:
            self.debug.print_debug(
                self,
                "WARNING: safety bail-out features are disabled at aggression level 11"
            )

        if self.args['bibscan']:

            BibliographyDatabase(self.gv).scan()
        else:
            # check for stylesheets
            self.gv.check_file_exists(self.gv.docx_style_sheet_dir)
            # metadata file
            gv.metadata_file = self.set_metadata_file()

            self.gv.mk_dir(self.gv.output_folder_path)

            if self.args['doc']:
                # run doc to docx conversion
                # then run docx to tei
                UnoconvToDocx(self.gv).run('doc')
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['odt']:
                # run odt to docx conversion
                # then run docx to tei
                UnoconvToDocx(self.gv).run('odt')
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['other']:
                # run other unoconv-supported format to docx conversion
                # then run docx to tei
                UnoconvToDocx(self.gv).run('unoconv')
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['docx']:
                # run docx to tei conversion
                # includes hooks for proprietary transforms if enabled
                DocxToTei(self.gv).run(True, self.args['--proprietary'])
            elif self.args['docxextracted']:
                self.debug.print_debug(self, u'Skipping docx extraction')
                DocxToTei(self.gv).run(False, self.args['--proprietary'])
            elif self.args['tei']:
                self.debug.print_debug(
                    self, u'Skipping docx extraction; processing TEI file')
                DocxToTei(self.gv).run(False,
                                       self.args['--proprietary'],
                                       tei=True)

            if self.args['--puretei']:
                self.debug.print_debug(self,
                                       u'Exiting as TEI transform complete')
                return

            metadata = Metadata(self.gv)
            metadata.pre_clean()

            # run size classifier
            # aggression 5
            SizeClassifier(self.gv).run()

            # run bibliographic addins handler
            # aggression 4
            found_bibliography = BibliographyAddins(self.gv).run()

            # run list classifier
            # aggression 4
            ListClassifier(self.gv).run()

            bibliography_classifier = BibliographyClassifier(self.gv)

            if not found_bibliography:
                # run bibliographic classifier
                # aggression 4
                bibliography_classifier.run()

            # tei
            # aggression 3
            TeiManipulate(self.gv).run()

            # run tei to nlm conversion
            TeiToNlm(self.gv).run(not found_bibliography)

            if self.gv.settings.args['--purenlm']:
                self.debug.print_debug(self,
                                       u'Exiting as NLM transform complete')
                return

            manipulate = NlmManipulate(self.gv)

            if not self.gv.used_list_method:
                manipulate.fuse_references()

            # run reference linker
            if not (self.args['--nolink']):
                rl = ReferenceLinker(self.gv)
                rl.run(self.args['--interactive'])
                rl.cleanup()

            # run table classifier
            cc = CaptionClassifier(self.gv)
            if int(self.args['--aggression']) > int(
                    self.gv.settings.get_setting(
                        'tablecaptions', self, domain='aggression')):
                cc.run_tables()

            if int(self.args['--aggression']) > int(
                    self.gv.settings.get_setting(
                        'graphiccaptions', self, domain='aggression')):
                cc.run_graphics()

            # run metadata merge
            metadata.run()

            if self.args['--interactive']:
                bibliography_classifier.run_prompt(True)

            # process any bibliography entries that are possible
            BibliographyDatabase(self.gv).run()

            # remove stranded titles and cleanup
            manipulate.final_clean()

            if self.args['--identifiers']:
                IdGenerator(self.gv).run()

            if self.args['--chain']:
                # construct and run an XSLT chainer
                XslChain(self.gv).run()

            if self.args['--clean']:
                ComplianceEnforcer(self.gv).run()