예제 #1
0
 def __init__(self):
     self.args = self.read_command_line()
     self.debug = Debug()
     self.settings = Settings(self.args)
     self.gv = GV(self.settings)
     Debuggable.__init__(self, 'Main')
     if self.args.get('--debug'):
         self.debug.enable_debug()
     self.dr = self.args.get('<path>')
     self.f = self.args.get('<input_file>')
     self.out_type = self.args.get('--out-type').lower()
     self.script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
예제 #2
0
 def __init__(self):
     self.args = self.read_command_line()
     self.debug = Debug()
     self.settings = Settings(self.args)
     self.gv = GV(self.settings)
     Debuggable.__init__(self, 'Main')
     if self.args.get('--debug'):
         self.debug.enable_debug()
     self.dr = self.args.get('<path>')
     self.f = self.args.get('<input_file>')
     self.stand_alone = self.args.get('--stand-alone')
     self.tr = etree.parse(os.path.join(self.dr, self.f))
예제 #3
0
    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.settings = Settings(self.args)
        self.gv = GV(self.settings)
        self.dr = self.args.get('<path>')
        self.f = self.args.get('<input_file>')
        self.scheme = self.args.get('<scheme>')
        self.set_numbering_tags = self.args.get('--set-numbering-tags')
        self.tr = etree.parse(os.path.join(self.dr, self.f))

        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()
예제 #4
0
    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.settings = Settings(self.args)
        self.gv = GV(self.settings)
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()

        self.current_result = datetime.datetime.now().strftime(
            "%Y_%m_%d-%H-%M-%S-") + str(uuid.uuid4())[:4]
        self.config = None
        self.all_typesetters = None
        self.script_folder = os.path.dirname(os.path.realpath(__file__))
예제 #5
0
파일: prepare.py 프로젝트: withanage/mpt
 def __init__(self):
     self.args = self.read_command_line()
     self.debug = Debug()
     self.gv = GV()
     Debuggable.__init__(self, 'Main')
     if self.args.get('--debug'):
         self.debug.enable_debug()
     self.dr = self.args.get('<path>')
     self.f = self.args.get('<input_file>')
     self.tr = etree.parse(os.path.join(self.dr, self.f))
예제 #6
0
 def __init__(self):
     self.args = self.read_command_line()
     self.debug = Debug()
     self.gv = GV()
     Debuggable.__init__(self, 'Main')
     if self.args.get('--debug'):
         self.debug.enable_debug()
     self.dr = self.args.get('<path>')
     self.f = self.args.get('<input_file>')
     self.out_type = self.args.get('--out-type').lower()
     self.script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
예제 #7
0
파일: mpt.py 프로젝트: withanage/mpt
    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.gv = GV()
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()

        self.current_result = datetime.datetime.now().strftime(
            "%Y_%m_%d-%H-%M-") + str(uuid.uuid4())[:8]
        self.config = self.gv.read_json(self.args['<config_file>'])
        self.all_typesetters = self.config.get('typesetters')
예제 #8
0
파일: merge.py 프로젝트: withanage/mpt
    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.gv = GV()
        self.uid = self.gv.uuid
        self.dr = self.args.get("<path>")
        self.f = self.args.get("<input_file>")
        self.scheme = self.args.get("<scheme>")
        self.set_numbering_tags = self.args.get("--set-numbering-tags")
        self.tr = etree.parse(os.path.join(self.dr, self.f))

        Debuggable.__init__(self, "Main")
        if self.args.get("--debug"):
            self.debug.enable_debug()
예제 #9
0
class Disseminate(Debuggable):

    def __init__(self):
        self.args = self.read_command_line()
        self.debug = Debug()
        self.settings = Settings(self.args)
        self.gv = GV(self.settings)
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()
        self.dr = self.args.get('<path>')
        self.f = self.args.get('<input_file>')
        self.out_type = self.args.get('--out-type').lower()
        self.script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='Disseminate 0.1')



    def get_saxon_path(self):
        """Checks if saxon is available in the default path

        Returns
        --------
        saxon : boolean
            True, if saxon is available. False, if not.

        """

        s = os.path.join(self.script_path, self.gv.apps.get('saxon'))
        if os.path.isfile(s):
            return s
        elif self.args.get('--saxon'):
            if os.path.isfile(self.args.get('--saxon')):
                return self.args.get('--saxon')
            else:
                return False

        else:
            return False

    def get_module_name(self):
        """
        Reads the name of the module for debugging and logging

        Returns
        -------
        name string
         Name of the Module
        """
        name = 'disseminate'
        return name

    def process(self, args):
        """Runs  typesetter with given arguments

        Creates the execution path for  the conversion process. Output,exit-code and  system error codes are captured and returned.


        Parameters
        ----------
        args : list
            application arguments in the correct oder.


        Returns
        -------
        output :str
            system standard output.
        err :str
            system standard error.
        exit_code: str
            system exit_code.

        See Also
        --------
        subprocess.Popen()

        """

        m = ' '.join(args).strip().split(' ')
        print(' '.join(args))
        process = Popen(m, stdout=PIPE)
        output, err = process.communicate()
        exit_code = process.wait()
        if exit_code == 1:
            print(err)
            sys.exit(1)

        return output, err, exit_code

    def run(self):
        """
        Runs converters

        See Also
        --------
        create_output, create_pdf

        """
        self.create_output(self.out_type)


    def create_output(self, out_type):
        """
        Create  FO output

        Parameters
        ----------
        out_type: str
            Output Type


        See Also
        -------
        run_saxon(), get_saxon_path()
        """

        formatters = self.args.get('--formatter').split(',')
        mediums = self.args.get('--medium').split(',')
        for f in formatters:
            f = f.lower()
            for m in mediums:
                m = m.lower()
                self.gv.create_dirs_recursive(self.args.get('<path>').split(os.pathsep))
                if self.out_type=='fo':
                    self.debug.print_console(self, self.gv.RUNNING_FO_CONVERSION)
                    saxon_path = self.get_saxon_path()
                    args = self.run_saxon(saxon_path,f, m)
                if self.out_type=='pdf':
                    self.debug.print_console(self, self.gv.RUNNING_PDF_CONVERSION)
                    args = self.run_fop_processor(f, m)
                output, err, exit_code = self.process(args)


    def run_fop_processor(self,  formatter, medium):

        args = []
        if formatter.lower() == 'fop':
            pth = os.path.join(self.script_path, self.gv.apps.get('fop'))
            if self.gv.check_program(pth):

                args = self.run_apache_fop(pth,formatter, medium)

        elif formatter.lower() == 'ah':
            pth = self.gv.apps.get('ah')
            if self.gv.check_program(pth):
                args = self.run_ah_fop(pth,formatter, medium)
        return args

    def run_ah_fop(self, pth, formatter, medium):
        args=[pth]
        args.append('-d')
        args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f), self.gv.uuid, formatter, medium))
        args.append('-o')
        args.append('{}/{}.{}.{}.pdf'.format(self.dr, self.gv.uuid, formatter, medium))

        return args



    def run_apache_fop(self, pth, formatter, medium):
        style_path = '{}/configurations/fop/conf/{}.{}.xml'.format(self.script_path, formatter,medium)
        args = [pth]
        args.append('-fo')
        args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f),self.gv.uuid, formatter, medium))
        args.append('-pdf')
        args.append('{}/{}.{}.{}.pdf'.format(self.dr,self.gv.uuid, formatter, medium))
        args.append('-c')
        args.append(style_path)
        return args



    def run_saxon(self, saxon_path, formatter, medium):
        """
        Creates the executable path for saxon

        Parameters
        ---------
        saxon_path : str
            absolute path  of the saxon binary jar file
        formatter : str
            name of the FO formatter
        medium : str
            name of the medium

        Returns
        ------
        args:list
            List of arguments for saxon execution path

        """
        args = ["java", "-jar", saxon_path]
        if self.args.get('--xsl'):
            xsl = self.script_path.split(os.sep)
            xsl.append('stylesheets')
            xsl.append(self.args.get('--xsl'))
            args.append("-xsl:" + os.sep.join(xsl))

        s = self.args.get('<input_file>')
        if os.path.exists(s):
            args.append("-s:" + s)
        else:
            self.debug.print_debug(self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + s)
            sys.exit(1)
        file_name = '.'.join([self.gv.uuid,formatter.lower(),medium.lower(),'fo'])
        args.append("-o:" + os.path.join(self.args.get('<path>'), file_name))
        args.append('formatter=' + formatter.lower())
        args.append('medium=' + medium.lower())
        return args
예제 #10
0
class Merge(Debuggable):
    """
     Standalone Processing object which merges current  JATS/BITS XML file in to the Body of a BITS-XML document.

    """
    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.settings = Settings(self.args)
        self.gv = GV(self.settings)
        self.dr = self.args.get('<path>')
        self.f = self.args.get('<input_file>')
        self.scheme = self.args.get('<scheme>')
        self.set_numbering_tags = self.args.get('--set-numbering-tags')
        self.tr = etree.parse(os.path.join(self.dr, self.f))

        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='xmlMerge 0.0.1')

    def create_output_bits(self):
        """
        Create bits output file, generates a new file, if no file is found.
        Otherwise the current file is appended to the book body as a book-part.

        See Also
        --------
        create_book_part_bits, create_book_bits, do_file_io

        """
        fuf = os.path.join(self.dr, self.gv.uuid)
        pt = os.path.join(self.dr, os.path.basename(self.gv.uuid))

        trf = None
        if os.path.isfile(fuf):
            trf = etree.parse(fuf)
            bp = trf.find(".//book-body")
            book_part = self.create_book_part_bits()
            bp.append(book_part)
        else:
            trf = self.create_book_bits()
        trf = self.process(trf)

        self.do_file_io(
            etree.tostring(trf,
                           pretty_print=False,
                           xml_declaration=True,
                           encoding='UTF-8',
                           standalone='yes'), 'w', pt)

    def create_output_jats(self):
        """
        Create jats output file, generates a new file,

        See Also
        --------
        create_book_part_bits, create_book_bits, do_file_io

        """
        fuf = os.path.join(self.dr, self.gv.uuid)
        pt = os.path.join(self.dr, os.path.basename(self.gv.uuid))

        trf = None
        if os.path.isfile(fuf):
            trf = etree.parse(fuf)
            bpf = trf.find(".//body")
            f, bd, bk = self.get_xml_parts()
            if bd is not None:
                for sec in list(bd):
                    bpf.append(sec)

            bkrf = trf.find(".//back/ref-list")
            for r in bk.findall('.//ref-list/ref'):
                bkrf.append(r)

            bkff = trf.find(".//back/fn-group")
            for fn in bk.findall('.//fn-group/fn'):
                bkff.append(fn)

        else:
            trf = self.create_journal_jats()

        trf = self.process(trf)
        self.do_file_io(
            etree.tostring(trf,
                           pretty_print=False,
                           xml_declaration=True,
                           encoding='UTF-8',
                           standalone='yes'), 'w', pt)

    def process(self, tr):
        """
        Process  BITS-XML file and do all transformations into the elementtree

        Parameters
        ----------
        tr : elementtree
            element tree as input

        Returns
        -------
        tr : elementtree
            transformed element tree

        See Also
        --------
        globals.set_numbering_tags(), set_book_part_attributes()

        """
        tr = self.gv.set_numbering_tags(self.set_numbering_tags.split(','),
                                        tr) if self.set_numbering_tags else tr

        self.set_book_part_attributes(tr)

        return tr

    def set_book_part_attributes(self, tr):
        """
        Add  specific attributes to book-part

        Parameters
        ----------
        tr : elementtree
            element tree as input


        Returns
        -------
        tr : elementtree
            transformed element tree


        """
        book_parts = tr.findall('.//book-part')
        for i, b in enumerate(book_parts):
            b.attrib['id'] = "ch_" + str(i)
            b.attrib['book-part-type'] = "chapter"
        return tr

    def create_metadata_path(self, metadata):
        """
        creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata

        Parameters
        ----------
        metadata : str
            Suffix of the metadata  files

        Returns
        -------
        pth : str
            Correct path of the metadata file in the folder structure

        Notes
        -----
        We assume that  metadata files are stored in a sub-folder named metadata
        """
        p = os.path.dirname(self.f).split(os.sep)
        del p[-4:]
        name, ext = os.path.splitext(os.path.basename(self.gv.uuid))
        file_name = [name, '.', metadata, '.', 'xml']
        p.append('metadata')
        p.append(''.join(file_name))

        pth = os.sep.join(p)
        self.debug.print_debug(self, 'merging headers' + str(pth))
        return pth

    def get_module_name(self):
        """
        Reads the name of the module for debugging and logging

        Returns
        -------
        name string
         Name of the Module
        """
        name = 'merge'
        return name

    def create_book_bits(self):
        """
        creates a  full BITS XML book and optionally adds metadata

        Returns
        -------
        book : elementtree
            Elementtree which complies to BITS XML Scheme.

        See Also
        ---------
        create_metadata_path, create_book_part_bits

        """
        nsmap = {
            'xlink': "http://www.w3.org/1999/xlink",
            'mml': "http://www.w3.org/1998/Math/MathML",
            "xml": "http://www.w3.org/XML/1998/namespace"
        }
        book = etree.Element(etree.QName('book'), nsmap=nsmap)
        book.attrib['dtd-version'] = "2.1"
        book.attrib[etree.QName(
            '{http://www.w3.org/XML/1998/namespace}lang')] = "de"
        book.attrib['book-type'] = "proceedings"

        metadata = self.args.get('--metadata')

        if metadata:
            pth = self.create_metadata_path(metadata)
            self.debug.print_console(self, 'merging headers' + str(pth))
            if os.path.isfile(pth):
                bp = etree.parse(pth).find('.//book-meta')
                book.insert(0, bp)
            else:
                self.debug.print_console(
                    self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + str(pth))
                #sys.exit(1)

        else:
            sys.exit('Metadata argument undefined')
        bd = etree.Element("book-body")
        bpbd = self.create_book_part_bits()
        bd.append(bpbd)
        book.append(bd)

        return book

    def create_journal_jats(self):
        """
        creates a  full JATS XML book and optionally adds metadata

        Returns
        -------
        book : elementtree
            Elementtree which complies to BITS XML Scheme.

        See Also
        ---------
        create_metadata_path, create_book_part_bits

        """

        nsmap = {
            'xlink': "http://www.w3.org/1999/xlink",
            'mml': "http://www.w3.org/1998/Math/MathML",
            "xml": "http://www.w3.org/XML/1998/namespace"
        }
        journal = etree.Element(etree.QName('article'), nsmap=nsmap)
        journal.attrib['dtd-version'] = "3.0"
        journal.attrib[etree.QName(
            '{http://www.w3.org/XML/1998/namespace}lang')] = "de"

        f, bd, bk = self.get_xml_parts()

        metadata = self.args.get('--metadata')

        if metadata:
            pth = self.create_metadata_path(metadata)
            if os.path.isfile(pth):
                bpm = etree.parse(pth).find('.')
                if bpm is not None:
                    if bpm.getroottree().getroot().tag == 'front':
                        journal.insert(0, bpm)
                    else:
                        self.debug.print_debug(self,
                                               'front metadata unspecified')
                        sys.exit(1)
        else:
            journal.insert(0, f)

        journal.append(bd)
        if len(bk) > 0:
            journal.append(bk)
        else:
            back = etree.Element(etree.QName('back'))
            back.append(etree.Element(etree.QName('fn-group')))
            back.append(etree.Element(etree.QName('ref-list')))
            journal.append(back)
        return journal

    def create_book_part_bits(self):
        """
        Reads a JATS XMl File and creates a book-part element tree according to BITS-XML.

        Returns
        -------
        bp : elementtree
            Book part elementTree
        """
        f, bd, bk = self.get_xml_parts()

        bp = etree.Element("book-part")

        if f is not None:
            if len(f):
                bp.append(f)
        if bd is not None:
            bp.append(bd)
        if bk is not None:
            bp.append(bk)
        return bp

    def get_xml_parts(self):
        """
        Returns  the front-matter , body and back-matter of a JATS XML file in the above order

        Returns
        -------
        f : elementtree
            Front-matter of JATS elementTree
        bd : elementtree
            Body of JATS elementTree
        bk : elementtree
            Back-matter of JATS elementTree

        """
        r = self.tr.getroot()
        f = r.find(".//front")
        if f is None:
            f = r.find(".//book-part-meta")
        bd = r.find(".//body")
        bk = r.find(".//back")
        return f, bd, bk

    def do_file_io(self, s, mode, pth):
        """
        Executes read or write operations on a path

        Parameters
        ----------
        s: str
            Content to be written or None for read
        mode: str
            w for write , r for r
        pth : str
            Path to the file to be read or written

        Raises
        ------
        IOError
            I/O operation fails

        """
        try:
            w = open(pth, mode)
            if mode == 'w':
                w.write(s.rstrip('\r\n'))
                w.close()
            if mode == 'r':
                o = w.read()
                w.close()
        except IOError as i:
            self.debug.print_debug(self, i)
            print(i)
            sys.exit(1)

    def run(self):
        """
         Runs the configuration on the processing object. Process  JATS-XML file and merges it into the full BITS-XML file

        See Also
        --------
        create_output_bits

        Warning
        -------
        function create_output_jats not yet used

        """

        self.gv.create_dirs_recursive(self.dr.split('/'))
        if self.scheme == 'bits':
            self.create_output_bits()

        elif self.scheme == 'jats':
            self.tr = self.create_output_jats()
예제 #11
0
class Disseminate(Debuggable):

    def __init__(self):
        self.args = self.read_command_line()
        self.debug = Debug()
        self.gv = GV()
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()
        self.dr = self.args.get('<path>')
        self.f = self.args.get('<input_file>')
        self.out_type = self.args.get('--out-type').lower()
        self.script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='Disseminate 0.1')



    def get_saxon_path(self):
        """Checks if saxon is available in the default path

        Returns
        --------
        saxon : boolean
            True, if saxon is available. False, if not.

        """

        s = os.path.join(self.script_path, self.gv.METYPESET_PATH)
        if os.path.isfile(s):
            return s
        elif self.args.get('--saxon'):
            if os.path.isfile(self.args.get('--saxon')):
                return self.args.get('--saxon')
            else:
                return False

        else:
            return False

    def get_module_name(self):
        """
        Reads the name of the module for debugging and logging

        Returns
        -------
        name string
         Name of the Module
        """
        name = 'OUTPUT Generation'
        return name

    def process(self, args):
        """Runs  typesetter with given arguments

        Creates the execution path for  the conversion process. Output,exit-code and  system error codes are captured and returned.


        Parameters
        ----------
        args : list
            application arguments in the correct oder.


        Returns
        -------
        output :str
            system standard output.
        err :str
            system standard error.
        exit_code: str
            system exit_code.

        See Also
        --------
        subprocess.Popen()

        """

        m = ' '.join(args).strip().split(' ')
        print ' '.join(args)
        process = Popen(m, stdout=PIPE)
        output, err = process.communicate()
        exit_code = process.wait()
        if exit_code == 1:
            print err
            sys.exit(1)
        return output, err, exit_code

    def run(self):
        """
        Runs converters

        See Also
        --------
        create_output, create_pdf

        """
        self.create_output(self.out_type)


    def create_output(self, out_type):
        """
        Create  FO output

        Parameters
        ----------
        out_type: str
            Output Type


        See Also
        -------
        run_saxon(), get_saxon_path()
        """

        formatters = self.args.get('--formatter').split(',')
        mediums = self.args.get('--medium').split(',')
        for f in formatters:
            f = f.lower()
            for m in mediums:
                m = m.lower()
                self.gv.create_dirs_recursive(self.args.get('<path>').split(os.pathsep))
                if self.out_type=='fo':
                    self.debug.print_console(self, self.gv.RUNNING_FO_CONVERSION)
                    saxon_path = self.get_saxon_path()
                    args = self.run_saxon(saxon_path,f, m)
                if self.out_type=='pdf':
                    self.debug.print_console(self, self.gv.RUNNING_PDF_CONVERSION)
                    args = self.run_fop_processor(f, m)
                output, err, exit_code = self.process(args)
                print output

    def run_fop_processor(self,  formatter, medium):

        args = []
        if formatter.lower() == 'fop':
            pth = os.path.join(self.script_path, self.gv.APACHE_FOP_PATH)
            if self.gv.check_program(pth):
                args = self.run_apache_fop(pth,formatter, medium)

        elif formatter.lower() == 'ah':
            pth = self.gv.ANTENNA_HOUSE_FOP_PATH
            if self.gv.check_program(pth):
                args = self.run_ah_fop(pth,formatter, medium)
        return args

    def run_ah_fop(self, pth, formatter, medium):
        args=[pth]
        args.append('-d')
        args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f), self.gv.uuid, formatter, medium))
        args.append('-o')
        args.append('{}/{}.{}.{}.pdf'.format(self.dr, self.gv.uuid, formatter, medium))

        return args






    def run_apache_fop(self, pth, formatter, medium):
        style_path = '{}/configurations/fop/conf/{}.{}.xml'.format(self.script_path, formatter,medium)
        args = [pth]
        args.append('-fo')
        args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f),self.gv.uuid, formatter, medium))
        args.append('-pdf')
        args.append('{}/{}.{}.{}.pdf'.format(self.dr,self.gv.uuid, formatter, medium))
        args.append('-c')
        args.append(style_path)
        return args



    def run_saxon(self, saxon_path, formatter, medium):
        """
        Creates the executable path for saxon

        Parameters
        ---------
        saxon_path : str
            absolute path  of the saxon binary jar file
        formatter : str
            name of the FO formatter
        medium : str
            name of the medium

        Returns
        ------
        args:list
            List of arguments for saxon execution path

        """
        args = ["java", "-jar", saxon_path]
        if self.args.get('--xsl'):
            xsl = self.script_path.split(os.sep)[:-1]
            xsl.append('stylesheets')
            xsl.append(self.args.get('--xsl'))
            args.append("-xsl:" + os.sep.join(xsl))

        s = self.args.get('<input_file>')
        if os.path.exists(s):
            args.append("-s:" + s)
        else:
            self.debug.print_debug(self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + s)
            sys.exit(1)
        file_name = '.'.join([self.gv.uuid,formatter.lower(),medium.lower(),'fo'])
        args.append("-o:" + os.path.join(self.args.get('<path>'), file_name))
        args.append('formatter=' + formatter.lower())
        args.append('medium=' + medium.lower())


        return args
예제 #12
0
파일: merge.py 프로젝트: withanage/mpt
class Merge(Debuggable):
    """
     Standalone Processing object which merges current  JATS/BITS XML file in to the Body of a BITS-XML document.

    """

    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.gv = GV()
        self.uid = self.gv.uuid
        self.dr = self.args.get("<path>")
        self.f = self.args.get("<input_file>")
        self.scheme = self.args.get("<scheme>")
        self.set_numbering_tags = self.args.get("--set-numbering-tags")
        self.tr = etree.parse(os.path.join(self.dr, self.f))

        Debuggable.__init__(self, "Main")
        if self.args.get("--debug"):
            self.debug.enable_debug()

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version="xmlMerge 0.0.1")

    def create_output_bits(self):
        """
        Create bits output file, generates a new file, if no file is found.
        Otherwise the current file is appended to the book body as a book-part.

        See Also
        --------
        create_book_part_bits, create_book_bits, do_file_io

        """
        fuf = os.path.join(self.dr, self.uid)
        pt = os.path.join(self.dr, os.path.basename(self.uid))

        trf = None
        if os.path.isfile(fuf):
            trf = etree.parse(fuf)
            bp = trf.find(".//book-body")
            book_part = self.create_book_part_bits()
            bp.append(book_part)
        else:
            trf = self.create_book_bits()
        trf = self.process(trf)

        self.do_file_io(
            etree.tostring(trf, pretty_print=True, xml_declaration=True, encoding="UTF-8", standalone="yes"), "w", pt
        )

    def process(self, tr):
        """
        Process  BITS-XML file and do all transformations into the elementtree

        Parameters
        ----------
        tr : elementtree
            element tree as input

        Returns
        -------
        tr : elementtree
            transformed element tree

        See Also
        --------
        globals.set_numbering_tags(), set_book_part_attributes()

        """
        tr = self.gv.set_numbering_tags(self.set_numbering_tags.split(","), tr) if self.set_numbering_tags else tr

        self.set_book_part_attributes(tr)

        return tr

    def set_book_part_attributes(self, tr):
        """
        Add  specific attributes to book-part

        Parameters
        ----------
        tr : elementtree
            element tree as input


        Returns
        -------
        tr : elementtree
            transformed element tree


        """
        book_parts = tr.findall(".//book-part")
        for i, b in enumerate(book_parts):
            b.attrib["id"] = "ch_" + str(i)
            b.attrib["book-part-type"] = "chapter"
        return tr

    def create_metadata_path(self, metadata):
        """
        creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata

        Parameters
        ----------
        metadata : str
            Suffix of the metadata  files

        Returns
        -------
        pth : str
            Correct path of the metadata file in the folder structure

        Notes
        -----
        We assume that  metadata files are stored in a sub-folder named metadata
        """
        p = os.path.dirname(self.f).split(os.sep)
        del p[-4:]
        name, ext = os.path.splitext(os.path.basename(self.uid))
        file_name = [name, ".", metadata, ext]
        p.append("metadata")
        p.append("".join(file_name))
        pth = os.sep.join(p)
        return pth

    def create_book_bits(self):
        """
        creates a  full BITS XML book and optionally adds metadata

        Returns
        -------
        book : elementtree
            Elementtree which complies to BITS XML Schheme.

        See Also
        ---------
        create_metadata_path, create_book_part_bits

        """
        nsmap = {
            "xlink": "http://www.w3.org/1999/xlink",
            "mml": "http://www.w3.org/1998/Math/MathML",
            "xml": "http://www.w3.org/XML/1998/namespace",
        }
        book = etree.Element(etree.QName("book"), nsmap=nsmap)
        book.attrib["dtd-version"] = "2.1"
        book.attrib[etree.QName("{http://www.w3.org/XML/1998/namespace}lang")] = "de"
        book.attrib["book-type"] = "proceedings"

        metadata = self.args.get("--metadata")
        if metadata:
            pth = self.create_metadata_path(metadata)
            if os.path.isfile(pth):
                bp = etree.parse(pth).find(".//book-meta")
                book.insert(0, bp)

        bd = etree.Element("book-body")
        bpbd = self.create_book_part_bits()
        bd.append(bpbd)
        book.append(bd)

        return book

    def create_book_part_bits(self):
        """
        Reads a JATS XMl File and creates a book-part element tree according to BITS-XML.

        Returns
        -------
        bp : elementtree
            Book part elementTree
        """

        f, bd, bk = self.get_xml_parts()

        bp = etree.Element("book-part")

        if f is not None:
            if len(f):
                bp.append(f)
        bp.append(bd)
        bp.append(bk)
        return bp

    def get_xml_parts(self):
        """
        Returns  the front-matter , body and back-matter of a JATS XML file in the above order

        Returns
        -------
        f : elementtree
            Front-matter of JATS elementTree
        bd : elementtree
            Body of JATS elementTree
        bk : elementtree
            Back-matter of JATS elementTree

        """
        r = self.tr.getroot()
        f = r.find(".//front")
        if f is None:
            f = r.find(".//book-part-meta")
        bd = r.find(".//body")
        bk = r.find(".//back")
        return f, bd, bk

    def do_file_io(self, s, mode, pth):
        """
        Executes read or write operations on a path

        Parameters
        ----------
        s: str
            Content to be written or None for read
        mode: str
            w for write , r for r
        pth : str
            Path to the file to be read or written

        Raises
        ------
        IOError
            I/O operation fails

        """
        try:
            w = open(pth, mode)
            if mode == "w":
                w.write(s)
                w.close()
            if mode == "r":
                o = w.read()
                w.close()
        except IOError as i:
            self.debug.print_debug(self, i)
            print(i)
            sys.exit(1)

    def run(self):
        """
         Runs the configuration on the processing object. Process  JATS-XML file and merges it into the full BITS-XML file

        See Also
        --------
        create_output_bits

        Warning
        -------
        function create_output_jats not yet used

        """

        self.gv.create_dirs_recursive(self.dr.split("/"))
        if self.scheme == "bits":
            self.create_output_bits()

        elif self.scheme == "jats":
            self.tr = self.create_output_jats(self.tr)
예제 #13
0
class MPT(Debuggable):
    """
    MPT Class Object,  which initializes the properties and defines the methods.

    """
    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.settings = Settings(self.args)
        self.gv = GV(self.settings)
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()

        self.current_result = datetime.datetime.now().strftime(
            "%Y_%m_%d-%H-%M-%S-") + str(uuid.uuid4())[:4]
        self.config = None
        self.all_typesetters = None
        self.script_folder = os.path.dirname(os.path.realpath(__file__))

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='heiMPT 0.0.1')

    def get_module_name(self):
        """
        Reads the name of the module for debugging and logging

        Returns
        -------
        name string
         Name of the Module
        """
        name = 'heiMPT'
        return name

    def call_typesetter(self, args):
        """Runs  typesetter with given arguments

        Creates the execution path for a typesetter or an application and runs it  as a system process. Output,
        exit-code and  system error codes are captured and returned.


        Parameters
        ----------
        args : list
            application arguments in the correct oder.


        Returns
        -------
        output :str
            system standard output.
        err :str
            system standard error.
        exit_code: str
            system exit_code.

        See Also
        --------
        subprocess.Popen()

        """
        args_str = ' '.join(args)

        if ': ' in args_str:

            args_str = args_str.replace(': ', ':')
            self.debug.print_debug(
                self,
                "Merging command: file into command:file, can be a problem for some applications"
            )
        m = args_str.strip().split(' ')
        process = Popen(m, stdout=PIPE)
        output, err = process.communicate()
        exit_code = process.wait()
        return output, err, exit_code

    def arguments_parse(self, t_props):
        """
        Reads typesetter properties from json  configuration and create  arguments.


        Parameters
        ----------
        t_props : dictionary
            typesetter properties


        Returns
        -------
        args : list
            application execution path and arguments in the correct oder.

        """

        args = []
        if t_props.get('executable'):
            args = [t_props.get('executable')]
        else:
            self.debug.print_debug(
                self, self.gv.TYPESETTER_EXECUTABLE_VARIABLE_IS_UNDEFINED)
            sys.exit(1)
        arguments = t_props.get("arguments")
        if arguments:
            arguments = collections.OrderedDict(sorted(arguments.items()))
            for a in arguments:
                args.append(arguments[a])
        return args

    def create_output_path(self, p, p_id, args, prefix, uid):
        """
        Creates the output path for  the current file

        Output folder is  constructed using project_name, current_time,  sequence number of the current typesetter
        and the sequence number of the current file.

        Parameters
        ---------
        p: dictionary
            json program properties
        p_id:  int
            typesetter id
        args : list
            application arguments in the correct oder.
        prefix: str
            file name prefix  of  the current file
        uid: str
            unique id of the current current typesetter

        Returns
        --------
        True: boolean
            Returns True if the output file is created

        See Also
        --------
        os.makedirs()

        """
        config_args = p.get('typesetters')[p_id].get("arguments")
        if config_args is None:
            self.debug.print_debug(self,
                                   self.gv.TYPESETTER_ARGUMENTS_NOT_DEFINED)
            sys.exit(1)
        ts_args = collections.OrderedDict(sorted(config_args.items()))
        out_type = p.get('typesetters')[p_id].get("out_type")
        out_path = os.path.join(p.get('path'), uid)

        for i in ts_args:
            arg = ts_args[i]

            if arg == '--create-dir':
                args.append(out_path)

            else:
                args.append(arg)
        self.debug.print_debug(self, '{} {}'.format('Execute', ' '.join(args)))
        return True

    def run_typesetter(self, p, pre_path, pre_out_type, p_id, uid, f_id,
                       f_name, args):
        """
        Creates the temporary output path, calls the typesetter and writes the outtput to the correct path for a
        certain file

        Parameters
        ---------
        p: dictionary
            json program properties
        pre_path: str
            project path of the previous iteration
        pre_out_type : str
            output type of the previous iteration
        p_id:  int
            typesetter id
        uid: str
            unique id of the current current typesetter
        f_id:  int
              sequence number of the current file
        f_name:  str
              name of the current file
        args : list
            application arguments in the correct oder.

        Returns
        --------
        p_path : str
            project output path of the current typesetter
        pf_type : str
            project file type of the current typesetter

        See Also
        --------

        call_typesetter, organize_output

        """

        p_path = ''
        pf_type = ''
        prefix = f_name.split('.')[0]
        if p_id == min(i for i in p['typesetters']):
            f_path = os.path.join(p.get('path'), f_name)

        elif p.get("chain"):
            f_path = os.path.join(pre_path, prefix + '.' + pre_out_type)

        if os.path.isfile(f_path) or p['typesetters'].get(p_id).get('expand'):
            self.debug.print_console(
                self, '\t{}:\t {} '.format('Processing', prefix))
            self.gv.log.append(prefix)
            args.append(f_path)
            self.create_output_path(p, p_id, args, prefix, uid)
            output, err, exit_code = self.call_typesetter(args)
            self.debug.print_debug(self, output.decode('utf-8'))
            p_path = self.organize_output(p, p_id, prefix, f_id, uid, args)

            pf_type = p.get('typesetters')[p_id].get("out_type")

        else:
            self.debug.print_debug(
                self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' +
                os.path.join(f_path))

        return p_path, pf_type

    def typeset_file(self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name):
        """
        Typesets the current file

        Parameters
        ---------
        p: dictionary
            json program properties
        pre_path: str
            project path of the previous iteration
        pre_out_type : str
            output type of the previous iteration
        p_id:  int
            typesetter id
        uid: str
            unique id of the current current typesetter
        f_id:  int
              sequence number of the current file
        f_name:  str
              name of the current file
        args: list
            application arguments in the correct oder.

        Returns
        --------
        p_path : str
            project output path of the current typesetter
        pf_type : str
            project file type of the current typesetter


        See Also
        --------
        run_typesetter

        """
        t_props = self.all_typesetters.get(
            p.get('typesetters')[p_id].get("name"))
        p_path, pf_type = '', ''

        if t_props:
            mt = self.arguments_parse(t_props)
            if self.gv.check_program(t_props.get('executable')):
                p_path, pf_type = self.run_typesetter(p, pre_path,
                                                      pre_out_type, p_id, uid,
                                                      f_id, f_name, mt)

            else:
                self.debug.print_debug(
                    self,
                    t_props.get('executable') +
                    self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE)
        else:
            self.debug.print_debug(self,
                                   self.gv.PROJECT_TYPESETTER_IS_NOT_AVAILABLE)
        return p_path, pf_type

    def typeset_files(self, p, pre_path, pre_out_type, pre_id):
        """
        Typeset all files of a  certain project

        Parameters
        ---------
        p: dictionary
            json program properties
        pre_path: str
            project path of the previously executed typesetter
        pre_out_type: str
            project file type of the previously executed typesetter
        pre_id :int
            sequence number of the previously executed file

        Returns
        --------
        p_path : str
            project output path of the current typesetter
        pf_type : str
            project file type of the current typesetter


        See Also
        --------
        typeset_file

        """
        p_path, pf_type = '', ''

        uid = str(uuid.uuid4())

        project_files = collections.OrderedDict(
            sorted((int(key), value)
                   for key, value in list(p.get('files').items())))
        if p.get('typesetters')[pre_id].get("expand"):
            f_name = self.gv.uuid
            p_path, pf_type = self.typeset_file(p, pre_path, pre_out_type,
                                                pre_id, uid, 0, f_name)

        else:
            for f_id in project_files:
                f_name = project_files[f_id]
                p_path, pf_type = self.typeset_file(p, pre_path, pre_out_type,
                                                    pre_id, uid, f_id, f_name)

        return p_path, pf_type

    def typeset_project(self, p):
        """
        Typesets a certain project

        Parameters
        ---------
        p: dictionary
            json program properties

        Returns
        --------
        True: boolean
            Returns True, if  all the typesetters in project has run successfully.


        See Also
        --------
        typeset_files

        """
        typesetters_ordered, temp_path, temp_pre_out_type = '', '', ''
        pre_path = ''
        prev_out_type = ''

        if p.get('active'):
            self.debug.print_console(self, 'PROJECT : ' + p.get('name'))
            self.gv.log.append(p.get("name"))
            ts = p.get('typesetters')
            if ts:
                typesetters_ordered = collections.OrderedDict(
                    sorted(ts.items()))
            else:
                self.debug.print_debug(
                    self, self.gv.PROJECT_TYPESETTERS_ARE_NOT_SPECIFIED)

            if self.all_typesetters is None:
                self.debug.print_debug(
                    self, self.gv.PROJECT_TYPESETTER_VAR_IS_NOT_SPECIFIED)
                sys.exit(1)

            for p_id in typesetters_ordered:
                self.debug.print_console(
                    self, ' '.join([
                        'Step', p_id, ':', '\t',
                        p.get('typesetters')[p_id].get("name")
                    ]))
                self.gv.log.append('{} {}'.format(
                    p_id,
                    p.get('typesetters')[p_id].get("name")))
                temp_path, temp_pre_out_type = self.typeset_files(
                    p, pre_path, prev_out_type, p_id)

                pre_path = temp_path
                prev_out_type = temp_pre_out_type

        else:
            self.debug.print_debug(
                self, self.gv.PROJECT_IS_NOT_ACTIVE + ' ' + p.get('name'))
        return True

    def typeset_all_projects(self):
        """
        Typeset all projects defined in the json file

        Returns
        --------
        True: boolean
            Returns True, if the  all the typesetters in project run

        See Also
        --------
        typeset_project

        """
        projects = self.config.get('projects')
        if projects:
            for p in projects:
                self.typeset_project(p)

        else:
            self.debug.print_debug(self, self.gv.PROJECTS_VAR_IS_NOT_SPECIFIED)
        return True

    def organize_output(self, p, p_id, prefix, f_id, uid, args):
        """
        Copy the temporary results into the  final project path

        This method reads the temporary results of the current typesetter step and copies them in to the correct output
        folder. Output folder is  constructed using project_name, current_time,  sequence number of the current typesetter
        and the sequence number of the current file.  Customized tool specific actions are also defined and handled here.



        Parameters
        ------------
        p: dict
            json program properties
        p_id:  int
            typesetter id
        prefix: str
            file name prefix  of  the current file
        f_id:  int
              sequence number of the current file
        uid: str
            unique id of the current current typesetter
        args: bytearray
            tool parameters , executable file is first element
        Returns
        --------
        project_path: str
            Final path for the current file


        See Also
        --------
        create_merged_file, gv.create_dirs_recursive

        """
        p_name = p.get('typesetters')[p_id].get("name")

        t_path = [p.get('path'), uid]
        if args:
            if len([arg for arg in args if 'meTypeset.py' in arg]) > 0:
                t_path += ['nlm']
        else:
            t_path += [p.get('path'), uid]

        out_type = p['typesetters'][p_id].get('out_type')

        if out_type is None:
            self.debug.print_console(
                self, self.gv.PROJECT_OUTPUT_FILE_TYPE_IS_NOT_SPECIFIED)
            sys.exit(1)
        project_path = [
            p.get('path'), p['name'], self.current_result, p_id + '_' + p_name,
            out_type
        ]

        temp_dir = os.path.join(p.get('path'), uid)

        if p['typesetters'][p_id].get('merge'):
            self.create_merged_file(p, p_id, project_path, t_path)
            if len(list(p.get('files').items())) == f_id:
                shutil.rmtree(temp_dir)
        elif p['typesetters'][p_id].get('expand'):
            for filename in os.listdir(temp_dir):
                p_path = self.gv.create_dirs_recursive(project_path)
                f_path = '{}{}{}'.format(p_path, SEP, filename)
                os.rename(os.path.join(temp_dir, filename), f_path)
            shutil.rmtree(temp_dir)
        elif p['typesetters'][p_id].get('process'):
            if p_name.lower() == 'metypeset' and not os.path.exists(
                    SEP.join(t_path)):
                t_path.append('nlm')
            t_path.append(prefix + '.' + out_type)
            p_path = self.gv.create_dirs_recursive(project_path)
            f_path = '{}{}{}.{}'.format(p_path, SEP, prefix, out_type)
            try:
                os.rename(SEP.join(t_path), f_path)
                shutil.rmtree(temp_dir)
            except FileNotFoundError:
                print('File not found\t{}', SEP.join(t_path))
                sys.exit(1)

        else:
            self.debug.print_debug(
                self, self.gv.PROJECT_TYPESETTER_PROCESS_METHOD_NOT_SPECIFIED)
        if len(list(p.get('typesetters').items())) == int(p_id) and int(
                f_id) == len(list(p.get('files').items())):
            zip_path = ''.join([p.get('path'), SEP, p['name']])
            shutil.make_archive('{}/{}'.format(zip_path, p.get("name")), 'zip',
                                zip_path)

        return SEP.join(project_path)

    def create_merged_file(self, p, p_id, project_path, t_path):
        """
        Create a combined file from a set of input files

        Parameters
        ------------
        p: dict
            json program properties
        p_id:  int
            typesetter id
        t_path : str
            temporary  output directory
        project_path : str
            system path to be created

        See Also
        --------
        create_named_file()


        """
        t_path.append(self.gv.uuid)
        p_path = self.gv.create_dirs_recursive(project_path)

        f_path = '{}{}{}.xml'.format(p_path, SEP, self.gv.uuid)
        shutil.copy2(SEP.join(t_path), f_path)
        self.create_named_file(p, p_id, p_path, t_path)
        return f_path

    def create_named_file(
        self,
        p,
        p_id,
        p_path,
        t_path,
    ):
        """
        Copy  unique file name to a named file

        p: dict
            json program properties
        p_id:  int
            typesetter id
        t_path : str
            temporary  output directory
        p_path : str
            output directory for the current typesetter

        """
        f = p['typesetters'][p_id].get('out_file')
        if f:
            shutil.copy2(SEP.join(t_path), '{}{}{}'.format(p_path, SEP, f))
        return

    def run_modules(self):
        """
        Run MPT in module mode

        """
        # Run import modules
        if self.args.get('import'):
            sys.path.insert(
                0, os.path.join(self.script_folder, 'plugins', 'import'))
            import ImportInterface
            if self.args.get('omp'):
                m = "omp"
                plugin_package = __import__(m, fromlist=['*'])
                plugin_module = getattr(plugin_package, m)
                # Find class inheriting form Import abstract class in the module
                for name in dir(plugin_module):
                    candidate = getattr(plugin_module, name)
                    if inspect.isclass(candidate)\
                            and issubclass(candidate, ImportInterface.Import)\
                            and candidate is not ImportInterface.Import:
                        plugin_class = candidate
                        print(("Found import plugin", name, plugin_class))
                        plugin = plugin_class()
                        self.debug.print_console(self, str(self.args))
                        plugin.run(self.args,
                                   {'base-path': self.script_folder})

                # try:
                #    plugin_module = __import__(m)
                #    plugin_module.plugin.run()
                # except Exception as e:
                #    print('{} {}: {}'.format(m, 'method  import failed', e))
                #    sys.exit(0)
        else:
            self.debug.fatal_error(self, "Unsupported arguments: " + self.args)
        return

    def check_applications(self):
        """
        Check if program binaries are available 

        """
        ps = self.config.get('projects')
        psf = [s for s in ps if s.get('active') == True]
        ts = self.config.get('typesetters')

        for p in [ts[i]['arguments'] for i in ts]:
            for k in [
                    j for j in list(p.values()) if j.find('--formatter') == 0
            ]:
                for l in k.split('=')[1].split(','):
                    if not self.gv.check_program(self.gv.apps.get(l.lower())):
                        self.debug.fatal_error(
                            self, '{} {}'.format(
                                self.gv.apps.get(l.lower()),
                                self.gv.apps.get(l.lower()) +
                                self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE))
                        sys.exit(1)

        for p in [ts[i]['executable'] for i in ts]:
            if not self.gv.check_program(p):
                self.debug.fatal_error(
                    self, '{} {}'.format(
                        p,
                        self.gv.apps.get(l.lower()) +
                        self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE))
                sys.exit(1)
예제 #14
0
파일: mpt.py 프로젝트: withanage/mpt
class MPT(Debuggable):
    """
    MPT Class Object,  which initializes the properties and defines the methods.

    """

    def __init__(self):

        self.args = self.read_command_line()
        self.debug = Debug()
        self.gv = GV()
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()

        self.current_result = datetime.datetime.now().strftime(
            "%Y_%m_%d-%H-%M-") + str(uuid.uuid4())[:8]
        self.config = self.gv.read_json(self.args['<config_file>'])
        self.all_typesetters = self.config.get('typesetters')

    def run(self):
        """
        Runs the MPT  Module, which typesets all the projects defined in the json input file

        Returns
        --------
        True: boolean
            Returns True if all the projects are typeset

        See Also
        --------
        typeset_all_projects

        """
        self.typeset_all_projects()
        return True

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='mpt 0.0.1')

    def get_module_name(self):
        """
        Reads the name of the module for debugging and logging

        Returns
        -------
        name string
         Name of the Module
        """
        name = 'MPT'
        return name

    def call_typesetter(self, args):
        """Runs  typesetter with given arguments

        Creates the execution path for a typesetter or an application and runs it  as a system process. Output,
        exit-code and  system error codes are captured and returned.


        Parameters
        ----------
        args : list
            application arguments in the correct oder.


        Returns
        -------
        output :str
            system standard output.
        err :str
            system standard error.
        exit_code: str
            system exit_code.

        See Also
        --------
        subprocess.Popen()

        """
        m = ' '.join(args).strip().split(' ')
        self.debug.print_console(self, ' '.join(m))
        process = Popen(m, stdout=PIPE)
        output, err = process.communicate()
        exit_code = process.wait()
        return output, err, exit_code

    def arguments_parse(self, t_props):
        """
        Reads typesetter properties from json  configuration and create  arguments.


        Parameters
        ----------
        t_props : dictionary
            typesetter properties


        Returns
        -------
        args : list
            application execution path and arguments in the correct oder.

        """

        args = []
        if t_props.get('executable'):
            args = [t_props.get('executable')]
        else:
            self.debug.print_debug(
                self, self.gv.TYPESETTER_EXECUTABLE_VARIABLE_IS_UNDEFINED)
            sys.exit(1)
        arguments = t_props.get("arguments")
        if arguments:
            arguments = collections.OrderedDict(sorted(arguments.items()))
            for a in arguments:
                args.append(arguments[a])
        return args

    def create_output_path(
            self,
            p,
            p_id,
            args,
            prefix,
            uid):
        """
        Creates the output path for  the current file

        Output folder is  constructed using project_name, current_time,  sequence number of the current typesetter
        and the sequence number of the current file.

        Parameters
        ---------
        p: dictionary
            json program properties
        p_id:  int
            typesetter id
        args : list
            application arguments in the correct oder.
        prefix: str
            file name prefix  of  the current file
        uid: str
            unique id of the current current typesetter

        Returns
        --------
        True: boolean
            Returns True if the output file is created

        See Also
        --------
        os.makedirs()

        """
        ts_args = collections.OrderedDict(
            sorted(p.get('typesetters')[p_id].get("arguments").items()))
        out_type = p.get('typesetters')[p_id].get("out_type")
        out_path = os.path.join(p.get('path'), uid)

        for i in ts_args:
            arg = ts_args[i]
            if arg == 'create_output_directory()':
                args.append(out_path)

            elif arg == 'create_output_file()':
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
                args.append(
                    os.path.join(
                        out_path,
                        prefix +
                        '.' +
                        out_type))
            else:
                args.append(arg)
        return True

    def run_typesetter(
            self,
            p,
            pre_path,
            pre_out_type,
            p_id,
            uid,
            f_id,
            f_name,
            args):
        """
        Creates the temporary output path, calls the typesetter and writes the outtput to the correct path for a
        certain file

        Parameters
        ---------
        p: dictionary
            json program properties
        pre_path: str
            project path of the previous iteration
        pre_out_type : str
            output type of the previous iteration
        p_id:  int
            typesetter id
        uid: str
            unique id of the current current typesetter
        f_id:  int
              sequence number of the current file
        f_name:  str
              name of the current file
        args : list
            application arguments in the correct oder.

        Returns
        --------
        p_path : str
            project output path of the current typesetter
        pf_type : str
            project file type of the current typesetter

        See Also
        --------

        call_typesetter, organize_output

        """

        p_path = ''
        pf_type = ''
        prefix = f_name.split('.')[0]
        if p_id == min(i for i in p['typesetters']):
            f_path = os.path.join(p.get('path'), f_name)

        elif p.get("chain"):
            f_path = os.path.join(pre_path,prefix +'.' + pre_out_type)

        if os.path.isfile(f_path) or p['typesetters'].get(p_id).get('expand'):
            args.append(f_path)
            self.create_output_path(p, p_id,  args, prefix, uid)
            output, err, exit_code = self.call_typesetter(args)
            self.debug.print_debug(self, output.decode('utf-8'))
            p_path = self.organize_output(
                p,
                p_id,
                prefix,
                f_id,
                uid)

            pf_type = p.get('typesetters')[p_id].get("out_type")

        else:
            self.debug.print_debug(
                self,
                self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' +
                os.path.join(f_path))

        return p_path, pf_type

    def typeset_file(
            self,
            p,
            pre_path,
            pre_out_type,
            p_id,
            uid,
            f_id,
            f_name
    ):
        """
        Typesets the current file

        Parameters
        ---------
        p: dictionary
            json program properties
        pre_path: str
            project path of the previous iteration
        pre_out_type : str
            output type of the previous iteration
        p_id:  int
            typesetter id
        uid: str
            unique id of the current current typesetter
        f_id:  int
              sequence number of the current file
        f_name:  str
              name of the current file
        args: list
            application arguments in the correct oder.

        Returns
        --------
        p_path : str
            project output path of the current typesetter
        pf_type : str
            project file type of the current typesetter


        See Also
        --------
        run_typesetter

        """
        t_props = self.all_typesetters.get(p.get('typesetters')[p_id].get("name"))
        p_path, pf_type = '',''

        if t_props:
            mt = self.arguments_parse(t_props)
            if self.gv.check_program(t_props.get('executable')):
                p_path, pf_type = self.run_typesetter(
                    p,
                    pre_path,
                    pre_out_type,
                    p_id,
                    uid,
                    f_id,
                    f_name,
                    mt)

            else:
                self.debug.print_debug(self, self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE)
        else:
            self.debug.print_debug(
                self, self.gv.PROJECT_TYPESETTER_IS_NOT_AVAILABLE)
        return p_path, pf_type

    def typeset_files(
            self,
            p,
            pre_path,
            pre_out_type,
            pre_id):
        """
        Typeset all files of a  certain project

        Parameters
        ---------
        p: dictionary
            json program properties
        pre_path: str
            project path of the previously executed typesetter
        pre_out_type: str
            project file type of the previously executed typesetter
        pre_id :int
            sequence number of the previously executed file

        Returns
        --------
        p_path : str
            project output path of the current typesetter
        pf_type : str
            project file type of the current typesetter


        See Also
        --------
        typeset_file

        """
        p_path, pf_type = '', ''

        uid = str(uuid.uuid4())

        project_files = collections.OrderedDict(
            sorted((int(key), value) for key, value in p.get('files').items()))
        if p.get('typesetters')[pre_id].get("expand"):
            f_name = self.gv.uuid
            p_path, pf_type = self.typeset_file(
                p,
                pre_path,
                pre_out_type,
                pre_id,
                uid,
                0,
                f_name
            )


        else:
            for f_id in project_files:
                f_name = project_files[f_id]
                p_path, pf_type = self.typeset_file(
                        p,
                        pre_path,
                        pre_out_type,
                        pre_id,
                        uid,
                        f_id,
                        f_name
                    )

        return p_path, pf_type

    def typeset_project(self, p):
        """
        Typesets a certain project

        Parameters
        ---------
        p: dictionary
            json program properties

        Returns
        --------
        True: boolean
            Returns True, if  all the typesetters in project has run successfully.


        See Also
        --------
        typeset_files

        """
        typesetters_ordered, temp_path, temp_pre_out_type = '', '', ''
        pre_path = ''
        prev_out_type = ''

        if p.get('active'):
            ts = p.get('typesetters')
            if ts:
                typesetters_ordered = collections.OrderedDict(
                    sorted(ts.items()))
            else:
                self.debug.print_debug(
                    self, self.gv.PROJECT_TYPESETTERS_ARE_NOT_SPECIFIED)

            if self.all_typesetters is None:
                self.debug.print_debug(
                    self, self.gv.PROJECT_TYPESETTER_VAR_IS_NOT_SPECIFIED)
                sys.exit(1)

            for p_id in typesetters_ordered:
                self.debug.print_console(self, ' '.join(['Runnning Typesetter',p_id,':', p.get('typesetters')[p_id].get("name")]))
                temp_path, temp_pre_out_type = self.typeset_files(
                    p,
                    pre_path,
                    prev_out_type,
                    p_id
                )

                pre_path = temp_path
                prev_out_type = temp_pre_out_type
                self.debug.print_console(self, ' '.join(['ls -al',temp_path]))

        else:
            self.debug.print_debug(self, self.gv.PROJECT_IS_NOT_ACTIVE)
        return True

    def typeset_all_projects(self):
        """
        Typeset all projects defined in the json file

        Returns
        --------
        True: boolean
            Returns True, if the  all the typesetters in project run

        See Also
        --------
        typeset_project

        """
        projects = self.config.get('projects')
        if projects:
            for p in projects:
                self.typeset_project(p)

        else:
            self.debug.print_debug(self, self.gv.PROJECTS_VAR_IS_NOT_SPECIFIED)
        return True


    def organize_output(
            self,
            p,
            p_id,
            prefix,
            f_id,
            uid):
        """
        Copy the temporary results into the  final project path

        This method reads the temporary results of the current typesetter step and copies them in to the correct output
        folder. Output folder is  constructed using project_name, current_time,  sequence number of the current typesetter
        and the sequence number of the current file.  Customized tool specific actions are also defined and handled here.



        Parameters
        ------------
        p: dict
            json program properties
        p_id:  int
            typesetter id
        prefix: str
            file name prefix  of  the current file
        f_id:  int
              sequence number of the current file
        uid: str
            unique id of the current current typesetter

        Returns
        --------
        project_path: str
            Final path for the current file


        See Also
        --------
        create_merged_file, gv.create_dirs_recursive

        """
        p_name = p.get('typesetters')[p_id].get("name")
        t_path = [p.get('path'), uid] + ['nlm'] if p_name == 'metypeset' else [p.get('path'), uid]
        out_type = p['typesetters'][p_id]['out_type']
        project_path = [p.get('path'),p['name'], self.current_result, p_id + '_' + p_name,out_type]
        temp_dir = os.path.join(p.get('path'), uid)

        if p['typesetters'][p_id].get('merge'):
            self.create_merged_file(p, p_id, project_path, t_path)
            if len(p.get('files').items()) == f_id:
                shutil.rmtree(temp_dir)
        elif p['typesetters'][p_id].get('expand'):
            for filename in os.listdir(temp_dir):
                p_path = self.gv.create_dirs_recursive(project_path)
                f_path = '{}{}{}'.format(p_path,SEP,filename)
                os.rename(os.path.join(temp_dir,filename), f_path)
            shutil.rmtree(temp_dir)
        elif p['typesetters'][p_id].get('process'):
            t_path.append(prefix + '.' + out_type)
            p_path = self.gv.create_dirs_recursive(project_path)
            f_path = '{}{}{}.{}'.format(p_path, SEP, prefix, out_type)
            os.rename(SEP.join(t_path), f_path)
            shutil.rmtree(temp_dir)
        else:
            self.debug.print_debug(self, self.gv.PROJECT_TYPESETTER_PROCESS_METHOD_NOT_SPECIFIED)

        #self.debug.print_console(self, '{}  {}'.format(self.gv.OUTPUT,f_path))

        return SEP.join(project_path)

    def create_merged_file(self, p, p_id, project_path, t_path):
        """
        Create a combined file from a set of input files

        Parameters
        ------------
        p: dict
            json program properties
        p_id:  int
            typesetter id
        t_path : str
            temporary  output directory
        project_path : str
            system path to be created

        See Also
        --------
        create_named_file()


        """
        t_path.append(self.gv.uuid)
        p_path = self.gv.create_dirs_recursive(project_path)
        f_path = '{}{}{}.xml'.format(p_path,SEP ,self.gv.uuid)
        shutil.copy2(SEP.join(t_path), f_path)
        self.create_named_file(p, p_id, p_path, t_path)
        return f_path

    def create_named_file(self,  p, p_id, p_path ,t_path,):
        """
        Copy  unique file name to a named file

        p: dict
            json program properties
        p_id:  int
            typesetter id
        t_path : str
            temporary  output directory
        p_path : str
            output directory for the current typesetter

        """
        f = p['typesetters'][p_id].get('out_file')
        if f:
            f_path = '{}{}{}'.format(p_path, SEP,f)
            shutil.copy2(SEP.join(t_path), f_path)
        return
예제 #15
0
class Prepare(Debuggable):
    """
    Standalone Processing object to combine, clean and modify a JATS XML file and optionally inject BITS Metadata headers.

    Features
    --------
    add Id numbering for any tag type, clean comments, remove unused references,
    set numbering, add unique ids to certain tag types, sort references

    """
    def __init__(self):
        self.args = self.read_command_line()
        self.debug = Debug()
        self.settings = Settings(self.args)
        self.gv = GV(self.settings)
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()
        self.dr = self.args.get('<path>')
        self.f = self.args.get('<input_file>')
        self.stand_alone = self.args.get('--stand-alone')
        self.tr = etree.parse(os.path.join(self.dr, self.f))

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='xml 0.1')

    def citations_to_references(self):
        """ Removes  mixed-citation block, adds as a <sec> Section element

        Returns
         -------
         tr : elementtree

        """

        t = self.tr.getroot()
        bd = t.find('.//body')
        sc = etree.Element('sec')
        ttl = etree.Element('title')
        ttl.text = 'References'
        sc.append(ttl)
        mc = t.findall('.//mixed-citation')
        if len(mc) > 0:
            for r in mc:
                r.tag = 'p'
                sc.append(r)
            bd.append(sc)
            rlst = t.find('.//ref-list')
            rlst.getparent().remove(rlst)
            bck = t.find('.//back')
            bck.append(etree.Element('ref-list'))

        return self.tr

    def clean_references(self):
        """ removes  references, which are not linked.

         Parameters
         -----------
         tag : str
            name of the XML tag

         Returns
         -------
         tr : elementtree

         See Also
         --------
         remove_element, remove_tags

        """
        r = self.tr.getroot()

        for e in r.findall('.//back/ref-list/ref'):
            if e.attrib.get('id'):
                if r.find(".//xref[@ref-type='bibr'][@rid='" +
                          e.attrib.get('id') + "']") is None:
                    self.remove_element(e)
            else:
                self.remove_element(e)
        for e in r.findall(".//xref[@ref-type='bibr']"):
            if r.find(".//back/ref-list/ref[@id='" + e.attrib.get('rid') +
                      "']") is None:
                if e.getparent() is not None:
                    for c in e.getparent().getiterator():
                        if c.tag == 'xref' and c.attrib.get(
                                'ref-type') == 'bibr':
                            self.remove_tags(c)
        return self.tr

    def remove_tags(self, e):
        """
        Takes an etree element and replaces it with its own text

        Parameters
        ----------
        e : element
            Element to be replaced

        """
        if e.getparent() is not None:
            previous = e.getprevious()
            if previous is not None:
                if previous.tail:
                    if e.text:
                        previous.tail = previous.tail + e.text
                    if e.tail:
                        previous.tail = previous.tail + e.tail
                    e.getparent().remove(e)

    def remove_element(self, e):
        """
        Remove any element only if it has a parent

        Parameters
        ----------
        e : element
            Element to be replaced

        """
        if e.getparent() is not None:
            e.getparent().remove(e)

    def set_uuids_for_back_matter(self, tags):
        """
        Add unique id tags to  any of the sub-elements of the back matter

        Parameters
        ----------
        tags: list
         list of elements

        Returns
        -------
        tr : elementtree

        """
        for s in tags:
            f = {}
            ref_type = 'bibr' if s == 'ref' else s
            fns = self.tr.getroot().findall(''.join(
                ['.//xref/[@ref-type="', ref_type, '"]']))
            for i in fns:
                rid = ''.join(['bibd', str(uuid.uuid4())])
                f[i.attrib['rid']] = rid
                i.set('rid', rid)
            for m in list(f.keys()):
                n = self.tr.getroot().find(''.join(
                    ['.//' + s + '/[@id="', m, '"]']))
                if n is not None:
                    n.set('id', f[m]) if len(n) > 0 else ''
        return self.tr

    def set_numbering_values(self, tag, attr, value, count, range_list):
        """
        Adds numerical values to  a  tag  in arguments list

        Parameters
        ---------
        tag: str
            xml tag name
        attr: str
            attribute name
        value :str
            value name
        count : int
            current sequence number
        range_list : list
           lower and upper level for the  numbering

        See Also
        --------
        set_roman_numbers

        """
        searchTag = './/' + tag + '[@' + attr + '="' + value + '"]'
        elems = self.tr.getroot().findall(searchTag)
        range_count = 1
        for elem in elems:
            elem.text, range_count = self.set_roman_numbers(
                count, range_count, range_list)
            count += 1

        return self.tr, count

    def convert_int_to_roman(self, i):
        """
        Converts an integer number into a roman number

        Parameters
        ---------
        i : int
            integer number

        Returns
        -------
        result : str
            Roman number

        """
        result = []
        for integer, numeral in self.gv.numeral_map:
            count = i // integer
            result.append(numeral * count)
            i -= integer * count
        return ''.join(result)

    def set_roman_numbers(self, count, r_count, range_list):
        """
        Converts a given set of elements defined by range_array into roman numbers

        Parameters
        ---------
        count :int
        r_count : int
        range_list : list
            lower and upper level for the  numbering

        Returns
        -------
        val : str
        r_count: int

        See Also
        --------
        convert_int_to_roman

        """

        val = str(count)
        if int(range_list[0]) <= count <= int(range_list[1]):
            val = self.convert_int_to_roman(r_count).lower()
            r_count += 1
        else:
            val = str(count - r_count + 1)
        return val, r_count

    def merge_metadata(self, metadata):
        """
        reads a metadata file path and  merge its content into the metadata section

        Parameters
        ----------
        metadata : str
             suffix  of the metadata files

        Returns
        -------
        tr : elementTree
            Element tree of the  current file

        See Also
        -------
        create_metadata_path

        """
        r = self.tr.getroot()

        pth = self.create_metadata_path(metadata)

        if os.path.isfile(pth):
            fr = r.find('.//front')
            if len(fr):
                bg = r.find('.//body').getparent()
                fr.getparent().remove(fr)
                bpm = etree.parse(pth).find('.//book-part-meta')
                if bpm is None:
                    bpm = etree.parse(pth).find('.')
                    if bpm is not None:
                        if bpm.getroottree().getroot().tag == 'front':
                            bg.insert(0, bpm)
                        else:
                            self.debug.print_debug(
                                self, 'front or bookpart metadata unspecified')
                            sys.exit(1)
                else:
                    bg.insert(0, bpm)
            else:
                self.debug.print_debug(self, 'front metadata unspecified')
        else:
            self.debug.print_debug(
                self, pth + self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST)
            sys.exit(1)
        return self.tr

    def create_metadata_path(self, metadata):
        """
        creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata

        Parameters
        ----------
        metadata : str
            Suffix of the metadata  files

        Returns
        -------
        pth : str
            Correct path of the metadata file in the folder structure

        Notes
        -----
        We assume that  metadata files are stored in a sub-folder named metadata
        """
        p = os.path.dirname(self.f).split(os.sep)
        f = os.path.basename(self.f)
        name, ext = os.path.splitext(f)
        file_name = [name, '.', metadata, ext]

        if not self.stand_alone or not os.path.exists(os.sep.join(p)):
            del p[-4:]
        p.append('metadata')
        p.append(''.join(file_name))
        pth = os.sep.join(p)
        return pth

    def sort_by_tags(self, tag_list, elem):
        """
        Sorts  a   list  of elements alphabetically

        Parameters
        ----------
        tag_list : list
            A list of tag types
        elem : Element
            Element to be modified

        """
        data = []
        for e in elem:
            vl = []
            for tag in tag_list:
                vl.append(e.findtext(".//" + tag))
            vl.append(e)
            data.append(tuple(vl))

        data.sort()
        elem[:] = [item[-1] for item in data]

    def sort_references(self, tag_list):
        """
        Sort references based on the  sub-elements list

        Parameters
        ----------
        tag_list : list
            A list of tag types


        Returns
        -------
        tr : elementTree
            Element tree of the  current file

        See Also
        --------
        sort_by_tags
        """
        elem = self.tr.find('./back/ref-list')
        self.sort_by_tags(tag_list, elem)

        return self.tr

    def sort_footnotes(self, tag_list):
        """
        Sort footnotes based on the  sub-elements list

        Parameters
        ----------
        tag_list : list
            A list of tag types


        Returns
        -------
        tr : elementTree
            Element tree of the  current file

        See Also
        --------
        sort_by_tags
        """
        elem = self.tr.find('./back/fn-group')
        self.sort_by_tags(tag_list, elem)

        return self.tr

    def process(self):
        """
        Process  JATS-XML file and do all transformations into the elementtree

        See Also
        --------
        merge_metadata, set_numbering_tags,set_uuids_for_back_matter,sort_footnotes,sort_references,set_numbering_values

        """

        citations_to_references = self.args.get('--citations-to-references')
        clean_references = self.args.get('--clean-references')
        set_numbering_tags = self.args.get('--set-numbering-tags')
        set_unique_ids = self.args.get('--set-uuids')
        sort_footnotes = self.args.get('--sort-footnotes')
        sort_references = self.args.get('--sort-references')
        set_numbering_values = self.args.get('--set-numbering-values')

        metadata = self.args.get('--metadata')
        self.tr = self.merge_metadata(metadata) if metadata else self.tr

        self.tr = self.citations_to_references(
        ) if citations_to_references else self.tr
        self.tr = self.clean_references() if clean_references else self.tr
        self.tr = self.gv.set_numbering_tags(
            set_numbering_tags.split(','),
            self.tr) if set_numbering_tags else self.tr
        self.tr = self.set_uuids_for_back_matter(
            set_unique_ids.split(',')) if set_unique_ids else self.tr
        self.tr = self.sort_footnotes(
            sort_footnotes.split(',')) if sort_footnotes else self.tr
        self.tr = self.sort_references(
            sort_references.split(',')) if sort_references else self.tr

        for s in set_numbering_values.split(';'):
            vals = s.split(',')

            count = 1
            range_count = [0, 0]

            if len(vals) > 3:
                r = vals[3].lstrip('{').rstrip('}').split(':')
                range_count = [int(r[0]), int(r[1])]
            self.tr, count = self.set_numbering_values(vals[0], vals[1],
                                                       vals[2], count,
                                                       range_count)

        self.gv.create_dirs_recursive(self.dr.split('/'))
        self.create_xml_file(os.path.join(self.dr, os.path.basename(self.f)))

    def get_module_name(self):
        """
        Reads the name of the module for debugging and logging

        Returns
        -------
        name string
         Name of the Module
        """
        name = 'prepare'
        return name

    def create_xml_file(self, pth):
        """
        Write the current elementTree into the file path

        Parameters
        ----------
        pth : str
            Correct path of the metadata file in the folder structure

        Raises
        ------
        IOError
            I/O operation fails

        Notes
        -----
        Default configuration writes a normalized XML file with XML scheme

        """

        try:

            self.tr.write(pth, pretty_print=False, xml_declaration=True)
            print()
        except IOError as e:
            print(e)
            self.debug.print_debug(self, self.XML_FILE_NOT_CREATED)

    def run(self):
        """
        Runs the configuration on the processing object

        See Also
        --------
        process


        """
        self.process()
예제 #16
0
파일: prepare.py 프로젝트: withanage/mpt
class Process(Debuggable):
    """
    Standalone Processing object to combine, clean and modify a JATS XML file and optionally inject BITS Metadata headers.

    Features
    --------
    add Id numbering for any tag type, clean comments, remove unused references,
    set numbering, add unique ids to certain tag types, sort references

    """

    def __init__(self):
        self.args = self.read_command_line()
        self.debug = Debug()
        self.gv = GV()
        Debuggable.__init__(self, 'Main')
        if self.args.get('--debug'):
            self.debug.enable_debug()
        self.dr = self.args.get('<path>')
        self.f = self.args.get('<input_file>')
        self.tr = etree.parse(os.path.join(self.dr, self.f))

    @staticmethod
    def read_command_line():
        """
        Reads and  generates a docopt dictionary from the command line parameters.

        Returns
        -------
        docopt : dictionary
          A dictionary, where keys are names of command-line elements  such as  and values are theparsed values of those
          elements.
        """
        return docopt(__doc__, version='xml 0.1')

    def remove_references(self):
        """ removes  references, which are not linked.

         Parameters
         -----------
         tag : str
            name of the XML tag

         Returns
         -------
         tr : elementtree

         See Also
         --------
         remove_element, remove_tags

        """
        r = self.tr.getroot()

        for e in r.findall('.//back/ref-list/ref'):
            if e.attrib.get('id'):
                if r.find(".//xref[@ref-type='bibr'][@rid='" + e.attrib.get('id') + "']") is None:
                    self.remove_element(e)
            else:
                self.remove_element(e)
        for e in r.findall(".//xref[@ref-type='bibr']"):
            if r.find(".//back/ref-list/ref[@id='" + e.attrib.get('rid') + "']") is None:
                if e.getparent() is not None:
                    for c in e.getparent().getiterator():
                        if c.tag == 'xref' and c.attrib.get('ref-type') == 'bibr':
                            self.remove_tags(c)
        return self.tr

    def remove_tags(self, e):
        """
        Takes an etree element and replaces it with its own text

        Parameters
        ----------
        e : element
            Element to be replaced

        """
        if e.getparent() is not None:
            previous = e.getprevious()
            if previous is not None:
                if previous.tail:
                    if e.text:
                        previous.tail = previous.tail + e.text
                    if e.tail:
                        previous.tail = previous.tail + e.tail
                    e.getparent().remove(e)

    def remove_element(self, e):
        """
        Remove any element only if it has a parent

        Parameters
        ----------
        e : element
            Element to be replaced

        """
        if e.getparent() is not None:
            e.getparent().remove(e)

    def set_uuids_for_back_matter(self, tags):
        """
        Add unique id tags to  any of the sub-elements of the back matter

        Parameters
        ----------
        tags: list
         list of elements

        Returns
        -------
        tr : elementtree

        """
        for s in tags:
            f = {}
            ref_type = 'bibr' if s == 'ref' else s
            fns = self.tr.getroot().findall(
                ''.join(['.//xref/[@ref-type="', ref_type, '"]']))
            for i in fns:
                rid = ''.join(['bibd', uuid.uuid4().get_hex()])
                f[i.attrib['rid']] = rid
                i.set('rid', rid)
            for m in f.keys():
                n = self.tr.getroot().find(
                    ''.join(['.//' + s + '/[@id="', m, '"]']))
                if n is not None:
                    n.set('id', f[m]) if len(n) > 0 else ''
        return self.tr

    def set_numbering_values(
            self,
            tag,
            attr,
            value,
            count,
            range_list):
        """
        Adds numerical values to  a  tag  in arguments list

        Parameters
        ---------
        tag: str
            xml tag name
        attr: str
            attribute name
        value :str
            value name
        count : int
            current sequence number
        range_list : list
           lower and upper level for the  numbering

        See Also
        --------
        set_roman_numbers

        """
        searchTag = './/' + tag + '[@' + attr + '="' + value + '"]'
        elems = self.tr.getroot().findall(searchTag)
        range_count = 1
        for elem in elems:
            elem.text, range_count = self.set_roman_numbers(
                count, range_count, range_list)
            count += 1

        return self.tr, count

    def convert_int_to_roman(self, i):
        """
        Converts an integer number into a roman number

        Parameters
        ---------
        i : int
            integer number

        Returns
        -------
        result : str
            Roman number

        """
        result = []
        for integer, numeral in self.gv.numeral_map:
            count = i // integer
            result.append(numeral * count)
            i -= integer * count
        return ''.join(result)

    def set_roman_numbers(self, count, r_count, range_list):
        """
        Converts a given set of elements defined by range_array into roman numbers

        Parameters
        ---------
        count :int
        r_count : int
        range_list : list
            lower and upper level for the  numbering

        Returns
        -------
        val : str
        r_count: int

        See Also
        --------
        convert_int_to_roman

        """

        val = str(count)
        if int(range_list[0]) <= count <= int(range_list[1]):
            val = self.convert_int_to_roman(r_count).lower()
            r_count += 1
        else:
            val = str(count - r_count + 1)
        return val, r_count

    def merge_metadata(self, metadata):
        """
        reads a metadata file path and  merge its content into the metadata section

        Parameters
        ----------
        metadata : str
             suffix  of the metadata files

        Returns
        -------
        tr : elementTree
            Element tree of the  current file

        See Also
        -------
        create_metadata_path

        """
        r = self.tr.getroot()

        pth = self.create_metadata_path(metadata)

        if os.path.isfile(pth):
            fr = r.find('.//front')
            fr.getparent().remove(fr)
            bpm = etree.parse(pth).find('.//book-part-meta')
            bg = r.find('.//body').getparent()
            bg.insert(0, bpm)

        else:
            self.debug.print_debug(self, pth +
                                   self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST)

        return self.tr

    def create_metadata_path(self, metadata):
        """
        creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata

        Parameters
        ----------
        metadata : str
            Suffix of the metadata  files

        Returns
        -------
        pth : str
            Correct path of the metadata file in the folder structure

        Notes
        -----
        We assume that  metadata files are stored in a sub-folder named metadata
        """
        p = os.path.dirname(self.f).split(os.sep)
        del p[-4:]
        f = os.path.basename(self.f)
        name, ext = os.path.splitext(f)
        file_name = [name, '.', metadata, ext]
        p.append('metadata')
        p.append(''.join(file_name))
        pth = os.sep.join(p)
        return pth

    def sort_by_tags(self, tag_list, elem):
        """
        Sorts  a   list  of elements alphabetically

        Parameters
        ----------
        tag_list : list
            A list of tag types
        elem : Element
            Element to be modified

        """
        data = []
        for e in elem:
            vl = []
            for tag in tag_list:
                vl.append(e.findtext(".//" + tag))

            vl.append(e)
            data.append(tuple(vl))

        data.sort()
        elem[:] = [item[-1] for item in data]

    def sort_references(self, tag_list):
        """
        Sort references based on the  sub-elements list

        Parameters
        ----------
        tag_list : list
            A list of tag types


        Returns
        -------
        tr : elementTree
            Element tree of the  current file

        See Also
        --------
        sort_by_tags
        """
        elem = self.tr.find('./back/ref-list')
        self.sort_by_tags(tag_list, elem)

        return self.tr

    def sort_footnotes(self, tag_list):
        """
        Sort footnotes based on the  sub-elements list

        Parameters
        ----------
        tag_list : list
            A list of tag types


        Returns
        -------
        tr : elementTree
            Element tree of the  current file

        See Also
        --------
        sort_by_tags
        """
        elem = self.tr.find('./back/fn-group')
        self.sort_by_tags(tag_list, elem)

        return self.tr

    def process(self):
        """
        Process  JATS-XML file and do all transformations into the elementtree

        See Also
        --------
        merge_metadata, set_numbering_tags,set_uuids_for_back_matter,sort_footnotes,sort_references,set_numbering_values

        """

        clean_references = self.args.get('--clean-references')

        set_numbering_tags = self.args.get('--set-numbering-tags')
        set_unique_ids = self.args.get('--set-uuids')
        sort_footnotes = self.args.get('--sort-footnotes')
        sort_references = self.args.get('--sort-references')
        set_numbering_values = self.args.get('--set-numbering-values')

        metadata = self.args.get('--metadata')
        self.tr = self.merge_metadata(metadata) if metadata else self.tr

        self.tr = self.remove_references() if clean_references else self.tr
        self.tr = self.gv.set_numbering_tags(set_numbering_tags.split(
            ','), self.tr) if set_numbering_tags else self.tr
        self.tr = self.set_uuids_for_back_matter(
            set_unique_ids.split(',')) if set_unique_ids else self.tr
        self.tr = self.sort_footnotes(
            sort_footnotes.split(',')) if sort_footnotes else self.tr
        self.tr = self.sort_references(
            sort_references.split(',')) if sort_references else self.tr

        for s in set_numbering_values.split(';'):
            vals = s.split(',')

            count = 1
            range_count = [0, 0]

            if len(vals) > 3:
                r = vals[3].lstrip('{').rstrip('}').split(':')
                range_count = [int(r[0]), int(r[1])]
            self.tr, count = self.set_numbering_values(
                vals[0], vals[1], vals[2], count, range_count)

        self.gv.create_dirs_recursive(self.dr.split('/'))
        self.create_xml_file(
            os.path.join(
                self.dr, os.path.basename(
                    self.f)))

    def create_xml_file(self, pth):
        """
        Write the current elementTree into the file path

        Parameters
        ----------
        pth : str
            Correct path of the metadata file in the folder structure

        Raises
        ------
        IOError
            I/O operation fails

        Notes
        -----
        Default configuration writes a normalized XML file with XML scheme

        """

        try:

            self.tr.write(
                pth,
                pretty_print=False,
                xml_declaration=True
            )
            print
        except IOError as e:
            print e
            self.debug.print_debug(self, self.XML_FILE_NOT_CREATED)

    def run(self):
        """
        Runs the configuration on the processing object

        See Also
        --------
        process


        """
        self.process()