class Merge(Debuggable): """ Standalone Processing object which merges current JATS/BITS XML file in to the Body of a BITS-XML document. """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.settings = Settings(self.args) self.gv = GV(self.settings) self.dr = self.args.get('<path>') self.f = self.args.get('<input_file>') self.scheme = self.args.get('<scheme>') self.set_numbering_tags = self.args.get('--set-numbering-tags') self.tr = etree.parse(os.path.join(self.dr, self.f)) Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='xmlMerge 0.0.1') def create_output_bits(self): """ Create bits output file, generates a new file, if no file is found. Otherwise the current file is appended to the book body as a book-part. See Also -------- create_book_part_bits, create_book_bits, do_file_io """ fuf = os.path.join(self.dr, self.gv.uuid) pt = os.path.join(self.dr, os.path.basename(self.gv.uuid)) trf = None if os.path.isfile(fuf): trf = etree.parse(fuf) bp = trf.find(".//book-body") book_part = self.create_book_part_bits() bp.append(book_part) else: trf = self.create_book_bits() trf = self.process(trf) self.do_file_io( etree.tostring(trf, pretty_print=False, xml_declaration=True, encoding='UTF-8', standalone='yes'), 'w', pt) def create_output_jats(self): """ Create jats output file, generates a new file, See Also -------- create_book_part_bits, create_book_bits, do_file_io """ fuf = os.path.join(self.dr, self.gv.uuid) pt = os.path.join(self.dr, os.path.basename(self.gv.uuid)) trf = None if os.path.isfile(fuf): trf = etree.parse(fuf) bpf = trf.find(".//body") f, bd, bk = self.get_xml_parts() if bd is not None: for sec in list(bd): bpf.append(sec) bkrf = trf.find(".//back/ref-list") for r in bk.findall('.//ref-list/ref'): bkrf.append(r) bkff = trf.find(".//back/fn-group") for fn in bk.findall('.//fn-group/fn'): bkff.append(fn) else: trf = self.create_journal_jats() trf = self.process(trf) self.do_file_io( etree.tostring(trf, pretty_print=False, xml_declaration=True, encoding='UTF-8', standalone='yes'), 'w', pt) def process(self, tr): """ Process BITS-XML file and do all transformations into the elementtree Parameters ---------- tr : elementtree element tree as input Returns ------- tr : elementtree transformed element tree See Also -------- globals.set_numbering_tags(), set_book_part_attributes() """ tr = self.gv.set_numbering_tags(self.set_numbering_tags.split(','), tr) if self.set_numbering_tags else tr self.set_book_part_attributes(tr) return tr def set_book_part_attributes(self, tr): """ Add specific attributes to book-part Parameters ---------- tr : elementtree element tree as input Returns ------- tr : elementtree transformed element tree """ book_parts = tr.findall('.//book-part') for i, b in enumerate(book_parts): b.attrib['id'] = "ch_" + str(i) b.attrib['book-part-type'] = "chapter" return tr def create_metadata_path(self, metadata): """ creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata Parameters ---------- metadata : str Suffix of the metadata files Returns ------- pth : str Correct path of the metadata file in the folder structure Notes ----- We assume that metadata files are stored in a sub-folder named metadata """ p = os.path.dirname(self.f).split(os.sep) del p[-4:] name, ext = os.path.splitext(os.path.basename(self.gv.uuid)) file_name = [name, '.', metadata, '.', 'xml'] p.append('metadata') p.append(''.join(file_name)) pth = os.sep.join(p) self.debug.print_debug(self, 'merging headers' + str(pth)) return pth def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'merge' return name def create_book_bits(self): """ creates a full BITS XML book and optionally adds metadata Returns ------- book : elementtree Elementtree which complies to BITS XML Scheme. See Also --------- create_metadata_path, create_book_part_bits """ nsmap = { 'xlink': "http://www.w3.org/1999/xlink", 'mml': "http://www.w3.org/1998/Math/MathML", "xml": "http://www.w3.org/XML/1998/namespace" } book = etree.Element(etree.QName('book'), nsmap=nsmap) book.attrib['dtd-version'] = "2.1" book.attrib[etree.QName( '{http://www.w3.org/XML/1998/namespace}lang')] = "de" book.attrib['book-type'] = "proceedings" metadata = self.args.get('--metadata') if metadata: pth = self.create_metadata_path(metadata) self.debug.print_console(self, 'merging headers' + str(pth)) if os.path.isfile(pth): bp = etree.parse(pth).find('.//book-meta') book.insert(0, bp) else: self.debug.print_console( self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + str(pth)) #sys.exit(1) else: sys.exit('Metadata argument undefined') bd = etree.Element("book-body") bpbd = self.create_book_part_bits() bd.append(bpbd) book.append(bd) return book def create_journal_jats(self): """ creates a full JATS XML book and optionally adds metadata Returns ------- book : elementtree Elementtree which complies to BITS XML Scheme. See Also --------- create_metadata_path, create_book_part_bits """ nsmap = { 'xlink': "http://www.w3.org/1999/xlink", 'mml': "http://www.w3.org/1998/Math/MathML", "xml": "http://www.w3.org/XML/1998/namespace" } journal = etree.Element(etree.QName('article'), nsmap=nsmap) journal.attrib['dtd-version'] = "3.0" journal.attrib[etree.QName( '{http://www.w3.org/XML/1998/namespace}lang')] = "de" f, bd, bk = self.get_xml_parts() metadata = self.args.get('--metadata') if metadata: pth = self.create_metadata_path(metadata) if os.path.isfile(pth): bpm = etree.parse(pth).find('.') if bpm is not None: if bpm.getroottree().getroot().tag == 'front': journal.insert(0, bpm) else: self.debug.print_debug(self, 'front metadata unspecified') sys.exit(1) else: journal.insert(0, f) journal.append(bd) if len(bk) > 0: journal.append(bk) else: back = etree.Element(etree.QName('back')) back.append(etree.Element(etree.QName('fn-group'))) back.append(etree.Element(etree.QName('ref-list'))) journal.append(back) return journal def create_book_part_bits(self): """ Reads a JATS XMl File and creates a book-part element tree according to BITS-XML. Returns ------- bp : elementtree Book part elementTree """ f, bd, bk = self.get_xml_parts() bp = etree.Element("book-part") if f is not None: if len(f): bp.append(f) if bd is not None: bp.append(bd) if bk is not None: bp.append(bk) return bp def get_xml_parts(self): """ Returns the front-matter , body and back-matter of a JATS XML file in the above order Returns ------- f : elementtree Front-matter of JATS elementTree bd : elementtree Body of JATS elementTree bk : elementtree Back-matter of JATS elementTree """ r = self.tr.getroot() f = r.find(".//front") if f is None: f = r.find(".//book-part-meta") bd = r.find(".//body") bk = r.find(".//back") return f, bd, bk def do_file_io(self, s, mode, pth): """ Executes read or write operations on a path Parameters ---------- s: str Content to be written or None for read mode: str w for write , r for r pth : str Path to the file to be read or written Raises ------ IOError I/O operation fails """ try: w = open(pth, mode) if mode == 'w': w.write(s.rstrip('\r\n')) w.close() if mode == 'r': o = w.read() w.close() except IOError as i: self.debug.print_debug(self, i) print(i) sys.exit(1) def run(self): """ Runs the configuration on the processing object. Process JATS-XML file and merges it into the full BITS-XML file See Also -------- create_output_bits Warning ------- function create_output_jats not yet used """ self.gv.create_dirs_recursive(self.dr.split('/')) if self.scheme == 'bits': self.create_output_bits() elif self.scheme == 'jats': self.tr = self.create_output_jats()
class Prepare(Debuggable): """ Standalone Processing object to combine, clean and modify a JATS XML file and optionally inject BITS Metadata headers. Features -------- add Id numbering for any tag type, clean comments, remove unused references, set numbering, add unique ids to certain tag types, sort references """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.settings = Settings(self.args) self.gv = GV(self.settings) Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.dr = self.args.get('<path>') self.f = self.args.get('<input_file>') self.stand_alone = self.args.get('--stand-alone') self.tr = etree.parse(os.path.join(self.dr, self.f)) @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='xml 0.1') def citations_to_references(self): """ Removes mixed-citation block, adds as a <sec> Section element Returns ------- tr : elementtree """ t = self.tr.getroot() bd = t.find('.//body') sc = etree.Element('sec') ttl = etree.Element('title') ttl.text = 'References' sc.append(ttl) mc = t.findall('.//mixed-citation') if len(mc) > 0: for r in mc: r.tag = 'p' sc.append(r) bd.append(sc) rlst = t.find('.//ref-list') rlst.getparent().remove(rlst) bck = t.find('.//back') bck.append(etree.Element('ref-list')) return self.tr def clean_references(self): """ removes references, which are not linked. Parameters ----------- tag : str name of the XML tag Returns ------- tr : elementtree See Also -------- remove_element, remove_tags """ r = self.tr.getroot() for e in r.findall('.//back/ref-list/ref'): if e.attrib.get('id'): if r.find(".//xref[@ref-type='bibr'][@rid='" + e.attrib.get('id') + "']") is None: self.remove_element(e) else: self.remove_element(e) for e in r.findall(".//xref[@ref-type='bibr']"): if r.find(".//back/ref-list/ref[@id='" + e.attrib.get('rid') + "']") is None: if e.getparent() is not None: for c in e.getparent().getiterator(): if c.tag == 'xref' and c.attrib.get( 'ref-type') == 'bibr': self.remove_tags(c) return self.tr def remove_tags(self, e): """ Takes an etree element and replaces it with its own text Parameters ---------- e : element Element to be replaced """ if e.getparent() is not None: previous = e.getprevious() if previous is not None: if previous.tail: if e.text: previous.tail = previous.tail + e.text if e.tail: previous.tail = previous.tail + e.tail e.getparent().remove(e) def remove_element(self, e): """ Remove any element only if it has a parent Parameters ---------- e : element Element to be replaced """ if e.getparent() is not None: e.getparent().remove(e) def set_uuids_for_back_matter(self, tags): """ Add unique id tags to any of the sub-elements of the back matter Parameters ---------- tags: list list of elements Returns ------- tr : elementtree """ for s in tags: f = {} ref_type = 'bibr' if s == 'ref' else s fns = self.tr.getroot().findall(''.join( ['.//xref/[@ref-type="', ref_type, '"]'])) for i in fns: rid = ''.join(['bibd', str(uuid.uuid4())]) f[i.attrib['rid']] = rid i.set('rid', rid) for m in list(f.keys()): n = self.tr.getroot().find(''.join( ['.//' + s + '/[@id="', m, '"]'])) if n is not None: n.set('id', f[m]) if len(n) > 0 else '' return self.tr def set_numbering_values(self, tag, attr, value, count, range_list): """ Adds numerical values to a tag in arguments list Parameters --------- tag: str xml tag name attr: str attribute name value :str value name count : int current sequence number range_list : list lower and upper level for the numbering See Also -------- set_roman_numbers """ searchTag = './/' + tag + '[@' + attr + '="' + value + '"]' elems = self.tr.getroot().findall(searchTag) range_count = 1 for elem in elems: elem.text, range_count = self.set_roman_numbers( count, range_count, range_list) count += 1 return self.tr, count def convert_int_to_roman(self, i): """ Converts an integer number into a roman number Parameters --------- i : int integer number Returns ------- result : str Roman number """ result = [] for integer, numeral in self.gv.numeral_map: count = i // integer result.append(numeral * count) i -= integer * count return ''.join(result) def set_roman_numbers(self, count, r_count, range_list): """ Converts a given set of elements defined by range_array into roman numbers Parameters --------- count :int r_count : int range_list : list lower and upper level for the numbering Returns ------- val : str r_count: int See Also -------- convert_int_to_roman """ val = str(count) if int(range_list[0]) <= count <= int(range_list[1]): val = self.convert_int_to_roman(r_count).lower() r_count += 1 else: val = str(count - r_count + 1) return val, r_count def merge_metadata(self, metadata): """ reads a metadata file path and merge its content into the metadata section Parameters ---------- metadata : str suffix of the metadata files Returns ------- tr : elementTree Element tree of the current file See Also ------- create_metadata_path """ r = self.tr.getroot() pth = self.create_metadata_path(metadata) if os.path.isfile(pth): fr = r.find('.//front') if len(fr): bg = r.find('.//body').getparent() fr.getparent().remove(fr) bpm = etree.parse(pth).find('.//book-part-meta') if bpm is None: bpm = etree.parse(pth).find('.') if bpm is not None: if bpm.getroottree().getroot().tag == 'front': bg.insert(0, bpm) else: self.debug.print_debug( self, 'front or bookpart metadata unspecified') sys.exit(1) else: bg.insert(0, bpm) else: self.debug.print_debug(self, 'front metadata unspecified') else: self.debug.print_debug( self, pth + self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST) sys.exit(1) return self.tr def create_metadata_path(self, metadata): """ creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata Parameters ---------- metadata : str Suffix of the metadata files Returns ------- pth : str Correct path of the metadata file in the folder structure Notes ----- We assume that metadata files are stored in a sub-folder named metadata """ p = os.path.dirname(self.f).split(os.sep) f = os.path.basename(self.f) name, ext = os.path.splitext(f) file_name = [name, '.', metadata, ext] if not self.stand_alone or not os.path.exists(os.sep.join(p)): del p[-4:] p.append('metadata') p.append(''.join(file_name)) pth = os.sep.join(p) return pth def sort_by_tags(self, tag_list, elem): """ Sorts a list of elements alphabetically Parameters ---------- tag_list : list A list of tag types elem : Element Element to be modified """ data = [] for e in elem: vl = [] for tag in tag_list: vl.append(e.findtext(".//" + tag)) vl.append(e) data.append(tuple(vl)) data.sort() elem[:] = [item[-1] for item in data] def sort_references(self, tag_list): """ Sort references based on the sub-elements list Parameters ---------- tag_list : list A list of tag types Returns ------- tr : elementTree Element tree of the current file See Also -------- sort_by_tags """ elem = self.tr.find('./back/ref-list') self.sort_by_tags(tag_list, elem) return self.tr def sort_footnotes(self, tag_list): """ Sort footnotes based on the sub-elements list Parameters ---------- tag_list : list A list of tag types Returns ------- tr : elementTree Element tree of the current file See Also -------- sort_by_tags """ elem = self.tr.find('./back/fn-group') self.sort_by_tags(tag_list, elem) return self.tr def process(self): """ Process JATS-XML file and do all transformations into the elementtree See Also -------- merge_metadata, set_numbering_tags,set_uuids_for_back_matter,sort_footnotes,sort_references,set_numbering_values """ citations_to_references = self.args.get('--citations-to-references') clean_references = self.args.get('--clean-references') set_numbering_tags = self.args.get('--set-numbering-tags') set_unique_ids = self.args.get('--set-uuids') sort_footnotes = self.args.get('--sort-footnotes') sort_references = self.args.get('--sort-references') set_numbering_values = self.args.get('--set-numbering-values') metadata = self.args.get('--metadata') self.tr = self.merge_metadata(metadata) if metadata else self.tr self.tr = self.citations_to_references( ) if citations_to_references else self.tr self.tr = self.clean_references() if clean_references else self.tr self.tr = self.gv.set_numbering_tags( set_numbering_tags.split(','), self.tr) if set_numbering_tags else self.tr self.tr = self.set_uuids_for_back_matter( set_unique_ids.split(',')) if set_unique_ids else self.tr self.tr = self.sort_footnotes( sort_footnotes.split(',')) if sort_footnotes else self.tr self.tr = self.sort_references( sort_references.split(',')) if sort_references else self.tr for s in set_numbering_values.split(';'): vals = s.split(',') count = 1 range_count = [0, 0] if len(vals) > 3: r = vals[3].lstrip('{').rstrip('}').split(':') range_count = [int(r[0]), int(r[1])] self.tr, count = self.set_numbering_values(vals[0], vals[1], vals[2], count, range_count) self.gv.create_dirs_recursive(self.dr.split('/')) self.create_xml_file(os.path.join(self.dr, os.path.basename(self.f))) def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'prepare' return name def create_xml_file(self, pth): """ Write the current elementTree into the file path Parameters ---------- pth : str Correct path of the metadata file in the folder structure Raises ------ IOError I/O operation fails Notes ----- Default configuration writes a normalized XML file with XML scheme """ try: self.tr.write(pth, pretty_print=False, xml_declaration=True) print() except IOError as e: print(e) self.debug.print_debug(self, self.XML_FILE_NOT_CREATED) def run(self): """ Runs the configuration on the processing object See Also -------- process """ self.process()
class Merge(Debuggable): """ Standalone Processing object which merges current JATS/BITS XML file in to the Body of a BITS-XML document. """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.gv = GV() self.uid = self.gv.uuid self.dr = self.args.get("<path>") self.f = self.args.get("<input_file>") self.scheme = self.args.get("<scheme>") self.set_numbering_tags = self.args.get("--set-numbering-tags") self.tr = etree.parse(os.path.join(self.dr, self.f)) Debuggable.__init__(self, "Main") if self.args.get("--debug"): self.debug.enable_debug() @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version="xmlMerge 0.0.1") def create_output_bits(self): """ Create bits output file, generates a new file, if no file is found. Otherwise the current file is appended to the book body as a book-part. See Also -------- create_book_part_bits, create_book_bits, do_file_io """ fuf = os.path.join(self.dr, self.uid) pt = os.path.join(self.dr, os.path.basename(self.uid)) trf = None if os.path.isfile(fuf): trf = etree.parse(fuf) bp = trf.find(".//book-body") book_part = self.create_book_part_bits() bp.append(book_part) else: trf = self.create_book_bits() trf = self.process(trf) self.do_file_io( etree.tostring(trf, pretty_print=True, xml_declaration=True, encoding="UTF-8", standalone="yes"), "w", pt ) def process(self, tr): """ Process BITS-XML file and do all transformations into the elementtree Parameters ---------- tr : elementtree element tree as input Returns ------- tr : elementtree transformed element tree See Also -------- globals.set_numbering_tags(), set_book_part_attributes() """ tr = self.gv.set_numbering_tags(self.set_numbering_tags.split(","), tr) if self.set_numbering_tags else tr self.set_book_part_attributes(tr) return tr def set_book_part_attributes(self, tr): """ Add specific attributes to book-part Parameters ---------- tr : elementtree element tree as input Returns ------- tr : elementtree transformed element tree """ book_parts = tr.findall(".//book-part") for i, b in enumerate(book_parts): b.attrib["id"] = "ch_" + str(i) b.attrib["book-part-type"] = "chapter" return tr def create_metadata_path(self, metadata): """ creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata Parameters ---------- metadata : str Suffix of the metadata files Returns ------- pth : str Correct path of the metadata file in the folder structure Notes ----- We assume that metadata files are stored in a sub-folder named metadata """ p = os.path.dirname(self.f).split(os.sep) del p[-4:] name, ext = os.path.splitext(os.path.basename(self.uid)) file_name = [name, ".", metadata, ext] p.append("metadata") p.append("".join(file_name)) pth = os.sep.join(p) return pth def create_book_bits(self): """ creates a full BITS XML book and optionally adds metadata Returns ------- book : elementtree Elementtree which complies to BITS XML Schheme. See Also --------- create_metadata_path, create_book_part_bits """ nsmap = { "xlink": "http://www.w3.org/1999/xlink", "mml": "http://www.w3.org/1998/Math/MathML", "xml": "http://www.w3.org/XML/1998/namespace", } book = etree.Element(etree.QName("book"), nsmap=nsmap) book.attrib["dtd-version"] = "2.1" book.attrib[etree.QName("{http://www.w3.org/XML/1998/namespace}lang")] = "de" book.attrib["book-type"] = "proceedings" metadata = self.args.get("--metadata") if metadata: pth = self.create_metadata_path(metadata) if os.path.isfile(pth): bp = etree.parse(pth).find(".//book-meta") book.insert(0, bp) bd = etree.Element("book-body") bpbd = self.create_book_part_bits() bd.append(bpbd) book.append(bd) return book def create_book_part_bits(self): """ Reads a JATS XMl File and creates a book-part element tree according to BITS-XML. Returns ------- bp : elementtree Book part elementTree """ f, bd, bk = self.get_xml_parts() bp = etree.Element("book-part") if f is not None: if len(f): bp.append(f) bp.append(bd) bp.append(bk) return bp def get_xml_parts(self): """ Returns the front-matter , body and back-matter of a JATS XML file in the above order Returns ------- f : elementtree Front-matter of JATS elementTree bd : elementtree Body of JATS elementTree bk : elementtree Back-matter of JATS elementTree """ r = self.tr.getroot() f = r.find(".//front") if f is None: f = r.find(".//book-part-meta") bd = r.find(".//body") bk = r.find(".//back") return f, bd, bk def do_file_io(self, s, mode, pth): """ Executes read or write operations on a path Parameters ---------- s: str Content to be written or None for read mode: str w for write , r for r pth : str Path to the file to be read or written Raises ------ IOError I/O operation fails """ try: w = open(pth, mode) if mode == "w": w.write(s) w.close() if mode == "r": o = w.read() w.close() except IOError as i: self.debug.print_debug(self, i) print(i) sys.exit(1) def run(self): """ Runs the configuration on the processing object. Process JATS-XML file and merges it into the full BITS-XML file See Also -------- create_output_bits Warning ------- function create_output_jats not yet used """ self.gv.create_dirs_recursive(self.dr.split("/")) if self.scheme == "bits": self.create_output_bits() elif self.scheme == "jats": self.tr = self.create_output_jats(self.tr)
class Process(Debuggable): """ Standalone Processing object to combine, clean and modify a JATS XML file and optionally inject BITS Metadata headers. Features -------- add Id numbering for any tag type, clean comments, remove unused references, set numbering, add unique ids to certain tag types, sort references """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.gv = GV() Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.dr = self.args.get('<path>') self.f = self.args.get('<input_file>') self.tr = etree.parse(os.path.join(self.dr, self.f)) @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='xml 0.1') def remove_references(self): """ removes references, which are not linked. Parameters ----------- tag : str name of the XML tag Returns ------- tr : elementtree See Also -------- remove_element, remove_tags """ r = self.tr.getroot() for e in r.findall('.//back/ref-list/ref'): if e.attrib.get('id'): if r.find(".//xref[@ref-type='bibr'][@rid='" + e.attrib.get('id') + "']") is None: self.remove_element(e) else: self.remove_element(e) for e in r.findall(".//xref[@ref-type='bibr']"): if r.find(".//back/ref-list/ref[@id='" + e.attrib.get('rid') + "']") is None: if e.getparent() is not None: for c in e.getparent().getiterator(): if c.tag == 'xref' and c.attrib.get('ref-type') == 'bibr': self.remove_tags(c) return self.tr def remove_tags(self, e): """ Takes an etree element and replaces it with its own text Parameters ---------- e : element Element to be replaced """ if e.getparent() is not None: previous = e.getprevious() if previous is not None: if previous.tail: if e.text: previous.tail = previous.tail + e.text if e.tail: previous.tail = previous.tail + e.tail e.getparent().remove(e) def remove_element(self, e): """ Remove any element only if it has a parent Parameters ---------- e : element Element to be replaced """ if e.getparent() is not None: e.getparent().remove(e) def set_uuids_for_back_matter(self, tags): """ Add unique id tags to any of the sub-elements of the back matter Parameters ---------- tags: list list of elements Returns ------- tr : elementtree """ for s in tags: f = {} ref_type = 'bibr' if s == 'ref' else s fns = self.tr.getroot().findall( ''.join(['.//xref/[@ref-type="', ref_type, '"]'])) for i in fns: rid = ''.join(['bibd', uuid.uuid4().get_hex()]) f[i.attrib['rid']] = rid i.set('rid', rid) for m in f.keys(): n = self.tr.getroot().find( ''.join(['.//' + s + '/[@id="', m, '"]'])) if n is not None: n.set('id', f[m]) if len(n) > 0 else '' return self.tr def set_numbering_values( self, tag, attr, value, count, range_list): """ Adds numerical values to a tag in arguments list Parameters --------- tag: str xml tag name attr: str attribute name value :str value name count : int current sequence number range_list : list lower and upper level for the numbering See Also -------- set_roman_numbers """ searchTag = './/' + tag + '[@' + attr + '="' + value + '"]' elems = self.tr.getroot().findall(searchTag) range_count = 1 for elem in elems: elem.text, range_count = self.set_roman_numbers( count, range_count, range_list) count += 1 return self.tr, count def convert_int_to_roman(self, i): """ Converts an integer number into a roman number Parameters --------- i : int integer number Returns ------- result : str Roman number """ result = [] for integer, numeral in self.gv.numeral_map: count = i // integer result.append(numeral * count) i -= integer * count return ''.join(result) def set_roman_numbers(self, count, r_count, range_list): """ Converts a given set of elements defined by range_array into roman numbers Parameters --------- count :int r_count : int range_list : list lower and upper level for the numbering Returns ------- val : str r_count: int See Also -------- convert_int_to_roman """ val = str(count) if int(range_list[0]) <= count <= int(range_list[1]): val = self.convert_int_to_roman(r_count).lower() r_count += 1 else: val = str(count - r_count + 1) return val, r_count def merge_metadata(self, metadata): """ reads a metadata file path and merge its content into the metadata section Parameters ---------- metadata : str suffix of the metadata files Returns ------- tr : elementTree Element tree of the current file See Also ------- create_metadata_path """ r = self.tr.getroot() pth = self.create_metadata_path(metadata) if os.path.isfile(pth): fr = r.find('.//front') fr.getparent().remove(fr) bpm = etree.parse(pth).find('.//book-part-meta') bg = r.find('.//body').getparent() bg.insert(0, bpm) else: self.debug.print_debug(self, pth + self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST) return self.tr def create_metadata_path(self, metadata): """ creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata Parameters ---------- metadata : str Suffix of the metadata files Returns ------- pth : str Correct path of the metadata file in the folder structure Notes ----- We assume that metadata files are stored in a sub-folder named metadata """ p = os.path.dirname(self.f).split(os.sep) del p[-4:] f = os.path.basename(self.f) name, ext = os.path.splitext(f) file_name = [name, '.', metadata, ext] p.append('metadata') p.append(''.join(file_name)) pth = os.sep.join(p) return pth def sort_by_tags(self, tag_list, elem): """ Sorts a list of elements alphabetically Parameters ---------- tag_list : list A list of tag types elem : Element Element to be modified """ data = [] for e in elem: vl = [] for tag in tag_list: vl.append(e.findtext(".//" + tag)) vl.append(e) data.append(tuple(vl)) data.sort() elem[:] = [item[-1] for item in data] def sort_references(self, tag_list): """ Sort references based on the sub-elements list Parameters ---------- tag_list : list A list of tag types Returns ------- tr : elementTree Element tree of the current file See Also -------- sort_by_tags """ elem = self.tr.find('./back/ref-list') self.sort_by_tags(tag_list, elem) return self.tr def sort_footnotes(self, tag_list): """ Sort footnotes based on the sub-elements list Parameters ---------- tag_list : list A list of tag types Returns ------- tr : elementTree Element tree of the current file See Also -------- sort_by_tags """ elem = self.tr.find('./back/fn-group') self.sort_by_tags(tag_list, elem) return self.tr def process(self): """ Process JATS-XML file and do all transformations into the elementtree See Also -------- merge_metadata, set_numbering_tags,set_uuids_for_back_matter,sort_footnotes,sort_references,set_numbering_values """ clean_references = self.args.get('--clean-references') set_numbering_tags = self.args.get('--set-numbering-tags') set_unique_ids = self.args.get('--set-uuids') sort_footnotes = self.args.get('--sort-footnotes') sort_references = self.args.get('--sort-references') set_numbering_values = self.args.get('--set-numbering-values') metadata = self.args.get('--metadata') self.tr = self.merge_metadata(metadata) if metadata else self.tr self.tr = self.remove_references() if clean_references else self.tr self.tr = self.gv.set_numbering_tags(set_numbering_tags.split( ','), self.tr) if set_numbering_tags else self.tr self.tr = self.set_uuids_for_back_matter( set_unique_ids.split(',')) if set_unique_ids else self.tr self.tr = self.sort_footnotes( sort_footnotes.split(',')) if sort_footnotes else self.tr self.tr = self.sort_references( sort_references.split(',')) if sort_references else self.tr for s in set_numbering_values.split(';'): vals = s.split(',') count = 1 range_count = [0, 0] if len(vals) > 3: r = vals[3].lstrip('{').rstrip('}').split(':') range_count = [int(r[0]), int(r[1])] self.tr, count = self.set_numbering_values( vals[0], vals[1], vals[2], count, range_count) self.gv.create_dirs_recursive(self.dr.split('/')) self.create_xml_file( os.path.join( self.dr, os.path.basename( self.f))) def create_xml_file(self, pth): """ Write the current elementTree into the file path Parameters ---------- pth : str Correct path of the metadata file in the folder structure Raises ------ IOError I/O operation fails Notes ----- Default configuration writes a normalized XML file with XML scheme """ try: self.tr.write( pth, pretty_print=False, xml_declaration=True ) print except IOError as e: print e self.debug.print_debug(self, self.XML_FILE_NOT_CREATED) def run(self): """ Runs the configuration on the processing object See Also -------- process """ self.process()