class Disseminate(Debuggable): def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.settings = Settings(self.args) self.gv = GV(self.settings) Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.dr = self.args.get('<path>') self.f = self.args.get('<input_file>') self.out_type = self.args.get('--out-type').lower() self.script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='Disseminate 0.1') def get_saxon_path(self): """Checks if saxon is available in the default path Returns -------- saxon : boolean True, if saxon is available. False, if not. """ s = os.path.join(self.script_path, self.gv.apps.get('saxon')) if os.path.isfile(s): return s elif self.args.get('--saxon'): if os.path.isfile(self.args.get('--saxon')): return self.args.get('--saxon') else: return False else: return False def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'disseminate' return name def process(self, args): """Runs typesetter with given arguments Creates the execution path for the conversion process. Output,exit-code and system error codes are captured and returned. Parameters ---------- args : list application arguments in the correct oder. Returns ------- output :str system standard output. err :str system standard error. exit_code: str system exit_code. See Also -------- subprocess.Popen() """ m = ' '.join(args).strip().split(' ') print(' '.join(args)) process = Popen(m, stdout=PIPE) output, err = process.communicate() exit_code = process.wait() if exit_code == 1: print(err) sys.exit(1) return output, err, exit_code def run(self): """ Runs converters See Also -------- create_output, create_pdf """ self.create_output(self.out_type) def create_output(self, out_type): """ Create FO output Parameters ---------- out_type: str Output Type See Also ------- run_saxon(), get_saxon_path() """ formatters = self.args.get('--formatter').split(',') mediums = self.args.get('--medium').split(',') for f in formatters: f = f.lower() for m in mediums: m = m.lower() self.gv.create_dirs_recursive(self.args.get('<path>').split(os.pathsep)) if self.out_type=='fo': self.debug.print_console(self, self.gv.RUNNING_FO_CONVERSION) saxon_path = self.get_saxon_path() args = self.run_saxon(saxon_path,f, m) if self.out_type=='pdf': self.debug.print_console(self, self.gv.RUNNING_PDF_CONVERSION) args = self.run_fop_processor(f, m) output, err, exit_code = self.process(args) def run_fop_processor(self, formatter, medium): args = [] if formatter.lower() == 'fop': pth = os.path.join(self.script_path, self.gv.apps.get('fop')) if self.gv.check_program(pth): args = self.run_apache_fop(pth,formatter, medium) elif formatter.lower() == 'ah': pth = self.gv.apps.get('ah') if self.gv.check_program(pth): args = self.run_ah_fop(pth,formatter, medium) return args def run_ah_fop(self, pth, formatter, medium): args=[pth] args.append('-d') args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f), self.gv.uuid, formatter, medium)) args.append('-o') args.append('{}/{}.{}.{}.pdf'.format(self.dr, self.gv.uuid, formatter, medium)) return args def run_apache_fop(self, pth, formatter, medium): style_path = '{}/configurations/fop/conf/{}.{}.xml'.format(self.script_path, formatter,medium) args = [pth] args.append('-fo') args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f),self.gv.uuid, formatter, medium)) args.append('-pdf') args.append('{}/{}.{}.{}.pdf'.format(self.dr,self.gv.uuid, formatter, medium)) args.append('-c') args.append(style_path) return args def run_saxon(self, saxon_path, formatter, medium): """ Creates the executable path for saxon Parameters --------- saxon_path : str absolute path of the saxon binary jar file formatter : str name of the FO formatter medium : str name of the medium Returns ------ args:list List of arguments for saxon execution path """ args = ["java", "-jar", saxon_path] if self.args.get('--xsl'): xsl = self.script_path.split(os.sep) xsl.append('stylesheets') xsl.append(self.args.get('--xsl')) args.append("-xsl:" + os.sep.join(xsl)) s = self.args.get('<input_file>') if os.path.exists(s): args.append("-s:" + s) else: self.debug.print_debug(self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + s) sys.exit(1) file_name = '.'.join([self.gv.uuid,formatter.lower(),medium.lower(),'fo']) args.append("-o:" + os.path.join(self.args.get('<path>'), file_name)) args.append('formatter=' + formatter.lower()) args.append('medium=' + medium.lower()) return args
class Merge(Debuggable): """ Standalone Processing object which merges current JATS/BITS XML file in to the Body of a BITS-XML document. """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.settings = Settings(self.args) self.gv = GV(self.settings) self.dr = self.args.get('<path>') self.f = self.args.get('<input_file>') self.scheme = self.args.get('<scheme>') self.set_numbering_tags = self.args.get('--set-numbering-tags') self.tr = etree.parse(os.path.join(self.dr, self.f)) Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='xmlMerge 0.0.1') def create_output_bits(self): """ Create bits output file, generates a new file, if no file is found. Otherwise the current file is appended to the book body as a book-part. See Also -------- create_book_part_bits, create_book_bits, do_file_io """ fuf = os.path.join(self.dr, self.gv.uuid) pt = os.path.join(self.dr, os.path.basename(self.gv.uuid)) trf = None if os.path.isfile(fuf): trf = etree.parse(fuf) bp = trf.find(".//book-body") book_part = self.create_book_part_bits() bp.append(book_part) else: trf = self.create_book_bits() trf = self.process(trf) self.do_file_io( etree.tostring(trf, pretty_print=False, xml_declaration=True, encoding='UTF-8', standalone='yes'), 'w', pt) def create_output_jats(self): """ Create jats output file, generates a new file, See Also -------- create_book_part_bits, create_book_bits, do_file_io """ fuf = os.path.join(self.dr, self.gv.uuid) pt = os.path.join(self.dr, os.path.basename(self.gv.uuid)) trf = None if os.path.isfile(fuf): trf = etree.parse(fuf) bpf = trf.find(".//body") f, bd, bk = self.get_xml_parts() if bd is not None: for sec in list(bd): bpf.append(sec) bkrf = trf.find(".//back/ref-list") for r in bk.findall('.//ref-list/ref'): bkrf.append(r) bkff = trf.find(".//back/fn-group") for fn in bk.findall('.//fn-group/fn'): bkff.append(fn) else: trf = self.create_journal_jats() trf = self.process(trf) self.do_file_io( etree.tostring(trf, pretty_print=False, xml_declaration=True, encoding='UTF-8', standalone='yes'), 'w', pt) def process(self, tr): """ Process BITS-XML file and do all transformations into the elementtree Parameters ---------- tr : elementtree element tree as input Returns ------- tr : elementtree transformed element tree See Also -------- globals.set_numbering_tags(), set_book_part_attributes() """ tr = self.gv.set_numbering_tags(self.set_numbering_tags.split(','), tr) if self.set_numbering_tags else tr self.set_book_part_attributes(tr) return tr def set_book_part_attributes(self, tr): """ Add specific attributes to book-part Parameters ---------- tr : elementtree element tree as input Returns ------- tr : elementtree transformed element tree """ book_parts = tr.findall('.//book-part') for i, b in enumerate(book_parts): b.attrib['id'] = "ch_" + str(i) b.attrib['book-part-type'] = "chapter" return tr def create_metadata_path(self, metadata): """ creates the correct folder path for the metadata file. Metadata files should be in a folder : metadata Parameters ---------- metadata : str Suffix of the metadata files Returns ------- pth : str Correct path of the metadata file in the folder structure Notes ----- We assume that metadata files are stored in a sub-folder named metadata """ p = os.path.dirname(self.f).split(os.sep) del p[-4:] name, ext = os.path.splitext(os.path.basename(self.gv.uuid)) file_name = [name, '.', metadata, '.', 'xml'] p.append('metadata') p.append(''.join(file_name)) pth = os.sep.join(p) self.debug.print_debug(self, 'merging headers' + str(pth)) return pth def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'merge' return name def create_book_bits(self): """ creates a full BITS XML book and optionally adds metadata Returns ------- book : elementtree Elementtree which complies to BITS XML Scheme. See Also --------- create_metadata_path, create_book_part_bits """ nsmap = { 'xlink': "http://www.w3.org/1999/xlink", 'mml': "http://www.w3.org/1998/Math/MathML", "xml": "http://www.w3.org/XML/1998/namespace" } book = etree.Element(etree.QName('book'), nsmap=nsmap) book.attrib['dtd-version'] = "2.1" book.attrib[etree.QName( '{http://www.w3.org/XML/1998/namespace}lang')] = "de" book.attrib['book-type'] = "proceedings" metadata = self.args.get('--metadata') if metadata: pth = self.create_metadata_path(metadata) self.debug.print_console(self, 'merging headers' + str(pth)) if os.path.isfile(pth): bp = etree.parse(pth).find('.//book-meta') book.insert(0, bp) else: self.debug.print_console( self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + str(pth)) #sys.exit(1) else: sys.exit('Metadata argument undefined') bd = etree.Element("book-body") bpbd = self.create_book_part_bits() bd.append(bpbd) book.append(bd) return book def create_journal_jats(self): """ creates a full JATS XML book and optionally adds metadata Returns ------- book : elementtree Elementtree which complies to BITS XML Scheme. See Also --------- create_metadata_path, create_book_part_bits """ nsmap = { 'xlink': "http://www.w3.org/1999/xlink", 'mml': "http://www.w3.org/1998/Math/MathML", "xml": "http://www.w3.org/XML/1998/namespace" } journal = etree.Element(etree.QName('article'), nsmap=nsmap) journal.attrib['dtd-version'] = "3.0" journal.attrib[etree.QName( '{http://www.w3.org/XML/1998/namespace}lang')] = "de" f, bd, bk = self.get_xml_parts() metadata = self.args.get('--metadata') if metadata: pth = self.create_metadata_path(metadata) if os.path.isfile(pth): bpm = etree.parse(pth).find('.') if bpm is not None: if bpm.getroottree().getroot().tag == 'front': journal.insert(0, bpm) else: self.debug.print_debug(self, 'front metadata unspecified') sys.exit(1) else: journal.insert(0, f) journal.append(bd) if len(bk) > 0: journal.append(bk) else: back = etree.Element(etree.QName('back')) back.append(etree.Element(etree.QName('fn-group'))) back.append(etree.Element(etree.QName('ref-list'))) journal.append(back) return journal def create_book_part_bits(self): """ Reads a JATS XMl File and creates a book-part element tree according to BITS-XML. Returns ------- bp : elementtree Book part elementTree """ f, bd, bk = self.get_xml_parts() bp = etree.Element("book-part") if f is not None: if len(f): bp.append(f) if bd is not None: bp.append(bd) if bk is not None: bp.append(bk) return bp def get_xml_parts(self): """ Returns the front-matter , body and back-matter of a JATS XML file in the above order Returns ------- f : elementtree Front-matter of JATS elementTree bd : elementtree Body of JATS elementTree bk : elementtree Back-matter of JATS elementTree """ r = self.tr.getroot() f = r.find(".//front") if f is None: f = r.find(".//book-part-meta") bd = r.find(".//body") bk = r.find(".//back") return f, bd, bk def do_file_io(self, s, mode, pth): """ Executes read or write operations on a path Parameters ---------- s: str Content to be written or None for read mode: str w for write , r for r pth : str Path to the file to be read or written Raises ------ IOError I/O operation fails """ try: w = open(pth, mode) if mode == 'w': w.write(s.rstrip('\r\n')) w.close() if mode == 'r': o = w.read() w.close() except IOError as i: self.debug.print_debug(self, i) print(i) sys.exit(1) def run(self): """ Runs the configuration on the processing object. Process JATS-XML file and merges it into the full BITS-XML file See Also -------- create_output_bits Warning ------- function create_output_jats not yet used """ self.gv.create_dirs_recursive(self.dr.split('/')) if self.scheme == 'bits': self.create_output_bits() elif self.scheme == 'jats': self.tr = self.create_output_jats()
class MPT(Debuggable): """ MPT Class Object, which initializes the properties and defines the methods. """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.settings = Settings(self.args) self.gv = GV(self.settings) Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.current_result = datetime.datetime.now().strftime( "%Y_%m_%d-%H-%M-%S-") + str(uuid.uuid4())[:4] self.config = None self.all_typesetters = None self.script_folder = os.path.dirname(os.path.realpath(__file__)) @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='heiMPT 0.0.1') def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'heiMPT' return name def call_typesetter(self, args): """Runs typesetter with given arguments Creates the execution path for a typesetter or an application and runs it as a system process. Output, exit-code and system error codes are captured and returned. Parameters ---------- args : list application arguments in the correct oder. Returns ------- output :str system standard output. err :str system standard error. exit_code: str system exit_code. See Also -------- subprocess.Popen() """ args_str = ' '.join(args) if ': ' in args_str: args_str = args_str.replace(': ', ':') self.debug.print_debug( self, "Merging command: file into command:file, can be a problem for some applications" ) m = args_str.strip().split(' ') process = Popen(m, stdout=PIPE) output, err = process.communicate() exit_code = process.wait() return output, err, exit_code def arguments_parse(self, t_props): """ Reads typesetter properties from json configuration and create arguments. Parameters ---------- t_props : dictionary typesetter properties Returns ------- args : list application execution path and arguments in the correct oder. """ args = [] if t_props.get('executable'): args = [t_props.get('executable')] else: self.debug.print_debug( self, self.gv.TYPESETTER_EXECUTABLE_VARIABLE_IS_UNDEFINED) sys.exit(1) arguments = t_props.get("arguments") if arguments: arguments = collections.OrderedDict(sorted(arguments.items())) for a in arguments: args.append(arguments[a]) return args def create_output_path(self, p, p_id, args, prefix, uid): """ Creates the output path for the current file Output folder is constructed using project_name, current_time, sequence number of the current typesetter and the sequence number of the current file. Parameters --------- p: dictionary json program properties p_id: int typesetter id args : list application arguments in the correct oder. prefix: str file name prefix of the current file uid: str unique id of the current current typesetter Returns -------- True: boolean Returns True if the output file is created See Also -------- os.makedirs() """ config_args = p.get('typesetters')[p_id].get("arguments") if config_args is None: self.debug.print_debug(self, self.gv.TYPESETTER_ARGUMENTS_NOT_DEFINED) sys.exit(1) ts_args = collections.OrderedDict(sorted(config_args.items())) out_type = p.get('typesetters')[p_id].get("out_type") out_path = os.path.join(p.get('path'), uid) for i in ts_args: arg = ts_args[i] if arg == '--create-dir': args.append(out_path) else: args.append(arg) self.debug.print_debug(self, '{} {}'.format('Execute', ' '.join(args))) return True def run_typesetter(self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name, args): """ Creates the temporary output path, calls the typesetter and writes the outtput to the correct path for a certain file Parameters --------- p: dictionary json program properties pre_path: str project path of the previous iteration pre_out_type : str output type of the previous iteration p_id: int typesetter id uid: str unique id of the current current typesetter f_id: int sequence number of the current file f_name: str name of the current file args : list application arguments in the correct oder. Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- call_typesetter, organize_output """ p_path = '' pf_type = '' prefix = f_name.split('.')[0] if p_id == min(i for i in p['typesetters']): f_path = os.path.join(p.get('path'), f_name) elif p.get("chain"): f_path = os.path.join(pre_path, prefix + '.' + pre_out_type) if os.path.isfile(f_path) or p['typesetters'].get(p_id).get('expand'): self.debug.print_console( self, '\t{}:\t {} '.format('Processing', prefix)) self.gv.log.append(prefix) args.append(f_path) self.create_output_path(p, p_id, args, prefix, uid) output, err, exit_code = self.call_typesetter(args) self.debug.print_debug(self, output.decode('utf-8')) p_path = self.organize_output(p, p_id, prefix, f_id, uid, args) pf_type = p.get('typesetters')[p_id].get("out_type") else: self.debug.print_debug( self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + os.path.join(f_path)) return p_path, pf_type def typeset_file(self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name): """ Typesets the current file Parameters --------- p: dictionary json program properties pre_path: str project path of the previous iteration pre_out_type : str output type of the previous iteration p_id: int typesetter id uid: str unique id of the current current typesetter f_id: int sequence number of the current file f_name: str name of the current file args: list application arguments in the correct oder. Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- run_typesetter """ t_props = self.all_typesetters.get( p.get('typesetters')[p_id].get("name")) p_path, pf_type = '', '' if t_props: mt = self.arguments_parse(t_props) if self.gv.check_program(t_props.get('executable')): p_path, pf_type = self.run_typesetter(p, pre_path, pre_out_type, p_id, uid, f_id, f_name, mt) else: self.debug.print_debug( self, t_props.get('executable') + self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE) else: self.debug.print_debug(self, self.gv.PROJECT_TYPESETTER_IS_NOT_AVAILABLE) return p_path, pf_type def typeset_files(self, p, pre_path, pre_out_type, pre_id): """ Typeset all files of a certain project Parameters --------- p: dictionary json program properties pre_path: str project path of the previously executed typesetter pre_out_type: str project file type of the previously executed typesetter pre_id :int sequence number of the previously executed file Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- typeset_file """ p_path, pf_type = '', '' uid = str(uuid.uuid4()) project_files = collections.OrderedDict( sorted((int(key), value) for key, value in list(p.get('files').items()))) if p.get('typesetters')[pre_id].get("expand"): f_name = self.gv.uuid p_path, pf_type = self.typeset_file(p, pre_path, pre_out_type, pre_id, uid, 0, f_name) else: for f_id in project_files: f_name = project_files[f_id] p_path, pf_type = self.typeset_file(p, pre_path, pre_out_type, pre_id, uid, f_id, f_name) return p_path, pf_type def typeset_project(self, p): """ Typesets a certain project Parameters --------- p: dictionary json program properties Returns -------- True: boolean Returns True, if all the typesetters in project has run successfully. See Also -------- typeset_files """ typesetters_ordered, temp_path, temp_pre_out_type = '', '', '' pre_path = '' prev_out_type = '' if p.get('active'): self.debug.print_console(self, 'PROJECT : ' + p.get('name')) self.gv.log.append(p.get("name")) ts = p.get('typesetters') if ts: typesetters_ordered = collections.OrderedDict( sorted(ts.items())) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTERS_ARE_NOT_SPECIFIED) if self.all_typesetters is None: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_VAR_IS_NOT_SPECIFIED) sys.exit(1) for p_id in typesetters_ordered: self.debug.print_console( self, ' '.join([ 'Step', p_id, ':', '\t', p.get('typesetters')[p_id].get("name") ])) self.gv.log.append('{} {}'.format( p_id, p.get('typesetters')[p_id].get("name"))) temp_path, temp_pre_out_type = self.typeset_files( p, pre_path, prev_out_type, p_id) pre_path = temp_path prev_out_type = temp_pre_out_type else: self.debug.print_debug( self, self.gv.PROJECT_IS_NOT_ACTIVE + ' ' + p.get('name')) return True def typeset_all_projects(self): """ Typeset all projects defined in the json file Returns -------- True: boolean Returns True, if the all the typesetters in project run See Also -------- typeset_project """ projects = self.config.get('projects') if projects: for p in projects: self.typeset_project(p) else: self.debug.print_debug(self, self.gv.PROJECTS_VAR_IS_NOT_SPECIFIED) return True def organize_output(self, p, p_id, prefix, f_id, uid, args): """ Copy the temporary results into the final project path This method reads the temporary results of the current typesetter step and copies them in to the correct output folder. Output folder is constructed using project_name, current_time, sequence number of the current typesetter and the sequence number of the current file. Customized tool specific actions are also defined and handled here. Parameters ------------ p: dict json program properties p_id: int typesetter id prefix: str file name prefix of the current file f_id: int sequence number of the current file uid: str unique id of the current current typesetter args: bytearray tool parameters , executable file is first element Returns -------- project_path: str Final path for the current file See Also -------- create_merged_file, gv.create_dirs_recursive """ p_name = p.get('typesetters')[p_id].get("name") t_path = [p.get('path'), uid] if args: if len([arg for arg in args if 'meTypeset.py' in arg]) > 0: t_path += ['nlm'] else: t_path += [p.get('path'), uid] out_type = p['typesetters'][p_id].get('out_type') if out_type is None: self.debug.print_console( self, self.gv.PROJECT_OUTPUT_FILE_TYPE_IS_NOT_SPECIFIED) sys.exit(1) project_path = [ p.get('path'), p['name'], self.current_result, p_id + '_' + p_name, out_type ] temp_dir = os.path.join(p.get('path'), uid) if p['typesetters'][p_id].get('merge'): self.create_merged_file(p, p_id, project_path, t_path) if len(list(p.get('files').items())) == f_id: shutil.rmtree(temp_dir) elif p['typesetters'][p_id].get('expand'): for filename in os.listdir(temp_dir): p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}'.format(p_path, SEP, filename) os.rename(os.path.join(temp_dir, filename), f_path) shutil.rmtree(temp_dir) elif p['typesetters'][p_id].get('process'): if p_name.lower() == 'metypeset' and not os.path.exists( SEP.join(t_path)): t_path.append('nlm') t_path.append(prefix + '.' + out_type) p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}.{}'.format(p_path, SEP, prefix, out_type) try: os.rename(SEP.join(t_path), f_path) shutil.rmtree(temp_dir) except FileNotFoundError: print('File not found\t{}', SEP.join(t_path)) sys.exit(1) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_PROCESS_METHOD_NOT_SPECIFIED) if len(list(p.get('typesetters').items())) == int(p_id) and int( f_id) == len(list(p.get('files').items())): zip_path = ''.join([p.get('path'), SEP, p['name']]) shutil.make_archive('{}/{}'.format(zip_path, p.get("name")), 'zip', zip_path) return SEP.join(project_path) def create_merged_file(self, p, p_id, project_path, t_path): """ Create a combined file from a set of input files Parameters ------------ p: dict json program properties p_id: int typesetter id t_path : str temporary output directory project_path : str system path to be created See Also -------- create_named_file() """ t_path.append(self.gv.uuid) p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}.xml'.format(p_path, SEP, self.gv.uuid) shutil.copy2(SEP.join(t_path), f_path) self.create_named_file(p, p_id, p_path, t_path) return f_path def create_named_file( self, p, p_id, p_path, t_path, ): """ Copy unique file name to a named file p: dict json program properties p_id: int typesetter id t_path : str temporary output directory p_path : str output directory for the current typesetter """ f = p['typesetters'][p_id].get('out_file') if f: shutil.copy2(SEP.join(t_path), '{}{}{}'.format(p_path, SEP, f)) return def run_modules(self): """ Run MPT in module mode """ # Run import modules if self.args.get('import'): sys.path.insert( 0, os.path.join(self.script_folder, 'plugins', 'import')) import ImportInterface if self.args.get('omp'): m = "omp" plugin_package = __import__(m, fromlist=['*']) plugin_module = getattr(plugin_package, m) # Find class inheriting form Import abstract class in the module for name in dir(plugin_module): candidate = getattr(plugin_module, name) if inspect.isclass(candidate)\ and issubclass(candidate, ImportInterface.Import)\ and candidate is not ImportInterface.Import: plugin_class = candidate print(("Found import plugin", name, plugin_class)) plugin = plugin_class() self.debug.print_console(self, str(self.args)) plugin.run(self.args, {'base-path': self.script_folder}) # try: # plugin_module = __import__(m) # plugin_module.plugin.run() # except Exception as e: # print('{} {}: {}'.format(m, 'method import failed', e)) # sys.exit(0) else: self.debug.fatal_error(self, "Unsupported arguments: " + self.args) return def check_applications(self): """ Check if program binaries are available """ ps = self.config.get('projects') psf = [s for s in ps if s.get('active') == True] ts = self.config.get('typesetters') for p in [ts[i]['arguments'] for i in ts]: for k in [ j for j in list(p.values()) if j.find('--formatter') == 0 ]: for l in k.split('=')[1].split(','): if not self.gv.check_program(self.gv.apps.get(l.lower())): self.debug.fatal_error( self, '{} {}'.format( self.gv.apps.get(l.lower()), self.gv.apps.get(l.lower()) + self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE)) sys.exit(1) for p in [ts[i]['executable'] for i in ts]: if not self.gv.check_program(p): self.debug.fatal_error( self, '{} {}'.format( p, self.gv.apps.get(l.lower()) + self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE)) sys.exit(1)
class Disseminate(Debuggable): def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.gv = GV() Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.dr = self.args.get('<path>') self.f = self.args.get('<input_file>') self.out_type = self.args.get('--out-type').lower() self.script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='Disseminate 0.1') def get_saxon_path(self): """Checks if saxon is available in the default path Returns -------- saxon : boolean True, if saxon is available. False, if not. """ s = os.path.join(self.script_path, self.gv.METYPESET_PATH) if os.path.isfile(s): return s elif self.args.get('--saxon'): if os.path.isfile(self.args.get('--saxon')): return self.args.get('--saxon') else: return False else: return False def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'OUTPUT Generation' return name def process(self, args): """Runs typesetter with given arguments Creates the execution path for the conversion process. Output,exit-code and system error codes are captured and returned. Parameters ---------- args : list application arguments in the correct oder. Returns ------- output :str system standard output. err :str system standard error. exit_code: str system exit_code. See Also -------- subprocess.Popen() """ m = ' '.join(args).strip().split(' ') print ' '.join(args) process = Popen(m, stdout=PIPE) output, err = process.communicate() exit_code = process.wait() if exit_code == 1: print err sys.exit(1) return output, err, exit_code def run(self): """ Runs converters See Also -------- create_output, create_pdf """ self.create_output(self.out_type) def create_output(self, out_type): """ Create FO output Parameters ---------- out_type: str Output Type See Also ------- run_saxon(), get_saxon_path() """ formatters = self.args.get('--formatter').split(',') mediums = self.args.get('--medium').split(',') for f in formatters: f = f.lower() for m in mediums: m = m.lower() self.gv.create_dirs_recursive(self.args.get('<path>').split(os.pathsep)) if self.out_type=='fo': self.debug.print_console(self, self.gv.RUNNING_FO_CONVERSION) saxon_path = self.get_saxon_path() args = self.run_saxon(saxon_path,f, m) if self.out_type=='pdf': self.debug.print_console(self, self.gv.RUNNING_PDF_CONVERSION) args = self.run_fop_processor(f, m) output, err, exit_code = self.process(args) print output def run_fop_processor(self, formatter, medium): args = [] if formatter.lower() == 'fop': pth = os.path.join(self.script_path, self.gv.APACHE_FOP_PATH) if self.gv.check_program(pth): args = self.run_apache_fop(pth,formatter, medium) elif formatter.lower() == 'ah': pth = self.gv.ANTENNA_HOUSE_FOP_PATH if self.gv.check_program(pth): args = self.run_ah_fop(pth,formatter, medium) return args def run_ah_fop(self, pth, formatter, medium): args=[pth] args.append('-d') args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f), self.gv.uuid, formatter, medium)) args.append('-o') args.append('{}/{}.{}.{}.pdf'.format(self.dr, self.gv.uuid, formatter, medium)) return args def run_apache_fop(self, pth, formatter, medium): style_path = '{}/configurations/fop/conf/{}.{}.xml'.format(self.script_path, formatter,medium) args = [pth] args.append('-fo') args.append('{}/{}.{}.{}.fo'.format(os.path.dirname(self.f),self.gv.uuid, formatter, medium)) args.append('-pdf') args.append('{}/{}.{}.{}.pdf'.format(self.dr,self.gv.uuid, formatter, medium)) args.append('-c') args.append(style_path) return args def run_saxon(self, saxon_path, formatter, medium): """ Creates the executable path for saxon Parameters --------- saxon_path : str absolute path of the saxon binary jar file formatter : str name of the FO formatter medium : str name of the medium Returns ------ args:list List of arguments for saxon execution path """ args = ["java", "-jar", saxon_path] if self.args.get('--xsl'): xsl = self.script_path.split(os.sep)[:-1] xsl.append('stylesheets') xsl.append(self.args.get('--xsl')) args.append("-xsl:" + os.sep.join(xsl)) s = self.args.get('<input_file>') if os.path.exists(s): args.append("-s:" + s) else: self.debug.print_debug(self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + s) sys.exit(1) file_name = '.'.join([self.gv.uuid,formatter.lower(),medium.lower(),'fo']) args.append("-o:" + os.path.join(self.args.get('<path>'), file_name)) args.append('formatter=' + formatter.lower()) args.append('medium=' + medium.lower()) return args
class MPT(Debuggable): """ MPT Class Object, which initializes the properties and defines the methods. """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.gv = GV() Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.current_result = datetime.datetime.now().strftime( "%Y_%m_%d-%H-%M-") + str(uuid.uuid4())[:8] self.config = self.gv.read_json(self.args['<config_file>']) self.all_typesetters = self.config.get('typesetters') def run(self): """ Runs the MPT Module, which typesets all the projects defined in the json input file Returns -------- True: boolean Returns True if all the projects are typeset See Also -------- typeset_all_projects """ self.typeset_all_projects() return True @staticmethod def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='mpt 0.0.1') def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'MPT' return name def call_typesetter(self, args): """Runs typesetter with given arguments Creates the execution path for a typesetter or an application and runs it as a system process. Output, exit-code and system error codes are captured and returned. Parameters ---------- args : list application arguments in the correct oder. Returns ------- output :str system standard output. err :str system standard error. exit_code: str system exit_code. See Also -------- subprocess.Popen() """ m = ' '.join(args).strip().split(' ') self.debug.print_console(self, ' '.join(m)) process = Popen(m, stdout=PIPE) output, err = process.communicate() exit_code = process.wait() return output, err, exit_code def arguments_parse(self, t_props): """ Reads typesetter properties from json configuration and create arguments. Parameters ---------- t_props : dictionary typesetter properties Returns ------- args : list application execution path and arguments in the correct oder. """ args = [] if t_props.get('executable'): args = [t_props.get('executable')] else: self.debug.print_debug( self, self.gv.TYPESETTER_EXECUTABLE_VARIABLE_IS_UNDEFINED) sys.exit(1) arguments = t_props.get("arguments") if arguments: arguments = collections.OrderedDict(sorted(arguments.items())) for a in arguments: args.append(arguments[a]) return args def create_output_path( self, p, p_id, args, prefix, uid): """ Creates the output path for the current file Output folder is constructed using project_name, current_time, sequence number of the current typesetter and the sequence number of the current file. Parameters --------- p: dictionary json program properties p_id: int typesetter id args : list application arguments in the correct oder. prefix: str file name prefix of the current file uid: str unique id of the current current typesetter Returns -------- True: boolean Returns True if the output file is created See Also -------- os.makedirs() """ ts_args = collections.OrderedDict( sorted(p.get('typesetters')[p_id].get("arguments").items())) out_type = p.get('typesetters')[p_id].get("out_type") out_path = os.path.join(p.get('path'), uid) for i in ts_args: arg = ts_args[i] if arg == 'create_output_directory()': args.append(out_path) elif arg == 'create_output_file()': if not os.path.exists(out_path): os.makedirs(out_path) args.append( os.path.join( out_path, prefix + '.' + out_type)) else: args.append(arg) return True def run_typesetter( self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name, args): """ Creates the temporary output path, calls the typesetter and writes the outtput to the correct path for a certain file Parameters --------- p: dictionary json program properties pre_path: str project path of the previous iteration pre_out_type : str output type of the previous iteration p_id: int typesetter id uid: str unique id of the current current typesetter f_id: int sequence number of the current file f_name: str name of the current file args : list application arguments in the correct oder. Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- call_typesetter, organize_output """ p_path = '' pf_type = '' prefix = f_name.split('.')[0] if p_id == min(i for i in p['typesetters']): f_path = os.path.join(p.get('path'), f_name) elif p.get("chain"): f_path = os.path.join(pre_path,prefix +'.' + pre_out_type) if os.path.isfile(f_path) or p['typesetters'].get(p_id).get('expand'): args.append(f_path) self.create_output_path(p, p_id, args, prefix, uid) output, err, exit_code = self.call_typesetter(args) self.debug.print_debug(self, output.decode('utf-8')) p_path = self.organize_output( p, p_id, prefix, f_id, uid) pf_type = p.get('typesetters')[p_id].get("out_type") else: self.debug.print_debug( self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + os.path.join(f_path)) return p_path, pf_type def typeset_file( self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name ): """ Typesets the current file Parameters --------- p: dictionary json program properties pre_path: str project path of the previous iteration pre_out_type : str output type of the previous iteration p_id: int typesetter id uid: str unique id of the current current typesetter f_id: int sequence number of the current file f_name: str name of the current file args: list application arguments in the correct oder. Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- run_typesetter """ t_props = self.all_typesetters.get(p.get('typesetters')[p_id].get("name")) p_path, pf_type = '','' if t_props: mt = self.arguments_parse(t_props) if self.gv.check_program(t_props.get('executable')): p_path, pf_type = self.run_typesetter( p, pre_path, pre_out_type, p_id, uid, f_id, f_name, mt) else: self.debug.print_debug(self, self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_IS_NOT_AVAILABLE) return p_path, pf_type def typeset_files( self, p, pre_path, pre_out_type, pre_id): """ Typeset all files of a certain project Parameters --------- p: dictionary json program properties pre_path: str project path of the previously executed typesetter pre_out_type: str project file type of the previously executed typesetter pre_id :int sequence number of the previously executed file Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- typeset_file """ p_path, pf_type = '', '' uid = str(uuid.uuid4()) project_files = collections.OrderedDict( sorted((int(key), value) for key, value in p.get('files').items())) if p.get('typesetters')[pre_id].get("expand"): f_name = self.gv.uuid p_path, pf_type = self.typeset_file( p, pre_path, pre_out_type, pre_id, uid, 0, f_name ) else: for f_id in project_files: f_name = project_files[f_id] p_path, pf_type = self.typeset_file( p, pre_path, pre_out_type, pre_id, uid, f_id, f_name ) return p_path, pf_type def typeset_project(self, p): """ Typesets a certain project Parameters --------- p: dictionary json program properties Returns -------- True: boolean Returns True, if all the typesetters in project has run successfully. See Also -------- typeset_files """ typesetters_ordered, temp_path, temp_pre_out_type = '', '', '' pre_path = '' prev_out_type = '' if p.get('active'): ts = p.get('typesetters') if ts: typesetters_ordered = collections.OrderedDict( sorted(ts.items())) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTERS_ARE_NOT_SPECIFIED) if self.all_typesetters is None: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_VAR_IS_NOT_SPECIFIED) sys.exit(1) for p_id in typesetters_ordered: self.debug.print_console(self, ' '.join(['Runnning Typesetter',p_id,':', p.get('typesetters')[p_id].get("name")])) temp_path, temp_pre_out_type = self.typeset_files( p, pre_path, prev_out_type, p_id ) pre_path = temp_path prev_out_type = temp_pre_out_type self.debug.print_console(self, ' '.join(['ls -al',temp_path])) else: self.debug.print_debug(self, self.gv.PROJECT_IS_NOT_ACTIVE) return True def typeset_all_projects(self): """ Typeset all projects defined in the json file Returns -------- True: boolean Returns True, if the all the typesetters in project run See Also -------- typeset_project """ projects = self.config.get('projects') if projects: for p in projects: self.typeset_project(p) else: self.debug.print_debug(self, self.gv.PROJECTS_VAR_IS_NOT_SPECIFIED) return True def organize_output( self, p, p_id, prefix, f_id, uid): """ Copy the temporary results into the final project path This method reads the temporary results of the current typesetter step and copies them in to the correct output folder. Output folder is constructed using project_name, current_time, sequence number of the current typesetter and the sequence number of the current file. Customized tool specific actions are also defined and handled here. Parameters ------------ p: dict json program properties p_id: int typesetter id prefix: str file name prefix of the current file f_id: int sequence number of the current file uid: str unique id of the current current typesetter Returns -------- project_path: str Final path for the current file See Also -------- create_merged_file, gv.create_dirs_recursive """ p_name = p.get('typesetters')[p_id].get("name") t_path = [p.get('path'), uid] + ['nlm'] if p_name == 'metypeset' else [p.get('path'), uid] out_type = p['typesetters'][p_id]['out_type'] project_path = [p.get('path'),p['name'], self.current_result, p_id + '_' + p_name,out_type] temp_dir = os.path.join(p.get('path'), uid) if p['typesetters'][p_id].get('merge'): self.create_merged_file(p, p_id, project_path, t_path) if len(p.get('files').items()) == f_id: shutil.rmtree(temp_dir) elif p['typesetters'][p_id].get('expand'): for filename in os.listdir(temp_dir): p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}'.format(p_path,SEP,filename) os.rename(os.path.join(temp_dir,filename), f_path) shutil.rmtree(temp_dir) elif p['typesetters'][p_id].get('process'): t_path.append(prefix + '.' + out_type) p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}.{}'.format(p_path, SEP, prefix, out_type) os.rename(SEP.join(t_path), f_path) shutil.rmtree(temp_dir) else: self.debug.print_debug(self, self.gv.PROJECT_TYPESETTER_PROCESS_METHOD_NOT_SPECIFIED) #self.debug.print_console(self, '{} {}'.format(self.gv.OUTPUT,f_path)) return SEP.join(project_path) def create_merged_file(self, p, p_id, project_path, t_path): """ Create a combined file from a set of input files Parameters ------------ p: dict json program properties p_id: int typesetter id t_path : str temporary output directory project_path : str system path to be created See Also -------- create_named_file() """ t_path.append(self.gv.uuid) p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}.xml'.format(p_path,SEP ,self.gv.uuid) shutil.copy2(SEP.join(t_path), f_path) self.create_named_file(p, p_id, p_path, t_path) return f_path def create_named_file(self, p, p_id, p_path ,t_path,): """ Copy unique file name to a named file p: dict json program properties p_id: int typesetter id t_path : str temporary output directory p_path : str output directory for the current typesetter """ f = p['typesetters'][p_id].get('out_file') if f: f_path = '{}{}{}'.format(p_path, SEP,f) shutil.copy2(SEP.join(t_path), f_path) return