def parse_display(self): display_elem = self.parent_elem.find("display") if display_elem is not None: display = xml_to_string(display_elem) else: display = None return display
def to_xml_file( self, shed_tool_data_table_config, new_elems=None, remove_elems=None ): """ Write the current in-memory version of the shed_tool_data_table_conf.xml file to disk. remove_elems are removed before new_elems are added. """ if not ( new_elems or remove_elems ): log.debug( 'ToolDataTableManager.to_xml_file called without any elements to add or remove.' ) return # no changes provided, no need to persist any changes if not new_elems: new_elems = [] if not remove_elems: remove_elems = [] full_path = os.path.abspath( shed_tool_data_table_config ) # FIXME: we should lock changing this file by other threads / head nodes try: tree = util.parse_xml( full_path ) root = tree.getroot() out_elems = [ elem for elem in root ] except Exception as e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e ) for elem in remove_elems: # handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove( elem ) # add new elems out_elems.extend( new_elems ) with open( full_path, 'wb' ) as out: out.write( '<?xml version="1.0"?>\n<tables>\n' ) for elem in out_elems: out.write( util.xml_to_string( elem, pretty=True ) ) out.write( '</tables>\n' ) os.chmod( full_path, 0644 )
def test_loader_macro_multiple_toplevel_yield(): """ test replacement of multiple top level yield """ with TestToolDirectory() as tool_dir: tool_dir.write(''' <tool> <macros> <xml name="test"> <blah/> <yield/> <blah> <yield name="token1"/> </blah> <yield name="token2"/> </xml> </macros> <expand macro="test"> <token name="token1"> <content_of_token1/> <more_content_of_token1/> </token> <sub_of_expand_1/> <token name="token2"> <content_of_token2/> <more_content_of_token2/> </token> <sub_of_expand_2/> </expand> </tool> ''') xml = tool_dir.load() assert xml_to_string(xml, pretty=True) == '''<?xml version="1.0" ?>
def create_and_write_tmp_file(elem): tmp_str = xml_to_string(elem, pretty=True) with tempfile.NamedTemporaryFile(prefix="tmp-toolshed-cawrf", delete=False) as fh: tmp_filename = fh.name with io.open(tmp_filename, mode='w', encoding='utf-8') as fh: fh.write(tmp_str) return tmp_filename
def test_xml_to_string_pretty(): section = util.parse_xml_string(SECTION_XML) s = util.xml_to_string(section, pretty=True) PRETTY = """<?xml version="1.0" ?> <section id="fasta_fastq_manipulation" name="Fasta Fastq Manipulation" version=""> <tool file="toolshed.g2.bx.psu.edu/repos/peterjc/seq_filter_by_id/fb1313d79396/seq_filter_by_id/tools/seq_filter_by_id/seq_filter_by_id.xml" guid="toolshed.g2.bx.psu.edu/repos/peterjc/seq_filter_by_id/seq_filter_by_id/0.2.5"> <tool_shed>toolshed.g2.bx.psu.edu</tool_shed> </tool> </section>""" assert s == PRETTY
def load_manager_from_elem( self, data_manager_elem, tool_path=None, add_manager=True, replace_existing=False ): try: data_manager = DataManager( self, data_manager_elem, tool_path=tool_path ) except Exception as e: log.error( "Error loading data_manager '%s':\n%s" % ( e, util.xml_to_string( data_manager_elem ) ) ) return None if add_manager: self.add_manager( data_manager, replace_existing=replace_existing ) log.debug( 'Loaded Data Manager: %s' % ( data_manager.id ) ) return data_manager
def load_manager_from_elem(self, data_manager_elem, tool_path=None, add_manager=True): try: data_manager = DataManager(self, data_manager_elem, tool_path=tool_path) except Exception as e: log.error("Error loading data_manager '%s':\n%s" % (e, util.xml_to_string(data_manager_elem))) return None if add_manager: self.add_manager(data_manager) log.debug('Loaded Data Manager: %s' % (data_manager.id)) return data_manager
def to_xml_file(self, shed_tool_data_table_config, new_elems=None, remove_elems=None): """ Write the current in-memory version of the shed_tool_data_table_conf.xml file to disk. remove_elems are removed before new_elems are added. """ if not (new_elems or remove_elems): log.debug( 'ToolDataTableManager.to_xml_file called without any elements to add or remove.' ) return # no changes provided, no need to persist any changes if not new_elems: new_elems = [] if not remove_elems: remove_elems = [] full_path = os.path.abspath(shed_tool_data_table_config) # FIXME: we should lock changing this file by other threads / head nodes try: try: tree = util.parse_xml(full_path) except OSError as e: if e.errno == errno.ENOENT: with open(full_path, 'w') as fh: fh.write(TOOL_DATA_TABLE_CONF_XML) tree = util.parse_xml(full_path) else: raise root = tree.getroot() out_elems = [elem for elem in root] except Exception as e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e) for elem in remove_elems: # handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove(elem) # add new elems out_elems.extend(new_elems) out_path_is_new = not os.path.exists(full_path) root = util.parse_xml_string( '<?xml version="1.0"?>\n<tables></tables>') for elem in out_elems: root.append(elem) with RenamedTemporaryFile(full_path, mode='w') as out: out.write(util.xml_to_string(root, pretty=True)) os.chmod(full_path, RW_R__R__) if out_path_is_new: self.tool_data_path_files.update_files()
def to_xml_file( self, shed_tool_data_table_config ): """Write the current in-memory version of the shed_tool_data_table_conf.xml file to disk.""" full_path = os.path.abspath( shed_tool_data_table_config ) fd, filename = tempfile.mkstemp() os.write( fd, '<?xml version="1.0"?>\n' ) os.write( fd, '<tables>\n' ) for elem in self.shed_data_table_elems: os.write( fd, '%s' % util.xml_to_string( elem ) ) os.write( fd, '</tables>\n' ) os.close( fd ) shutil.move( filename, full_path ) os.chmod( full_path, 0644 )
def load_from_element(self, elem, tool_path): assert ( elem.tag == "data_manager" ), 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % (elem.tag) self.declared_id = elem.get("id", None) self.guid = elem.get("guid", None) path = elem.get("tool_file", None) self.version = elem.get("version", self.version) tool_shed_repository_id = None tool_guid = None if path is None: tool_elem = elem.find("tool") assert tool_elem is not None, ( "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % (util.xml_to_string(elem)) ) path = tool_elem.get("file", None) tool_guid = tool_elem.get("guid", None) # need to determine repository info so that dependencies will work correctly tool_shed = tool_elem.find("tool_shed").text repository_name = tool_elem.find("repository_name").text repository_owner = tool_elem.find("repository_owner").text installed_changeset_revision = tool_elem.find("installed_changeset_revision").text # save repository info here self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision, ) # get tool_shed repo id tool_shed_repository = suc.get_tool_shed_repository_by_shed_name_owner_installed_changeset_revision( self.data_managers.app, tool_shed, repository_name, repository_owner, installed_changeset_revision ) if tool_shed_repository is None: log.warning( "Could not determine tool shed repository from database. This should only ever happen when running tests." ) # we'll set tool_path manually here from shed_conf_file tool_shed_repository_id = None try: tool_path = util.parse_xml(elem.get("shed_conf_file")).getroot().get("tool_path", tool_path) except Exception, e: log.error("Error determining tool_path for Data Manager during testing: %s", e) else: tool_shed_repository_id = self.data_managers.app.security.encode_id(tool_shed_repository.id) # use shed_conf_file to determine tool_path shed_conf_file = elem.get("shed_conf_file", None) if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename(shed_conf_file, None) if shed_conf: tool_path = shed_conf.get("tool_path", tool_path)
def config_elems_to_xml_file(self, config_elems, config_filename, tool_path): """ Persist the current in-memory list of config_elems to a file named by the value of config_filename. """ try: root = parse_xml_string('<?xml version="1.0"?>\n<toolbox tool_path="%s"></toolbox>' % str(tool_path)) for elem in config_elems: root.append(elem) with RenamedTemporaryFile(config_filename, mode='w') as fh: fh.write(xml_to_string(root, pretty=True)) except Exception: log.exception("Exception in ToolPanelManager.config_elems_to_xml_file")
def to_xml_file( self, tool_data_table_config_path ): """Write the current in-memory version of the tool_data-table_conf.xml file to disk.""" full_path = os.path.abspath( tool_data_table_config_path ) fd, filename = tempfile.mkstemp() os.write( fd, '<?xml version="1.0"?>\n' ) os.write( fd, "<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->\n" ) os.write( fd, '<tables>\n' ) for elem in self.data_table_elems: os.write( fd, '%s' % util.xml_to_string( elem ) ) os.write( fd, '</tables>\n' ) os.close( fd ) shutil.move( filename, full_path ) os.chmod( full_path, 0644 )
def config_elems_to_xml_file(self, config_elems, config_filename, tool_path): """ Persist the current in-memory list of config_elems to a file named by the value of config_filename. """ try: root = XmlET.fromstring('<?xml version="1.0"?>\n<toolbox tool_path="%s"></toolbox>' % str(tool_path)) for elem in config_elems: root.append(elem) with RenamedTemporaryFile(config_filename, mode='w') as fh: fh.write(xml_to_string(root, pretty=True)) except Exception as e: log.exception("Exception in ToolPanelManager.config_elems_to_xml_file: \n %s", str(e))
def load_manager_from_elem(self, data_manager_elem, tool_path=None, add_manager=True, replace_existing=False): try: data_manager = DataManager(self, data_manager_elem, tool_path=tool_path) except Exception, e: log.error("Error loading data_manager '%s':\n%s" % (e, util.xml_to_string(data_manager_elem))) return None
def config_elems_to_xml_file(self, config_elems, config_filename, tool_path, tool_cache_data_dir=None): """ Persist the current in-memory list of config_elems to a file named by the value of config_filename. """ try: tool_cache_data_dir = f' tool_cache_data_dir="{tool_cache_data_dir}"' if tool_cache_data_dir else '' root = parse_xml_string(f'<?xml version="1.0"?>\n<toolbox tool_path="{tool_path}"{tool_cache_data_dir}></toolbox>') for elem in config_elems: root.append(elem) with RenamedTemporaryFile(config_filename, mode='w') as fh: fh.write(xml_to_string(root, pretty=True)) except Exception: log.exception("Exception in ToolPanelManager.config_elems_to_xml_file")
def data_manager_config_elems_to_xml_file(self, config_elems, config_filename): """ Persist the current in-memory list of config_elems to a file named by the value of config_filename. """ data_managers_path = self.data_managers_path if data_managers_path: root_str = '<?xml version="1.0"?><data_managers tool_path="%s"></data_managers>' % data_managers_path else: root_str = '<?xml version="1.0"?><data_managers></data_managers>' root = ElementTree.fromstring(root_str) for elem in config_elems: root.append(elem) try: with RenamedTemporaryFile(config_filename, mode='w') as fh: fh.write(xml_to_string(root)) except Exception: log.exception("Exception in DataManagerHandler.data_manager_config_elems_to_xml_file")
def test_loader_specify_nested_macro_by_token(): """ test if a nested macro can have a nested macro specifying the macro name via a token of the outer macro """ with TestToolDirectory() as tool_dir: tool_dir.write(''' <tool> <macros> <import>external.xml</import> </macros> <expand macro="testp" token_name="a"/> <expand macro="testp" token_name="b"/> </tool>''') tool_dir.write(''' <macros> <xml name="a"> <A/> </xml> <xml name="b"> <B/> </xml> <xml name="testp" tokens="token_name"> <expand macro="@TOKEN_NAME@"/> </xml> </macros> ''', name="external.xml") xml = tool_dir.load() # test with re because loading from external macros # adds a xml:base property (containing the source path) # to the node which is printed print(f"{xml_to_string(xml, pretty=True)}") assert re.match( r"""<\?xml version="1\.0" \?> <tool> <macros/> <A/> <B/> </tool>""", xml_to_string(xml, pretty=True), re.MULTILINE)
def load_manager_from_elem(self, data_manager_elem, tool_path=None, add_manager=True): try: data_manager = DataManager(self, data_manager_elem, tool_path=tool_path) except IOError as e: if e.errno == errno.ENOENT: # File does not exist return None except Exception as e: log.error("Error loading data_manager '%s':\n%s" % (e, util.xml_to_string(data_manager_elem))) return None if add_manager: self.add_manager(data_manager) log.debug('Loaded Data Manager: %s' % (data_manager.id)) return data_manager
def to_xml_file(self, shed_tool_data_table_config, new_elems=None, remove_elems=None): """ Write the current in-memory version of the shed_tool_data_table_conf.xml file to disk. remove_elems are removed before new_elems are added. """ if not (new_elems or remove_elems): log.debug( 'ToolDataTableManager.to_xml_file called without any elements to add or remove.' ) return # no changes provided, no need to persist any changes if not new_elems: new_elems = [] if not remove_elems: remove_elems = [] full_path = os.path.abspath(shed_tool_data_table_config) # FIXME: we should lock changing this file by other threads / head nodes try: tree = util.parse_xml(full_path) root = tree.getroot() out_elems = [elem for elem in root] except Exception as e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e) for elem in remove_elems: # handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove(elem) # add new elems out_elems.extend(new_elems) with open(full_path, 'wb') as out: out.write('<?xml version="1.0"?>\n<tables>\n') for elem in out_elems: out.write(util.xml_to_string(elem, pretty=True)) out.write('</tables>\n') os.chmod(full_path, 0o644)
def test_loader_macro_named_yield(): """ test expansion of named and unnamed yield - named yields are replaced by content of the corresponding token - unnamed yields are replaced by all non-token elements of the expand tag """ with TestToolDirectory() as tool_dir: tool_dir.write(''' <tool> <macros> <xml name="test"> <A> <yield/> </A> <yield name="token1"/> <B> <yield/> <yield name="token2"/> </B> </xml> </macros> <expand macro="test"> <token name="token1"> <content_of_token1/> <more_content_of_token1/> </token> <sub_of_expand_1/> <token name="token2"> <content_of_token2/> <more_content_of_token2/> </token> <sub_of_expand_2/> </expand> </tool> ''') xml = tool_dir.load() assert xml_to_string(xml, pretty=True) == '''<?xml version="1.0" ?>
def test_loader_macro_recursive_named_yield(): """ test 'recursive' replacement with named yields since named yields are processed in order of the definition of the corresponding tokens: - replacing yield for token1 introduces yield for token2 - replacing yield for token2 introduced unnamed yield - replacing unnamed yield gives the only non-token element of the expand """ with TestToolDirectory() as tool_dir: tool_dir.write(''' <tool> <macros> <xml name="test"> <A> <yield name="token1"/> </A> </xml> </macros> <expand macro="test"> <token name="token1"> <T1> <yield name="token2"/> </T1> </token> <token name="token2"> <T2> <yield/> </T2> </token> <T/> </expand> </tool>''') xml = tool_dir.load() assert xml_to_string(xml, pretty=True) == '''<?xml version="1.0" ?>
def load_from_element( self, elem, tool_path ): assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( elem.tag ) self.declared_id = elem.get( 'id', None ) self.guid = elem.get( 'guid', None ) path = elem.get( 'tool_file', None ) self.version = elem.get( 'version', self.version ) tool_shed_repository_id = None tool_guid = None if path is None: tool_elem = elem.find( 'tool' ) assert tool_elem is not None, "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % ( util.xml_to_string( elem ) ) path = tool_elem.get( "file", None ) tool_guid = tool_elem.get( "guid", None ) # need to determine repository info so that dependencies will work correctly tool_shed_url = tool_elem.find( 'tool_shed' ).text # Handle protocol changes. tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry( self.data_managers.app, tool_shed_url ) # The protocol is not stored in the database. tool_shed = common_util.remove_protocol_from_tool_shed_url( tool_shed_url ) repository_name = tool_elem.find( 'repository_name' ).text repository_owner = tool_elem.find( 'repository_owner' ).text installed_changeset_revision = tool_elem.find( 'installed_changeset_revision' ).text self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision ) tool_shed_repository = \ suc.get_installed_repository( self.data_managers.app, tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision ) if tool_shed_repository is None: log.warning( 'Could not determine tool shed repository from database. This should only ever happen when running tests.' ) # we'll set tool_path manually here from shed_conf_file tool_shed_repository_id = None try: tool_path = util.parse_xml( elem.get( 'shed_conf_file' ) ).getroot().get( 'tool_path', tool_path ) except Exception, e: log.error( 'Error determining tool_path for Data Manager during testing: %s', e ) else: tool_shed_repository_id = self.data_managers.app.security.encode_id( tool_shed_repository.id ) # use shed_conf_file to determine tool_path shed_conf_file = elem.get( "shed_conf_file", None ) if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename( shed_conf_file, None ) if shed_conf: tool_path = shed_conf.get( "tool_path", tool_path )
def main(): parser = argparse.ArgumentParser( description='Update galaxy shed_tool_conf.xml with tool labels') parser.add_argument('-g', '--galaxy_url', help='Galaxy server URL', required=True) parser.add_argument('-u', '--remote_user', help='Remote user', default='galaxy') parser.add_argument('-f', '--remote_file_path', help='File name on galaxy', required=True) parser.add_argument( '-k', '--key_path', help='Path to private ssh key file' ) # for local testing - jenkins has the ssh identity already parser.add_argument('--display_new_days', type=int, help='Number of days to display label for new tool', required=True) parser.add_argument( '--display_updated_days', type=int, help='Number of days to display label for updated tool', required=True) parser.add_argument( '--safe', action='store_true', help= 'Do not overwrite the original file, give the updated file a new name') args = parser.parse_args() file = os.path.basename(args.remote_file_path) galaxy_url = args.galaxy_url display_new_days = args.display_new_days display_updated_days = args.display_updated_days copy_args = { 'file': file, 'remote_user': args.remote_user, 'url': galaxy_url.split('//')[1] if galaxy_url.startswith('https://') else galaxy_url, 'remote_file_path': args.remote_file_path, 'key_path': args.key_path, } def filter_new(row): return row['Status'] == 'Installed' and in_time_window( row['Date (AEST)'], display_new_days) and row['New Tool'] == 'True' def filter_updated(row): return row['Status'] == 'Installed' and in_time_window( row['Date (AEST)'], display_updated_days) and row['New Tool'] == 'False' with open(tool_labels_file) as handle: tool_labels = yaml.safe_load(handle) tool_labels.update({ new_label: [], updated_label: [], }) toolshed_tools = get_toolshed_tools(galaxy_url) for row in load_log(filter=filter_new): tool_ids = [ t['id'] for t in toolshed_tools if (t['tool_shed_repository']['name'] == row['Name'] and t['tool_shed_repository']['owner'] == row['Owner'] and t['tool_shed_repository']['changeset_revision'] == row['Installed Revision']) ] tool_labels[new_label].extend(tool_ids) for row in load_log(filter=filter_updated): tool_ids = [ t['id'] for t in toolshed_tools if (t['tool_shed_repository']['name'] == row['Name'] and t['tool_shed_repository']['owner'] == row['Owner'] and t['tool_shed_repository']['changeset_revision'] == row['Installed Revision']) ] tool_labels[updated_label].extend(tool_ids) try: get_remote_file(**copy_args) except Exception as e: print(e) raise Exception('Failed to fetch remote file') tree, error_message = parse_xml(file) root = tree.getroot() # shed_tool_conf.xml has multiple section elements containing tools # loop through all sections and tools for section in root: if section.tag == 'section': for tool in section.getchildren(): if tool.tag == 'tool': tool_id = tool.find('id').text # remove all existing labels tool.attrib.pop('labels', None) # replace labels from dict labels_for_tool = [] for label in tool_labels: for id in tool_labels[label]: if tool_id == id or ( id.endswith('*') and get_deversioned_id(id) == get_deversioned_id(tool_id)): labels_for_tool.append(label) break if labels_for_tool: tool.set('labels', ','.join(labels_for_tool)) with open(file, 'w') as handle: handle.write(xml_to_string(root, pretty=True)) if args.safe: remote_file_path = copy_args['remote_file_path'] copy_args.update({ 'remote_file_path': '%s_jenkins_%s' % (remote_file_path, arrow.now().format('YYYYMMDD')) }) try: copy_file_to_remote_location(**copy_args) except Exception as e: print(e) raise Exception('Failed to copy file to remote instance')
def load_from_element(self, elem, tool_path): assert ( elem.tag == "data_manager" ), 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % (root.tag) self.declared_id = elem.get("id", None) self.guid = elem.get("guid", None) path = elem.get("tool_file", None) self.version = elem.get("version", self.version) tool_shed_repository_id = None tool_guid = None if path is None: tool_elem = elem.find("tool") assert tool_elem is not None, ( "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % (util.xml_to_string(elem)) ) path = tool_elem.get("file", None) tool_guid = tool_elem.get("guid", None) # need to determine repository info so that dependencies will work correctly tool_shed = tool_elem.find("tool_shed").text repository_name = tool_elem.find("repository_name").text repository_owner = tool_elem.find("repository_owner").text installed_changeset_revision = tool_elem.find("installed_changeset_revision").text # save repository info here self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision, ) # get tool_shed repo id tool_shed_repository = suc.get_tool_shed_repository_by_shed_name_owner_installed_changeset_revision( self.data_managers.app, tool_shed, repository_name, repository_owner, installed_changeset_revision ) tool_shed_repository_id = self.data_managers.app.security.encode_id(tool_shed_repository.id) # use shed_conf_file to determine tool_path shed_conf_file = elem.get("shed_conf_file", None) if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename(shed_conf_file, None) if shed_conf: tool_path = shed_conf.get("tool_path", tool_path) assert path is not None, "A tool file path could not be determined:\n%s" % (util.xml_to_string(elem)) self.load_tool( os.path.join(tool_path, path), guid=tool_guid, data_manager_id=self.id, tool_shed_repository_id=tool_shed_repository_id, ) self.name = elem.get("name", self.tool.name) self.description = elem.get("description", self.tool.description) for data_table_elem in elem.findall("data_table"): data_table_name = data_table_elem.get("name") assert data_table_name is not None, "A name is required for a data table entry" if data_table_name not in self.data_tables: self.data_tables[data_table_name] = odict() # {} output_elem = data_table_elem.find("output") if output_elem is not None: for column_elem in output_elem.findall("column"): column_name = column_elem.get("name", None) assert column_name is not None, "Name is required for column entry" data_table_coumn_name = column_elem.get("data_table_name", column_name) self.data_tables[data_table_name][data_table_coumn_name] = column_name output_ref = column_elem.get("output_ref", None) if output_ref is not None: if data_table_name not in self.output_ref_by_data_table: self.output_ref_by_data_table[data_table_name] = {} self.output_ref_by_data_table[data_table_name][data_table_coumn_name] = output_ref value_translation_elems = column_elem.findall("value_translation") if value_translation_elems is not None: for value_translation_elem in value_translation_elems: value_translation = value_translation_elem.text if value_translation is not None: value_translation_type = value_translation_elem.get( "type", DEFAULT_VALUE_TRANSLATION_TYPE ) if data_table_name not in self.value_translation_by_data_table_column: self.value_translation_by_data_table_column[data_table_name] = {} if ( data_table_coumn_name not in self.value_translation_by_data_table_column[data_table_name] ): self.value_translation_by_data_table_column[data_table_name][ data_table_coumn_name ] = [] if value_translation_type == "function": if value_translation in VALUE_TRANSLATION_FUNCTIONS: value_translation = VALUE_TRANSLATION_FUNCTIONS[value_translation] else: raise ValueError( "Unsupported value translation function: '%s'" % (value_translation) ) else: assert value_translation_type == DEFAULT_VALUE_TRANSLATION_TYPE, ValueError( "Unsupported value translation type: '%s'" % (value_translation_type) ) self.value_translation_by_data_table_column[data_table_name][ data_table_coumn_name ].append(value_translation) for move_elem in column_elem.findall("move"): move_type = move_elem.get("type", "directory") relativize_symlinks = move_elem.get( "relativize_symlinks", False ) # TODO: should we instead always relativize links? source_elem = move_elem.find("source") if source_elem is None: source_base = None source_value = "" else: source_base = source_elem.get("base", None) source_value = source_elem.text target_elem = move_elem.find("target") if target_elem is None: target_base = None target_value = "" else: target_base = target_elem.get("base", None) target_value = target_elem.text if data_table_name not in self.move_by_data_table_column: self.move_by_data_table_column[data_table_name] = {} self.move_by_data_table_column[data_table_name][data_table_coumn_name] = dict( type=move_type, source_base=source_base, source_value=source_value, target_base=target_base, target_value=target_value, relativize_symlinks=relativize_symlinks, )
def configure_and_load( self, config_element, tool_data_path, from_shed_config = False): """ Configure and load table from an XML element. """ self.separator = config_element.get( 'separator', '\t' ) self.comment_char = config_element.get( 'comment_char', '#' ) # Configure columns self.parse_column_spec( config_element ) #store repo info if available: repo_elem = config_element.find( 'tool_shed_repository' ) if repo_elem is not None: repo_info = dict( tool_shed=repo_elem.find( 'tool_shed' ).text, name=repo_elem.find( 'repository_name' ).text, owner=repo_elem.find( 'repository_owner' ).text, installed_changeset_revision=repo_elem.find( 'installed_changeset_revision' ).text ) else: repo_info = None # Read every file for file_element in config_element.findall( 'file' ): filename = file_path = file_element.get( 'path', None ) found = False if file_path is None: log.debug( "Encountered a file element (%s) that does not contain a path value when loading tool data table '%s'.", util.xml_to_string( file_element ), self.name ) continue #FIXME: splitting on and merging paths from a configuration file when loading is wonky # Data should exist on disk in the state needed, i.e. the xml configuration should # point directly to the desired file to load. Munging of the tool_data_tables_conf.xml.sample # can be done during installing / testing / metadata resetting with the creation of a proper # tool_data_tables_conf.xml file, containing correct <file path=> attributes. Allowing a # path.join with a different root should be allowed, but splitting should not be necessary. if tool_data_path and from_shed_config: # Must identify with from_shed_config as well, because the # regular galaxy app has and uses tool_data_path. # We're loading a tool in the tool shed, so we cannot use the Galaxy tool-data # directory which is hard-coded into the tool_data_table_conf.xml entries. filename = os.path.split( file_path )[ 1 ] filename = os.path.join( tool_data_path, filename ) if os.path.exists( filename ): found = True else: # Since the path attribute can include a hard-coded path to a specific directory # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value # as self.tool_data_path, we'll parse the path to get the filename and see if it is # in self.tool_data_path. file_path, file_name = os.path.split( filename ) if file_path and file_path != self.tool_data_path: corrected_filename = os.path.join( self.tool_data_path, file_name ) if os.path.exists( corrected_filename ): filename = corrected_filename found = True if found: self.data.extend( self.parse_file_fields( open( filename ) ) ) self._update_version() else: self.missing_index_file = filename log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) ) if filename not in self.filenames or not self.filenames[ filename ][ 'found' ]: self.filenames[ filename ] = dict( found=found, filename=filename, from_shed_config=from_shed_config, tool_data_path=tool_data_path, config_element=config_element, tool_shed_repository=repo_info ) else: log.debug( "Filename '%s' already exists in filenames (%s), not adding", filename, self.filenames.keys() )
def configure_and_load(self, config_element, tool_data_path, from_shed_config=False, url_timeout=10): """ Configure and load table from an XML element. """ self.separator = config_element.get('separator', '\t') self.comment_char = config_element.get('comment_char', '#') # Configure columns self.parse_column_spec(config_element) # store repo info if available: repo_elem = config_element.find('tool_shed_repository') if repo_elem is not None: repo_info = dict(tool_shed=repo_elem.find('tool_shed').text, name=repo_elem.find('repository_name').text, owner=repo_elem.find('repository_owner').text, installed_changeset_revision=repo_elem.find('installed_changeset_revision').text) else: repo_info = None # Read every file for file_element in config_element.findall('file'): tmp_file = None filename = file_element.get('path', None) if filename is None: # Handle URLs as files filename = file_element.get('url', None) if filename: tmp_file = NamedTemporaryFile(prefix='TTDT_URL_%s-' % self.name) try: tmp_file.write(requests.get(filename, timeout=url_timeout).text) except Exception as e: log.error('Error loading Data Table URL "%s": %s', filename, e) continue log.debug('Loading Data Table URL "%s" as filename "%s".', filename, tmp_file.name) filename = tmp_file.name tmp_file.flush() filename = file_path = expand_here_template(filename, here=self.here) found = False if file_path is None: log.debug("Encountered a file element (%s) that does not contain a path value when loading tool data table '%s'.", util.xml_to_string(file_element), self.name) continue # FIXME: splitting on and merging paths from a configuration file when loading is wonky # Data should exist on disk in the state needed, i.e. the xml configuration should # point directly to the desired file to load. Munging of the tool_data_tables_conf.xml.sample # can be done during installing / testing / metadata resetting with the creation of a proper # tool_data_tables_conf.xml file, containing correct <file path=> attributes. Allowing a # path.join with a different root should be allowed, but splitting should not be necessary. if tool_data_path and from_shed_config: # Must identify with from_shed_config as well, because the # regular galaxy app has and uses tool_data_path. # We're loading a tool in the tool shed, so we cannot use the Galaxy tool-data # directory which is hard-coded into the tool_data_table_conf.xml entries. filename = os.path.split(file_path)[1] filename = os.path.join(tool_data_path, filename) if self.tool_data_path_files.exists(filename): found = True elif self.tool_data_path_files.exists("%s.sample" % filename) and not from_shed_config: log.info("Could not find tool data %s, reading sample" % filename) filename = "%s.sample" % filename found = True else: # Since the path attribute can include a hard-coded path to a specific directory # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value # as self.tool_data_path, we'll parse the path to get the filename and see if it is # in self.tool_data_path. file_path, file_name = os.path.split(filename) if file_path and file_path != self.tool_data_path: corrected_filename = os.path.join(self.tool_data_path, file_name) if self.tool_data_path_files.exists(corrected_filename): filename = corrected_filename found = True errors = [] if found: self.extend_data_with(filename, errors=errors) self._update_version() else: self.missing_index_file = filename log.warning("Cannot find index file '%s' for tool data table '%s'" % (filename, self.name)) if filename not in self.filenames or not self.filenames[filename]['found']: self.filenames[filename] = dict(found=found, filename=filename, from_shed_config=from_shed_config, tool_data_path=tool_data_path, config_element=config_element, tool_shed_repository=repo_info, errors=errors) else: log.debug("Filename '%s' already exists in filenames (%s), not adding", filename, list(self.filenames.keys())) # Remove URL tmp file if tmp_file is not None: tmp_file.close()
class DataManager(object): GUID_TYPE = 'data_manager' DEFAULT_VERSION = "0.0.1" def __init__(self, data_managers, elem=None, tool_path=None): self.data_managers = data_managers self.declared_id = None self.name = None self.description = None self.version = self.DEFAULT_VERSION self.guid = None self.tool = None self.data_tables = odict() self.output_ref_by_data_table = {} self.move_by_data_table_column = {} self.value_translation_by_data_table_column = {} self.tool_shed_repository_info_dict = None if elem is not None: self.load_from_element(elem, tool_path or self.data_managers.tool_path) def load_from_element(self, elem, tool_path): assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( elem.tag) self.declared_id = elem.get('id', None) self.guid = elem.get('guid', None) path = elem.get('tool_file', None) self.version = elem.get('version', self.version) tool_shed_repository_id = None tool_guid = None if path is None: tool_elem = elem.find('tool') assert tool_elem is not None, "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % ( util.xml_to_string(elem)) path = tool_elem.get("file", None) tool_guid = tool_elem.get("guid", None) #need to determine repository info so that dependencies will work correctly tool_shed_url = tool_elem.find('tool_shed').text # Handle protocol changes. tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry( self.data_managers.app, tool_shed_url) # The protocol is not stored in the database. tool_shed = common_util.remove_protocol_from_tool_shed_url( tool_shed_url) repository_name = tool_elem.find('repository_name').text repository_owner = tool_elem.find('repository_owner').text installed_changeset_revision = tool_elem.find( 'installed_changeset_revision').text self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision) tool_shed_repository = \ suc.get_tool_shed_repository_by_shed_name_owner_installed_changeset_revision( self.data_managers.app, tool_shed, repository_name, repository_owner, installed_changeset_revision ) if tool_shed_repository is None: log.warning( 'Could not determine tool shed repository from database. This should only ever happen when running tests.' ) #we'll set tool_path manually here from shed_conf_file tool_shed_repository_id = None try: tool_path = util.parse_xml( elem.get('shed_conf_file')).getroot().get( 'tool_path', tool_path) except Exception, e: log.error( 'Error determining tool_path for Data Manager during testing: %s', e) else: tool_shed_repository_id = self.data_managers.app.security.encode_id( tool_shed_repository.id) #use shed_conf_file to determine tool_path shed_conf_file = elem.get("shed_conf_file", None) if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename( shed_conf_file, None) if shed_conf: tool_path = shed_conf.get("tool_path", tool_path) assert path is not None, "A tool file path could not be determined:\n%s" % ( util.xml_to_string(elem)) self.load_tool(os.path.join(tool_path, path), guid=tool_guid, data_manager_id=self.id, tool_shed_repository_id=tool_shed_repository_id) self.name = elem.get('name', self.tool.name) self.description = elem.get('description', self.tool.description) for data_table_elem in elem.findall('data_table'): data_table_name = data_table_elem.get("name") assert data_table_name is not None, "A name is required for a data table entry" if data_table_name not in self.data_tables: self.data_tables[data_table_name] = odict() #{} output_elem = data_table_elem.find('output') if output_elem is not None: for column_elem in output_elem.findall('column'): column_name = column_elem.get('name', None) assert column_name is not None, "Name is required for column entry" data_table_coumn_name = column_elem.get( 'data_table_name', column_name) self.data_tables[data_table_name][ data_table_coumn_name] = column_name output_ref = column_elem.get('output_ref', None) if output_ref is not None: if data_table_name not in self.output_ref_by_data_table: self.output_ref_by_data_table[data_table_name] = {} self.output_ref_by_data_table[data_table_name][ data_table_coumn_name] = output_ref value_translation_elems = column_elem.findall( 'value_translation') if value_translation_elems is not None: for value_translation_elem in value_translation_elems: value_translation = value_translation_elem.text if value_translation is not None: value_translation_type = value_translation_elem.get( 'type', DEFAULT_VALUE_TRANSLATION_TYPE) if data_table_name not in self.value_translation_by_data_table_column: self.value_translation_by_data_table_column[ data_table_name] = {} if data_table_coumn_name not in self.value_translation_by_data_table_column[ data_table_name]: self.value_translation_by_data_table_column[ data_table_name][ data_table_coumn_name] = [] if value_translation_type == 'function': if value_translation in VALUE_TRANSLATION_FUNCTIONS: value_translation = VALUE_TRANSLATION_FUNCTIONS[ value_translation] else: raise ValueError( "Unsupported value translation function: '%s'" % (value_translation)) else: assert value_translation_type == DEFAULT_VALUE_TRANSLATION_TYPE, ValueError( "Unsupported value translation type: '%s'" % (value_translation_type)) self.value_translation_by_data_table_column[ data_table_name][ data_table_coumn_name].append( value_translation) for move_elem in column_elem.findall('move'): move_type = move_elem.get('type', 'directory') relativize_symlinks = move_elem.get( 'relativize_symlinks', False ) #TODO: should we instead always relativize links? source_elem = move_elem.find('source') if source_elem is None: source_base = None source_value = '' else: source_base = source_elem.get('base', None) source_value = source_elem.text target_elem = move_elem.find('target') if target_elem is None: target_base = None target_value = '' else: target_base = target_elem.get('base', None) target_value = target_elem.text if data_table_name not in self.move_by_data_table_column: self.move_by_data_table_column[ data_table_name] = {} self.move_by_data_table_column[ data_table_name ][ data_table_coumn_name ] = \ dict( type=move_type, source_base=source_base, source_value=source_value, target_base=target_base, target_value=target_value, relativize_symlinks=relativize_symlinks )
def load_from_element(self, elem, tool_path): assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( elem.tag) self.declared_id = elem.get('id') self.guid = elem.get('guid') path = elem.get('tool_file') self.version = elem.get('version', self.version) tool_shed_repository = None tool_guid = None if path is None: tool_elem = elem.find('tool') assert tool_elem is not None, "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % ( util.xml_to_string(elem)) path = tool_elem.get("file") tool_guid = tool_elem.get("guid") # need to determine repository info so that dependencies will work correctly tool_shed_repository = self.data_managers.app.toolbox.get_tool_repository_from_xml_item( tool_elem, path) self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed_repository.tool_shed, name=tool_shed_repository.name, owner=tool_shed_repository.owner, installed_changeset_revision=tool_shed_repository. installed_changeset_revision) # use shed_conf_file to determine tool_path shed_conf_file = elem.get("shed_conf_file") if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename( shed_conf_file) if shed_conf: tool_path = shed_conf.get("tool_path", tool_path) assert path is not None, "A tool file path could not be determined:\n%s" % ( util.xml_to_string(elem)) self.load_tool(os.path.join(tool_path, path), guid=tool_guid, data_manager_id=self.id, tool_shed_repository=tool_shed_repository) self.name = elem.get('name', self.tool.name) self.description = elem.get('description', self.tool.description) self.undeclared_tables = util.asbool( elem.get('undeclared_tables', self.undeclared_tables)) for data_table_elem in elem.findall('data_table'): data_table_name = data_table_elem.get("name") assert data_table_name is not None, "A name is required for a data table entry" if data_table_name not in self.data_tables: self.data_tables[data_table_name] = OrderedDict() output_elem = data_table_elem.find('output') if output_elem is not None: for column_elem in output_elem.findall('column'): column_name = column_elem.get('name', None) assert column_name is not None, "Name is required for column entry" data_table_coumn_name = column_elem.get( 'data_table_name', column_name) self.data_tables[data_table_name][ data_table_coumn_name] = column_name output_ref = column_elem.get('output_ref', None) if output_ref is not None: if data_table_name not in self.output_ref_by_data_table: self.output_ref_by_data_table[data_table_name] = {} self.output_ref_by_data_table[data_table_name][ data_table_coumn_name] = output_ref value_translation_elems = column_elem.findall( 'value_translation') if value_translation_elems is not None: for value_translation_elem in value_translation_elems: value_translation = value_translation_elem.text if value_translation is not None: value_translation_type = value_translation_elem.get( 'type', DEFAULT_VALUE_TRANSLATION_TYPE) if data_table_name not in self.value_translation_by_data_table_column: self.value_translation_by_data_table_column[ data_table_name] = {} if data_table_coumn_name not in self.value_translation_by_data_table_column[ data_table_name]: self.value_translation_by_data_table_column[ data_table_name][ data_table_coumn_name] = [] if value_translation_type == 'function': if value_translation in VALUE_TRANSLATION_FUNCTIONS: value_translation = VALUE_TRANSLATION_FUNCTIONS[ value_translation] else: raise ValueError( "Unsupported value translation function: '%s'" % (value_translation)) else: assert value_translation_type == DEFAULT_VALUE_TRANSLATION_TYPE, ValueError( "Unsupported value translation type: '%s'" % (value_translation_type)) self.value_translation_by_data_table_column[ data_table_name][ data_table_coumn_name].append( value_translation) for move_elem in column_elem.findall('move'): move_type = move_elem.get('type', 'directory') relativize_symlinks = move_elem.get( 'relativize_symlinks', False ) # TODO: should we instead always relativize links? source_elem = move_elem.find('source') if source_elem is None: source_base = None source_value = '' else: source_base = source_elem.get('base', None) source_value = source_elem.text target_elem = move_elem.find('target') if target_elem is None: target_base = None target_value = '' else: target_base = target_elem.get('base', None) target_value = target_elem.text if data_table_name not in self.move_by_data_table_column: self.move_by_data_table_column[ data_table_name] = {} self.move_by_data_table_column[data_table_name][data_table_coumn_name] = \ dict(type=move_type, source_base=source_base, source_value=source_value, target_base=target_base, target_value=target_value, relativize_symlinks=relativize_symlinks)
tree = util.parse_xml( full_path ) root = tree.getroot() out_elems = [ elem for elem in root ] except Exception, e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e ) for elem in remove_elems: #handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove( elem ) #add new elems out_elems.extend( new_elems ) with open( full_path, 'wb' ) as out: out.write( '<?xml version="1.0"?>\n<tables>\n' ) for elem in out_elems: out.write( util.xml_to_string( elem ) ) out.write( '</tables>\n' ) os.chmod( full_path, 0644 ) def reload_tables( self, table_names=None ): tables = self.get_tables() if not table_names: table_names = tables.keys() elif not isinstance( table_names, list ): table_names = [ table_names ] for table_name in table_names: tables[ table_name ].reload_from_files() log.debug( "Reloaded tool data table '%s' from files.", table_name ) return table_names
def main(): for required_file in [ 'automated_tool_installation_log.tsv', 'tool_labels.yml', 'shed_tool_conf.xml' ]: if not os.path.exists(required_file): raise Exception(f'Required file {required_file} is missing') def filter_new(row): return row['Status'] == 'Installed' and in_time_window( row['Date (AEST)'], display_new_days) and row['New Tool'] == 'True' def filter_updated(row): return row['Status'] == 'Installed' and in_time_window( row['Date (AEST)'], display_updated_days) and row['New Tool'] == 'False' def load_log(filter=None): """ Load the installation log tsv file and return it as a list row objects, i.e. [{'Build Num.': '156', 'Name': 'abricate', ...}, {'Build Num.': '156', 'Name': 'bedtools', ...},...] The filter argument is a function that takes a row as input and returns True or False """ table = [] with open(log_file) as tsvfile: reader = csv.DictReader(tsvfile, dialect='excel-tab') for row in reader: if not filter or filter(row): table.append(row) return table with open(tool_labels_file) as handle: tool_labels_constant = yaml.safe_load(handle) if os.path.exists(hidden_tools_file): with open(hidden_tools_file) as handle: hidden_tool_ids = yaml.safe_load(handle).get('hidden_tool_ids', []) else: hidden_tool_ids = [] tool_labels_dynamic = { new_label: load_log(filter=filter_new), updated_label: load_log(filter=filter_updated), } tree, error_message = parse_xml(shed_tool_conf_file) root = tree.getroot() # shed_tool_conf.xml has multiple section elements containing tools # loop through all sections and tools for section in root: if section.tag == 'section': for tool in list(section): if tool.tag == 'tool': tool_id = tool.find('id').text name = tool.find('repository_name').text owner = tool.find('repository_owner').text revision = tool.find('installed_changeset_revision').text # remove all existing labels tool.attrib.pop('labels', None) # replace labels from dict labels_for_tool = [] for label in tool_labels_constant: for id in tool_labels_constant[label]: if tool_id == id or ( id.endswith('*') and get_deversioned_id(id) == get_deversioned_id(tool_id)): labels_for_tool.append(label) break for label in tool_labels_dynamic: for row in tool_labels_dynamic[label]: if row['Name'] == name and row[ 'Owner'] == owner and row[ 'Installed Revision'] == revision: labels_for_tool.append(label) break tool.attrib.pop('hidden', None) for id in hidden_tool_ids: if tool_id == id or (id.endswith('*') and get_deversioned_id(id) == get_deversioned_id(tool_id)): tool.set('hidden', 'True') break if labels_for_tool: tool.set('labels', ','.join(labels_for_tool)) with open(shed_tool_conf_file, 'w') as handle: handle.write(xml_to_string(root, pretty=True))
def configure_and_load(self, config_element, tool_data_path, from_shed_config=False): """ Configure and load table from an XML element. """ self.separator = config_element.get('separator', '\t') self.comment_char = config_element.get('comment_char', '#') # Configure columns self.parse_column_spec(config_element) #store repo info if available: repo_elem = config_element.find('tool_shed_repository') if repo_elem is not None: repo_info = dict(tool_shed=repo_elem.find('tool_shed').text, name=repo_elem.find('repository_name').text, owner=repo_elem.find('repository_owner').text, installed_changeset_revision=repo_elem.find( 'installed_changeset_revision').text) else: repo_info = None # Read every file for file_element in config_element.findall('file'): filename = file_path = file_element.get('path', None) found = False if file_path is None: log.debug( "Encountered a file element (%s) that does not contain a path value when loading tool data table '%s'.", util.xml_to_string(file_element), self.name) continue #FIXME: splitting on and merging paths from a configuration file when loading is wonky # Data should exist on disk in the state needed, i.e. the xml configuration should # point directly to the desired file to load. Munging of the tool_data_tables_conf.xml.sample # can be done during installing / testing / metadata resetting with the creation of a proper # tool_data_tables_conf.xml file, containing correct <file path=> attributes. Allowing a # path.join with a different root should be allowed, but splitting should not be necessary. if tool_data_path and from_shed_config: # Must identify with from_shed_config as well, because the # regular galaxy app has and uses tool_data_path. # We're loading a tool in the tool shed, so we cannot use the Galaxy tool-data # directory which is hard-coded into the tool_data_table_conf.xml entries. filename = os.path.split(file_path)[1] filename = os.path.join(tool_data_path, filename) if os.path.exists(filename): found = True else: # Since the path attribute can include a hard-coded path to a specific directory # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value # as self.tool_data_path, we'll parse the path to get the filename and see if it is # in self.tool_data_path. file_path, file_name = os.path.split(filename) if file_path and file_path != self.tool_data_path: corrected_filename = os.path.join(self.tool_data_path, file_name) if os.path.exists(corrected_filename): filename = corrected_filename found = True if found: self.data.extend(self.parse_file_fields(open(filename))) self._update_version() else: self.missing_index_file = filename log.warn( "Cannot find index file '%s' for tool data table '%s'" % (filename, self.name)) if filename not in self.filenames or not self.filenames[filename][ 'found']: self.filenames[filename] = dict( found=found, filename=filename, from_shed_config=from_shed_config, tool_data_path=tool_data_path, config_element=config_element, tool_shed_repository=repo_info) else: log.debug( "Filename '%s' already exists in filenames (%s), not adding", filename, self.filenames.keys())
out_elems = [elem for elem in root] except Exception, e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e) for elem in remove_elems: # handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove(elem) # add new elems out_elems.extend(new_elems) with open(full_path, 'wb') as out: out.write('<?xml version="1.0"?>\n<tables>\n') for elem in out_elems: out.write(util.xml_to_string(elem, pretty=True)) out.write('</tables>\n') os.chmod(full_path, 0644) def reload_tables(self, table_names=None): tables = self.get_tables() if not table_names: table_names = tables.keys() elif not isinstance(table_names, list): table_names = [table_names] for table_name in table_names: tables[table_name].reload_from_files() log.debug("Reloaded tool data table '%s' from files.", table_name) return table_names
tree = util.parse_xml( full_path ) root = tree.getroot() out_elems = [ elem for elem in root ] except Exception, e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e ) for elem in remove_elems: # handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove( elem ) # add new elems out_elems.extend( new_elems ) with open( full_path, 'wb' ) as out: out.write( '<?xml version="1.0"?>\n<tables>\n' ) for elem in out_elems: out.write( util.xml_to_string( elem, pretty=True ) ) out.write( '</tables>\n' ) os.chmod( full_path, 0644 ) def reload_tables( self, table_names=None ): tables = self.get_tables() if not table_names: table_names = tables.keys() elif not isinstance( table_names, list ): table_names = [ table_names ] for table_name in table_names: tables[ table_name ].reload_from_files() log.debug( "Reloaded tool data table '%s' from files.", table_name ) return table_names
def test_xml_to_string(): section = util.parse_xml_string(SECTION_XML) s = util.xml_to_string(section) assert len(s.split('\n')) == 1
def load_manager_from_elem( self, data_manager_elem, tool_path=None, add_manager=True, replace_existing=False ): try: data_manager = DataManager( self, data_manager_elem, tool_path=tool_path ) except Exception, e: log.error( "Error loading data_manager '%s':\n%s" % ( e, util.xml_to_string( data_manager_elem ) ) ) return None
def install_data_managers(self, shed_data_manager_conf_filename, metadata_dict, shed_config_dict, relative_install_dir, repository, repository_tools_tups): rval = [] if 'data_manager' in metadata_dict: tpm = tool_panel_manager.ToolPanelManager(self.app) repository_tools_by_guid = {} for tool_tup in repository_tools_tups: repository_tools_by_guid[tool_tup[1]] = dict( tool_config_filename=tool_tup[0], tool=tool_tup[2]) # Load existing data managers. try: tree, error_message = parse_xml( shed_data_manager_conf_filename, check_exists=False) except OSError as exc: if exc.errno == errno.ENOENT: with open(shed_data_manager_conf_filename, 'w') as fh: fh.write(SHED_DATA_MANAGER_CONF_XML) tree, error_message = parse_xml( shed_data_manager_conf_filename) else: raise if tree is None: return rval config_elems = [elem for elem in tree.getroot()] repo_data_manager_conf_filename = metadata_dict[ 'data_manager'].get('config_filename', None) if repo_data_manager_conf_filename is None: log.debug("No data_manager_conf.xml file has been defined.") return rval data_manager_config_has_changes = False relative_repo_data_manager_dir = os.path.join( shed_config_dict.get('tool_path', ''), relative_install_dir) repo_data_manager_conf_filename = os.path.join( relative_repo_data_manager_dir, repo_data_manager_conf_filename) tree, error_message = parse_xml(repo_data_manager_conf_filename) if tree is None: return rval root = tree.getroot() for elem in root: if elem.tag == 'data_manager': data_manager_id = elem.get('id', None) if data_manager_id is None: log.error( "A data manager was defined that does not have an id and will not be installed:\n%s" % xml_to_string(elem)) continue data_manager_dict = metadata_dict['data_manager'].get( 'data_managers', {}).get(data_manager_id, None) if data_manager_dict is None: log.error( "Data manager metadata is not defined properly for '%s'." % (data_manager_id)) continue guid = data_manager_dict.get('guid', None) if guid is None: log.error( "Data manager guid '{}' is not set in metadata for '{}'." .format(guid, data_manager_id)) continue elem.set('guid', guid) tool_guid = data_manager_dict.get('tool_guid', None) if tool_guid is None: log.error( "Data manager tool guid '{}' is not set in metadata for '{}'." .format(tool_guid, data_manager_id)) continue tool_dict = repository_tools_by_guid.get(tool_guid, None) if tool_dict is None: log.error( "Data manager tool guid '%s' could not be found for '%s'. Perhaps the tool is invalid?" % (tool_guid, data_manager_id)) continue tool = tool_dict.get('tool', None) if tool is None: log.error( "Data manager tool with guid '%s' could not be found for '%s'. Perhaps the tool is invalid?" % (tool_guid, data_manager_id)) continue tool_config_filename = tool_dict.get( 'tool_config_filename', None) if tool_config_filename is None: log.error( "Data manager metadata is missing 'tool_config_file' for '%s'." % (data_manager_id)) continue elem.set('shed_conf_file', shed_config_dict['config_filename']) if elem.get('tool_file', None) is not None: del elem.attrib[ 'tool_file'] # remove old tool_file info tool_elem = tpm.generate_tool_elem( repository.tool_shed, repository.name, repository.installed_changeset_revision, repository.owner, tool_config_filename, tool, None) elem.insert(0, tool_elem) data_manager = \ self.app.data_managers.load_manager_from_elem(elem, tool_path=shed_config_dict.get('tool_path', '')) if data_manager: rval.append(data_manager) elif elem.tag is etree.Comment: pass else: log.warning( "Encountered unexpected element '{}':\n{}".format( elem.tag, xml_to_string(elem))) config_elems.append(elem) data_manager_config_has_changes = True # Persist the altered shed_data_manager_config file. if data_manager_config_has_changes: reload_count = self.app.data_managers._reload_count self.data_manager_config_elems_to_xml_file( config_elems, shed_data_manager_conf_filename) while self.app.data_managers._reload_count <= reload_count: time.sleep( 0.1 ) # Wait for shed_data_manager watcher thread to pick up changes return rval
def xml_string(self): return util.xml_to_string(self.config_element)
def to_string(self): return xml_to_string(self.root)
def load_from_element(self, elem, tool_path): assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( elem.tag) self.declared_id = elem.get('id', None) self.guid = elem.get('guid', None) path = elem.get('tool_file', None) self.version = elem.get('version', self.version) tool_shed_repository_id = None tool_guid = None if path is None: tool_elem = elem.find('tool') assert tool_elem is not None, "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % ( util.xml_to_string(elem)) path = tool_elem.get("file", None) tool_guid = tool_elem.get("guid", None) #need to determine repository info so that dependencies will work correctly tool_shed_url = tool_elem.find('tool_shed').text # Handle protocol changes. tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry( self.data_managers.app, tool_shed_url) # The protocol is not stored in the database. tool_shed = common_util.remove_protocol_from_tool_shed_url( tool_shed_url) repository_name = tool_elem.find('repository_name').text repository_owner = tool_elem.find('repository_owner').text installed_changeset_revision = tool_elem.find( 'installed_changeset_revision').text self.tool_shed_repository_info_dict = dict( tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision) tool_shed_repository = \ suc.get_tool_shed_repository_by_shed_name_owner_installed_changeset_revision( self.data_managers.app, tool_shed, repository_name, repository_owner, installed_changeset_revision ) if tool_shed_repository is None: log.warning( 'Could not determine tool shed repository from database. This should only ever happen when running tests.' ) #we'll set tool_path manually here from shed_conf_file tool_shed_repository_id = None try: tool_path = util.parse_xml( elem.get('shed_conf_file')).getroot().get( 'tool_path', tool_path) except Exception, e: log.error( 'Error determining tool_path for Data Manager during testing: %s', e) else: tool_shed_repository_id = self.data_managers.app.security.encode_id( tool_shed_repository.id) #use shed_conf_file to determine tool_path shed_conf_file = elem.get("shed_conf_file", None) if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename( shed_conf_file, None) if shed_conf: tool_path = shed_conf.get("tool_path", tool_path)
def install_data_managers(self, shed_data_manager_conf_filename, metadata_dict, shed_config_dict, relative_install_dir, repository, repository_tools_tups): rval = [] if 'data_manager' in metadata_dict: tpm = tool_panel_manager.ToolPanelManager(self.app) repository_tools_by_guid = {} for tool_tup in repository_tools_tups: repository_tools_by_guid[tool_tup[1]] = dict(tool_config_filename=tool_tup[0], tool=tool_tup[2]) # Load existing data managers. tree, error_message = xml_util.parse_xml(shed_data_manager_conf_filename) if tree is None: return rval config_elems = [elem for elem in tree.getroot()] repo_data_manager_conf_filename = metadata_dict['data_manager'].get('config_filename', None) if repo_data_manager_conf_filename is None: log.debug("No data_manager_conf.xml file has been defined.") return rval data_manager_config_has_changes = False relative_repo_data_manager_dir = os.path.join(shed_config_dict.get('tool_path', ''), relative_install_dir) repo_data_manager_conf_filename = os.path.join(relative_repo_data_manager_dir, repo_data_manager_conf_filename) tree, error_message = xml_util.parse_xml(repo_data_manager_conf_filename) if tree is None: return rval root = tree.getroot() for elem in root: if elem.tag == 'data_manager': data_manager_id = elem.get('id', None) if data_manager_id is None: log.error("A data manager was defined that does not have an id and will not be installed:\n%s" % xml_to_string(elem)) continue data_manager_dict = metadata_dict['data_manager'].get('data_managers', {}).get(data_manager_id, None) if data_manager_dict is None: log.error("Data manager metadata is not defined properly for '%s'." % (data_manager_id)) continue guid = data_manager_dict.get('guid', None) if guid is None: log.error("Data manager guid '%s' is not set in metadata for '%s'." % (guid, data_manager_id)) continue elem.set('guid', guid) tool_guid = data_manager_dict.get('tool_guid', None) if tool_guid is None: log.error("Data manager tool guid '%s' is not set in metadata for '%s'." % (tool_guid, data_manager_id)) continue tool_dict = repository_tools_by_guid.get(tool_guid, None) if tool_dict is None: log.error("Data manager tool guid '%s' could not be found for '%s'. Perhaps the tool is invalid?" % (tool_guid, data_manager_id)) continue tool = tool_dict.get('tool', None) if tool is None: log.error("Data manager tool with guid '%s' could not be found for '%s'. Perhaps the tool is invalid?" % (tool_guid, data_manager_id)) continue tool_config_filename = tool_dict.get('tool_config_filename', None) if tool_config_filename is None: log.error("Data manager metadata is missing 'tool_config_file' for '%s'." % (data_manager_id)) continue elem.set('shed_conf_file', shed_config_dict['config_filename']) if elem.get('tool_file', None) is not None: del elem.attrib['tool_file'] # remove old tool_file info tool_elem = tpm.generate_tool_elem(repository.tool_shed, repository.name, repository.installed_changeset_revision, repository.owner, tool_config_filename, tool, None) elem.insert(0, tool_elem) data_manager = \ self.app.data_managers.load_manager_from_elem(elem, tool_path=shed_config_dict.get('tool_path', '')) if data_manager: rval.append(data_manager) else: log.warning("Encountered unexpected element '%s':\n%s" % (elem.tag, xml_to_string(elem))) config_elems.append(elem) data_manager_config_has_changes = True # Persist the altered shed_data_manager_config file. if data_manager_config_has_changes: reload_count = self.app.data_managers._reload_count self.data_manager_config_elems_to_xml_file(config_elems, shed_data_manager_conf_filename) while self.app.data_managers._reload_count <= reload_count: time.sleep(0.1) # Wait for shed_data_manager watcher thread to pick up changes return rval
def load_from_element(self, elem, tool_path): assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % (elem.tag) self.declared_id = elem.get('id', None) self.guid = elem.get('guid', None) path = elem.get('tool_file', None) self.version = elem.get('version', self.version) tool_shed_repository_id = None tool_guid = None if path is None: tool_elem = elem.find('tool') assert tool_elem is not None, "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % (util.xml_to_string(elem)) path = tool_elem.get("file", None) tool_guid = tool_elem.get("guid", None) # need to determine repository info so that dependencies will work correctly if hasattr(self.data_managers.app, 'tool_cache') and tool_guid in self.data_managers.app.tool_cache._tool_paths_by_id: path = self.data_managers.app.tool_cache._tool_paths_by_id[tool_guid] tool = self.data_managers.app.tool_cache.get_tool(path) tool_shed_repository = tool.tool_shed_repository self.tool_shed_repository_info_dict = dict(tool_shed=tool_shed_repository.tool_shed, name=tool_shed_repository.name, owner=tool_shed_repository.owner, installed_changeset_revision=tool_shed_repository.installed_changeset_revision) tool_shed_repository_id = self.data_managers.app.security.encode_id(tool_shed_repository.id) tool_path = "" else: tool_shed_url = tool_elem.find('tool_shed').text # Handle protocol changes. tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry(self.data_managers.app, tool_shed_url) # The protocol is not stored in the database. tool_shed = common_util.remove_protocol_from_tool_shed_url(tool_shed_url) repository_name = tool_elem.find('repository_name').text repository_owner = tool_elem.find('repository_owner').text installed_changeset_revision = tool_elem.find('installed_changeset_revision').text self.tool_shed_repository_info_dict = dict(tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision) tool_shed_repository = \ repository_util.get_installed_repository(self.data_managers.app, tool_shed=tool_shed, name=repository_name, owner=repository_owner, installed_changeset_revision=installed_changeset_revision) if tool_shed_repository is None: log.warning('Could not determine tool shed repository from database. This should only ever happen when running tests.') # we'll set tool_path manually here from shed_conf_file tool_shed_repository_id = None try: tool_path = util.parse_xml(elem.get('shed_conf_file')).getroot().get('tool_path', tool_path) except Exception as e: log.error('Error determining tool_path for Data Manager during testing: %s', e) else: tool_shed_repository_id = self.data_managers.app.security.encode_id(tool_shed_repository.id) # use shed_conf_file to determine tool_path shed_conf_file = elem.get("shed_conf_file", None) if shed_conf_file: shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename(shed_conf_file, None) if shed_conf: tool_path = shed_conf.get("tool_path", tool_path) assert path is not None, "A tool file path could not be determined:\n%s" % (util.xml_to_string(elem)) self.load_tool(os.path.join(tool_path, path), guid=tool_guid, data_manager_id=self.id, tool_shed_repository_id=tool_shed_repository_id) self.name = elem.get('name', self.tool.name) self.description = elem.get('description', self.tool.description) self.undeclared_tables = util.asbool(elem.get('undeclared_tables', self.undeclared_tables)) for data_table_elem in elem.findall('data_table'): data_table_name = data_table_elem.get("name") assert data_table_name is not None, "A name is required for a data table entry" if data_table_name not in self.data_tables: self.data_tables[data_table_name] = odict() output_elem = data_table_elem.find('output') if output_elem is not None: for column_elem in output_elem.findall('column'): column_name = column_elem.get('name', None) assert column_name is not None, "Name is required for column entry" data_table_coumn_name = column_elem.get('data_table_name', column_name) self.data_tables[data_table_name][data_table_coumn_name] = column_name output_ref = column_elem.get('output_ref', None) if output_ref is not None: if data_table_name not in self.output_ref_by_data_table: self.output_ref_by_data_table[data_table_name] = {} self.output_ref_by_data_table[data_table_name][data_table_coumn_name] = output_ref value_translation_elems = column_elem.findall('value_translation') if value_translation_elems is not None: for value_translation_elem in value_translation_elems: value_translation = value_translation_elem.text if value_translation is not None: value_translation_type = value_translation_elem.get('type', DEFAULT_VALUE_TRANSLATION_TYPE) if data_table_name not in self.value_translation_by_data_table_column: self.value_translation_by_data_table_column[data_table_name] = {} if data_table_coumn_name not in self.value_translation_by_data_table_column[data_table_name]: self.value_translation_by_data_table_column[data_table_name][data_table_coumn_name] = [] if value_translation_type == 'function': if value_translation in VALUE_TRANSLATION_FUNCTIONS: value_translation = VALUE_TRANSLATION_FUNCTIONS[value_translation] else: raise ValueError("Unsupported value translation function: '%s'" % (value_translation)) else: assert value_translation_type == DEFAULT_VALUE_TRANSLATION_TYPE, ValueError("Unsupported value translation type: '%s'" % (value_translation_type)) self.value_translation_by_data_table_column[data_table_name][data_table_coumn_name].append(value_translation) for move_elem in column_elem.findall('move'): move_type = move_elem.get('type', 'directory') relativize_symlinks = move_elem.get('relativize_symlinks', False) # TODO: should we instead always relativize links? source_elem = move_elem.find('source') if source_elem is None: source_base = None source_value = '' else: source_base = source_elem.get('base', None) source_value = source_elem.text target_elem = move_elem.find('target') if target_elem is None: target_base = None target_value = '' else: target_base = target_elem.get('base', None) target_value = target_elem.text if data_table_name not in self.move_by_data_table_column: self.move_by_data_table_column[data_table_name] = {} self.move_by_data_table_column[data_table_name][data_table_coumn_name] = \ dict(type=move_type, source_base=source_base, source_value=source_value, target_base=target_base, target_value=target_value, relativize_symlinks=relativize_symlinks)
out_elems = [elem for elem in root] except Exception, e: out_elems = [] log.debug( 'Could not parse existing tool data table config, assume no existing elements: %s', e) for elem in remove_elems: #handle multiple occurrences of remove elem in existing elems while elem in out_elems: remove_elems.remove(elem) #add new elems out_elems.extend(new_elems) with open(full_path, 'wb') as out: out.write('<?xml version="1.0"?>\n<tables>\n') for elem in out_elems: out.write(util.xml_to_string(elem)) out.write('</tables>\n') os.chmod(full_path, 0644) def reload_tables(self, table_names=None): tables = self.get_tables() if not table_names: table_names = tables.keys() elif not isinstance(table_names, list): table_names = [table_names] for table_name in table_names: tables[table_name].reload_from_files() log.debug("Reloaded tool data table '%s' from files.", table_name) return table_names