def _configure_tool_data_tables( self, from_shed_config ): from galaxy.tools.data import ToolDataTableManager # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path. self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path, config_filename=self.config.tool_data_table_config_path ) # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables. self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config, tool_data_path=self.tool_data_tables.tool_data_path, from_shed_config=from_shed_config )
def __init__( self, app ): super( GenomeTransferPlugin, self ).__init__( app ) self.app = app self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__'] self.sa_session = app.model.context.current tdtman = ToolDataTableManager( app.config.tool_data_path ) xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path ) for node in xmltree: table = node.get('name') location = node.findall('file')[0].get('path') self.locations[table] = location
def __init__(self, app): super(GenomeTransferPlugin, self).__init__(app) self.app = app self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__'] self.sa_session = app.model.context.current tdtman = ToolDataTableManager(app.config.tool_data_path) xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path) for node in xmltree: table = node.get('name') location = node.findall('file')[0].get('path') self.locations[table] = location
def _configure_tool_data_tables(self, from_shed_config): # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path. self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path, config_filename=self.config.tool_data_table_config_path, other_config_dict=self.config) # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables. try: self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config, tool_data_path=self.tool_data_tables.tool_data_path, from_shed_config=from_shed_config) except OSError as exc: # Missing shed_tool_data_table_config is okay if it's the default if exc.errno != errno.ENOENT or self.config.is_set( 'shed_tool_data_table_config'): raise
def from_app(app, work_dir=None): cleanup = False if not work_dir: work_dir = tempfile.mkdtemp() cleanup = True tool_data_tables = ToolDataTableManager(work_dir) with ValidationContext(app_name=app.name, security=app.security, model=app.model, tool_data_path=work_dir, shed_tool_data_path=work_dir, tool_data_tables=tool_data_tables, hgweb_config_manager=getattr( app, 'hgweb_config_manager', None)) as app: yield app if cleanup: shutil.rmtree(work_dir, ignore_errors=True)
def __init__(self, config=None, **kwargs): super().__init__() config = config or MockAppConfig(**kwargs) GalaxyDataTestApp.__init__(self, config=config, **kwargs) self[BasicSharedApp] = self self[MinimalManagerApp] = self self[StructuredApp] = self self[idencoding.IdEncodingHelper] = self.security self.name = kwargs.get('name', 'galaxy') self[SharedModelMapping] = self.model self[GalaxyModelMapping] = self.model self[galaxy_scoped_session] = self.model.context self.visualizations_registry = MockVisualizationsRegistry() self.tag_handler = tags.GalaxyTagHandler(self.model.context) self[tags.GalaxyTagHandler] = self.tag_handler self.quota_agent = quota.DatabaseQuotaAgent(self.model) self.job_config = Bunch( dynamic_params=None, destinations={}, use_messaging=False, assign_handler=lambda *args, **kwargs: None ) self.tool_data_tables = ToolDataTableManager(tool_data_path=self.config.tool_data_path) self.dataset_collections_service = None self.container_finder = NullContainerFinder() self._toolbox_lock = MockLock() self.tool_shed_registry = Bunch(tool_sheds={}) self.genome_builds = GenomeBuilds(self) self.job_manager = NoopManager() self.application_stack = ApplicationStack() self.auth_manager = AuthManager(self.config) self.user_manager = UserManager(self) self.execution_timer_factory = Bunch(get_timer=StructuredExecutionTimer) self.file_sources = Bunch(to_dict=lambda *args, **kwargs: {}) self.interactivetool_manager = Bunch(create_interactivetool=lambda *args, **kwargs: None) self.is_job_handler = False self.biotools_metadata_source = None rebind_container_to_task(self) def url_for(*args, **kwds): return "/mock/url" self.url_for = url_for
def from_app(app, work_dir=None): cleanup = False if not work_dir: work_dir = tempfile.mkdtemp() cleanup = True tool_data_tables = ToolDataTableManager(work_dir) try: with ValidationContext( app_name=app.name, security=app.security, model=app.model, tool_data_path=work_dir, shed_tool_data_path=work_dir, tool_data_tables=tool_data_tables, registry=app.datatypes_registry, hgweb_config_manager=getattr(app, 'hgweb_config_manager', None), biotools_metadata_source=getattr(app, 'biotools_metadata_source', None), ) as app: yield app finally: if cleanup: shutil.rmtree(work_dir, ignore_errors=True)
def main(TMPDIR, WORKING_DIRECTORY, IMPORT_STORE_DIRECTORY): metadata_params = get_metadata_params(WORKING_DIRECTORY) datatypes_config = metadata_params["datatypes_config"] if not os.path.exists(datatypes_config): datatypes_config = os.path.join(WORKING_DIRECTORY, 'configs', datatypes_config) datatypes_registry = validate_and_load_datatypes_config(datatypes_config) object_store = get_object_store(WORKING_DIRECTORY) import_store = store.imported_store_for_metadata(IMPORT_STORE_DIRECTORY) # TODO: clean up random places from which we read files in the working directory job_io = JobIO.from_json(os.path.join(IMPORT_STORE_DIRECTORY, 'job_io.json'), sa_session=import_store.sa_session) tool_app_config = ToolAppConfig( name='tool_app', tool_data_path=job_io.tool_data_path, galaxy_data_manager_data_path=job_io.galaxy_data_manager_data_path, nginx_upload_path=TMPDIR, len_file_path=job_io.len_file_path, builds_file_path=job_io.builds_file_path, root=TMPDIR, is_admin_user=lambda _: job_io.user_context.is_admin) with open(os.path.join(IMPORT_STORE_DIRECTORY, 'tool_data_tables.json')) as data_tables_json: tdtm = ToolDataTableManager.from_dict(json.load(data_tables_json)) app = ToolApp( sa_session=import_store.sa_session, tool_app_config=tool_app_config, datatypes_registry=datatypes_registry, object_store=object_store, tool_data_table_manager=tdtm, file_sources=job_io.file_sources, ) # TODO: could try to serialize just a minimal tool variant instead of the whole thing ? tool_source = get_tool_source(tool_source_class=job_io.tool_source_class, raw_tool_source=job_io.tool_source) tool = create_tool_from_source(app, tool_source=tool_source, tool_dir=job_io.tool_dir) tool_evaluator = evaluation.RemoteToolEvaluator(app=app, tool=tool, job=job_io.job, local_working_directory=WORKING_DIRECTORY) tool_evaluator.set_compute_environment(compute_environment=SharedComputeEnvironment(job_io=job_io, job=job_io.job)) with open(os.path.join(WORKING_DIRECTORY, 'tool_script.sh'), 'a') as out: command_line, version_command_line, extra_filenames, environment_variables = tool_evaluator.build() out.write(f'{version_command_line or ""}{command_line}')
class ConfiguresGalaxyMixin: """ Shared code for configuring Galaxy-like app objects. """ def _configure_genome_builds(self, data_table_name="__dbkeys__", load_old_style=True): self.genome_builds = GenomeBuilds(self, data_table_name=data_table_name, load_old_style=load_old_style) def _configure_toolbox(self): # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file. tool_configs = self.config.tool_configs if self.config.migrated_tools_config not in tool_configs: tool_configs.append(self.config.migrated_tools_config) from galaxy.managers.citations import CitationsManager self.citations_manager = CitationsManager(self) from galaxy import tools self.toolbox = tools.ToolBox(tool_configs, self.config.tool_path, self) # Search support for tools import galaxy.tools.search self.toolbox_search = galaxy.tools.search.ToolBoxSearch(self.toolbox) from galaxy.tools.deps import containers galaxy_root_dir = os.path.abspath(self.config.root) file_path = os.path.abspath(getattr(self.config, "file_path")) app_info = containers.AppInfo(galaxy_root_dir, default_file_path=file_path, outputs_to_working_directory=self.config. outputs_to_working_directory) self.container_finder = galaxy.tools.deps.containers.ContainerFinder( app_info) def _configure_tool_data_tables(self, from_shed_config): from galaxy.tools.data import ToolDataTableManager # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path. self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path, config_filename=self.config.tool_data_table_config_path) # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables. self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config, tool_data_path=self.tool_data_tables.tool_data_path, from_shed_config=from_shed_config) def _configure_datatypes_registry(self, installed_repository_manager=None): from galaxy.datatypes import registry # Create an empty datatypes registry. self.datatypes_registry = registry.Registry() if installed_repository_manager: # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories. We # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used. # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence. If there is a conflict # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence. installed_repository_manager.load_proprietary_datatypes() # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config. self.datatypes_registry.load_datatypes(self.config.root, self.config.datatypes_config) def _configure_object_store(self, **kwds): from galaxy.objectstore import build_object_store_from_config self.object_store = build_object_store_from_config(self.config, **kwds) def _configure_security(self): from galaxy.web import security self.security = security.SecurityHelper( id_secret=self.config.id_secret) def _configure_tool_shed_registry(self): import tool_shed.tool_shed_registry # Set up the tool sheds registry if os.path.isfile(self.config.tool_sheds_config_file): self.tool_shed_registry = tool_shed.tool_shed_registry.Registry( self.config.root, self.config.tool_sheds_config_file) else: self.tool_shed_registry = None def _configure_models(self, check_migrate_databases=False, check_migrate_tools=False, config_file=None): """ Preconditions: object_store must be set on self. """ if self.config.database_connection: db_url = self.config.database_connection else: db_url = "sqlite:///%s?isolation_level=IMMEDIATE" % self.config.database install_db_url = self.config.install_database_connection # TODO: Consider more aggressive check here that this is not the same # database file under the hood. combined_install_database = not (install_db_url and install_db_url != db_url) install_db_url = install_db_url or db_url if check_migrate_databases: # Initialize database / check for appropriate schema version. # If this # is a new installation, we'll restrict the tool migration messaging. from galaxy.model.migrate.check import create_or_verify_database create_or_verify_database(db_url, config_file, self.config.database_engine_options, app=self) if not combined_install_database: from galaxy.model.tool_shed_install.migrate.check import create_or_verify_database as tsi_create_or_verify_database tsi_create_or_verify_database( install_db_url, self.config.install_database_engine_options, app=self) if check_migrate_tools: # Alert the Galaxy admin to tools that have been moved from the distribution to the tool shed. from tool_shed.galaxy_install.migrate.check import verify_tools if combined_install_database: install_database_options = self.config.database_engine_options else: install_database_options = self.config.install_database_engine_options verify_tools(self, install_db_url, config_file, install_database_options) from galaxy.model import mapping self.model = mapping.init(self.config.file_path, db_url, self.config.database_engine_options, map_install_models=combined_install_database, database_query_profiling_proxy=self.config. database_query_profiling_proxy, object_store=self.object_store, trace_logger=getattr(self, "trace_logger", None), use_pbkdf2=self.config.get_bool( 'use_pbkdf2', True)) if combined_install_database: log.info( "Install database targetting Galaxy's database configuration.") self.install_model = self.model else: from galaxy.model.tool_shed_install import mapping as install_mapping install_db_url = self.config.install_database_connection log.info("Install database using its own connection %s" % install_db_url) install_db_engine_options = self.config.install_database_engine_options self.install_model = install_mapping.init( install_db_url, install_db_engine_options)
def postprocessing( self, sa_session, app ): """ Finish the job, move the finished indexes to their final resting place, and update the .loc files where applicable. """ gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first() indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index', bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' ) if gitd: fp = open( gitd.dataset.get_file_name(), 'r' ) deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first() try: logloc = simplejson.load( fp ) except ValueError: deferred.state = app.model.DeferredJob.states.ERROR sa_session.add( deferred ) sa_session.flush() log.debug( 'Indexing job failed, setting deferred job state to error.' ) return False finally: fp.close() destination = None tdtman = ToolDataTableManager( app.config.tool_data_path ) xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path ) for node in xmltree: table = node.get('name') location = node.findall('file')[0].get('path') self.locations[table] = os.path.abspath( location ) locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] ) params = deferred.params dbkey = params[ 'dbkey' ] basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey ) intname = params[ 'intname' ] indexer = gitd.indexer workingdir = os.path.abspath( gitd.dataset.extra_files_path ) location = [] indexdata = gitd.dataset.extra_files_path if indexer == '2bit': indexdata = os.path.join( workingdir, '%s.2bit' % dbkey ) destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey ) location.append( dict( line='\t'.join( [ 'seq', dbkey, destination ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) ) elif indexer == 'bowtie': self._ex_tar( workingdir, 'cs.tar' ) destination = os.path.join( basepath, 'bowtie_index' ) for var in [ 'nt', 'cs' ]: for line in logloc[ var ]: idx = line if var == 'nt': locfile = self.locations[ 'bowtie_indexes' ] locdir = os.path.join( destination, idx ) else: locfile = self.locations[ 'bowtie_indexes_color' ] locdir = os.path.join( destination, var, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'bowtie2': destination = os.path.join( basepath, 'bowtie2_index' ) for line in logloc[ 'nt' ]: idx = line locfile = self.locations[ 'bowtie2_indexes' ] locdir = os.path.join( destination, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'bwa': self._ex_tar( workingdir, 'cs.tar' ) destination = os.path.join( basepath, 'bwa_index' ) for var in [ 'nt', 'cs' ]: for line in logloc[ var ]: idx = line if var == 'nt': locfile = self.locations[ 'bwa_indexes' ] locdir = os.path.join( destination, idx ) else: locfile = self.locations[ 'bwa_indexes_color' ] locdir = os.path.join( destination, var, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'perm': self._ex_tar( workingdir, 'cs.tar' ) destination = os.path.join( basepath, 'perm_index' ) for var in [ 'nt', 'cs' ]: for line in logloc[ var ]: idx = line.pop() if var == 'nt': locfile = self.locations[ 'perm_base_indexes' ] locdir = os.path.join( destination, idx ) else: locfile = self.locations[ 'perm_color_indexes' ] locdir = os.path.join( destination, var, idx ) line.append( locdir ) location.append( dict( line='\t'.join( line ), file=locfile ) ) elif indexer == 'picard': destination = os.path.join( basepath, 'srma_index' ) for var in [ 'nt' ]: for line in logloc[ var ]: idx = line locfile = self.locations[ 'picard_indexes' ] locdir = os.path.join( destination, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'sam': destination = os.path.join( basepath, 'sam_index' ) for var in [ 'nt' ]: for line in logloc[ var ]: locfile = self.locations[ 'sam_fa_indexes' ] locdir = os.path.join( destination, line ) location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) ) if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ): log.debug( 'Moving %s to %s' % ( indexdata, destination ) ) shutil.move( indexdata, destination ) if indexer not in [ '2bit' ]: genome = '%s.fa' % dbkey target = os.path.join( destination, genome ) fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) ) self._check_link( fasta, target ) if os.path.exists( os.path.join( destination, 'cs' ) ): target = os.path.join( destination, 'cs', genome ) fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) ) self._check_link( fasta, target ) for line in location: self._add_line( line[ 'file' ], line[ 'line' ] ) deferred.state = app.model.DeferredJob.states.OK sa_session.add( deferred ) sa_session.flush()
class ConfiguresGalaxyMixin: """ Shared code for configuring Galaxy-like app objects. """ def _configure_genome_builds( self, data_table_name="__dbkeys__", load_old_style=True ): self.genome_builds = GenomeBuilds( self, data_table_name=data_table_name, load_old_style=load_old_style ) def _configure_toolbox( self ): # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file. tool_configs = self.config.tool_configs if self.config.migrated_tools_config not in tool_configs: tool_configs.append( self.config.migrated_tools_config ) from galaxy.managers.citations import CitationsManager self.citations_manager = CitationsManager( self ) from galaxy import tools self.toolbox = tools.ToolBox( tool_configs, self.config.tool_path, self ) # Search support for tools import galaxy.tools.search self.toolbox_search = galaxy.tools.search.ToolBoxSearch( self.toolbox ) from galaxy.tools.deps import containers galaxy_root_dir = os.path.abspath(self.config.root) file_path = os.path.abspath(getattr(self.config, "file_path")) app_info = containers.AppInfo( galaxy_root_dir, default_file_path=file_path, outputs_to_working_directory=self.config.outputs_to_working_directory ) self.container_finder = galaxy.tools.deps.containers.ContainerFinder(app_info) def _configure_tool_data_tables( self, from_shed_config ): from galaxy.tools.data import ToolDataTableManager # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path. self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path, config_filename=self.config.tool_data_table_config_path ) # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables. self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config, tool_data_path=self.tool_data_tables.tool_data_path, from_shed_config=from_shed_config ) def _configure_datatypes_registry( self, installed_repository_manager=None ): from galaxy.datatypes import registry # Create an empty datatypes registry. self.datatypes_registry = registry.Registry() if installed_repository_manager: # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories. We # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used. # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence. If there is a conflict # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence. installed_repository_manager.load_proprietary_datatypes() # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config. self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config ) def _configure_object_store( self, **kwds ): from galaxy.objectstore import build_object_store_from_config self.object_store = build_object_store_from_config( self.config, **kwds ) def _configure_security( self ): from galaxy.web import security self.security = security.SecurityHelper( id_secret=self.config.id_secret ) def _configure_tool_shed_registry( self ): import tool_shed.tool_shed_registry # Set up the tool sheds registry if os.path.isfile( self.config.tool_sheds_config_file ): self.tool_shed_registry = tool_shed.tool_shed_registry.Registry( self.config.root, self.config.tool_sheds_config_file ) else: self.tool_shed_registry = None def _configure_models( self, check_migrate_databases=False, check_migrate_tools=False, config_file=None ): """ Preconditions: object_store must be set on self. """ if self.config.database_connection: db_url = self.config.database_connection else: db_url = "sqlite:///%s?isolation_level=IMMEDIATE" % self.config.database install_db_url = self.config.install_database_connection # TODO: Consider more aggressive check here that this is not the same # database file under the hood. combined_install_database = not( install_db_url and install_db_url != db_url ) install_db_url = install_db_url or db_url if check_migrate_databases: # Initialize database / check for appropriate schema version. # If this # is a new installation, we'll restrict the tool migration messaging. from galaxy.model.migrate.check import create_or_verify_database create_or_verify_database( db_url, config_file, self.config.database_engine_options, app=self ) if not combined_install_database: from galaxy.model.tool_shed_install.migrate.check import create_or_verify_database as tsi_create_or_verify_database tsi_create_or_verify_database( install_db_url, self.config.install_database_engine_options, app=self ) if check_migrate_tools: # Alert the Galaxy admin to tools that have been moved from the distribution to the tool shed. from tool_shed.galaxy_install.migrate.check import verify_tools if combined_install_database: install_database_options = self.config.database_engine_options else: install_database_options = self.config.install_database_engine_options verify_tools( self, install_db_url, config_file, install_database_options ) from galaxy.model import mapping self.model = mapping.init( self.config.file_path, db_url, self.config.database_engine_options, map_install_models=combined_install_database, database_query_profiling_proxy=self.config.database_query_profiling_proxy, object_store=self.object_store, trace_logger=getattr(self, "trace_logger", None), use_pbkdf2=self.config.get_bool( 'use_pbkdf2', True ) ) if combined_install_database: log.info("Install database targetting Galaxy's database configuration.") self.install_model = self.model else: from galaxy.model.tool_shed_install import mapping as install_mapping install_db_url = self.config.install_database_connection log.info("Install database using its own connection %s" % install_db_url) install_db_engine_options = self.config.install_database_engine_options self.install_model = install_mapping.init( install_db_url, install_db_engine_options )
class ConfiguresGalaxyMixin: """Shared code for configuring Galaxy-like app objects.""" config: config.GalaxyAppConfiguration tool_cache: ToolCache job_config: jobs.JobConfiguration toolbox: tools.ToolBox toolbox_search: ToolBoxSearch container_finder: containers.ContainerFinder def _configure_genome_builds(self, data_table_name="__dbkeys__", load_old_style=True): self.genome_builds = GenomeBuilds(self, data_table_name=data_table_name, load_old_style=load_old_style) def wait_for_toolbox_reload(self, old_toolbox): timer = ExecutionTimer() log.debug('Waiting for toolbox reload') # Wait till toolbox reload has been triggered (or more than 60 seconds have passed) while timer.elapsed < 60: if self.toolbox.has_reloaded(old_toolbox): log.debug('Finished waiting for toolbox reload %s', timer) break time.sleep(0.1) else: log.warning( 'Waiting for toolbox reload timed out after 60 seconds') def _configure_tool_config_files(self): if self.config.shed_tool_config_file not in self.config.tool_configs: self.config.tool_configs.append(self.config.shed_tool_config_file) # The value of migrated_tools_config is the file reserved for containing only those tools that have been # eliminated from the distribution and moved to the tool shed. If migration checking is disabled, only add it if # it exists (since this may be an existing deployment where migrations were previously run). if (os.path.exists(self.config.migrated_tools_config) and self.config.migrated_tools_config not in self.config.tool_configs): self.config.tool_configs.append(self.config.migrated_tools_config) def _configure_toolbox(self): if not isinstance(self, BasicSharedApp): raise Exception("Must inherit from BasicSharedApp") self.citations_manager = CitationsManager(self) self.biotools_metadata_source = get_galaxy_biotools_metadata_source( self.config) self.dynamic_tools_manager = DynamicToolManager(self) self._toolbox_lock = threading.RLock() self.toolbox = tools.ToolBox(self.config.tool_configs, self.config.tool_path, self) galaxy_root_dir = os.path.abspath(self.config.root) file_path = os.path.abspath(self.config.file_path) app_info = AppInfo( galaxy_root_dir=galaxy_root_dir, default_file_path=file_path, tool_data_path=self.config.tool_data_path, shed_tool_data_path=self.config.shed_tool_data_path, outputs_to_working_directory=self.config. outputs_to_working_directory, container_image_cache_path=self.config.container_image_cache_path, library_import_dir=self.config.library_import_dir, enable_mulled_containers=self.config.enable_mulled_containers, container_resolvers_config_file=self.config. container_resolvers_config_file, container_resolvers_config_dict=self.config.container_resolvers, involucro_path=self.config.involucro_path, involucro_auto_init=self.config.involucro_auto_init, mulled_channels=self.config.mulled_channels, ) mulled_resolution_cache = None if self.config.mulled_resolution_cache_type: cache_opts = { "cache.type": self.config.mulled_resolution_cache_type, "cache.data_dir": self.config.mulled_resolution_cache_data_dir, "cache.lock_dir": self.config.mulled_resolution_cache_lock_dir, "cache.expire": self.config.mulled_resolution_cache_expire, } mulled_resolution_cache = CacheManager( **parse_cache_config_options(cache_opts)).get_cache( 'mulled_resolution') self.container_finder = containers.ContainerFinder( app_info, mulled_resolution_cache=mulled_resolution_cache) self._set_enabled_container_types() index_help = getattr(self.config, "index_tool_help", True) self.toolbox_search = ToolBoxSearch( self.toolbox, index_dir=self.config.tool_search_index_dir, index_help=index_help) def reindex_tool_search(self): # Call this when tools are added or removed. self.toolbox_search.build_index(tool_cache=self.tool_cache, toolbox=self.toolbox) self.tool_cache.reset_status() def _set_enabled_container_types(self): container_types_to_destinations = collections.defaultdict(list) for destinations in self.job_config.destinations.values(): for destination in destinations: for enabled_container_type in self.container_finder._enabled_container_types( destination.params): container_types_to_destinations[ enabled_container_type].append(destination) self.toolbox.dependency_manager.set_enabled_container_types( container_types_to_destinations) self.toolbox.dependency_manager.resolver_classes.update( self.container_finder.default_container_registry.resolver_classes) self.toolbox.dependency_manager.dependency_resolvers.extend( self.container_finder.default_container_registry. container_resolvers) def _configure_tool_data_tables(self, from_shed_config): # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path. self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path, config_filename=self.config.tool_data_table_config_path, other_config_dict=self.config) # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables. try: self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config, tool_data_path=self.tool_data_tables.tool_data_path, from_shed_config=from_shed_config) except OSError as exc: # Missing shed_tool_data_table_config is okay if it's the default if exc.errno != errno.ENOENT or self.config.is_set( 'shed_tool_data_table_config'): raise def _configure_datatypes_registry(self, installed_repository_manager=None): # Create an empty datatypes registry. self.datatypes_registry = Registry(self.config) if installed_repository_manager and self.config.load_tool_shed_datatypes: # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories. We # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used. # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence. If there is a conflict # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence. installed_repository_manager.load_proprietary_datatypes() # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config. datatypes_configs = self.config.datatypes_config for datatypes_config in listify(datatypes_configs): # Setting override=False would make earlier files would take # precedence - but then they wouldn't override tool shed # datatypes. self.datatypes_registry.load_datatypes(self.config.root, datatypes_config, override=True) def _configure_object_store(self, **kwds): self.object_store = build_object_store_from_config(self.config, **kwds) def _configure_security(self): self.security = IdEncodingHelper(id_secret=self.config.id_secret) BaseDatabaseIdField.security = self.security def _configure_tool_shed_registry(self): # Set up the tool sheds registry if os.path.isfile(self.config.tool_sheds_config_file): self.tool_shed_registry = tool_shed_registry.Registry( self.config.tool_sheds_config_file) else: self.tool_shed_registry = tool_shed_registry.Registry() def _configure_models(self, check_migrate_databases=False, config_file=None): """Preconditions: object_store must be set on self.""" db_url = self.config.database_connection install_db_url = self.config.install_database_connection # TODO: Consider more aggressive check here that this is not the same # database file under the hood. combined_install_database = not (install_db_url and install_db_url != db_url) install_db_url = install_db_url or db_url install_database_options = self.config.database_engine_options if combined_install_database else self.config.install_database_engine_options if self.config.database_wait: self._wait_for_database(db_url) if getattr(self.config, "max_metadata_value_size", None): custom_types.MAX_METADATA_VALUE_SIZE = self.config.max_metadata_value_size if check_migrate_databases: # Initialize database / check for appropriate schema version. # If this # is a new installation, we'll restrict the tool migration messaging. create_or_verify_database( db_url, config_file, self.config.database_engine_options, app=self, map_install_models=combined_install_database) if not combined_install_database: tsi_create_or_verify_database(install_db_url, install_database_options, app=self) self.model = init_models_from_config( self.config, map_install_models=combined_install_database, object_store=self.object_store, trace_logger=getattr(self, "trace_logger", None)) if combined_install_database: log.info( "Install database targetting Galaxy's database configuration.") self.install_model = self.model else: install_db_url = self.config.install_database_connection log.info( f"Install database using its own connection {install_db_url}") self.install_model = install_mapping.init( install_db_url, install_database_options) def _configure_signal_handlers(self, handlers): for sig, handler in handlers.items(): signal.signal(sig, handler) def _wait_for_database(self, url): attempts = self.config.database_wait_attempts pause = self.config.database_wait_sleep for i in range(1, attempts): try: database_exists(url) break except Exception: log.info("Waiting for database: attempt %d of %d" % (i, attempts)) time.sleep(pause) @property def tool_dependency_dir(self): return self.toolbox.dependency_manager.default_base_path
def postprocessing( self, sa_session, app ): """ Finish the job, move the finished indexes to their final resting place, and update the .loc files where applicable. """ gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first() indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index', bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' ) if gitd: destination = None tdtman = ToolDataTableManager() xmltree = tdtman.load_from_config_file(app.config.tool_data_table_config_path) for node in xmltree: table = node.get('name') location = node.findall('file')[0].get('path') self.locations[table] = os.path.abspath( location ) locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] ) deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first() params = deferred.params dbkey = params[ 'dbkey' ] basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey ) intname = params[ 'intname' ] indexer = gitd.indexer workingdir = os.path.abspath( gitd.dataset.extra_files_path ) fp = open( gitd.dataset.get_file_name(), 'r' ) logloc = json.load( fp ) fp.close() location = [] indexdata = gitd.dataset.extra_files_path if indexer == '2bit': indexdata = os.path.join( workingdir, '%s.2bit' % dbkey ) destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey ) location.append( dict( line='\t'.join( [ 'seq', dbkey, os.path.join( destination, '%s.2bit' % dbkey ) ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) ) elif indexer == 'bowtie': self._ex_tar( workingdir, 'cs.tar' ) destination = os.path.join( basepath, 'bowtie_index' ) for var in [ 'nt', 'cs' ]: for line in logloc[ var ]: idx = line if var == 'nt': locfile = self.locations[ 'bowtie_indexes' ] locdir = os.path.join( destination, idx ) else: locfile = self.locations[ 'bowtie_indexes_color' ] locdir = os.path.join( destination, var, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'bowtie2': destination = os.path.join( basepath, 'bowtie2_index' ) for line in logloc[ 'nt' ]: idx = line locfile = self.locations[ 'bowtie2_indexes' ] locdir = os.path.join( destination, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'bwa': self._ex_tar( workingdir, 'cs.tar' ) destination = os.path.join( basepath, 'bwa_index' ) for var in [ 'nt', 'cs' ]: for line in logloc[ var ]: idx = line if var == 'nt': locfile = self.locations[ 'bwa_indexes' ] locdir = os.path.join( destination, idx ) else: locfile = self.locations[ 'bwa_indexes_color' ] locdir = os.path.join( destination, var, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'perm': self._ex_tar( workingdir, 'cs.tar' ) destination = os.path.join( basepath, 'perm_index' ) for var in [ 'nt', 'cs' ]: for line in logloc[ var ]: idx = line.pop() if var == 'nt': locfile = self.locations[ 'perm_base_indexes' ] locdir = os.path.join( destination, idx ) else: locfile = self.locations[ 'perm_color_indexes' ] locdir = os.path.join( destination, var, idx ) line.append( locdir ) location.append( dict( line='\t'.join( line ), file=locfile ) ) elif indexer == 'picard': destination = os.path.join( basepath, 'srma_index' ) for var in [ 'nt' ]: for line in logloc[ var ]: idx = line locfile = self.locations[ 'picard_indexes' ] locdir = os.path.join( destination, idx ) location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) elif indexer == 'sam': destination = os.path.join( basepath, 'sam_index' ) for var in [ 'nt' ]: for line in logloc[ var ]: locfile = self.locations[ 'sam_fa_indexes' ] locdir = os.path.join( destination, line ) location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) ) if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ): log.debug( 'Moving %s to %s' % ( indexdata, destination ) ) shutil.move( indexdata, destination ) if indexer not in [ '2bit' ]: genome = '%s.fa' target = os.path.join( destination, genome ) farel = os.path.relpath( os.path.join( basepath, 'seq', genome ), destination ) os.symlink( farel, target ) if os.path.exists( os.path.join( destination, 'cs' ) ): target = os.path.join( destination, 'cs', genome ) farel = os.path.relpath( os.path.join( basepath, 'seq', genome ), os.path.join( destination, 'cs' ) ) os.symlink( os.path.join( farel, target ) ) for line in location: self._add_line( line[ 'file' ], line[ 'line' ] )
def postprocessing(self, sa_session, app): """ Finish the job, move the finished indexes to their final resting place, and update the .loc files where applicable. """ gitd = sa_session.query(model.GenomeIndexToolData).filter_by(job_id=self.job_id).first() indexdirs = dict( bfast="bfast_index", bowtie="bowtie_index", bowtie2="bowtie2_index", bwa="bwa_index", perm="perm_%s_index", picard="srma_index", sam="sam_index", ) if gitd: fp = open(gitd.dataset.get_file_name(), "r") deferred = sa_session.query(model.DeferredJob).filter_by(id=gitd.deferred_job_id).first() try: logloc = simplejson.load(fp) except ValueError: deferred.state = app.model.DeferredJob.states.ERROR sa_session.add(deferred) sa_session.flush() log.debug("Indexing job failed, setting deferred job state to error.") return False finally: fp.close() destination = None tdtman = ToolDataTableManager(app.config.tool_data_path) xmltree = tdtman.load_from_config_file(app.config.tool_data_table_config_path, app.config.tool_data_path) for node in xmltree: table = node.get("name") location = node.findall("file")[0].get("path") self.locations[table] = os.path.abspath(location) locbase = os.path.abspath(os.path.split(self.locations["all_fasta"])[0]) params = deferred.params dbkey = params["dbkey"] basepath = os.path.join(os.path.abspath(app.config.genome_data_path), dbkey) intname = params["intname"] indexer = gitd.indexer workingdir = os.path.abspath(gitd.dataset.extra_files_path) location = [] indexdata = gitd.dataset.extra_files_path if indexer == "2bit": indexdata = os.path.join(workingdir, "%s.2bit" % dbkey) destination = os.path.join(basepath, "seq", "%s.2bit" % dbkey) location.append( dict(line="\t".join(["seq", dbkey, destination]), file=os.path.join(locbase, "alignseq.loc")) ) elif indexer == "bowtie": self._ex_tar(workingdir, "cs.tar") destination = os.path.join(basepath, "bowtie_index") for var in ["nt", "cs"]: for line in logloc[var]: idx = line if var == "nt": locfile = self.locations["bowtie_indexes"] locdir = os.path.join(destination, idx) else: locfile = self.locations["bowtie_indexes_color"] locdir = os.path.join(destination, var, idx) location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile)) elif indexer == "bowtie2": destination = os.path.join(basepath, "bowtie2_index") for line in logloc["nt"]: idx = line locfile = self.locations["bowtie2_indexes"] locdir = os.path.join(destination, idx) location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile)) elif indexer == "bwa": self._ex_tar(workingdir, "cs.tar") destination = os.path.join(basepath, "bwa_index") for var in ["nt", "cs"]: for line in logloc[var]: idx = line if var == "nt": locfile = self.locations["bwa_indexes"] locdir = os.path.join(destination, idx) else: locfile = self.locations["bwa_indexes_color"] locdir = os.path.join(destination, var, idx) location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile)) elif indexer == "perm": self._ex_tar(workingdir, "cs.tar") destination = os.path.join(basepath, "perm_index") for var in ["nt", "cs"]: for line in logloc[var]: idx = line.pop() if var == "nt": locfile = self.locations["perm_base_indexes"] locdir = os.path.join(destination, idx) else: locfile = self.locations["perm_color_indexes"] locdir = os.path.join(destination, var, idx) line.append(locdir) location.append(dict(line="\t".join(line), file=locfile)) elif indexer == "picard": destination = os.path.join(basepath, "srma_index") for var in ["nt"]: for line in logloc[var]: idx = line locfile = self.locations["picard_indexes"] locdir = os.path.join(destination, idx) location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile)) elif indexer == "sam": destination = os.path.join(basepath, "sam_index") for var in ["nt"]: for line in logloc[var]: locfile = self.locations["sam_fa_indexes"] locdir = os.path.join(destination, line) location.append(dict(line="\t".join(["index", dbkey, locdir]), file=locfile)) if ( destination is not None and os.path.exists(os.path.split(destination)[0]) and not os.path.exists(destination) ): log.debug("Moving %s to %s" % (indexdata, destination)) shutil.move(indexdata, destination) if indexer not in ["2bit"]: genome = "%s.fa" % dbkey target = os.path.join(destination, genome) fasta = os.path.abspath(os.path.join(basepath, "seq", genome)) self._check_link(fasta, target) if os.path.exists(os.path.join(destination, "cs")): target = os.path.join(destination, "cs", genome) fasta = os.path.abspath(os.path.join(basepath, "seq", genome)) self._check_link(fasta, target) for line in location: self._add_line(line["file"], line["line"]) deferred.state = app.model.DeferredJob.states.OK sa_session.add(deferred) sa_session.flush()