Ejemplo n.º 1
0
    def _configure_tool_data_tables( self, from_shed_config ):
        from galaxy.tools.data import ToolDataTableManager

        # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path.
        self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path,
                                                      config_filename=self.config.tool_data_table_config_path )
        # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
        self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config,
                                                     tool_data_path=self.tool_data_tables.tool_data_path,
                                                     from_shed_config=from_shed_config )
Ejemplo n.º 2
0
 def __init__( self, app ):
     super( GenomeTransferPlugin, self ).__init__( app )
     self.app = app
     self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__']
     self.sa_session = app.model.context.current
     tdtman = ToolDataTableManager( app.config.tool_data_path )
     xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path )
     for node in xmltree:
         table = node.get('name')
         location = node.findall('file')[0].get('path')
         self.locations[table] = location
Ejemplo n.º 3
0
 def __init__(self, app):
     super(GenomeTransferPlugin, self).__init__(app)
     self.app = app
     self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__']
     self.sa_session = app.model.context.current
     tdtman = ToolDataTableManager(app.config.tool_data_path)
     xmltree = tdtman.load_from_config_file(
         app.config.tool_data_table_config_path, app.config.tool_data_path)
     for node in xmltree:
         table = node.get('name')
         location = node.findall('file')[0].get('path')
         self.locations[table] = location
Ejemplo n.º 4
0
 def _configure_tool_data_tables(self, from_shed_config):
     # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path.
     self.tool_data_tables = ToolDataTableManager(
         tool_data_path=self.config.tool_data_path,
         config_filename=self.config.tool_data_table_config_path,
         other_config_dict=self.config)
     # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
     try:
         self.tool_data_tables.load_from_config_file(
             config_filename=self.config.shed_tool_data_table_config,
             tool_data_path=self.tool_data_tables.tool_data_path,
             from_shed_config=from_shed_config)
     except OSError as exc:
         # Missing shed_tool_data_table_config is okay if it's the default
         if exc.errno != errno.ENOENT or self.config.is_set(
                 'shed_tool_data_table_config'):
             raise
Ejemplo n.º 5
0
 def from_app(app, work_dir=None):
     cleanup = False
     if not work_dir:
         work_dir = tempfile.mkdtemp()
         cleanup = True
     tool_data_tables = ToolDataTableManager(work_dir)
     with ValidationContext(app_name=app.name,
                            security=app.security,
                            model=app.model,
                            tool_data_path=work_dir,
                            shed_tool_data_path=work_dir,
                            tool_data_tables=tool_data_tables,
                            hgweb_config_manager=getattr(
                                app, 'hgweb_config_manager', None)) as app:
         yield app
     if cleanup:
         shutil.rmtree(work_dir, ignore_errors=True)
Ejemplo n.º 6
0
    def __init__(self, config=None, **kwargs):
        super().__init__()
        config = config or MockAppConfig(**kwargs)
        GalaxyDataTestApp.__init__(self, config=config, **kwargs)
        self[BasicSharedApp] = self
        self[MinimalManagerApp] = self
        self[StructuredApp] = self
        self[idencoding.IdEncodingHelper] = self.security
        self.name = kwargs.get('name', 'galaxy')
        self[SharedModelMapping] = self.model
        self[GalaxyModelMapping] = self.model
        self[galaxy_scoped_session] = self.model.context
        self.visualizations_registry = MockVisualizationsRegistry()
        self.tag_handler = tags.GalaxyTagHandler(self.model.context)
        self[tags.GalaxyTagHandler] = self.tag_handler
        self.quota_agent = quota.DatabaseQuotaAgent(self.model)
        self.job_config = Bunch(
            dynamic_params=None,
            destinations={},
            use_messaging=False,
            assign_handler=lambda *args, **kwargs: None
        )
        self.tool_data_tables = ToolDataTableManager(tool_data_path=self.config.tool_data_path)
        self.dataset_collections_service = None
        self.container_finder = NullContainerFinder()
        self._toolbox_lock = MockLock()
        self.tool_shed_registry = Bunch(tool_sheds={})
        self.genome_builds = GenomeBuilds(self)
        self.job_manager = NoopManager()
        self.application_stack = ApplicationStack()
        self.auth_manager = AuthManager(self.config)
        self.user_manager = UserManager(self)
        self.execution_timer_factory = Bunch(get_timer=StructuredExecutionTimer)
        self.file_sources = Bunch(to_dict=lambda *args, **kwargs: {})
        self.interactivetool_manager = Bunch(create_interactivetool=lambda *args, **kwargs: None)
        self.is_job_handler = False
        self.biotools_metadata_source = None
        rebind_container_to_task(self)

        def url_for(*args, **kwds):
            return "/mock/url"
        self.url_for = url_for
Ejemplo n.º 7
0
 def from_app(app, work_dir=None):
     cleanup = False
     if not work_dir:
         work_dir = tempfile.mkdtemp()
         cleanup = True
     tool_data_tables = ToolDataTableManager(work_dir)
     try:
         with ValidationContext(
             app_name=app.name,
             security=app.security,
             model=app.model,
             tool_data_path=work_dir,
             shed_tool_data_path=work_dir,
             tool_data_tables=tool_data_tables,
             registry=app.datatypes_registry,
             hgweb_config_manager=getattr(app, 'hgweb_config_manager', None),
             biotools_metadata_source=getattr(app, 'biotools_metadata_source', None),
         ) as app:
             yield app
     finally:
         if cleanup:
             shutil.rmtree(work_dir, ignore_errors=True)
Ejemplo n.º 8
0
def main(TMPDIR, WORKING_DIRECTORY, IMPORT_STORE_DIRECTORY):
    metadata_params = get_metadata_params(WORKING_DIRECTORY)
    datatypes_config = metadata_params["datatypes_config"]
    if not os.path.exists(datatypes_config):
        datatypes_config = os.path.join(WORKING_DIRECTORY, 'configs', datatypes_config)
    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    object_store = get_object_store(WORKING_DIRECTORY)
    import_store = store.imported_store_for_metadata(IMPORT_STORE_DIRECTORY)
    # TODO: clean up random places from which we read files in the working directory
    job_io = JobIO.from_json(os.path.join(IMPORT_STORE_DIRECTORY, 'job_io.json'), sa_session=import_store.sa_session)
    tool_app_config = ToolAppConfig(
        name='tool_app',
        tool_data_path=job_io.tool_data_path,
        galaxy_data_manager_data_path=job_io.galaxy_data_manager_data_path,
        nginx_upload_path=TMPDIR,
        len_file_path=job_io.len_file_path,
        builds_file_path=job_io.builds_file_path,
        root=TMPDIR,
        is_admin_user=lambda _: job_io.user_context.is_admin)
    with open(os.path.join(IMPORT_STORE_DIRECTORY, 'tool_data_tables.json')) as data_tables_json:
        tdtm = ToolDataTableManager.from_dict(json.load(data_tables_json))
    app = ToolApp(
        sa_session=import_store.sa_session,
        tool_app_config=tool_app_config,
        datatypes_registry=datatypes_registry,
        object_store=object_store,
        tool_data_table_manager=tdtm,
        file_sources=job_io.file_sources,
    )
    # TODO: could try to serialize just a minimal tool variant instead of the whole thing ?
    tool_source = get_tool_source(tool_source_class=job_io.tool_source_class, raw_tool_source=job_io.tool_source)
    tool = create_tool_from_source(app, tool_source=tool_source, tool_dir=job_io.tool_dir)
    tool_evaluator = evaluation.RemoteToolEvaluator(app=app, tool=tool, job=job_io.job, local_working_directory=WORKING_DIRECTORY)
    tool_evaluator.set_compute_environment(compute_environment=SharedComputeEnvironment(job_io=job_io, job=job_io.job))
    with open(os.path.join(WORKING_DIRECTORY, 'tool_script.sh'), 'a') as out:
        command_line, version_command_line, extra_filenames, environment_variables = tool_evaluator.build()
        out.write(f'{version_command_line or ""}{command_line}')
Ejemplo n.º 9
0
class ConfiguresGalaxyMixin:
    """ Shared code for configuring Galaxy-like app objects.
    """
    def _configure_genome_builds(self,
                                 data_table_name="__dbkeys__",
                                 load_old_style=True):
        self.genome_builds = GenomeBuilds(self,
                                          data_table_name=data_table_name,
                                          load_old_style=load_old_style)

    def _configure_toolbox(self):
        # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file.
        tool_configs = self.config.tool_configs
        if self.config.migrated_tools_config not in tool_configs:
            tool_configs.append(self.config.migrated_tools_config)

        from galaxy.managers.citations import CitationsManager
        self.citations_manager = CitationsManager(self)

        from galaxy import tools
        self.toolbox = tools.ToolBox(tool_configs, self.config.tool_path, self)
        # Search support for tools
        import galaxy.tools.search
        self.toolbox_search = galaxy.tools.search.ToolBoxSearch(self.toolbox)

        from galaxy.tools.deps import containers
        galaxy_root_dir = os.path.abspath(self.config.root)
        file_path = os.path.abspath(getattr(self.config, "file_path"))
        app_info = containers.AppInfo(galaxy_root_dir,
                                      default_file_path=file_path,
                                      outputs_to_working_directory=self.config.
                                      outputs_to_working_directory)
        self.container_finder = galaxy.tools.deps.containers.ContainerFinder(
            app_info)

    def _configure_tool_data_tables(self, from_shed_config):
        from galaxy.tools.data import ToolDataTableManager

        # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path.
        self.tool_data_tables = ToolDataTableManager(
            tool_data_path=self.config.tool_data_path,
            config_filename=self.config.tool_data_table_config_path)
        # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
        self.tool_data_tables.load_from_config_file(
            config_filename=self.config.shed_tool_data_table_config,
            tool_data_path=self.tool_data_tables.tool_data_path,
            from_shed_config=from_shed_config)

    def _configure_datatypes_registry(self, installed_repository_manager=None):
        from galaxy.datatypes import registry
        # Create an empty datatypes registry.
        self.datatypes_registry = registry.Registry()
        if installed_repository_manager:
            # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories.  We
            # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some
            # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used.
            # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype
            # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence.  If there is a conflict
            # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence.
            installed_repository_manager.load_proprietary_datatypes()
        # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config.
        self.datatypes_registry.load_datatypes(self.config.root,
                                               self.config.datatypes_config)

    def _configure_object_store(self, **kwds):
        from galaxy.objectstore import build_object_store_from_config
        self.object_store = build_object_store_from_config(self.config, **kwds)

    def _configure_security(self):
        from galaxy.web import security
        self.security = security.SecurityHelper(
            id_secret=self.config.id_secret)

    def _configure_tool_shed_registry(self):
        import tool_shed.tool_shed_registry

        # Set up the tool sheds registry
        if os.path.isfile(self.config.tool_sheds_config_file):
            self.tool_shed_registry = tool_shed.tool_shed_registry.Registry(
                self.config.root, self.config.tool_sheds_config_file)
        else:
            self.tool_shed_registry = None

    def _configure_models(self,
                          check_migrate_databases=False,
                          check_migrate_tools=False,
                          config_file=None):
        """
        Preconditions: object_store must be set on self.
        """
        if self.config.database_connection:
            db_url = self.config.database_connection
        else:
            db_url = "sqlite:///%s?isolation_level=IMMEDIATE" % self.config.database
        install_db_url = self.config.install_database_connection
        # TODO: Consider more aggressive check here that this is not the same
        # database file under the hood.
        combined_install_database = not (install_db_url
                                         and install_db_url != db_url)
        install_db_url = install_db_url or db_url

        if check_migrate_databases:
            # Initialize database / check for appropriate schema version.  # If this
            # is a new installation, we'll restrict the tool migration messaging.
            from galaxy.model.migrate.check import create_or_verify_database
            create_or_verify_database(db_url,
                                      config_file,
                                      self.config.database_engine_options,
                                      app=self)
            if not combined_install_database:
                from galaxy.model.tool_shed_install.migrate.check import create_or_verify_database as tsi_create_or_verify_database
                tsi_create_or_verify_database(
                    install_db_url,
                    self.config.install_database_engine_options,
                    app=self)

        if check_migrate_tools:
            # Alert the Galaxy admin to tools that have been moved from the distribution to the tool shed.
            from tool_shed.galaxy_install.migrate.check import verify_tools
            if combined_install_database:
                install_database_options = self.config.database_engine_options
            else:
                install_database_options = self.config.install_database_engine_options
            verify_tools(self, install_db_url, config_file,
                         install_database_options)

        from galaxy.model import mapping
        self.model = mapping.init(self.config.file_path,
                                  db_url,
                                  self.config.database_engine_options,
                                  map_install_models=combined_install_database,
                                  database_query_profiling_proxy=self.config.
                                  database_query_profiling_proxy,
                                  object_store=self.object_store,
                                  trace_logger=getattr(self, "trace_logger",
                                                       None),
                                  use_pbkdf2=self.config.get_bool(
                                      'use_pbkdf2', True))

        if combined_install_database:
            log.info(
                "Install database targetting Galaxy's database configuration.")
            self.install_model = self.model
        else:
            from galaxy.model.tool_shed_install import mapping as install_mapping
            install_db_url = self.config.install_database_connection
            log.info("Install database using its own connection %s" %
                     install_db_url)
            install_db_engine_options = self.config.install_database_engine_options
            self.install_model = install_mapping.init(
                install_db_url, install_db_engine_options)
Ejemplo n.º 10
0
    def postprocessing( self, sa_session, app ):
        """ Finish the job, move the finished indexes to their final resting place,
            and update the .loc files where applicable. """
        gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first()
        indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index', 
                          bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' )
        

        if gitd:
            fp = open( gitd.dataset.get_file_name(), 'r' )
            deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first()
            try:
                logloc = simplejson.load( fp )
            except ValueError:
                deferred.state = app.model.DeferredJob.states.ERROR
                sa_session.add( deferred )
                sa_session.flush()
                log.debug( 'Indexing job failed, setting deferred job state to error.' )
                return False
            finally:
                fp.close()
            destination = None
            tdtman = ToolDataTableManager( app.config.tool_data_path )
            xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path )
            for node in xmltree:
                table = node.get('name')
                location = node.findall('file')[0].get('path')
                self.locations[table] = os.path.abspath( location )
            locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] )
            params = deferred.params
            dbkey = params[ 'dbkey' ]
            basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey )
            intname = params[ 'intname' ]
            indexer = gitd.indexer
            workingdir = os.path.abspath( gitd.dataset.extra_files_path )
            location = []
            indexdata = gitd.dataset.extra_files_path
            if indexer == '2bit':
                indexdata = os.path.join( workingdir, '%s.2bit' % dbkey )
                destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey )
                location.append( dict( line='\t'.join( [ 'seq', dbkey, destination ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) )
            elif indexer == 'bowtie':
                self._ex_tar( workingdir, 'cs.tar' )
                destination = os.path.join( basepath, 'bowtie_index' )
                for var in [ 'nt', 'cs' ]:
                    for line in logloc[ var ]:
                        idx = line
                        if var == 'nt':
                            locfile = self.locations[ 'bowtie_indexes' ]
                            locdir = os.path.join( destination, idx )
                        else:
                            locfile = self.locations[ 'bowtie_indexes_color' ]
                            locdir = os.path.join( destination, var, idx )
                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'bowtie2':
                destination = os.path.join( basepath, 'bowtie2_index' )
                for line in logloc[ 'nt' ]:
                    idx = line
                    locfile = self.locations[ 'bowtie2_indexes' ]
                    locdir = os.path.join( destination, idx )
                    location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'bwa':
                self._ex_tar( workingdir, 'cs.tar' )
                destination = os.path.join( basepath, 'bwa_index' )
                for var in [ 'nt', 'cs' ]:
                    for line in logloc[ var ]:
                        idx = line
                        if var == 'nt':
                            locfile = self.locations[ 'bwa_indexes' ]
                            locdir = os.path.join( destination, idx )
                        else:
                            locfile = self.locations[ 'bwa_indexes_color' ]
                            locdir = os.path.join( destination, var, idx )
                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'perm':
                self._ex_tar( workingdir, 'cs.tar' )
                destination = os.path.join( basepath, 'perm_index' )
                for var in [ 'nt', 'cs' ]:
                    for line in logloc[ var ]:
                        idx = line.pop()
                        if var == 'nt':
                            locfile = self.locations[ 'perm_base_indexes' ]
                            locdir = os.path.join( destination, idx )
                        else:
                            locfile = self.locations[ 'perm_color_indexes' ]
                            locdir = os.path.join( destination, var, idx )
                        line.append( locdir )
                        location.append( dict( line='\t'.join( line ), file=locfile ) )
            elif indexer == 'picard':
                destination = os.path.join( basepath, 'srma_index' )
                for var in [ 'nt' ]:
                    for line in logloc[ var ]:
                        idx = line
                        locfile = self.locations[ 'picard_indexes' ]
                        locdir = os.path.join( destination, idx )
                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'sam':
                destination = os.path.join( basepath, 'sam_index' )
                for var in [ 'nt' ]:
                    for line in logloc[ var ]:
                        locfile = self.locations[ 'sam_fa_indexes' ]
                        locdir = os.path.join( destination, line )
                        location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) )
            
            if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ):
                log.debug( 'Moving %s to %s' % ( indexdata, destination ) )
                shutil.move( indexdata, destination )
                if indexer not in [ '2bit' ]:
                    genome = '%s.fa' % dbkey
                    target = os.path.join( destination, genome )
                    fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) )
                    self._check_link( fasta, target )
                    if os.path.exists( os.path.join( destination, 'cs' ) ):
                        target = os.path.join( destination, 'cs', genome )
                        fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) )
                        self._check_link( fasta, target )
            for line in location:
                self._add_line( line[ 'file' ], line[ 'line' ] )
            deferred.state = app.model.DeferredJob.states.OK
            sa_session.add( deferred )
            sa_session.flush()
class ConfiguresGalaxyMixin:
    """ Shared code for configuring Galaxy-like app objects.
    """

    def _configure_genome_builds( self, data_table_name="__dbkeys__", load_old_style=True ):
        self.genome_builds = GenomeBuilds( self, data_table_name=data_table_name, load_old_style=load_old_style )

    def _configure_toolbox( self ):
        # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file.
        tool_configs = self.config.tool_configs
        if self.config.migrated_tools_config not in tool_configs:
            tool_configs.append( self.config.migrated_tools_config )

        from galaxy.managers.citations import CitationsManager
        self.citations_manager = CitationsManager( self )

        from galaxy import tools
        self.toolbox = tools.ToolBox( tool_configs, self.config.tool_path, self )
        # Search support for tools
        import galaxy.tools.search
        self.toolbox_search = galaxy.tools.search.ToolBoxSearch( self.toolbox )

        from galaxy.tools.deps import containers
        galaxy_root_dir = os.path.abspath(self.config.root)
        file_path = os.path.abspath(getattr(self.config, "file_path"))
        app_info = containers.AppInfo(
            galaxy_root_dir,
            default_file_path=file_path,
            outputs_to_working_directory=self.config.outputs_to_working_directory
        )
        self.container_finder = galaxy.tools.deps.containers.ContainerFinder(app_info)

    def _configure_tool_data_tables( self, from_shed_config ):
        from galaxy.tools.data import ToolDataTableManager

        # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path.
        self.tool_data_tables = ToolDataTableManager( tool_data_path=self.config.tool_data_path,
                                                      config_filename=self.config.tool_data_table_config_path )
        # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
        self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config,
                                                     tool_data_path=self.tool_data_tables.tool_data_path,
                                                     from_shed_config=from_shed_config )

    def _configure_datatypes_registry( self, installed_repository_manager=None ):
        from galaxy.datatypes import registry
        # Create an empty datatypes registry.
        self.datatypes_registry = registry.Registry()
        if installed_repository_manager:
            # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories.  We
            # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some
            # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used.
            # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype
            # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence.  If there is a conflict
            # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence.
            installed_repository_manager.load_proprietary_datatypes()
        # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config.
        self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config )

    def _configure_object_store( self, **kwds ):
        from galaxy.objectstore import build_object_store_from_config
        self.object_store = build_object_store_from_config( self.config, **kwds )

    def _configure_security( self ):
        from galaxy.web import security
        self.security = security.SecurityHelper( id_secret=self.config.id_secret )

    def _configure_tool_shed_registry( self ):
        import tool_shed.tool_shed_registry

        # Set up the tool sheds registry
        if os.path.isfile( self.config.tool_sheds_config_file ):
            self.tool_shed_registry = tool_shed.tool_shed_registry.Registry( self.config.root, self.config.tool_sheds_config_file )
        else:
            self.tool_shed_registry = None

    def _configure_models( self, check_migrate_databases=False, check_migrate_tools=False, config_file=None ):
        """
        Preconditions: object_store must be set on self.
        """
        if self.config.database_connection:
            db_url = self.config.database_connection
        else:
            db_url = "sqlite:///%s?isolation_level=IMMEDIATE" % self.config.database
        install_db_url = self.config.install_database_connection
        # TODO: Consider more aggressive check here that this is not the same
        # database file under the hood.
        combined_install_database = not( install_db_url and install_db_url != db_url )
        install_db_url = install_db_url or db_url

        if check_migrate_databases:
            # Initialize database / check for appropriate schema version.  # If this
            # is a new installation, we'll restrict the tool migration messaging.
            from galaxy.model.migrate.check import create_or_verify_database
            create_or_verify_database( db_url, config_file, self.config.database_engine_options, app=self )
            if not combined_install_database:
                from galaxy.model.tool_shed_install.migrate.check import create_or_verify_database as tsi_create_or_verify_database
                tsi_create_or_verify_database( install_db_url, self.config.install_database_engine_options, app=self )

        if check_migrate_tools:
            # Alert the Galaxy admin to tools that have been moved from the distribution to the tool shed.
            from tool_shed.galaxy_install.migrate.check import verify_tools
            if combined_install_database:
                install_database_options = self.config.database_engine_options
            else:
                install_database_options = self.config.install_database_engine_options
            verify_tools( self, install_db_url, config_file, install_database_options )

        from galaxy.model import mapping
        self.model = mapping.init( self.config.file_path,
                                   db_url,
                                   self.config.database_engine_options,
                                   map_install_models=combined_install_database,
                                   database_query_profiling_proxy=self.config.database_query_profiling_proxy,
                                   object_store=self.object_store,
                                   trace_logger=getattr(self, "trace_logger", None),
                                   use_pbkdf2=self.config.get_bool( 'use_pbkdf2', True ) )

        if combined_install_database:
            log.info("Install database targetting Galaxy's database configuration.")
            self.install_model = self.model
        else:
            from galaxy.model.tool_shed_install import mapping as install_mapping
            install_db_url = self.config.install_database_connection
            log.info("Install database using its own connection %s" % install_db_url)
            install_db_engine_options = self.config.install_database_engine_options
            self.install_model = install_mapping.init( install_db_url,
                                                       install_db_engine_options )
Ejemplo n.º 12
0
class ConfiguresGalaxyMixin:
    """Shared code for configuring Galaxy-like app objects."""

    config: config.GalaxyAppConfiguration
    tool_cache: ToolCache
    job_config: jobs.JobConfiguration
    toolbox: tools.ToolBox
    toolbox_search: ToolBoxSearch
    container_finder: containers.ContainerFinder

    def _configure_genome_builds(self,
                                 data_table_name="__dbkeys__",
                                 load_old_style=True):
        self.genome_builds = GenomeBuilds(self,
                                          data_table_name=data_table_name,
                                          load_old_style=load_old_style)

    def wait_for_toolbox_reload(self, old_toolbox):
        timer = ExecutionTimer()
        log.debug('Waiting for toolbox reload')
        # Wait till toolbox reload has been triggered (or more than 60 seconds have passed)
        while timer.elapsed < 60:
            if self.toolbox.has_reloaded(old_toolbox):
                log.debug('Finished waiting for toolbox reload %s', timer)
                break
            time.sleep(0.1)
        else:
            log.warning(
                'Waiting for toolbox reload timed out after 60 seconds')

    def _configure_tool_config_files(self):
        if self.config.shed_tool_config_file not in self.config.tool_configs:
            self.config.tool_configs.append(self.config.shed_tool_config_file)
        # The value of migrated_tools_config is the file reserved for containing only those tools that have been
        # eliminated from the distribution and moved to the tool shed. If migration checking is disabled, only add it if
        # it exists (since this may be an existing deployment where migrations were previously run).
        if (os.path.exists(self.config.migrated_tools_config)
                and self.config.migrated_tools_config
                not in self.config.tool_configs):
            self.config.tool_configs.append(self.config.migrated_tools_config)

    def _configure_toolbox(self):
        if not isinstance(self, BasicSharedApp):
            raise Exception("Must inherit from BasicSharedApp")

        self.citations_manager = CitationsManager(self)
        self.biotools_metadata_source = get_galaxy_biotools_metadata_source(
            self.config)

        self.dynamic_tools_manager = DynamicToolManager(self)
        self._toolbox_lock = threading.RLock()
        self.toolbox = tools.ToolBox(self.config.tool_configs,
                                     self.config.tool_path, self)
        galaxy_root_dir = os.path.abspath(self.config.root)
        file_path = os.path.abspath(self.config.file_path)
        app_info = AppInfo(
            galaxy_root_dir=galaxy_root_dir,
            default_file_path=file_path,
            tool_data_path=self.config.tool_data_path,
            shed_tool_data_path=self.config.shed_tool_data_path,
            outputs_to_working_directory=self.config.
            outputs_to_working_directory,
            container_image_cache_path=self.config.container_image_cache_path,
            library_import_dir=self.config.library_import_dir,
            enable_mulled_containers=self.config.enable_mulled_containers,
            container_resolvers_config_file=self.config.
            container_resolvers_config_file,
            container_resolvers_config_dict=self.config.container_resolvers,
            involucro_path=self.config.involucro_path,
            involucro_auto_init=self.config.involucro_auto_init,
            mulled_channels=self.config.mulled_channels,
        )
        mulled_resolution_cache = None
        if self.config.mulled_resolution_cache_type:
            cache_opts = {
                "cache.type": self.config.mulled_resolution_cache_type,
                "cache.data_dir": self.config.mulled_resolution_cache_data_dir,
                "cache.lock_dir": self.config.mulled_resolution_cache_lock_dir,
                "cache.expire": self.config.mulled_resolution_cache_expire,
            }
            mulled_resolution_cache = CacheManager(
                **parse_cache_config_options(cache_opts)).get_cache(
                    'mulled_resolution')
        self.container_finder = containers.ContainerFinder(
            app_info, mulled_resolution_cache=mulled_resolution_cache)
        self._set_enabled_container_types()
        index_help = getattr(self.config, "index_tool_help", True)
        self.toolbox_search = ToolBoxSearch(
            self.toolbox,
            index_dir=self.config.tool_search_index_dir,
            index_help=index_help)

    def reindex_tool_search(self):
        # Call this when tools are added or removed.
        self.toolbox_search.build_index(tool_cache=self.tool_cache,
                                        toolbox=self.toolbox)
        self.tool_cache.reset_status()

    def _set_enabled_container_types(self):
        container_types_to_destinations = collections.defaultdict(list)
        for destinations in self.job_config.destinations.values():
            for destination in destinations:
                for enabled_container_type in self.container_finder._enabled_container_types(
                        destination.params):
                    container_types_to_destinations[
                        enabled_container_type].append(destination)
        self.toolbox.dependency_manager.set_enabled_container_types(
            container_types_to_destinations)
        self.toolbox.dependency_manager.resolver_classes.update(
            self.container_finder.default_container_registry.resolver_classes)
        self.toolbox.dependency_manager.dependency_resolvers.extend(
            self.container_finder.default_container_registry.
            container_resolvers)

    def _configure_tool_data_tables(self, from_shed_config):
        # Initialize tool data tables using the config defined by self.config.tool_data_table_config_path.
        self.tool_data_tables = ToolDataTableManager(
            tool_data_path=self.config.tool_data_path,
            config_filename=self.config.tool_data_table_config_path,
            other_config_dict=self.config)
        # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
        try:
            self.tool_data_tables.load_from_config_file(
                config_filename=self.config.shed_tool_data_table_config,
                tool_data_path=self.tool_data_tables.tool_data_path,
                from_shed_config=from_shed_config)
        except OSError as exc:
            # Missing shed_tool_data_table_config is okay if it's the default
            if exc.errno != errno.ENOENT or self.config.is_set(
                    'shed_tool_data_table_config'):
                raise

    def _configure_datatypes_registry(self, installed_repository_manager=None):
        # Create an empty datatypes registry.
        self.datatypes_registry = Registry(self.config)
        if installed_repository_manager and self.config.load_tool_shed_datatypes:
            # Load proprietary datatypes defined in datatypes_conf.xml files in all installed tool shed repositories.  We
            # load proprietary datatypes before datatypes in the distribution because Galaxy's default sniffers include some
            # generic sniffers (eg text,xml) which catch anything, so it's impossible for proprietary sniffers to be used.
            # However, if there is a conflict (2 datatypes with the same extension) between a proprietary datatype and a datatype
            # in the Galaxy distribution, the datatype in the Galaxy distribution will take precedence.  If there is a conflict
            # between 2 proprietary datatypes, the datatype from the repository that was installed earliest will take precedence.
            installed_repository_manager.load_proprietary_datatypes()
        # Load the data types in the Galaxy distribution, which are defined in self.config.datatypes_config.
        datatypes_configs = self.config.datatypes_config
        for datatypes_config in listify(datatypes_configs):
            # Setting override=False would make earlier files would take
            # precedence - but then they wouldn't override tool shed
            # datatypes.
            self.datatypes_registry.load_datatypes(self.config.root,
                                                   datatypes_config,
                                                   override=True)

    def _configure_object_store(self, **kwds):
        self.object_store = build_object_store_from_config(self.config, **kwds)

    def _configure_security(self):
        self.security = IdEncodingHelper(id_secret=self.config.id_secret)
        BaseDatabaseIdField.security = self.security

    def _configure_tool_shed_registry(self):
        # Set up the tool sheds registry
        if os.path.isfile(self.config.tool_sheds_config_file):
            self.tool_shed_registry = tool_shed_registry.Registry(
                self.config.tool_sheds_config_file)
        else:
            self.tool_shed_registry = tool_shed_registry.Registry()

    def _configure_models(self,
                          check_migrate_databases=False,
                          config_file=None):
        """Preconditions: object_store must be set on self."""
        db_url = self.config.database_connection
        install_db_url = self.config.install_database_connection
        # TODO: Consider more aggressive check here that this is not the same
        # database file under the hood.
        combined_install_database = not (install_db_url
                                         and install_db_url != db_url)
        install_db_url = install_db_url or db_url
        install_database_options = self.config.database_engine_options if combined_install_database else self.config.install_database_engine_options

        if self.config.database_wait:
            self._wait_for_database(db_url)

        if getattr(self.config, "max_metadata_value_size", None):
            custom_types.MAX_METADATA_VALUE_SIZE = self.config.max_metadata_value_size

        if check_migrate_databases:
            # Initialize database / check for appropriate schema version.  # If this
            # is a new installation, we'll restrict the tool migration messaging.
            create_or_verify_database(
                db_url,
                config_file,
                self.config.database_engine_options,
                app=self,
                map_install_models=combined_install_database)
            if not combined_install_database:
                tsi_create_or_verify_database(install_db_url,
                                              install_database_options,
                                              app=self)

        self.model = init_models_from_config(
            self.config,
            map_install_models=combined_install_database,
            object_store=self.object_store,
            trace_logger=getattr(self, "trace_logger", None))
        if combined_install_database:
            log.info(
                "Install database targetting Galaxy's database configuration.")
            self.install_model = self.model
        else:
            install_db_url = self.config.install_database_connection
            log.info(
                f"Install database using its own connection {install_db_url}")
            self.install_model = install_mapping.init(
                install_db_url, install_database_options)

    def _configure_signal_handlers(self, handlers):
        for sig, handler in handlers.items():
            signal.signal(sig, handler)

    def _wait_for_database(self, url):
        attempts = self.config.database_wait_attempts
        pause = self.config.database_wait_sleep
        for i in range(1, attempts):
            try:
                database_exists(url)
                break
            except Exception:
                log.info("Waiting for database: attempt %d of %d" %
                         (i, attempts))
                time.sleep(pause)

    @property
    def tool_dependency_dir(self):
        return self.toolbox.dependency_manager.default_base_path
Ejemplo n.º 13
0
    def postprocessing( self, sa_session, app ):
        """ Finish the job, move the finished indexes to their final resting place,
            and update the .loc files where applicable. """
        gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first()
        indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index', 
                          bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' )
        

        if gitd:
            destination = None
            tdtman = ToolDataTableManager()
            xmltree = tdtman.load_from_config_file(app.config.tool_data_table_config_path)
            for node in xmltree:
                table = node.get('name')
                location = node.findall('file')[0].get('path')
                self.locations[table] = os.path.abspath( location )
            locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] )
            deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first()
            params = deferred.params
            dbkey = params[ 'dbkey' ]
            basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey )
            intname = params[ 'intname' ]
            indexer = gitd.indexer
            workingdir = os.path.abspath( gitd.dataset.extra_files_path )
            fp = open( gitd.dataset.get_file_name(), 'r' )
            logloc = json.load( fp )
            fp.close()
            location = []
            indexdata = gitd.dataset.extra_files_path
            if indexer == '2bit':
                indexdata = os.path.join( workingdir, '%s.2bit' % dbkey )
                destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey )
                location.append( dict( line='\t'.join( [ 'seq', dbkey, os.path.join( destination, '%s.2bit' % dbkey ) ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) )
            elif indexer == 'bowtie':
                self._ex_tar( workingdir, 'cs.tar' )
                destination = os.path.join( basepath, 'bowtie_index' )
                for var in [ 'nt', 'cs' ]:
                    for line in logloc[ var ]:
                        idx = line
                        if var == 'nt':
                            locfile = self.locations[ 'bowtie_indexes' ]
                            locdir = os.path.join( destination, idx )
                        else:
                            locfile = self.locations[ 'bowtie_indexes_color' ]
                            locdir = os.path.join( destination, var, idx )
                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'bowtie2':
                destination = os.path.join( basepath, 'bowtie2_index' )
                for line in logloc[ 'nt' ]:
                    idx = line
                    locfile = self.locations[ 'bowtie2_indexes' ]
                    locdir = os.path.join( destination, idx )
                    location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'bwa':
                self._ex_tar( workingdir, 'cs.tar' )
                destination = os.path.join( basepath, 'bwa_index' )
                for var in [ 'nt', 'cs' ]:
                    for line in logloc[ var ]:
                        idx = line
                        if var == 'nt':
                            locfile = self.locations[ 'bwa_indexes' ]
                            locdir = os.path.join( destination, idx )
                        else:
                            locfile = self.locations[ 'bwa_indexes_color' ]
                            locdir = os.path.join( destination, var, idx )
                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'perm':
                self._ex_tar( workingdir, 'cs.tar' )
                destination = os.path.join( basepath, 'perm_index' )
                for var in [ 'nt', 'cs' ]:
                    for line in logloc[ var ]:
                        idx = line.pop()
                        if var == 'nt':
                            locfile = self.locations[ 'perm_base_indexes' ]
                            locdir = os.path.join( destination, idx )
                        else:
                            locfile = self.locations[ 'perm_color_indexes' ]
                            locdir = os.path.join( destination, var, idx )
                        line.append( locdir )
                        location.append( dict( line='\t'.join( line ), file=locfile ) )
            elif indexer == 'picard':
                destination = os.path.join( basepath, 'srma_index' )
                for var in [ 'nt' ]:
                    for line in logloc[ var ]:
                        idx = line
                        locfile = self.locations[ 'picard_indexes' ]
                        locdir = os.path.join( destination, idx )
                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
            elif indexer == 'sam':
                destination = os.path.join( basepath, 'sam_index' )
                for var in [ 'nt' ]:
                    for line in logloc[ var ]:
                        locfile = self.locations[ 'sam_fa_indexes' ]
                        locdir = os.path.join( destination, line )
                        location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) )
            
            if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ):
                log.debug( 'Moving %s to %s' % ( indexdata, destination ) )
                shutil.move( indexdata, destination )
                if indexer not in [ '2bit' ]:
                    genome = '%s.fa'
                    target = os.path.join( destination, genome )
                    farel = os.path.relpath( os.path.join( basepath, 'seq', genome ), destination )
                    os.symlink( farel, target )
                    if os.path.exists( os.path.join( destination, 'cs' ) ):
                        target = os.path.join( destination, 'cs', genome )
                        farel = os.path.relpath( os.path.join( basepath, 'seq', genome ), os.path.join( destination, 'cs' ) )
                        os.symlink( os.path.join( farel, target ) )
            for line in location:
                self._add_line( line[ 'file' ], line[ 'line' ] )
Ejemplo n.º 14
0
    def postprocessing(self, sa_session, app):
        """ Finish the job, move the finished indexes to their final resting place,
            and update the .loc files where applicable. """
        gitd = sa_session.query(model.GenomeIndexToolData).filter_by(job_id=self.job_id).first()
        indexdirs = dict(
            bfast="bfast_index",
            bowtie="bowtie_index",
            bowtie2="bowtie2_index",
            bwa="bwa_index",
            perm="perm_%s_index",
            picard="srma_index",
            sam="sam_index",
        )

        if gitd:
            fp = open(gitd.dataset.get_file_name(), "r")
            deferred = sa_session.query(model.DeferredJob).filter_by(id=gitd.deferred_job_id).first()
            try:
                logloc = simplejson.load(fp)
            except ValueError:
                deferred.state = app.model.DeferredJob.states.ERROR
                sa_session.add(deferred)
                sa_session.flush()
                log.debug("Indexing job failed, setting deferred job state to error.")
                return False
            finally:
                fp.close()
            destination = None
            tdtman = ToolDataTableManager(app.config.tool_data_path)
            xmltree = tdtman.load_from_config_file(app.config.tool_data_table_config_path, app.config.tool_data_path)
            for node in xmltree:
                table = node.get("name")
                location = node.findall("file")[0].get("path")
                self.locations[table] = os.path.abspath(location)
            locbase = os.path.abspath(os.path.split(self.locations["all_fasta"])[0])
            params = deferred.params
            dbkey = params["dbkey"]
            basepath = os.path.join(os.path.abspath(app.config.genome_data_path), dbkey)
            intname = params["intname"]
            indexer = gitd.indexer
            workingdir = os.path.abspath(gitd.dataset.extra_files_path)
            location = []
            indexdata = gitd.dataset.extra_files_path
            if indexer == "2bit":
                indexdata = os.path.join(workingdir, "%s.2bit" % dbkey)
                destination = os.path.join(basepath, "seq", "%s.2bit" % dbkey)
                location.append(
                    dict(line="\t".join(["seq", dbkey, destination]), file=os.path.join(locbase, "alignseq.loc"))
                )
            elif indexer == "bowtie":
                self._ex_tar(workingdir, "cs.tar")
                destination = os.path.join(basepath, "bowtie_index")
                for var in ["nt", "cs"]:
                    for line in logloc[var]:
                        idx = line
                        if var == "nt":
                            locfile = self.locations["bowtie_indexes"]
                            locdir = os.path.join(destination, idx)
                        else:
                            locfile = self.locations["bowtie_indexes_color"]
                            locdir = os.path.join(destination, var, idx)
                        location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile))
            elif indexer == "bowtie2":
                destination = os.path.join(basepath, "bowtie2_index")
                for line in logloc["nt"]:
                    idx = line
                    locfile = self.locations["bowtie2_indexes"]
                    locdir = os.path.join(destination, idx)
                    location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile))
            elif indexer == "bwa":
                self._ex_tar(workingdir, "cs.tar")
                destination = os.path.join(basepath, "bwa_index")
                for var in ["nt", "cs"]:
                    for line in logloc[var]:
                        idx = line
                        if var == "nt":
                            locfile = self.locations["bwa_indexes"]
                            locdir = os.path.join(destination, idx)
                        else:
                            locfile = self.locations["bwa_indexes_color"]
                            locdir = os.path.join(destination, var, idx)
                        location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile))
            elif indexer == "perm":
                self._ex_tar(workingdir, "cs.tar")
                destination = os.path.join(basepath, "perm_index")
                for var in ["nt", "cs"]:
                    for line in logloc[var]:
                        idx = line.pop()
                        if var == "nt":
                            locfile = self.locations["perm_base_indexes"]
                            locdir = os.path.join(destination, idx)
                        else:
                            locfile = self.locations["perm_color_indexes"]
                            locdir = os.path.join(destination, var, idx)
                        line.append(locdir)
                        location.append(dict(line="\t".join(line), file=locfile))
            elif indexer == "picard":
                destination = os.path.join(basepath, "srma_index")
                for var in ["nt"]:
                    for line in logloc[var]:
                        idx = line
                        locfile = self.locations["picard_indexes"]
                        locdir = os.path.join(destination, idx)
                        location.append(dict(line="\t".join([dbkey, dbkey, intname, locdir]), file=locfile))
            elif indexer == "sam":
                destination = os.path.join(basepath, "sam_index")
                for var in ["nt"]:
                    for line in logloc[var]:
                        locfile = self.locations["sam_fa_indexes"]
                        locdir = os.path.join(destination, line)
                        location.append(dict(line="\t".join(["index", dbkey, locdir]), file=locfile))

            if (
                destination is not None
                and os.path.exists(os.path.split(destination)[0])
                and not os.path.exists(destination)
            ):
                log.debug("Moving %s to %s" % (indexdata, destination))
                shutil.move(indexdata, destination)
                if indexer not in ["2bit"]:
                    genome = "%s.fa" % dbkey
                    target = os.path.join(destination, genome)
                    fasta = os.path.abspath(os.path.join(basepath, "seq", genome))
                    self._check_link(fasta, target)
                    if os.path.exists(os.path.join(destination, "cs")):
                        target = os.path.join(destination, "cs", genome)
                        fasta = os.path.abspath(os.path.join(basepath, "seq", genome))
                        self._check_link(fasta, target)
            for line in location:
                self._add_line(line["file"], line["line"])
            deferred.state = app.model.DeferredJob.states.OK
            sa_session.add(deferred)
            sa_session.flush()