Example #1
0
    def __init__(self, root=None, **kwargs):
        Bunch.__init__(self, **kwargs)
        root = root or '/tmp'
        self.security = security.SecurityHelper(id_secret='bler')
        self.use_remote_user = kwargs.get('use_remote_user', False)
        self.file_path = '/tmp'
        self.jobs_directory = '/tmp'
        self.new_file_path = '/tmp'
        self.tool_data_path = '/tmp'

        self.object_store_config_file = ''
        self.object_store = 'disk'
        self.object_store_check_old_style = False

        self.user_activation_on = False
        self.new_user_dataset_access_role_default_private = False

        self.expose_dataset_path = True
        self.allow_user_dataset_purge = True
        self.enable_old_display_applications = True

        self.umask = 0o77

        # Follow two required by GenomeBuilds
        self.len_file_path = os.path.join('tool-data', 'shared', 'ucsc', 'chrom')
        self.builds_file_path = os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample')

        self.migrated_tools_config = "/tmp/migrated_tools_conf.xml"
        self.preserve_python_environment = "always"

        # set by MockDir
        self.root = root
def mock_trans(has_user=True, is_admin=False):
    trans = Bunch(user_is_admin=lambda: is_admin)
    if has_user:
        trans.user = Bunch(preferences={})
    else:
        trans.user = None
    return trans
def mock_trans( has_user=True ):
    trans = Bunch( )
    if has_user:
        trans.user = Bunch(preferences={})
    else:
        trans.user = None
    return trans
Example #4
0
 def __init__( self, **kwd ):
     Bunch.__init__( self, **kwd )
     self.primary_file = None
     self.composite_files = odict()
     self.dbkey = None
     self.warnings = []
     
     self._temp_filenames = [] #store all created filenames here, delete on cleanup
def filter_factory(config_dict=None):
    if config_dict is None:
        config_dict = dict(
            tool_filters=["filtermod:filter_tool"],
            tool_section_filters=["filtermod:filter_section"],
            tool_label_filters=["filtermod:filter_label_1", "filtermod:filter_label_2"],
        )
    config = Bunch(**config_dict)
    config.toolbox_filter_base_modules = "galaxy.tools.filters,unit.tools.filter_modules"
    app = Bunch(config=config)
    toolbox = Bunch(app=app)
    return FilterFactory(toolbox)
Example #6
0
    def __init__( self, **kwargs ):
        Bunch.__init__( self, **kwargs )
        self.security = security.SecurityHelper( id_secret='bler' )
        self.file_path = '/tmp'
        self.job_working_directory = '/tmp'
        self.new_file_path = '/tmp'

        self.object_store_config_file = ''
        self.object_store = 'disk'
        self.object_store_check_old_style = False

        self.user_activation_on = False
        self.new_user_dataset_access_role_default_private = False

        self.expose_dataset_path = True
        self.allow_user_dataset_purge = True
        self.enable_old_display_applications = True
Example #7
0
def get_dataset(filename, index_attr='bam_index', dataset_id=1, has_data=True):
    dataset = Bunch()
    dataset.has_data = lambda: True
    dataset.id = dataset_id
    dataset.metadata = Bunch()
    with get_input_files(filename) as input_files, get_tmp_path() as index_path:
        dataset.file_name = input_files[0]
        index = Bunch()
        index.file_name = index_path
        setattr(dataset.metadata, index_attr, index)
        yield dataset
Example #8
0
    def __init__( self, root=None, **kwargs ):
        Bunch.__init__( self, **kwargs )
        self.security = security.SecurityHelper( id_secret='bler' )
        self.use_remote_user = kwargs.get( 'use_remote_user', False )
        self.file_path = '/tmp'
        self.jobs_directory = '/tmp'
        self.new_file_path = '/tmp'

        self.object_store_config_file = ''
        self.object_store = 'disk'
        self.object_store_check_old_style = False

        self.user_activation_on = False
        self.new_user_dataset_access_role_default_private = False

        self.expose_dataset_path = True
        self.allow_user_dataset_purge = True
        self.enable_old_display_applications = True

        self.umask = 0o77

        # set by MockDir
        self.root = root
Example #9
0
def init( file_path, url, engine_options={}, create_tables=False ):
    """Connect mappings to the database"""
    # Load the appropriate db module
    load_egg_for_url( url )
    # Create the database engine
    engine = create_engine( url, **engine_options )
    # Connect the metadata to the database.
    metadata.bind = engine
    # Clear any existing contextual sessions and reconfigure
    Session.remove()
    Session.configure( bind=engine )
    # Create tables if needed
    if create_tables:
        metadata.create_all()
    # Pack everything into a bunch
    result = Bunch( **globals() )
    result.engine = engine
    result.session = Session
    result.create_tables = create_tables
    # Load local tool shed security policy
    result.security_agent = CommunityRBACAgent( result )
    result.shed_counter = shed_statistics.ShedCounter( result )
    result.hgweb_config_manager = galaxy.webapps.tool_shed.util.hgweb_config.HgWebConfigManager()
    return result
Example #10
0
def main():
    parser = optparse.OptionParser()
    parser.add_option(
        '-b', '--buffer',
        dest='buffer',
        type='int', default=1000000,
        help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.'
    )
    parser.add_option(
        '-d', '--index_depth',
        dest='index_depth',
        type='int', default=3,
        help='Depth to use on filebased offset indexing. Default: 3.'
    )
    parser.add_option(
        '-p', '--keep_partial',
        action='store_true',
        dest='keep_partial',
        default=False,
        help='Keep rows in first input which are missing identifiers.')
    parser.add_option(
        '-u', '--keep_unmatched',
        action='store_true',
        dest='keep_unmatched',
        default=False,
        help='Keep rows in first input which are not joined with the second input.')
    parser.add_option(
        '-f', '--fill_options_file',
        dest='fill_options_file',
        type='str', default=None,
        help='Fill empty columns with a values from a JSONified file.')
    parser.add_option(
        '-H', '--keep_headers',
        action='store_true',
        dest='keep_headers',
        default=False,
        help='Keep the headers')

    options, args = parser.parse_args()

    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch(**stringify_dictionary_keys(json.load(open(options.fill_options_file))))  # json.load( open( options.fill_options_file ) )
        except Exception as e:
            print("Warning: Ignoring fill options due to json error (%s)." % e)
    if fill_options is None:
        fill_options = Bunch()
    if 'fill_unjoined_only' not in fill_options:
        fill_options.fill_unjoined_only = True
    if 'file1_columns' not in fill_options:
        fill_options.file1_columns = None
    if 'file2_columns' not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int(args[2]) - 1
        column2 = int(args[3]) - 1
        out_filename = args[4]
    except Exception:
        print("Error parsing command line.", file=sys.stderr)
        sys.exit()

    # Character for splitting fields and joining lines
    split = "\t"

    return join_files(filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.keep_headers, options.index_depth, fill_options=fill_options)
Example #11
0
 def job_io(self):
     return Bunch(get_output_fnames=lambda: ['output1'],
                  check_job_script_integrity=False)
Example #12
0
class ToolDependency(object):
    installation_status = Bunch(NEVER_INSTALLED='Never installed',
                                INSTALLING='Installing',
                                INSTALLED='Installed',
                                ERROR='Error',
                                UNINSTALLED='Uninstalled')

    states = Bunch(INSTALLING='running',
                   OK='ok',
                   WARNING='queued',
                   ERROR='error',
                   UNINSTALLED='deleted_new')

    def __init__(self, tool_shed_repository_id=None, name=None, version=None, type=None, status=None, error_message=None):
        self.tool_shed_repository_id = tool_shed_repository_id
        self.name = name
        self.version = version
        self.type = type
        self.status = status
        self.error_message = error_message

    @property
    def can_install(self):
        return self.status in [self.installation_status.NEVER_INSTALLED, self.installation_status.UNINSTALLED]

    @property
    def can_uninstall(self):
        return self.status in [self.installation_status.ERROR, self.installation_status.INSTALLED]

    @property
    def can_update(self):
        return self.status in [self.installation_status.NEVER_INSTALLED,
                               self.installation_status.INSTALLED,
                               self.installation_status.ERROR,
                               self.installation_status.UNINSTALLED]

    def get_env_shell_file_path(self, app):
        installation_directory = self.installation_directory(app)
        file_path = os.path.join(installation_directory, 'env.sh')
        if os.path.exists(file_path):
            return file_path
        return None

    @property
    def in_error_state(self):
        return self.status == self.installation_status.ERROR

    def installation_directory(self, app):
        if self.type == 'package':
            return os.path.join(app.config.tool_dependency_dir,
                                self.name,
                                self.version,
                                self.tool_shed_repository.owner,
                                self.tool_shed_repository.name,
                                self.tool_shed_repository.installed_changeset_revision)
        if self.type == 'set_environment':
            return os.path.join(app.config.tool_dependency_dir,
                                'environment_settings',
                                self.name,
                                self.tool_shed_repository.owner,
                                self.tool_shed_repository.name,
                                self.tool_shed_repository.installed_changeset_revision)

    @property
    def is_installed(self):
        return self.status == self.installation_status.INSTALLED
Example #13
0
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None,
                                   purge=True):
            dataset_name = override_name

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=dataset_name,
                             purge_source=purge)
            except Exception:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)

        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file).read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:10].lower()
                    looks_like_url = False
                    for url_prefix in URI_PREFIXES:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=dataset_name)

        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress',
                           None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
                if ftp_files:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip(
            ):  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                assert not os.path.islink(
                    user_ftp_dir
                ), "User FTP directory cannot be a symbolic link"
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename),
                                               user_ftp_dir)
                                if not os.path.islink(
                                        os.path.join(dirpath, filename)):
                                    ftp_data_file = {
                                        'local_filename':
                                        os.path.abspath(
                                            os.path.join(user_ftp_dir, path)),
                                        'filename':
                                        os.path.basename(path)
                                    }
                                    purge = getattr(trans.app.config,
                                                    'ftp_upload_purge', True)
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info,
                                        purge=purge,
                                    )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.auto_decompress = auto_decompress
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            if file_type is not None:
                file_bunch.file_type = file_type
            if dbkey is not None:
                file_bunch.dbkey = dbkey
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress',
                           None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, str)
                ]
                if trans.user is None:
                    log.warning(
                        'Anonymous user passed values in ftp_files: %s' %
                        ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(
                        user_ftp_dir
                    ), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames,
                         filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, str):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        'User passed an invalid file path in ftp_files: %s' %
                        ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info,
                                                    purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        file_count = self.get_file_count(trans, context)
        d_type = self.get_datatype(trans, context)
        dbkey = self.get_dbkey(context)
        tag_using_filenames = context.get('tag_using_filenames', False)
        tags = context.get('tags', False)
        force_composite = asbool(context.get('force_composite', 'False'))
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for _ in range(file_count)]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None or force_composite:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            dataset.tag_using_filenames = None
            dataset.tags = None
            # load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = {
                composite_file.substitute_name_with_metadata:
                d_type.metadata_spec[
                    composite_file.substitute_name_with_metadata].default
                for composite_file in d_type.composite_files.values()
                if composite_file.substitute_name_with_metadata
            }
            for meta_name, meta_spec in d_type.metadata_spec.items():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(
                                meta_value,
                                default=metadata_name_substition_default_dict[
                                    meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.name = self.get_composite_dataset_name(context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name = sniff.stream_to_file(
                    io.StringIO(d_type.generate_primary_file(dataset)),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.auto_decompress = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.auto_decompress = file_bunch.auto_decompress
                dataset.space_to_tab = file_bunch.space_to_tab
                if file_bunch.file_type:
                    dataset.file_type = file_type
                if file_bunch.dbkey:
                    dataset.dbkey = dbkey
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            if not force_composite:
                keys = [value.name for value in writable_files.values()]
            else:
                keys = [str(index) for index in range(file_count)]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if not force_composite and group_incoming is None and not writable_files[
                        list(writable_files.keys())[keys.index(key)]].optional:
                    dataset.warnings.append(
                        "A required composite file (%s) was not specified." %
                        (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        if force_composite:
                            key = group_incoming.get("NAME") or i
                        dataset.composite_files[key] = file_bunch.__dict__
                    elif not force_composite:
                        dataset.composite_files[key] = None
                        if not writable_files[list(writable_files.keys())[
                                keys.index(key)]].optional:
                            dataset.warnings.append(
                                "A required composite file (%s) was not specified."
                                % (key))
            return [dataset]
        else:
            rval = []
            for i, file_contexts in enumerate(context[self.name]):
                datasets = get_filenames(file_contexts)
                for dataset in datasets:
                    override_file_type = self.get_file_type(
                        context[self.name][i], parent_context=context)
                    d_type = self.get_datatype(trans,
                                               context[self.name][i],
                                               parent_context=context)
                    dataset.file_type = override_file_type
                    dataset.datatype = d_type
                    dataset.ext = self.get_datatype_ext(trans,
                                                        context[self.name][i],
                                                        parent_context=context)
                    dataset.dbkey = self.get_dbkey(context[self.name][i],
                                                   parent_context=context)
                    dataset.tag_using_filenames = tag_using_filenames
                    dataset.tags = tags
                    rval.append(dataset)
            return rval
Example #14
0
def join_files(filename1,
               column1,
               filename2,
               column2,
               out_filename,
               split=None,
               buffer=1000000,
               keep_unmatched=False,
               keep_partial=False,
               index_depth=3,
               fill_options=None):
    # return identifier based upon line
    def get_identifier_by_line(line, column, split=None):
        if isinstance(line, str):
            fields = line.rstrip('\r\n').split(split)
            if column < len(fields):
                return fields[column]
        return None

    if fill_options is None:
        fill_options = Bunch(fill_unjoined_only=True,
                             file1_columns=None,
                             file2_columns=None)
    out = open(out_filename, 'w+b')
    index = BufferedIndex(filename2, column2, split, buffer, index_depth)
    for line1 in open(filename1, 'rb'):
        identifier = get_identifier_by_line(line1, column1, split)
        if identifier:
            written = False
            for line2 in index.get_lines_by_identifier(identifier):
                if not fill_options.fill_unjoined_only:
                    out.write(
                        "%s%s%s\n" %
                        (fill_empty_columns(line1.rstrip('\r\n'), split,
                                            fill_options.file1_columns), split,
                         fill_empty_columns(line2.rstrip('\r\n'), split,
                                            fill_options.file2_columns)))
                else:
                    out.write(
                        "%s%s%s\n" %
                        (line1.rstrip('\r\n'), split, line2.rstrip('\r\n')))
                written = True
            if not written and keep_unmatched:
                out.write(
                    fill_empty_columns(line1.rstrip('\r\n'), split,
                                       fill_options.file1_columns))
                if fill_options:
                    if fill_options.file2_columns:
                        out.write("%s%s" %
                                  (split,
                                   fill_empty_columns(
                                       "", split, fill_options.file2_columns)))
                out.write("\n")
        elif keep_partial:
            out.write(
                fill_empty_columns(line1.rstrip('\r\n'), split,
                                   fill_options.file1_columns))
            if fill_options:
                if fill_options.file2_columns:
                    out.write(
                        "%s%s" % (split,
                                  fill_empty_columns(
                                      "", split, fill_options.file2_columns)))
            out.write("\n")
    out.close()
Example #15
0
        def wrap_input(input_values, input):
            value = input_values[input.name]
            if isinstance(input, DataToolParameter) and input.multiple:
                dataset_instances = DatasetListWrapper.to_dataset_instances(value)
                input_values[input.name] = \
                    DatasetListWrapper(job_working_directory,
                                       dataset_instances,
                                       compute_environment=self.compute_environment,
                                       datatypes_registry=self.app.datatypes_registry,
                                       tool=self.tool,
                                       name=input.name,
                                       formats=input.formats)

            elif isinstance(input, DataToolParameter):
                # FIXME: We're populating param_dict with conversions when
                #        wrapping values, this should happen as a separate
                #        step before wrapping (or call this wrapping step
                #        something more generic) (but iterating this same
                #        list twice would be wasteful)
                # Add explicit conversions by name to current parent
                for conversion_name, conversion_extensions, conversion_datatypes in input.conversions:
                    # If we are at building cmdline step, then converters
                    # have already executed
                    conv_ext, converted_dataset = input_values[input.name].find_conversion_destination(conversion_datatypes)
                    # When dealing with optional inputs, we'll provide a
                    # valid extension to be used for None converted dataset
                    if not conv_ext:
                        conv_ext = conversion_extensions[0]
                    # input_values[ input.name ] is None when optional
                    # dataset, 'conversion' of optional dataset should
                    # create wrapper around NoneDataset for converter output
                    if input_values[input.name] and not converted_dataset:
                        # Input that converter is based from has a value,
                        # but converted dataset does not exist
                        raise Exception('A path for explicit datatype conversion has not been found: %s --/--> %s'
                                        % (input_values[input.name].extension, conversion_extensions))
                    else:
                        # Trick wrapper into using target conv ext (when
                        # None) without actually being a tool parameter
                        input_values[conversion_name] = \
                            DatasetFilenameWrapper(converted_dataset,
                                                   datatypes_registry=self.app.datatypes_registry,
                                                   tool=Bunch(conversion_name=Bunch(extensions=conv_ext)),
                                                   name=conversion_name)
                # Wrap actual input dataset
                dataset = input_values[input.name]
                wrapper_kwds = dict(
                    datatypes_registry=self.app.datatypes_registry,
                    tool=self,
                    name=input.name,
                    compute_environment=self.compute_environment
                )
                element_identifier = element_identifier_mapper.identifier(dataset, param_dict)
                if element_identifier:
                    wrapper_kwds["identifier"] = element_identifier
                input_values[input.name] = \
                    DatasetFilenameWrapper(dataset, **wrapper_kwds)
            elif isinstance(input, DataCollectionToolParameter):
                dataset_collection = value
                wrapper_kwds = dict(
                    datatypes_registry=self.app.datatypes_registry,
                    compute_environment=self.compute_environment,
                    tool=self,
                    name=input.name
                )
                wrapper = DatasetCollectionWrapper(
                    job_working_directory,
                    dataset_collection,
                    **wrapper_kwds
                )
                input_values[input.name] = wrapper
            elif isinstance(input, SelectToolParameter):
                if input.multiple:
                    value = listify(value)
                input_values[input.name] = SelectToolParameterWrapper(
                    input, value, other_values=param_dict, compute_environment=self.compute_environment)
            else:
                input_values[input.name] = InputValueWrapper(
                    input, value, param_dict)
Example #16
0
def _verify_outputs(testdef,
                    history,
                    jobs,
                    tool_id,
                    data_list,
                    data_collection_list,
                    galaxy_interactor,
                    quiet=False):
    assert len(
        jobs
    ) == 1, "Test framework logic error, somehow tool test resulted in more than one job."
    job = jobs[0]

    maxseconds = testdef.maxseconds
    if testdef.num_outputs is not None:
        expected = testdef.num_outputs
        actual = len(data_list)
        if expected != actual:
            messaage_template = "Incorrect number of outputs - expected %d, found %s."
            message = messaage_template % (expected, actual)
            raise Exception(message)
    found_exceptions = []

    def register_exception(e):
        if not found_exceptions and not quiet:
            # Only print this stuff out once.
            for stream in ['stdout', 'stderr']:
                if stream in job_stdio:
                    print(_format_stream(job_stdio[stream],
                                         stream=stream,
                                         format=True),
                          file=sys.stderr)
        found_exceptions.append(e)

    if testdef.expect_failure:
        if testdef.outputs:
            raise Exception(
                "Cannot specify outputs in a test expecting failure.")

    # Wait for the job to complete and register expections if the final
    # status was not what test was expecting.
    job_failed = False
    try:
        galaxy_interactor.wait_for_job(job['id'], history, maxseconds)
    except Exception as e:
        job_failed = True
        if not testdef.expect_failure:
            found_exceptions.append(e)

    job_stdio = galaxy_interactor.get_job_stdio(job['id'])

    if not job_failed and testdef.expect_failure:
        error = AssertionError(
            "Expected job to fail but Galaxy indicated the job successfully completed."
        )
        register_exception(error)

    expect_exit_code = testdef.expect_exit_code
    if expect_exit_code is not None:
        exit_code = job_stdio["exit_code"]
        if str(expect_exit_code) != str(exit_code):
            error = AssertionError(
                "Expected job to complete with exit code %s, found %s" %
                (expect_exit_code, exit_code))
            register_exception(error)

    for output_index, output_dict in enumerate(testdef.outputs):
        # Get the correct hid
        name = output_dict["name"]
        outfile = output_dict["value"]
        attributes = output_dict["attributes"]
        output_testdef = Bunch(name=name,
                               outfile=outfile,
                               attributes=attributes)
        try:
            output_data = data_list[name]
        except (TypeError, KeyError):
            # Legacy - fall back on ordered data list access if data_list is
            # just a list (case with twill variant or if output changes its
            # name).
            if hasattr(data_list, "values"):
                output_data = list(data_list.values())[output_index]
            else:
                output_data = data_list[len(data_list) - len(testdef.outputs) +
                                        output_index]
        assert output_data is not None
        try:
            galaxy_interactor.verify_output(history,
                                            jobs,
                                            output_data,
                                            output_testdef=output_testdef,
                                            tool_id=tool_id,
                                            maxseconds=maxseconds)
        except Exception as e:
            register_exception(e)

    other_checks = {
        "command_line": "Command produced by the job",
        "stdout": "Standard output of the job",
        "stderr": "Standard error of the job",
    }
    for what, description in other_checks.items():
        if getattr(testdef, what, None) is not None:
            try:
                data = job_stdio[what]
                verify_assertions(data, getattr(testdef, what))
            except AssertionError as err:
                errmsg = '%s different than expected\n' % description
                errmsg += str(err)
                register_exception(AssertionError(errmsg))

    for output_collection_def in testdef.output_collections:
        try:
            name = output_collection_def.name
            # TODO: data_collection_list is clearly a bad name for dictionary.
            if name not in data_collection_list:
                template = "Failed to find output [%s], tool outputs include [%s]"
                message = template % (name, ",".join(
                    data_collection_list.keys()))
                raise AssertionError(message)

            # Data collection returned from submission, elements may have been populated after
            # the job completed so re-hit the API for more information.
            data_collection_returned = data_collection_list[name]
            data_collection = galaxy_interactor._get(
                "dataset_collections/%s" % data_collection_returned["id"],
                data={
                    "instance_type": "history"
                }).json()

            def get_element(elements, id):
                for element in elements:
                    if element["element_identifier"] == id:
                        return element
                return False

            expected_collection_type = output_collection_def.collection_type
            if expected_collection_type:
                collection_type = data_collection["collection_type"]
                if expected_collection_type != collection_type:
                    template = "Expected output collection [%s] to be of type [%s], was of type [%s]."
                    message = template % (name, expected_collection_type,
                                          collection_type)
                    raise AssertionError(message)

            expected_element_count = output_collection_def.count
            if expected_element_count:
                actual_element_count = len(data_collection["elements"])
                if expected_element_count != actual_element_count:
                    template = "Expected output collection [%s] to have %s elements, but it had %s."
                    message = template % (name, expected_element_count,
                                          actual_element_count)
                    raise AssertionError(message)

            def verify_elements(element_objects, element_tests):
                for element_identifier, (
                        element_outfile,
                        element_attrib) in element_tests.items():
                    element = get_element(element_objects, element_identifier)
                    if not element:
                        template = "Failed to find identifier [%s] for testing, tool generated collection elements [%s]"
                        message = template % (element_identifier,
                                              element_objects)
                        raise AssertionError(message)

                    element_type = element["element_type"]
                    if element_type != "dataset_collection":
                        hda = element["object"]
                        galaxy_interactor.verify_output_dataset(
                            history,
                            hda_id=hda["id"],
                            outfile=element_outfile,
                            attributes=element_attrib,
                            tool_id=tool_id)
                    if element_type == "dataset_collection":
                        elements = element["object"]["elements"]
                        verify_elements(elements,
                                        element_attrib.get("elements", {}))

            verify_elements(data_collection["elements"],
                            output_collection_def.element_tests)
        except Exception as e:
            register_exception(e)

    if found_exceptions:
        raise JobOutputsError(found_exceptions, job_stdio)
    else:
        return job_stdio
Example #17
0
def get_permitted_actions(**kwds):
    return Bunch()
Example #18
0
    def __init__(self, root=None, **kwargs):
        Bunch.__init__(self, **kwargs)
        if not root:
            root = tempfile.mkdtemp()
            self._remove_root = True
        else:
            self._remove_root = False
        self.schema = self.MockSchema()
        self.security = idencoding.IdEncodingHelper(
            id_secret='6e46ed6483a833c100e68cc3f1d0dd76')
        self.database_connection = kwargs.get('database_connection',
                                              "sqlite:///:memory:")
        self.use_remote_user = kwargs.get('use_remote_user', False)
        self.enable_celery_tasks = False
        self.data_dir = os.path.join(root, 'database')
        self.file_path = os.path.join(self.data_dir, 'files')
        self.jobs_directory = os.path.join(self.data_dir, 'jobs_directory')
        self.new_file_path = os.path.join(self.data_dir, 'tmp')
        self.tool_data_path = os.path.join(root, 'tool-data')
        self.tool_dependency_dir = None
        self.metadata_strategy = 'legacy'

        self.object_store_config_file = ''
        self.object_store = 'disk'
        self.object_store_check_old_style = False
        self.object_store_cache_path = '/tmp/cache'
        self.umask = os.umask(0o77)
        self.gid = os.getgid()

        self.user_activation_on = False
        self.new_user_dataset_access_role_default_private = False

        self.expose_dataset_path = True
        self.allow_user_dataset_purge = True
        self.enable_old_display_applications = True
        self.redact_username_in_logs = False
        self.auth_config_file = "config/auth_conf.xml.sample"
        self.error_email_to = "*****@*****.**"
        self.password_expiration_period = 0

        self.umask = 0o77
        self.flush_per_n_datasets = 0

        # Compliance related config
        self.redact_email_in_job_name = False

        # Follow two required by GenomeBuilds
        self.len_file_path = os.path.join('tool-data', 'shared', 'ucsc',
                                          'chrom')
        self.builds_file_path = os.path.join('tool-data', 'shared', 'ucsc',
                                             'builds.txt.sample')

        self.shed_tool_config_file = "config/shed_tool_conf.xml"
        self.shed_tool_config_file_set = False
        self.enable_beta_edam_toolbox = False
        self.preserve_python_environment = "always"
        self.enable_beta_gdpr = False

        self.version_major = "19.09"

        # set by MockDir
        self.root = root
        self.enable_tool_document_cache = False
        self.tool_cache_data_dir = os.path.join(root, 'tool_cache')
        self.delay_tool_initialization = True
        self.external_chown_script = None

        self.default_panel_view = "default"
        self.panel_views_dir = ''
        self.panel_views = {}
        self.edam_panel_views = ''

        self.config_file = None
Example #19
0
    def __init__(self, test_directory, mock_model=True):
        # The following line is needed in order to create
        # HistoryDatasetAssociations - ideally the model classes would be
        # usable without the ORM infrastructure in place.
        in_memomry_model = mapping.init("/tmp",
                                        "sqlite:///:memory:",
                                        create_tables=True)

        self.datatypes_registry = Bunch(
            integrated_datatypes_configs=
            '/galaxy/integrated_datatypes_configs.xml',
            get_datatype_by_extension=lambda ext: Bunch(),
        )

        self.config = Bunch(
            outputs_to_working_directory=False,
            commands_in_new_shell=True,
            new_file_path=os.path.join(test_directory, "new_files"),
            tool_data_path=os.path.join(test_directory, "tools"),
            root=os.path.join(test_directory, "galaxy"),
            admin_users="*****@*****.**",
            len_file_path=os.path.join('tool-data', 'shared', 'ucsc', 'chrom'),
            builds_file_path=os.path.join('tool-data', 'shared', 'ucsc',
                                          'builds.txt.sample'),
            migrated_tools_config=os.path.join(test_directory,
                                               "migrated_tools_conf.xml"),
            server_name="test_server",
        )

        # Setup some attributes for downstream extension by specific tests.
        self.job_config = Bunch(dynamic_params=None, )

        # Two ways to handle model layer, one is to stub out some objects that
        # have an interface similar to real model (mock_model) and can keep
        # track of 'persisted' objects in a map. The other is to use a real
        # sqlalchemy layer but target an in memory database. Depending on what
        # is being tested.
        if mock_model:
            # Create self.model to mimic app.model.
            self.model = Bunch(context=MockContext())
            for module_member_name in dir(galaxy.model):
                module_member = getattr(galaxy.model, module_member_name)
                if type(module_member) == type:
                    self.model[module_member_name] = module_member
        else:
            self.model = in_memomry_model
        self.genome_builds = GenomeBuilds(self)
        self.toolbox = None
        self.object_store = None
        self.security = SecurityHelper(id_secret="testing")
        from galaxy.security import GalaxyRBACAgent
        self.job_queue = NoopQueue()
        self.security_agent = GalaxyRBACAgent(self.model)
        self.tool_data_tables = {}
        self.dataset_collections_service = None
        self.container_finder = NullContainerFinder()
        self.name = "galaxy"
        self._toolbox_lock = MockLock()
        self.tool_version_cache = Bunch(app=self,
                                        tool_version_by_id={},
                                        tool_version_by_tool_id={},
                                        tool_id_to_parent_id={},
                                        parent_id_to_tool_id={})
Example #20
0
    def upload_async_create(self, trans, tool_id=None, **kwd):
        """
        Precreate datasets for asynchronous uploading.
        """
        cntrller = kwd.get('cntrller', '')
        roles = kwd.get('roles', False)
        if roles:
            # The user associated the DATASET_ACCESS permission on the uploaded datasets with 1 or more roles.
            # We need to ensure that the roles are legitimately derived from the roles associated with the LIBRARY_ACCESS
            # permission if the library is not public ( this should always be the case since any ill-legitimate roles
            # were filtered out of the roles displayed on the upload form.  In addition, we need to ensure that the user
            # did not associated roles that would make the dataset in-accessible by everyone.
            library_id = trans.app.security.decode_id(kwd.get(
                'library_id', ''))
            vars = dict(DATASET_ACCESS_in=roles)
            permissions, in_roles, error, msg = trans.app.security_agent.derive_roles_from_access(
                trans, library_id, cntrller, library=True, **vars)
            if error:
                return ['error', msg]

        def create_dataset(name):
            ud = Bunch(name=name, file_type=None, dbkey=None)
            if nonfile_params.get('folder_id', False):
                replace_id = nonfile_params.get('replace_id', None)
                if replace_id not in [None, 'None']:
                    replace_dataset = trans.sa_session.query(
                        trans.app.model.LibraryDataset).get(
                            trans.security.decode_id(replace_id))
                else:
                    replace_dataset = None
                # FIXME: instead of passing params here ( chiech have been process by util.Params(), the original kwd
                # should be passed so that complex objects that may have been included in the initial request remain.
                library_bunch = upload_common.handle_library_params(
                    trans, nonfile_params, nonfile_params.folder_id,
                    replace_dataset)
            else:
                library_bunch = None
            return upload_common.new_upload(
                trans,
                cntrller,
                ud,
                library_bunch=library_bunch,
                state=trans.app.model.HistoryDatasetAssociation.states.UPLOAD)

        tool = self.get_toolbox().get_tool(tool_id)
        if not tool:
            return False  # bad tool_id
        nonfile_params = galaxy.util.Params(kwd, sanitize=False)
        if kwd.get('tool_state', None) not in (None, 'None'):
            encoded_state = galaxy.util.string_to_object(kwd["tool_state"])
            tool_state = DefaultToolState()
            tool_state.decode(encoded_state, tool, trans.app)
        else:
            tool_state = tool.new_state(trans)
        tool.update_state(trans,
                          tool.inputs,
                          tool_state.inputs,
                          kwd,
                          update_only=True)
        datasets = []
        dataset_upload_inputs = []
        for input_name, input in tool.inputs.iteritems():
            if input.type == "upload_dataset":
                dataset_upload_inputs.append(input)
        assert dataset_upload_inputs, Exception(
            "No dataset upload groups were found.")
        for dataset_upload_input in dataset_upload_inputs:
            d_type = dataset_upload_input.get_datatype(trans, kwd)
            if d_type.composite_type is not None:
                datasets.append(
                    create_dataset(
                        dataset_upload_input.get_composite_dataset_name(kwd)))
            else:
                params = Bunch(
                    **tool_state.inputs[dataset_upload_input.name][0])
                if params.file_data not in [None, ""]:
                    name = params.file_data
                    if name.count('/'):
                        name = name.rsplit('/', 1)[1]
                    if name.count('\\'):
                        name = name.rsplit('\\', 1)[1]
                    datasets.append(create_dataset(name))
                if params.url_paste not in [None, ""]:
                    url_paste = params.url_paste.replace('\r', '').split('\n')
                    url = False
                    for line in url_paste:
                        line = line.rstrip('\r\n').strip()
                        if not line:
                            continue
                        elif line.lower().startswith('http://') or line.lower(
                        ).startswith('ftp://') or line.lower().startswith(
                                'https://'):
                            url = True
                            datasets.append(create_dataset(line))
                        else:
                            if url:
                                continue  # non-url when we've already processed some urls
                            else:
                                # pasted data
                                datasets.append(create_dataset('Pasted Entry'))
                                break
        return [d.id for d in datasets]
Example #21
0
def is_filtered(filters, trans, tool):
    context = Bunch(trans=trans)
    return not all(_(context, tool) for _ in filters)
Example #22
0
 def get_uploaded_datasets( self, trans, context, override_name = None, override_info = None ):
     def get_data_file_filename( data_file, override_name = None, override_info = None ):
         dataset_name = override_name
         dataset_info = override_info
         def get_file_name( file_name ):
             file_name = file_name.split( '\\' )[-1]
             file_name = file_name.split( '/' )[-1]
             return file_name
         try:
             # Use the existing file
             if not dataset_name and 'filename' in data_file:
                 dataset_name = get_file_name( data_file['filename'] )
             if not dataset_info:
                 dataset_info = 'uploaded file'
             return Bunch( type='file', path=data_file['local_filename'], name=get_file_name( data_file['filename'] ) )
             #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
         except:
             # The uploaded file should've been persisted by the upload tool action
             return Bunch( type=None, path=None, name=None )
             #return None, None, None, None, None
     def get_url_paste_urls_or_filename( group_incoming, override_name = None, override_info = None ):
         filenames = []
         url_paste_file = group_incoming.get( 'url_paste', None )
         if url_paste_file is not None:
             url_paste = open( url_paste_file, 'r' ).read( 1024 )
             if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ):
                 url_paste = url_paste.replace( '\r', '' ).split( '\n' )
                 for line in url_paste:
                     line = line.strip()
                     if line:
                         if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ):
                             continue # non-url line, ignore
                         precreated_name = line
                         dataset_name = override_name
                         if not dataset_name:
                             dataset_name = line
                         dataset_info = override_info
                         if not dataset_info:
                             dataset_info = 'uploaded url'
                         yield Bunch( type='url', path=line, name=precreated_name )
                         #yield ( 'url', line, precreated_name, dataset_name, dataset_info )
             else:
                 dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here
                 if override_name:
                     dataset_name = override_name
                 if override_info:
                     dataset_info = override_info
                 yield Bunch( type='file', path=url_paste_file, name=precreated_name )
                 #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )
     def get_one_filename( context ):
         data_file = context['file_data']
         url_paste = context['url_paste']
         name = context.get( 'NAME', None )
         info = context.get( 'INFO', None )
         warnings = []
         space_to_tab = False 
         if context.get( 'space_to_tab', None ) not in ["None", None]:
             space_to_tab = True
         file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
         if file_bunch.path and url_paste:
             if url_paste.strip():
                 warnings.append( "All file contents specified in the paste box were ignored." )
         else: #we need to use url_paste
             for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ):
                 if file_bunch.path:
                     break
         return file_bunch, warnings
     def get_filenames( context ):
         rval = []
         data_file = context['file_data']
         url_paste = context['url_paste']
         name = context.get( 'NAME', None )
         info = context.get( 'INFO', None )
         space_to_tab = False
         if context.get( 'space_to_tab', None ) not in ["None", None]:
             space_to_tab = True
         warnings = []
         file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
         if file_bunch.path:
             file_bunch.space_to_tab = space_to_tab
             rval.append( file_bunch )
         for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ):
             if file_bunch.path:
                 file_bunch.space_to_tab = space_to_tab
                 rval.append( file_bunch )
         return rval
     file_type = self.get_file_type( context )
     d_type = self.get_datatype( trans, context )
     dbkey = context.get( 'dbkey', None )
     writable_files = d_type.writable_files
     writable_files_offset = 0
     groups_incoming = [ None for filename in writable_files ]
     for group_incoming in context.get( self.name, [] ):
         i = int( group_incoming['__index__'] )
         groups_incoming[ i ] = group_incoming
     if d_type.composite_type is not None:
         #handle uploading of composite datatypes
         #Only one Dataset can be created
         dataset = Bunch()
         dataset.type = 'composite'
         dataset.file_type = file_type
         dataset.dbkey = dbkey
         dataset.datatype = d_type
         dataset.warnings = []
         dataset.metadata = {}
         dataset.composite_files = {}
         #load metadata
         files_metadata = context.get( self.metadata_ref, {} )
         for meta_name, meta_spec in d_type.metadata_spec.iteritems():
             if meta_spec.set_in_upload:
                 if meta_name in files_metadata:
                     dataset.metadata[ meta_name ] = files_metadata[ meta_name ]
         dataset_name = None
         dataset_info = None
         if dataset.datatype.composite_type == 'auto_primary_file':
             #replace sniff here with just creating an empty file
             temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file() ), prefix='upload_auto_primary_file' )
             dataset.primary_file = temp_name
             dataset.space_to_tab = False
             dataset.precreated_name = dataset.name = 'Uploaded Composite Dataset (%s)' % ( file_type )
         else:
             file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] )
             if dataset.datatype.composite_type:
                 precreated_name = 'Uploaded Composite Dataset (%s)' % ( file_type )
             writable_files_offset = 1
             dataset.primary_file = file_bunch.path
             dataset.space_to_tab = file_bunch.space_to_tab
             dataset.precreated_name = file_bunch.precreated_name
             dataset.name = file_bunch.precreated_name
             dataset.warnings.extend( file_bunch.warnings )
         if dataset.primary_file is None:#remove this before finish, this should create an empty dataset
             raise Exception( 'No primary dataset file was available for composite upload' )
         keys = [ value.name for value in writable_files.values() ]
         for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ):
             key = keys[ i + writable_files_offset ]
             if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                 dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
                 dataset.composite_files[ key ] = None
             else:
                 file_bunch, warnings = get_one_filename( group_incoming )
                 if file_bunch.path:
                     dataset.composite_files[ key ] = file_bunch.__dict__
                 else:
                     dataset.composite_files[ key ] = None
                     if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                         dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
         return [ dataset ]
     else:
         datasets = get_filenames( context[ self.name ][0] )
         rval = []
         for dataset in datasets:
             dataset.file_type = file_type
             dataset.datatype = d_type
             dataset.ext = self.get_datatype_ext( trans, context )
             dataset.dbkey = dbkey
             rval.append( dataset )
         return rval
 def filter_by(self, **kwds):
     return Bunch(first=lambda: None)
Example #24
0
class Repository(Dictifiable):
    dict_collection_visible_keys = [
        'id', 'name', 'type', 'remote_repository_url', 'homepage_url',
        'description', 'user_id', 'private', 'deleted', 'times_downloaded',
        'deprecated'
    ]
    dict_element_visible_keys = [
        'id', 'name', 'type', 'remote_repository_url', 'homepage_url',
        'description', 'long_description', 'user_id', 'private', 'deleted',
        'times_downloaded', 'deprecated'
    ]
    file_states = Bunch(NORMAL='n',
                        NEEDS_MERGING='m',
                        MARKED_FOR_REMOVAL='r',
                        MARKED_FOR_ADDITION='a',
                        NOT_TRACKED='?')

    def __init__(self,
                 id=None,
                 name=None,
                 type=None,
                 remote_repository_url=None,
                 homepage_url=None,
                 description=None,
                 long_description=None,
                 user_id=None,
                 private=False,
                 deleted=None,
                 email_alerts=None,
                 times_downloaded=0,
                 deprecated=False):
        self.id = id
        self.name = name or "Unnamed repository"
        self.type = type
        self.remote_repository_url = remote_repository_url
        self.homepage_url = homepage_url
        self.description = description
        self.long_description = long_description
        self.user_id = user_id
        self.private = private
        self.deleted = deleted
        self.email_alerts = email_alerts
        self.times_downloaded = times_downloaded
        self.deprecated = deprecated

    @property
    def admin_role(self):
        admin_role_name = '%s_%s_admin' % (str(
            self.name), str(self.user.username))
        for rra in self.roles:
            role = rra.role
            if str(role.name) == admin_role_name:
                return role
        raise Exception(
            'Repository %s owned by %s is not associated with a required administrative role.'
            % (str(self.name), str(self.user.username)))

    def allow_push(self, app):
        repo = hg_util.get_repo_for_repository(app, repository=self)
        return repo.ui.config('web', 'allow_push')

    def can_change_type(self, app):
        # Allow changing the type only if the repository has no contents, has never been installed, or has
        # never been changed from the default type.
        if self.is_new(app):
            return True
        if self.times_downloaded == 0:
            return True
        if self.type == rt_util.UNRESTRICTED:
            return True
        return False

    def can_change_type_to(self, app, new_type_label):
        if self.type == new_type_label:
            return False
        if self.can_change_type(app):
            new_type = app.repository_types_registry.get_class_by_label(
                new_type_label)
            if new_type.is_valid_for_type(app, self):
                return True
        return False

    def get_changesets_for_setting_metadata(self, app):
        type_class = self.get_type_class(app)
        return type_class.get_changesets_for_setting_metadata(app, self)

    def get_repository_dependencies(self, app, changeset, toolshed_url):
        # We aren't concerned with repositories of type tool_dependency_definition here if a
        # repository_metadata record is not returned because repositories of this type will never
        # have repository dependencies. However, if a readme file is uploaded, or some other change
        # is made that does not create a new downloadable changeset revision but updates the existing
        # one, we still want to be able to get repository dependencies.
        repository_metadata = metadata_util.get_current_repository_metadata_for_changeset_revision(
            app, self, changeset)
        if repository_metadata:
            metadata = repository_metadata.metadata
            if metadata:
                rb = relation_builder.RelationBuilder(app, self,
                                                      repository_metadata,
                                                      toolshed_url)
                repository_dependencies = rb.get_repository_dependencies_for_changeset_revision(
                )
                if repository_dependencies:
                    return repository_dependencies
        return None

    def get_type_class(self, app):
        return app.repository_types_registry.get_class_by_label(self.type)

    def get_tool_dependencies(self, app, changeset_revision):
        changeset_revision = metadata_util.get_next_downloadable_changeset_revision(
            app, self, changeset_revision)
        for downloadable_revision in self.downloadable_revisions:
            if downloadable_revision.changeset_revision == changeset_revision:
                return downloadable_revision.metadata.get(
                    'tool_dependencies', {})
        return {}

    def installable_revisions(self, app, sort_revisions=True):
        return metadata_util.get_metadata_revisions(
            app, self, sort_revisions=sort_revisions)

    def is_new(self, app):
        repo = hg_util.get_repo_for_repository(app, repository=self)
        tip_ctx = repo.changectx(repo.changelog.tip())
        return tip_ctx.rev() < 0

    def repo_path(self, app):
        return app.hgweb_config_manager.get_entry(
            os.path.join("repos", self.user.username, self.name))

    def revision(self, app):
        repo = hg_util.get_repo_for_repository(app, repository=self)
        tip_ctx = repo.changectx(repo.changelog.tip())
        return "%s:%s" % (str(
            tip_ctx.rev()), str(repo.changectx(repo.changelog.tip())))

    def set_allow_push(self, app, usernames, remove_auth=''):
        allow_push = util.listify(self.allow_push(app))
        if remove_auth:
            allow_push.remove(remove_auth)
        else:
            for username in util.listify(usernames):
                if username not in allow_push:
                    allow_push.append(username)
        allow_push = '%s\n' % ','.join(allow_push)
        repo = hg_util.get_repo_for_repository(app, repository=self)
        # Why doesn't the following work?
        # repo.ui.setconfig( 'web', 'allow_push', allow_push )
        lines = repo.opener('hgrc', 'rb').readlines()
        fp = repo.opener('hgrc', 'wb')
        for line in lines:
            if line.startswith('allow_push'):
                fp.write('allow_push = %s' % allow_push)
            else:
                fp.write(line)
        fp.close()

    def tip(self, app):
        repo = hg_util.get_repo_for_repository(app, repository=self)
        return str(repo.changectx(repo.changelog.tip()))

    def to_dict(self, view='collection', value_mapper=None):
        rval = super(Repository, self).to_dict(view=view,
                                               value_mapper=value_mapper)
        if 'user_id' in rval:
            rval['owner'] = self.user.username
        return rval
Example #25
0
            )
            if self.area:
                as_dict["area"] = True

            if self.input_type == INPUT_TYPE.INTEGER:
                as_dict["value"] = "0"
            if self.input_type == INPUT_TYPE.FLOAT:
                as_dict["value"] = "0.0"
            elif self.input_type == INPUT_TYPE.DATA_COLLECTON:
                as_dict["collection_type"] = self.collection_type

        return as_dict


OUTPUT_TYPE = Bunch(
    GLOB="glob",
    STDOUT="stdout",
)


# TODO: Different subclasses - this is representing different types of things.
class OutputInstance:
    def __init__(self,
                 name,
                 output_data_type,
                 output_type,
                 path=None,
                 fields=None):
        self.name = name
        self.output_data_type = output_data_type
        self.output_type = output_type
        self.path = path
Example #26
0
from galaxy import config, tools, web, util
from galaxy.web.base.controller import BaseController, BaseAPIController
from galaxy.util.bunch import Bunch

messages = Bunch(NO_TOOL="no tool")


class ToolsController(BaseAPIController):
    """
    RESTful controller for interactions with tools.
    """
    @web.expose_api
    def index(self, trans, **kwds):
        """
        GET /api/tools: returns a list of tools defined by parameters
            parameters:
                in_panel  - if true, tools are returned in panel structure, 
                            including sections and labels
                trackster - if true, only tools that are compatible with 
                            Trackster are returned
        """

        # Read params.
        in_panel = util.string_as_bool(kwds.get('in_panel', 'True'))
        trackster = util.string_as_bool(kwds.get('trackster', 'False'))

        # Create return value.
        return self.app.toolbox.to_dict(trans,
                                        in_panel=in_panel,
                                        trackster=trackster)
Example #27
0
""" There is some shared logic between matching/multiplying inputs in workflows
and tools. This module is meant to capture some general permutation logic that
can be applicable for both cases but will only be used in the newer tools case
first.

Maybe this doesn't make sense and maybe much of this stuff could be replaced
with itertools product and permutations. These are open questions.
"""
from galaxy.exceptions import MessageException
from galaxy.util.bunch import Bunch

input_classification = Bunch(
    SINGLE="single",
    MATCHED="matched",
    MULTIPLIED="multiplied",
)


class InputMatchedException(MessageException):
    """ Indicates problem matching inputs while building up inputs
    permutations. """


def expand_multi_inputs(inputs, classifier, key_filter=None):
    key_filter = key_filter or (lambda x: True)

    single_inputs, matched_multi_inputs, multiplied_multi_inputs = __split_inputs(
        inputs,
        classifier,
        key_filter
    )
Example #28
0
 def encode_runtime_state(self, trans, state):
     fake_tool = Bunch(inputs=self.get_runtime_inputs())
     return state.encode(fake_tool, trans.app)
Example #29
0
def main():
    parser = optparse.OptionParser()
    parser.add_option(
        '-b',
        '--buffer',
        dest='buffer',
        type='int',
        default=1000000,
        help=
        'Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.'
    )
    parser.add_option(
        '-d',
        '--index_depth',
        dest='index_depth',
        type='int',
        default=3,
        help='Depth to use on filebased offset indexing. Default: 3.')
    parser.add_option(
        '-p',
        '--keep_partial',
        action='store_true',
        dest='keep_partial',
        default=False,
        help='Keep rows in first input which are missing identifiers.')
    parser.add_option(
        '-u',
        '--keep_unmatched',
        action='store_true',
        dest='keep_unmatched',
        default=False,
        help=
        'Keep rows in first input which are not joined with the second input.')
    parser.add_option(
        '-f',
        '--fill_options_file',
        dest='fill_options_file',
        type='str',
        default=None,
        help='Fill empty columns with a values from a JSONified file.')

    options, args = parser.parse_args()

    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch(**stringify_dictionary_keys(
                json.load(open(options.fill_options_file))
            ))  # json.load( open( options.fill_options_file ) )
        except Exception as e:
            print("Warning: Ignoring fill options due to json error (%s)." % e)
    if fill_options is None:
        fill_options = Bunch()
    if 'fill_unjoined_only' not in fill_options:
        fill_options.fill_unjoined_only = True
    if 'file1_columns' not in fill_options:
        fill_options.file1_columns = None
    if 'file2_columns' not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int(args[2]) - 1
        column2 = int(args[3]) - 1
        out_filename = args[4]
    except:
        print("Error parsing command line.", file=sys.stderr)
        sys.exit()

    # Character for splitting fields and joining lines
    split = "\t"

    return join_files(filename1,
                      column1,
                      filename2,
                      column2,
                      out_filename,
                      split,
                      options.buffer,
                      options.keep_unmatched,
                      options.keep_partial,
                      options.index_depth,
                      fill_options=fill_options)
Example #30
0
 def decode_runtime_state(self, trans, string):
     fake_tool = Bunch(inputs=self.get_runtime_inputs())
     state = galaxy.tools.DefaultToolState()
     state.decode(string, fake_tool, trans.app)
     return state
Example #31
0
class ToolShedRepository(object):
    dict_collection_visible_keys = ['id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes',
                                    'tool_shed_status', 'deleted', 'uninstalled', 'dist_to_shed', 'status', 'error_message']
    dict_element_visible_keys = ['id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes',
                                 'tool_shed_status', 'deleted', 'uninstalled', 'dist_to_shed', 'status', 'error_message']
    installation_status = Bunch(NEW='New',
                                CLONING='Cloning',
                                SETTING_TOOL_VERSIONS='Setting tool versions',
                                INSTALLING_REPOSITORY_DEPENDENCIES='Installing repository dependencies',
                                INSTALLING_TOOL_DEPENDENCIES='Installing tool dependencies',
                                LOADING_PROPRIETARY_DATATYPES='Loading proprietary datatypes',
                                INSTALLED='Installed',
                                DEACTIVATED='Deactivated',
                                ERROR='Error',
                                UNINSTALLED='Uninstalled')
    states = Bunch(INSTALLING='running',
                   OK='ok',
                   WARNING='queued',
                   ERROR='error',
                   UNINSTALLED='deleted_new')

    def __init__(self, id=None, create_time=None, tool_shed=None, name=None, description=None, owner=None, installed_changeset_revision=None,
                 changeset_revision=None, ctx_rev=None, metadata=None, includes_datatypes=False, tool_shed_status=None, deleted=False,
                 uninstalled=False, dist_to_shed=False, status=None, error_message=None):
        self.id = id
        self.create_time = create_time
        self.tool_shed = tool_shed
        self.name = name
        self.description = description
        self.owner = owner
        self.installed_changeset_revision = installed_changeset_revision
        self.changeset_revision = changeset_revision
        self.ctx_rev = ctx_rev
        self.metadata = metadata
        self.includes_datatypes = includes_datatypes
        self.tool_shed_status = tool_shed_status
        self.deleted = deleted
        self.uninstalled = uninstalled
        self.dist_to_shed = dist_to_shed
        self.status = status
        self.error_message = error_message

    def as_dict(self, value_mapper=None):
        return self.to_dict(view='element', value_mapper=value_mapper)

    @property
    def can_install(self):
        return self.status == self.installation_status.NEW

    @property
    def can_reset_metadata(self):
        return self.status == self.installation_status.INSTALLED

    @property
    def can_uninstall(self):
        return self.status != self.installation_status.UNINSTALLED

    @property
    def can_deactivate(self):
        return self.status not in [self.installation_status.DEACTIVATED,
                                   self.installation_status.ERROR,
                                   self.installation_status.UNINSTALLED]

    @property
    def can_reinstall_or_activate(self):
        return self.deleted

    def get_sharable_url(self, app):
        tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry(app, self.tool_shed)
        if tool_shed_url:
            # Append a slash to the tool shed URL, because urlparse.urljoin will eliminate
            # the last part of a URL if it does not end with a forward slash.
            tool_shed_url = '%s/' % tool_shed_url
            return urljoin(tool_shed_url, 'view/%s/%s' % (self.owner, self.name))
        return tool_shed_url

    def get_shed_config_filename(self):
        shed_config_filename = None
        if self.metadata:
            shed_config_filename = self.metadata.get('shed_config_filename', shed_config_filename)
        return shed_config_filename

    def get_shed_config_dict(self, app, default=None):
        """
        Return the in-memory version of the shed_tool_conf file, which is stored in the config_elems entry
        in the shed_tool_conf_dict.
        """

        def _is_valid_shed_config_filename(filename):
            for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True):
                if filename == shed_tool_conf_dict['config_filename']:
                    return True
            return False

        if not self.shed_config_filename or not _is_valid_shed_config_filename(self.shed_config_filename):
            self.guess_shed_config(app, default=default)
        if self.shed_config_filename:
            for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True):
                if self.shed_config_filename == shed_tool_conf_dict['config_filename']:
                    return shed_tool_conf_dict
        return default

    def get_tool_relative_path(self, app):
        shed_conf_dict = self.get_shed_config_dict(app)
        tool_path = None
        relative_path = None
        if shed_conf_dict:
            tool_path = shed_conf_dict['tool_path']
            relative_path = os.path.join(self.tool_shed_path_name, 'repos', self.owner, self.name, self.installed_changeset_revision)
        return tool_path, relative_path

    def guess_shed_config(self, app, default=None):
        tool_ids = []
        metadata = self.metadata or {}
        for tool in metadata.get('tools', []):
            tool_ids.append(tool.get('guid'))
        for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True):
            name = shed_tool_conf_dict['config_filename']
            for elem in shed_tool_conf_dict['config_elems']:
                if elem.tag == 'tool':
                    for sub_elem in elem.findall('id'):
                        tool_id = sub_elem.text.strip()
                        if tool_id in tool_ids:
                            self.shed_config_filename = name
                            return shed_tool_conf_dict
                elif elem.tag == "section":
                    for tool_elem in elem.findall('tool'):
                        for sub_elem in tool_elem.findall('id'):
                            tool_id = sub_elem.text.strip()
                            if tool_id in tool_ids:
                                self.shed_config_filename = name
                                return shed_tool_conf_dict
        if self.includes_datatypes or self.includes_data_managers:
            # We need to search by file paths here, which is less desirable.
            tool_shed = common_util.remove_protocol_and_port_from_tool_shed_url(self.tool_shed)
            for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True):
                tool_path = shed_tool_conf_dict['tool_path']
                relative_path = os.path.join(tool_path, tool_shed, 'repos', self.owner, self.name, self.installed_changeset_revision)
                if os.path.exists(relative_path):
                    self.shed_config_filename = shed_tool_conf_dict['config_filename']
                    return shed_tool_conf_dict
        return default

    @property
    def has_readme_files(self):
        if self.metadata:
            return 'readme_files' in self.metadata
        return False

    @property
    def has_repository_dependencies(self):
        if self.metadata:
            repository_dependencies_dict = self.metadata.get('repository_dependencies', {})
            repository_dependencies = repository_dependencies_dict.get('repository_dependencies', [])
            # [["http://localhost:9009", "package_libgtextutils_0_6", "test", "e2003cbf18cd", "True", "True"]]
            for rd_tup in repository_dependencies:
                tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
                    common_util.parse_repository_dependency_tuple(rd_tup)
                if not asbool(only_if_compiling_contained_td):
                    return True
        return False

    @property
    def has_repository_dependencies_only_if_compiling_contained_td(self):
        if self.metadata:
            repository_dependencies_dict = self.metadata.get('repository_dependencies', {})
            repository_dependencies = repository_dependencies_dict.get('repository_dependencies', [])
            # [["http://localhost:9009", "package_libgtextutils_0_6", "test", "e2003cbf18cd", "True", "True"]]
            for rd_tup in repository_dependencies:
                tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
                    common_util.parse_repository_dependency_tuple(rd_tup)
                if not asbool(only_if_compiling_contained_td):
                    return False
            return True
        return False

    @property
    def in_error_state(self):
        return self.status == self.installation_status.ERROR

    @property
    def includes_data_managers(self):
        if self.metadata:
            return bool(len(self.metadata.get('data_manager', {}).get('data_managers', {})))
        return False

    @property
    def includes_tools(self):
        if self.metadata:
            return 'tools' in self.metadata
        return False

    @property
    def includes_tools_for_display_in_tool_panel(self):
        if self.includes_tools:
            tool_dicts = self.metadata['tools']
            for tool_dict in tool_dicts:
                if tool_dict.get('add_to_tool_panel', True):
                    return True
        return False

    @property
    def includes_tool_dependencies(self):
        if self.metadata:
            return 'tool_dependencies' in self.metadata
        return False

    @property
    def includes_workflows(self):
        if self.metadata:
            return 'workflows' in self.metadata
        return False

    @property
    def installed_repository_dependencies(self):
        """Return the repository's repository dependencies that are currently installed."""
        installed_required_repositories = []
        for required_repository in self.repository_dependencies:
            if required_repository.status == self.installation_status.INSTALLED:
                installed_required_repositories.append(required_repository)
        return installed_required_repositories

    @property
    def installed_tool_dependencies(self):
        """Return the repository's tool dependencies that are currently installed, but possibly in an error state."""
        installed_dependencies = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status in [ToolDependency.installation_status.INSTALLED]:
                installed_dependencies.append(tool_dependency)
        return installed_dependencies

    @property
    def is_deprecated_in_tool_shed(self):
        if self.tool_shed_status:
            return asbool(self.tool_shed_status.get('repository_deprecated', False))
        return False

    @property
    def is_deactivated_or_installed(self):
        return self.status in [self.installation_status.DEACTIVATED,
                               self.installation_status.INSTALLED]

    @property
    def is_installed(self):
        return self.status == self.installation_status.INSTALLED

    @property
    def is_latest_installable_revision(self):
        if self.tool_shed_status:
            return asbool(self.tool_shed_status.get('latest_installable_revision', False))
        return False

    @property
    def is_new(self):
        return self.status == self.installation_status.NEW

    @property
    def missing_repository_dependencies(self):
        """Return the repository's repository dependencies that are not currently installed, and may not ever have been installed."""
        missing_required_repositories = []
        for required_repository in self.repository_dependencies:
            if required_repository.status not in [self.installation_status.INSTALLED]:
                missing_required_repositories.append(required_repository)
        return missing_required_repositories

    @property
    def missing_tool_dependencies(self):
        """Return the repository's tool dependencies that are not currently installed, and may not ever have been installed."""
        missing_dependencies = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status not in [ToolDependency.installation_status.INSTALLED]:
                missing_dependencies.append(tool_dependency)
        return missing_dependencies

    def repo_files_directory(self, app):
        repo_path = self.repo_path(app)
        if repo_path:
            return os.path.join(repo_path, self.name)
        return None

    def repo_path(self, app):
        tool_shed = common_util.remove_protocol_and_port_from_tool_shed_url(self.tool_shed)
        for shed_tool_conf_dict in app.toolbox.dynamic_confs(include_migrated_tool_conf=True):
            tool_path = shed_tool_conf_dict['tool_path']
            relative_path = os.path.join(tool_path, tool_shed, 'repos', self.owner, self.name, self.installed_changeset_revision)
            if os.path.exists(relative_path):
                return relative_path
        return None

    @property
    def repository_dependencies(self):
        """
        Return all of this repository's repository dependencies, ignoring their attributes like prior_installation_required and
        only_if_compiling_contained_td.
        """
        required_repositories = []
        for rrda in self.required_repositories:
            repository_dependency = rrda.repository_dependency
            required_repository = repository_dependency.repository
            if required_repository:
                required_repositories.append(required_repository)
        return required_repositories

    @property
    def repository_dependencies_being_installed(self):
        """Return the repository's repository dependencies that are currently being installed."""
        required_repositories_being_installed = []
        for required_repository in self.repository_dependencies:
            if required_repository.status in [self.installation_status.CLONING,
                                              self.installation_status.INSTALLING_REPOSITORY_DEPENDENCIES,
                                              self.installation_status.INSTALLING_TOOL_DEPENDENCIES,
                                              self.installation_status.LOADING_PROPRIETARY_DATATYPES,
                                              self.installation_status.SETTING_TOOL_VERSIONS]:
                required_repositories_being_installed.append(required_repository)
        return required_repositories_being_installed

    @property
    def repository_dependencies_missing_or_being_installed(self):
        """Return the repository's repository dependencies that are either missing or currently being installed."""
        required_repositories_missing_or_being_installed = []
        for required_repository in self.repository_dependencies:
            if required_repository.status in [self.installation_status.ERROR,
                                              self.installation_status.INSTALLING,
                                              self.installation_status.NEVER_INSTALLED,
                                              self.installation_status.UNINSTALLED]:
                required_repositories_missing_or_being_installed.append(required_repository)
        return required_repositories_missing_or_being_installed

    @property
    def repository_dependencies_with_installation_errors(self):
        """Return the repository's repository dependencies that have installation errors."""
        required_repositories_with_installation_errors = []
        for required_repository in self.repository_dependencies:
            if required_repository.status == self.installation_status.ERROR:
                required_repositories_with_installation_errors.append(required_repository)
        return required_repositories_with_installation_errors

    @property
    def requires_prior_installation_of(self):
        """
        Return a list of repository dependency tuples like (tool_shed, name, owner, changeset_revision, prior_installation_required) for this
        repository's repository dependencies where prior_installation_required is True.  By definition, repository dependencies are required to
        be installed in order for this repository to function correctly.  However, those repository dependencies that are defined for this
        repository with prior_installation_required set to True place them in a special category in that the required repositories must be
        installed before this repository is installed.  Among other things, this enables these "special" repository dependencies to include
        information that enables the successful installation of this repository.  This method is not used during the initial installation of
        this repository, but only after it has been installed (metadata must be set for this repository in order for this method to be useful).
        """
        required_rd_tups_that_must_be_installed = []
        if self.has_repository_dependencies:
            rd_tups = self.metadata['repository_dependencies']['repository_dependencies']
            for rd_tup in rd_tups:
                if len(rd_tup) == 5:
                    tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
                        common_util.parse_repository_dependency_tuple(rd_tup, contains_error=False)
                    if asbool(prior_installation_required):
                        required_rd_tups_that_must_be_installed.append((tool_shed, name, owner, changeset_revision, 'True', 'False'))
                elif len(rd_tup) == 6:
                    tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
                        common_util.parse_repository_dependency_tuple(rd_tup, contains_error=False)
                    # The repository dependency will only be required to be previously installed if it does not fall into the category of
                    # a repository that must be installed only so that its contained tool dependency can be used for compiling the tool
                    # dependency of the dependent repository.
                    if not asbool(only_if_compiling_contained_td):
                        if asbool(prior_installation_required):
                            required_rd_tups_that_must_be_installed.append((tool_shed, name, owner, changeset_revision, 'True', 'False'))
        return required_rd_tups_that_must_be_installed

    @property
    def revision_update_available(self):
        # This method should be named update_available, but since it is no longer possible to drop a table column using migration scripts
        # with the sqlite database (see ~/galaxy/model/migrate/versions/0016_drop_update_available_col_add_tool_shed_status_col.py), we
        # have to name it in such a way that it will not conflict with the eliminated tool_shed_repository.update_available column (which
        # cannot be eliminated if using the sqlite database).
        if self.tool_shed_status:
            return asbool(self.tool_shed_status.get('revision_update', False))
        return False

    def set_shed_config_filename(self, value):
        self.metadata['shed_config_filename'] = value

    shed_config_filename = property(get_shed_config_filename, set_shed_config_filename)

    def to_dict(self, view='collection', value_mapper=None):
        if value_mapper is None:
            value_mapper = {}
        rval = {}
        try:
            visible_keys = self.__getattribute__('dict_' + view + '_visible_keys')
        except AttributeError:
            raise Exception('Unknown API view: %s' % view)
        for key in visible_keys:
            try:
                rval[key] = self.__getattribute__(key)
                if key in value_mapper:
                    rval[key] = value_mapper.get(key, rval[key])
            except AttributeError:
                rval[key] = None
        return rval

    @property
    def tool_dependencies_being_installed(self):
        dependencies_being_installed = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status == ToolDependency.installation_status.INSTALLING:
                dependencies_being_installed.append(tool_dependency)
        return dependencies_being_installed

    @property
    def tool_dependencies_installed_or_in_error(self):
        """Return the repository's tool dependencies that are currently installed, but possibly in an error state."""
        installed_dependencies = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status in [ToolDependency.installation_status.INSTALLED,
                                          ToolDependency.installation_status.ERROR]:
                installed_dependencies.append(tool_dependency)
        return installed_dependencies

    @property
    def tool_dependencies_missing_or_being_installed(self):
        dependencies_missing_or_being_installed = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status in [ToolDependency.installation_status.ERROR,
                                          ToolDependency.installation_status.INSTALLING,
                                          ToolDependency.installation_status.NEVER_INSTALLED,
                                          ToolDependency.installation_status.UNINSTALLED]:
                dependencies_missing_or_being_installed.append(tool_dependency)
        return dependencies_missing_or_being_installed

    @property
    def tool_dependencies_with_installation_errors(self):
        dependencies_with_installation_errors = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status == ToolDependency.installation_status.ERROR:
                dependencies_with_installation_errors.append(tool_dependency)
        return dependencies_with_installation_errors

    @property
    def tool_shed_path_name(self):
        tool_shed_url = self.tool_shed
        if tool_shed_url.find(':') > 0:
            # Eliminate the port, if any, since it will result in an invalid directory name.
            tool_shed_url = tool_shed_url.split(':')[0]
        return tool_shed_url.rstrip('/')

    @property
    def tuples_of_repository_dependencies_needed_for_compiling_td(self):
        """
        Return tuples defining this repository's repository dependencies that are necessary only for compiling this repository's tool
        dependencies.
        """
        rd_tups_of_repositories_needed_for_compiling_td = []
        if self.metadata:
            repository_dependencies = self.metadata.get('repository_dependencies', None)
            rd_tups = repository_dependencies['repository_dependencies']
            for rd_tup in rd_tups:
                if len(rd_tup) == 6:
                    tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = rd_tup
                    if asbool(only_if_compiling_contained_td):
                        rd_tups_of_repositories_needed_for_compiling_td.append((tool_shed, name, owner, changeset_revision, 'False', 'True'))
        return rd_tups_of_repositories_needed_for_compiling_td

    @property
    def uninstalled_repository_dependencies(self):
        """Return the repository's repository dependencies that have been uninstalled."""
        uninstalled_required_repositories = []
        for required_repository in self.repository_dependencies:
            if required_repository.status == self.installation_status.UNINSTALLED:
                uninstalled_required_repositories.append(required_repository)
        return uninstalled_required_repositories

    @property
    def uninstalled_tool_dependencies(self):
        """Return the repository's tool dependencies that have been uninstalled."""
        uninstalled_tool_dependencies = []
        for tool_dependency in self.tool_dependencies:
            if tool_dependency.status == ToolDependency.installation_status.UNINSTALLED:
                uninstalled_tool_dependencies.append(tool_dependency)
        return uninstalled_tool_dependencies

    @property
    def upgrade_available(self):
        if self.tool_shed_status:
            if self.is_deprecated_in_tool_shed:
                # Only allow revision upgrades if the repository is not deprecated in the tool shed.
                return False
            return asbool(self.tool_shed_status.get('revision_upgrade', False))
        return False
Example #32
0
STAGING_ACTION_REMOTE = "remote"
STAGING_ACTION_LOCAL = "local"
STAGING_ACTION_NONE = None
STAGING_ACTION_DEFAULT = "default"

# Poor man's enum.
path_type = Bunch(
    # Galaxy input datasets and extra files.
    INPUT="input",
    # Galaxy config and param files.
    CONFIG="config",
    # Files from tool's tool_dir (for now just wrapper if available).
    TOOL="tool",
    # Input work dir files - e.g. metadata files, task-split input files, etc..
    WORKDIR="workdir",
    # Galaxy output datasets in their final home.
    OUTPUT="output",
    # Galaxy from_work_dir output paths and other files (e.g. galaxy.json)
    OUTPUT_WORKDIR="output_workdir",
    # Other fixed tool parameter paths (likely coming from tool data, but not
    # nessecarily). Not sure this is the best name...
    UNSTRUCTURED="unstructured",
)

ACTION_DEFAULT_PATH_TYPES = [
    path_type.INPUT,
    path_type.CONFIG,
    path_type.TOOL,
    path_type.WORKDIR,
    path_type.OUTPUT,
Example #33
0
    def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None):
        def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True):
            dataset_name = override_name

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name
            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge)
            except Exception:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)

        def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:8].lower()
                    looks_like_url = False
                    for url_prefix in ["http://", "https://", "ftp://", "file://"]:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url', path=line, name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file', path=url_paste_file, name=dataset_name)

        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append("All file contents specified in the paste box were ignored.")
                if ftp_files:
                    warnings.append("All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip():  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append("All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                                if not os.path.islink(os.path.join(dirpath, filename)):
                                    ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                                     'filename' : os.path.basename(path)}
                                    purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info,
                                        purge=purge,
                                    )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.auto_decompress = auto_decompress
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            if file_type is not None:
                file_bunch.file_type = file_type
            if dbkey is not None:
                file_bunch.dbkey = dbkey
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)]
                if trans.user is None:
                    log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, text_type):
                                    valid_files.append(unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                                 'filename' : os.path.basename(ftp_file)}
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval
        file_type = self.get_file_type(context)
        file_count = self.get_file_count(trans, context)
        d_type = self.get_datatype(trans, context)
        dbkey = self.get_dbkey(context)
        tag_using_filenames = context.get('tag_using_filenames', False)
        force_composite = asbool(context.get('force_composite', 'False'))
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for _ in range(file_count)]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None or force_composite:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            dataset.tag_using_filenames = None
            # load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = dict((composite_file.substitute_name_with_metadata, d_type.metadata_spec[composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata)
            for meta_name, meta_spec in d_type.metadata_spec.items():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(meta_value, default=metadata_name_substition_default_dict[meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.name = self.get_composite_dataset_name(context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name = sniff.stream_to_file(StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.auto_decompress = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.auto_decompress = file_bunch.auto_decompress
                dataset.space_to_tab = file_bunch.space_to_tab
                if file_bunch.file_type:
                    dataset.file_type = file_type
                if file_bunch.dbkey:
                    dataset.dbkey = dbkey
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception('No primary dataset file was available for composite upload')
            if not force_composite:
                keys = [value.name for value in writable_files.values()]
            else:
                keys = [str(index) for index in range(file_count)]
            for i, group_incoming in enumerate(groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if not force_composite and group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
                    dataset.warnings.append("A required composite file (%s) was not specified." % (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        if force_composite:
                            key = group_incoming.get("NAME") or i
                        dataset.composite_files[key] = file_bunch.__dict__
                    elif not force_composite:
                        dataset.composite_files[key] = None
                        if not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
                            dataset.warnings.append("A required composite file (%s) was not specified." % (key))
            return [dataset]
        else:
            rval = []
            for i, file_contexts in enumerate(context[self.name]):
                datasets = get_filenames(file_contexts)
                for dataset in datasets:
                    override_file_type = self.get_file_type(context[self.name][i], parent_context=context)
                    d_type = self.get_datatype(trans, context[self.name][i], parent_context=context)
                    dataset.file_type = override_file_type
                    dataset.datatype = d_type
                    dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context)
                    dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context)
                    dataset.tag_using_filenames = tag_using_filenames
                    rval.append(dataset)
            return rval
    def test_pulsar_objectstore(self):
        # Define real object store used by Pulsar server.
        object_store_config_file = join(self.temp_directory,
                                        "object_store_conf.xml")
        with open(object_store_config_file, "w") as configf:
            config_template = Template("""<?xml version="1.0"?>
<object_store type="disk">
    <files_dir path="${temp_directory}"/>
    <extra_dir type="temp" path="${temp_directory}"/>
    <extra_dir type="job_work" path="${temp_directory}"/>
</object_store>
""")
            config_contents = config_template.safe_substitute(
                temp_directory=self.temp_directory)
            configf.write(config_contents)

        app_conf = dict(
            object_store_config_file=object_store_config_file,
            private_token="12345",
        )
        from .test_utils import test_pulsar_server
        with test_pulsar_server(app_conf=app_conf) as server:
            url = server.application_url
            # Define a proxy Pulsar object store.
            proxy_object_store_config_file = join(
                self.temp_directory, "proxy_object_store_conf.xml")
            with open(proxy_object_store_config_file, "w") as configf:
                config_template = Template("""<?xml version="1.0"?>
<object_store type="pulsar" url="$url" private_token="12345" transport="urllib">
  <!-- private_token is optional - see Pulsar documentation for more information. -->
  <!-- transport is optional, set to curl to use libcurl instead of urllib for communication with Pulsar. -->
</object_store>
""")
                contents = config_template.safe_substitute(url=url)
                configf.write(contents)

            config = Bunch(
                object_store_config_file=proxy_object_store_config_file)
            object_store = build_object_store_from_config(config=config)

            # Test no dataset with id 1 exists.
            absent_dataset = MockDataset(1)
            assert not object_store.exists(absent_dataset)

            # Write empty dataset 2 in second backend, ensure it is empty and
            # exists.
            empty_dataset = MockDataset(2)
            self.__write(b"", "000/dataset_2.dat")
            assert object_store.exists(empty_dataset)
            assert object_store.empty(empty_dataset)

            # Write non-empty dataset in backend 1, test it is not emtpy & exists.
            hello_world_dataset = MockDataset(3)
            self.__write(b"Hello World!", "000/dataset_3.dat")
            assert object_store.exists(hello_world_dataset)
            assert not object_store.empty(hello_world_dataset)

            # Test get_data
            data = object_store.get_data(hello_world_dataset)
            assert data == b"Hello World!"

            data = object_store.get_data(hello_world_dataset, start=1, count=6)
            assert data == b"ello W"

            # Test Size

            # Test absent and empty datasets yield size of 0.
            assert object_store.size(absent_dataset) == 0
            assert object_store.size(empty_dataset) == 0
            # Elsewise
            assert object_store.size(
                hello_world_dataset
            ) > 0  # Should this always be the number of bytes?

            # Test percent used (to some degree)
            percent_store_used = object_store.get_store_usage_percent()
            assert percent_store_used > 0.0
            assert percent_store_used < 100.0

            # Test update_from_file test
            output_dataset = MockDataset(4)
            output_real_path = join(self.temp_directory, "000",
                                    "dataset_4.dat")
            assert not exists(output_real_path)
            output_working_path = self.__write(
                b"NEW CONTENTS", "job_working_directory1/example_output")
            object_store.update_from_file(output_dataset,
                                          file_name=output_working_path,
                                          create=True)
            assert exists(output_real_path)

            # Test delete
            to_delete_dataset = MockDataset(5)
            to_delete_real_path = self.__write(b"content to be deleted!",
                                               "000/dataset_5.dat")
            assert object_store.exists(to_delete_dataset)
            assert object_store.delete(to_delete_dataset)
            assert not object_store.exists(to_delete_dataset)
            assert not exists(to_delete_real_path)

            # Test json content.
            complex_contents_dataset = MockDataset(6)
            complex_content = b'{"a":6}'
            self.__write(complex_content, "000/dataset_6.dat")
            assert object_store.exists(complex_contents_dataset)
            data = object_store.get_data(
                complex_contents_dataset) == complex_content
def __main__():
    #Parse Command Line
    parser = optparse.OptionParser()
    parser.add_option( '', '--threads', dest='threads', help='The number of threads to use' )
    parser.add_option( '', '--ref_name', dest='ref_name', help='The reference name to change all output matches to' )
    parser.add_option( '', '--ref_source', dest='ref_source', help='Whether the reference is self, cached or from the history' )
    parser.add_option( '', '--ref_sequences', dest='ref_sequences', help='Number of sequences in the reference dataset' )
    parser.add_option( '', '--mirror', dest='mirror', help='Do or do not report mirror image of all gap-free alignments' )
    parser.add_option( '', '--source_select', dest='source_select', help='Whether to used pre-set or cached reference file' )
    parser.add_option( '', '--input1', dest='input1', help='The name of the reference file if using history or reference base name if using cached' )
    parser.add_option( '', '--input2', dest='input2', help='The reads file to align' )
    parser.add_option( '', '--strand', dest='strand', help='Which strand of the read to search, if specifying all parameters' )
    parser.add_option( '', '--match_reward', dest='match_reward', help='Score values for a match (reward)' )
    parser.add_option( '', '--match_penalty', dest='match_penalty', help='Score values for a mismatch (penalty), same as reward when not specified (but reward is)' )
    parser.add_option( '', '--gapped', dest='gapped', help='Perform gapped extension of HSPs (or seeds if gapped-free extension is not performed) after first reducing them to anchor points' )
    parser.add_option( '', '--gap_open', dest='gap_open', help='Score penalties for opening a gap' )
    parser.add_option( '', '--gap_extend', dest='gap_extend', help='Score penalties for extending a gap' )
    parser.add_option( '', '--ambiguous', dest='ambiguous', help='Treat as ambiguous nucleotides' )
    parser.add_option( '', '--step', dest='step', help='Offset between the starting positions of successive target words considered for potential seeds' )
    parser.add_option( '', '--masking', dest='masking', help='Dynamically mask the target sequence by excluding any positions that appear in too many alignments from further consideration for seeds' )
    parser.add_option( '', '--seed', dest='seed', help='Offset between the starting positions of successive target words considered for potential seeds' )
    parser.add_option( '', '--match_length', dest='match_length', help='Seeds require bp word of this length with matches in all positions' )
    parser.add_option( '', '--transition', dest='transition', help='Transition settings, affects the number of allowed transition substitutions in each seed' )
    parser.add_option( '', '--xdrop', dest='xdrop', help='Find HSPs using the xdrop extension method with the given termination threshold instead of using the exact match method' )
    parser.add_option( '', '--hspthresh', dest='hspthresh', help='Score threshold for the x-drop extension method' )
    parser.add_option( '', '--entropy', dest='entropy', help='Whether to adjust for entropy when qualifying HSPs in the x-drop extension method' )
    parser.add_option( '', '--chain', dest='chain', help='Perform chaining of HSPs with no penalties' )
    parser.add_option( '', '--ydrop', dest='ydrop', help='Set the threshold for terminating gapped extension' )
    parser.add_option( '', '--ytrim', dest='ytrim', help='Trim back to peak score if y-drop extension encounters end of sequence' )
    parser.add_option( '', '--gappedthresh', dest='gappedthresh', help='Threshold for gapped extension.  Alignments scoring lower are discarded.' )
    parser.add_option( '', '--filter', dest='filter', help='Filter alignments.' )
    parser.add_option( '', '--identity_min', dest='identity_min', help='Minimum for filtering alignments by their percent identity.' )
    parser.add_option( '', '--identity_max', dest='identity_max', help='Maximum for filtering alignments by their percent identity.' )
    parser.add_option( '', '--coverage_min', dest='coverage_min', help='Minimum for filtering alignments by how much of the input sequence they cover.' )
    parser.add_option( '', '--coverage_max', dest='coverage_max', help='Maximum for filtering alignments by how much of the input sequence they cover.' )
    parser.add_option( '', '--nmatch_min', dest='nmatch_min', help='Minimum for filtering alignments by how many bases they match.' )
    parser.add_option( '', '--nmismatch_max', dest='nmismatch_max', help='Maximum for filtering alignments by the number of mismatches.' )
    parser.add_option( '', '--trivial', dest='trivial', help='Do or do not output a trivial self-alignment block if the target and query sequences are identical.' )
    parser.add_option( '', '--inner', dest='inner', help='Perform additional alignment between the gapped alignment blocks using (presumably) more sensitive alignment parameters.' )
    parser.add_option( '', '--shortcuts_for_yasra', dest='shortcuts_for_yasra', help='Shortcut options to support the Yasra mapping assembler' )
    parser.add_option( '', '--out_format', dest='format', help='The format of the output file (sam, diffs, or tabular (general))' )
    parser.add_option( '', '--output', dest='output', help='The output file' )
    parser.add_option( '', '--lastzSeqsFileDir', dest='lastzSeqsFileDir', help='Directory of local lastz_seqs.loc file' )
    ( options, args ) = parser.parse_args()
    # Output version # of tool
    try:
        tmp = tempfile.NamedTemporaryFile().name
        tmp_stdout = open( tmp, 'wb' )
        proc = subprocess.Popen( args='lastz -v', shell=True, stdout=tmp_stdout )
        tmp_stdout.close()
        returncode = proc.wait()
        stdout = None
        for line in open( tmp_stdout.name, 'rb' ):
            if line.lower().find( 'version' ) >= 0:
                stdout = line.strip()
                break
        if stdout:
            sys.stdout.write( '%s\n' % stdout )
        else:
            raise Exception
    except:
        sys.stdout.write( 'Could not determine Lastz version\n' )

    if options.ref_name:
        ref_name = '[nickname=%s]' % options.ref_name
    else:
        ref_name = ''
    set_options = ''
    # Commonly-used preset options
    if options.source_select == 'pre_set':
        # Handle ref_source
        if options.ref_source == 'self':
            # --mirror is available only if ref_source selection is --self
            if options.mirror == 'yes':
                set_options += '--nomirror '
    else:
        # Full set of user-specified options
        # Handle ref_source
        if options.ref_source == 'self':
            # --mirror is available only if ref_source selection is --self
            if options.mirror == 'yes':
                set_options += '--nomirror '
        else:
            # Using --self automatically enables this option
            if options.trivial == 'no':
                set_options += '--notrivial '
        # Handle --match
        if options.match_reward not in [ "", "0" ]:
            if options.match_penalty in [ "", "0" ]:
                match_penalty = options.match_reward
            else:
                match_penalty = options.match_penalty
            set_options += '--match=%s,%s ' % ( options.match_reward, match_penalty )
        # Handle --gapped
        if options.gapped == 'yes':
            set_options += '--gapped '
            if options.gap_open not in [ "" ]:
                if options.gap_extend in [ "" ]:
                    set_options += '--gap=%s ' % options.gap_open
                else:
                    set_options += '--gap=%s,%s ' % ( options.gap_open, options.gap_extend )
            # Handle --ydrop
            if options.ydrop not in [ "", "0" ]:
                set_options += '--ydrop=%s ' % options.ydrop
            # Handle --ytrim
            if options.ytrim == 'no':
                set_options += '--noytrim '
            # Handle --gappedthresh
            if options.gappedthresh not in [ "", "0" ]:
                set_options += '--gappedthresh=%s ' % options.gappedthresh
            # Handle --inner
            if options.inner not in [ "" ]:
                set_options += '--inner=%s ' % options.inner
        else:
            set_options += '--nogapped '
        # Handle --step
        if options.step not in [ "", "0" ]:
            set_options += '--step=%s ' % options.step
        # Handle --masking
        if options.masking not in [ '0' ]:
            set_options += '--masking=%s ' % options.masking
        # Handle --seed
        if options.seed not in [ "no" ]:
            if options.seed == 'match':
                set_options += '--seed=match%s ' % options.match_length
            else:
                set_options += '--seed=%s ' % options.seed
        # Handle --transition
        if options.transition == '0':
            set_options += '--notransition '
        else:
            set_options += '--transition=%s ' % options.transition
        # Handle --xdrop
        if options.xdrop not in [ "", "0" ]:
            set_options += '--xdrop=%s ' % options.xdrop
        # handle --hspthresh
        if options.hspthresh not in [ "", "0" ]:
            set_options += '--hspthresh=%s ' % options.hspthresh
        # Handle --entropy
        if options.entropy == 'no':
            set_options += '--noentropy '
        else:
            set_options += '--entropy '
        # Handle --chain
        if options.chain == 'no':
            set_options += '--nochain '
        else:
            set_options += '--chain '
        # Handle --filter
        if options.filter not in [ "no" ]:
            if options.filter == 'identity':
                identity_min = options.identity_min
                if options.identity_max in [ "", "0" ] or options.identity_max <= identity_min:
                    identity_max = '100'
                else:
                    identity_max = options.identity_max
                set_options += '--filter=identity:%s..%s ' % ( identity_min, identity_max )
            elif options.filter == 'coverage':
                coverage_min = options.coverage_min
                if options.coverage_max in [ "", "0" ] or options.coverage_max <= coverage_min:
                    coverage_max = '100'
                else:
                    coverage_max = options.coverage_max
                set_options += '--filter=coverage:%s..%s ' % ( coverage_min, coverage_max )
            elif options.filter == 'nmatch':
                set_options += '--filter=nmatch:%s% ' % options.nmatch_min
            elif options.filter == 'nmismatch':
                set_options += '--filter=nmismatch:0..%s ' % options.nmismatch_max
    # Handle --strand
    set_options += '--strand=%s ' % options.strand
    # Handle --ambiguous
    if options.ambiguous not in [ "no" ]:
        set_options += '--ambiguous=%s ' % options.ambiguous
    # Handle --shortcuts_for_yasra
    if options.shortcuts_for_yasra not in [ 'none' ]:
        set_options += '--%s ' % ( options.shortcuts_for_yasra )
    # Specify input2 and add [fullnames] modifier if output format is diffs
    if options.format == 'diffs':
        input2 = '%s[fullnames]' % options.input2
    else:
        input2 = options.input2
    if options.format == 'tabular':
        # Change output format to general if it's tabular and add field names for tabular output
        format = 'general-'
        tabular_fields = ':score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle'
    elif options.format == 'sam':
        # We currently need to keep headers.
        format = 'sam'
        tabular_fields = ''
    else:
        format = options.format
        tabular_fields = ''
    # Set up our queues
    threads = int( options.threads )
    lastz_job_queue = LastzJobQueue( threads, slots=SLOTS )
    combine_data_queue = CombineDataQueue( options.output )
    if str( options.ref_source ) in [ 'history', 'self' ]:
        # Reference is a fasta dataset from the history or the dataset containing the target sequence itself,
        # so split job across the number of sequences in the dataset ( this could be a HUGE number ).
        try:
            # Ensure there is at least 1 sequence in the dataset ( this may not be necessary ).
            error_msg = "The reference dataset is missing metadata.  Click the pencil icon in the history item and 'auto-detect' the metadata attributes."
            ref_sequences = int( options.ref_sequences )
            if ref_sequences < 1:
                stop_queues( lastz_job_queue, combine_data_queue )
                stop_err( error_msg )
        except:
            stop_queues( lastz_job_queue, combine_data_queue )
            stop_err( error_msg )
        seqs = 0
        fasta_reader = FastaReader( open( options.input1 ) )
        while True:
            # Read the next sequence from the reference dataset
            seq = fasta_reader.next()
            if not seq:
                break
            seqs += 1
            # Create a temporary file to contain the current sequence as input to lastz
            tmp_in_fd, tmp_in_name = tempfile.mkstemp( suffix='.in' )
            tmp_in = os.fdopen( tmp_in_fd, 'wb' )
            # Write the current sequence to the temporary input file
            tmp_in.write( '>%s\n%s\n' % ( seq.name, seq.text ) )
            tmp_in.close()
            # Create a 2nd temporary file to contain the output from lastz execution on the current sequence
            tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' )
            os.close( tmp_out_fd )
            # Generate the command line for calling lastz on the current sequence
            command = 'lastz %s%s %s %s --format=%s%s > %s' % ( tmp_in_name, ref_name, input2, set_options, format, tabular_fields, tmp_out_name )
            # Create a job object
            job = Bunch()
            job.command = command
            job.output = tmp_out_name
            job.cleanup = [ tmp_in_name, tmp_out_name ]
            job.combine_data_queue = combine_data_queue
            # Add another job to the lastz_job_queue.  Execution will wait at this point if the queue is full.
            lastz_job_queue.put( job, block=True )
        # Make sure the value of sequences in the metadata is the same as the number of
        # sequences read from the dataset.  According to Bob, this may not be necessary.
        if ref_sequences != seqs:
            stop_queues( lastz_job_queue, combine_data_queue )
            stop_err( "The value of metadata.sequences (%d) differs from the number of sequences read from the reference (%d)." % ( ref_sequences, seqs ) )
    else:
        # Reference is a locally cached 2bit file, split job across number of chroms in 2bit file
        tbf = TwoBitFile( open( options.input1, 'r' ) )
        for chrom in tbf.keys():
            # Create a temporary file to contain the output from lastz execution on the current chrom
            tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' )
            os.close( tmp_out_fd )
            command = 'lastz %s/%s%s %s %s --format=%s%s >> %s' % \
                ( options.input1, chrom, ref_name, input2, set_options, format, tabular_fields, tmp_out_name )
            # Create a job object
            job = Bunch()
            job.command = command
            job.output = tmp_out_name
            job.cleanup = [ tmp_out_name ]
            job.combine_data_queue = combine_data_queue
            # Add another job to the lastz_job_queue.  Execution will wait at this point if the queue is full.
            lastz_job_queue.put( job, block=True )
    # Stop the lastz_job_queue.
    for t in lastz_job_queue.threads:
        lastz_job_queue.put( STOP_SIGNAL, True )
    # Although all jobs are submitted to the queue, we can't shut down the combine_data_queue
    # until we know that all jobs have been submitted to its queue.  We do this by checking
    # whether all of the threads in the lastz_job_queue have terminated.
    while threading.activeCount() > 2:
        time.sleep( 1 )
    # Now it's safe to stop the combine_data_queue.
    combine_data_queue.put( STOP_SIGNAL )
    def __init__(self, trans, plugin):
        self.trans = trans
        self.log = log

        self.attr = Bunch()
        self.attr.viz_id = plugin.name
        self.attr.history_id = trans.security.encode_id(trans.history.id)
        self.attr.galaxy_config = trans.app.config
        self.attr.redact_username_in_logs = trans.app.config.redact_username_in_logs
        self.attr.galaxy_root_dir = os.path.abspath(
            self.attr.galaxy_config.root)
        self.attr.root = web.url_for("/")
        self.attr.app_root = self.attr.root + "static/plugins/interactive_environments/" + self.attr.viz_id + "/static/"
        self.attr.import_volume = True

        plugin_path = os.path.abspath(plugin.path)

        # Store our template and configuration path
        self.attr.our_config_dir = os.path.join(plugin_path, "config")
        self.attr.our_template_dir = os.path.join(plugin_path, "templates")
        self.attr.HOST = trans.request.host.rsplit(':', 1)[0]

        self.load_deploy_config()
        self.load_allowed_images()
        self.load_container_interface()

        self.attr.docker_hostname = self.attr.viz_config.get(
            "docker", "docker_hostname")
        raw_docker_connect_port = self.attr.viz_config.get(
            "docker", "docker_connect_port")
        self.attr.docker_connect_port = int(
            raw_docker_connect_port) if raw_docker_connect_port else None

        # Generate per-request passwords the IE plugin can use to configure
        # the destination container.
        self.notebook_pw_salt = self.generate_password(length=12)
        self.notebook_pw = self.generate_password(length=24)

        ie_parent_temp_dir = self.attr.viz_config.get(
            "docker", "docker_galaxy_temp_dir") or None
        self.temp_dir = os.path.abspath(
            tempfile.mkdtemp(dir=ie_parent_temp_dir))

        if self.attr.viz_config.getboolean("docker", "wx_tempdir"):
            # Ensure permissions are set
            try:
                os.chmod(self.temp_dir,
                         os.stat(self.temp_dir).st_mode | stat.S_IXOTH)
            except Exception:
                log.error("Could not change permissions of tmpdir %s" %
                          self.temp_dir)
                # continue anyway

        # This duplicates the logic in the proxy manager
        if self.attr.galaxy_config.dynamic_proxy_external_proxy:
            self.attr.proxy_prefix = '/'.join((
                '',
                trans.cookie_path.strip('/'),
                self.attr.galaxy_config.dynamic_proxy_prefix.strip('/'),
                self.attr.viz_id,
            ))
        else:
            self.attr.proxy_prefix = ''
        # If cookie_path is unset (thus '/'), the proxy prefix ends up with
        # multiple leading '/' characters, which will cause the client to
        # request resources from http://dynamic_proxy_prefix
        if self.attr.proxy_prefix.startswith('/'):
            self.attr.proxy_prefix = '/' + self.attr.proxy_prefix.lstrip('/')

        assert not self.attr.container_interface \
            or not self.attr.container_interface.publish_port_list_required \
            or (self.attr.container_interface.publish_port_list_required and self.attr.docker_connect_port is not None), \
            "Error: Container interface requires publish port list but docker_connect_port is not set"
Example #37
0
    def __build_metadata_configuration(self, client, job_wrapper,
                                       remote_metadata, remote_job_config):
        metadata_kwds = {}
        if remote_metadata:
            remote_system_properties = remote_job_config.get(
                "system_properties", {})
            remote_galaxy_home = remote_system_properties.get(
                "galaxy_home", None)
            if not remote_galaxy_home:
                raise Exception(NO_REMOTE_GALAXY_FOR_METADATA_MESSAGE)
            metadata_kwds['exec_dir'] = remote_galaxy_home
            outputs_directory = remote_job_config['outputs_directory']
            configs_directory = remote_job_config['configs_directory']
            working_directory = remote_job_config['working_directory']
            # For metadata calculation, we need to build a list of of output
            # file objects with real path indicating location on Galaxy server
            # and false path indicating location on compute server. Since the
            # Pulsar disables from_work_dir copying as part of the job command
            # line we need to take the list of output locations on the Pulsar
            # server (produced by self.get_output_files(job_wrapper)) and for
            # each work_dir output substitute the effective path on the Pulsar
            # server relative to the remote working directory as the
            # false_path to send the metadata command generation module.
            work_dir_outputs = self.get_work_dir_outputs(
                job_wrapper, job_working_directory=working_directory)
            outputs = [
                Bunch(false_path=os.path.join(outputs_directory,
                                              os.path.basename(path)),
                      real_path=path)
                for path in self.get_output_files(job_wrapper)
            ]
            for output in outputs:
                for pulsar_workdir_path, real_path in work_dir_outputs:
                    if real_path == output.real_path:
                        output.false_path = pulsar_workdir_path
            metadata_kwds['output_fnames'] = outputs
            metadata_kwds['compute_tmp_dir'] = working_directory
            metadata_kwds['config_root'] = remote_galaxy_home
            default_config_file = os.path.join(remote_galaxy_home,
                                               'config/galaxy.ini')
            metadata_kwds['config_file'] = remote_system_properties.get(
                'galaxy_config_file', default_config_file)
            metadata_kwds['dataset_files_path'] = remote_system_properties.get(
                'galaxy_dataset_files_path', None)
            if PulsarJobRunner.__use_remote_datatypes_conf(client):
                remote_datatypes_config = remote_system_properties.get(
                    'galaxy_datatypes_config_file', None)
                if not remote_datatypes_config:
                    log.warn(NO_REMOTE_DATATYPES_CONFIG)
                    remote_datatypes_config = os.path.join(
                        remote_galaxy_home, 'datatypes_conf.xml')
                metadata_kwds['datatypes_config'] = remote_datatypes_config
            else:
                integrates_datatypes_config = self.app.datatypes_registry.integrated_datatypes_configs
                # Ensure this file gets pushed out to the remote config dir.
                job_wrapper.extra_filenames.append(integrates_datatypes_config)

                metadata_kwds['datatypes_config'] = os.path.join(
                    configs_directory,
                    os.path.basename(integrates_datatypes_config))
        return metadata_kwds
Example #38
0
    def get_uploaded_datasets( self, trans, context, override_name=None, override_info=None ):
        def get_data_file_filename( data_file, override_name=None, override_info=None ):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name( file_name ):
                file_name = file_name.split( '\\' )[-1]
                file_name = file_name.split( '/' )[-1]
                return file_name
            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name( data_file['filename'] )
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch( type='file', path=data_file['local_filename'], name=dataset_name )
                # return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch( type=None, path=None, name=None )
                # return None, None, None, None, None

        def get_url_paste_urls_or_filename( group_incoming, override_name=None, override_info=None ):
            url_paste_file = group_incoming.get( 'url_paste', None )
            if url_paste_file is not None:
                url_paste = open( url_paste_file, 'r' ).read( 1024 )
                if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ) or url_paste.lstrip().lower().startswith( 'https://' ):
                    url_paste = url_paste.replace( '\r', '' ).split( '\n' )
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ) and not line.lower().startswith( 'https://' ):
                                continue  # non-url line, ignore
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch( type='url', path=line, name=dataset_name )
                            # yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch( type='file', path=url_paste_file, name=precreated_name )
                    # yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )

        def get_one_filename( context ):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            uuid = context.get( 'uuid', None ) or None  # Turn '' to None
            warnings = []
            to_posix_lines = False
            if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
                to_posix_lines = True
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info )
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append( "All file contents specified in the paste box were ignored." )
                if ftp_files:
                    warnings.append( "All FTP uploaded file selections were ignored." )
            elif url_paste is not None and url_paste.strip():  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append( "All FTP uploaded file selections were ignored." )
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                                if not os.path.islink( os.path.join( dirpath, filename ) ):
                                    ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ),
                                                      'filename' : os.path.basename( path ) }
                                    file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            return file_bunch, warnings

        def get_filenames( context ):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get( 'uuid', None ) or None  # Turn '' to None
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            to_posix_lines = False
            if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
                to_posix_lines = True
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info )
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append( file_bunch )
            for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)]
                if trans.user is None:
                    log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files )
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                        for filename in filenames:
                            path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                            if not os.path.islink( os.path.join( dirpath, filename ) ):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(unicodedata.normalize('NFC', path ))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file )
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ),
                                  'filename' : os.path.basename( ftp_file ) }
                file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info )
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            return rval
        file_type = self.get_file_type( context )
        d_type = self.get_datatype( trans, context )
        dbkey = context.get( 'dbkey', None )
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [ None for _ in writable_files ]
        for group_incoming in context.get( self.name, [] ):
            i = int( group_incoming['__index__'] )
            groups_incoming[ i ] = group_incoming
        if d_type.composite_type is not None:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            # load metadata
            files_metadata = context.get( self.metadata_ref, {} )
            metadata_name_substition_default_dict = dict( [ ( composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata ].default ) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ] )
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[ meta_name ]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name ] )
                        dataset.metadata[ meta_name ] = meta_value
            dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context )
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file( dataset ) ), prefix='upload_auto_primary_file' )
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] )
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.warnings.extend( warnings )
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception( 'No primary dataset file was available for composite upload' )
            keys = [ value.name for value in writable_files.values() ]
            for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ):
                key = keys[ i + writable_files_offset ]
                if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                    dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
                    dataset.composite_files[ key ] = None
                else:
                    file_bunch, warnings = get_one_filename( group_incoming )
                    dataset.warnings.extend( warnings )
                    if file_bunch.path:
                        dataset.composite_files[ key ] = file_bunch.__dict__
                    else:
                        dataset.composite_files[ key ] = None
                        if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                            dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
            return [ dataset ]
        else:
            datasets = get_filenames( context[ self.name ][0] )
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext( trans, context )
                dataset.dbkey = dbkey
                rval.append( dataset )
            return rval
Example #39
0
class DeferredJobQueue( object ):
    job_states = Bunch( READY='ready',
                        WAIT='wait',
                        INVALID='invalid' )

    def __init__( self, app ):
        self.app = app
        self.sa_session = app.model.context.current
        self.queue = Queue()
        self.plugins = {}
        self._load_plugins()
        self.sleeper = Sleeper()
        self.running = True
        self.waiting_jobs = []
        self.__check_jobs_at_startup()
        self.monitor_thread = threading.Thread( target=self.__monitor )
        self.monitor_thread.start()
        log.info( 'Deferred job queue started' )

    def _load_plugins( self ):
        for fname in os.listdir( os.path.dirname( __file__ ) ):
            if not fname.startswith( '_' ) and fname.endswith( '.py' ):
                name = fname[:-3]
                module_name = 'galaxy.jobs.deferred.' + name
                try:
                    module = __import__( module_name )
                except:
                    log.exception( 'Deferred job plugin appears to exist but is not loadable: %s' % module_name )
                    continue
                for comp in module_name.split( "." )[1:]:
                    module = getattr( module, comp )
                if '__all__' not in dir( module ):
                    log.error( 'Plugin "%s" does not contain a list of exported classes in __all__' % module_name )
                    continue
                for obj in module.__all__:
                    display_name = ':'.join( ( module_name, obj ) )
                    plugin = getattr( module, obj )
                    for name in ( 'check_job', 'run_job' ):
                        if name not in dir( plugin ):
                            log.error( 'Plugin "%s" does not contain required method "%s()"' % ( display_name, name ) )
                            break
                    else:
                        self.plugins[obj] = plugin( self.app )
                        self.plugins[obj].job_states = self.job_states
                        log.debug( 'Loaded deferred job plugin: %s' % display_name )

    def __check_jobs_at_startup( self ):
        waiting_jobs = self.sa_session.query( model.DeferredJob ) \
                                      .filter( model.DeferredJob.state == model.DeferredJob.states.WAITING ).all()
        for job in waiting_jobs:
            if not self.__check_job_plugin( job ):
                continue
            if 'check_interval' in dir( self.plugins[job.plugin] ):
                job.check_interval = self.plugins[job.plugin].check_interval
            log.info( 'Recovered deferred job (id: %s) at startup' % job.id )
            # Pass the job ID as opposed to the job, since the monitor thread
            # needs to load it in its own threadlocal scoped session.
            self.waiting_jobs.append( job.id )

    def __monitor( self ):
        while self.running:
            try:
                self.__monitor_step()
            except:
                log.exception( 'Exception in monitor_step' )
            self.sleeper.sleep( 1 )
        log.info( 'job queue stopped' )

    def __monitor_step( self ):
        # TODO: Querying the database with this frequency is bad, we need message passing
        new_jobs = self.sa_session.query( model.DeferredJob ) \
                                  .filter( model.DeferredJob.state == model.DeferredJob.states.NEW ).all()
        for job in new_jobs:
            if not self.__check_job_plugin( job ):
                continue
            job.state = model.DeferredJob.states.WAITING
            self.sa_session.add( job )
            self.sa_session.flush()
            if 'check_interval' in dir( self.plugins[job.plugin] ):
                job.check_interval = self.plugins[job.plugin].check_interval
            self.waiting_jobs.append( job )
        new_waiting = []
        for job in self.waiting_jobs:
            try:
                # Recovered jobs are passed in by ID
                assert type( job ) is int
                job = self.sa_session.query( model.DeferredJob ).get( job )
            except:
                pass
            if job.is_check_time:
                try:
                    job_state = self.plugins[job.plugin].check_job( job )
                except Exception as e:
                    self.__fail_job( job )
                    log.exception( 'Set deferred job %s to error because of an exception in check_job(): %s' % ( job.id, str( e ) ) )
                    continue
                if job_state == self.job_states.READY:
                    try:
                        self.plugins[job.plugin].run_job( job )
                    except Exception as e:
                        self.__fail_job( job )
                        log.exception( 'Set deferred job %s to error because of an exception in run_job(): %s' % ( job.id, str( e ) ) )
                        continue
                elif job_state == self.job_states.INVALID:
                    self.__fail_job( job )
                    log.error( 'Unable to run deferred job (id: %s): Plugin "%s" marked it as invalid' % ( job.id, job.plugin ) )
                    continue
                else:
                    new_waiting.append( job )
                job.last_check = 'now'
            else:
                new_waiting.append( job )
        self.waiting_jobs = new_waiting

    def __check_job_plugin( self, job ):
        if job.plugin not in self.plugins:
            log.error( 'Invalid deferred job plugin: %s' ) % job.plugin
            job.state = model.DeferredJob.states.ERROR
            self.sa_session.add( job )
            self.sa_session.flush()
            return False
        return True

    def __check_if_ready_to_run( self, job ):
        return self.plugins[job.plugin].check_job( job )

    def __fail_job( self, job ):
        job.state = model.DeferredJob.states.ERROR
        self.sa_session.add( job )
        self.sa_session.flush()

    def shutdown( self ):
        self.running = False
        self.sleeper.wake()
Example #40
0
 cols = [ int( c ) for c in str( options.columns ).split( ',' ) if int( c ) > hinge ]
 inputs = [ options.input1, options.input2 ]
 if options.fill_options_file == 'None':
     inputs.extend( args )
 elif len( args ) > 0:
     inputs.extend( args )
 fill_options = None
 if options.fill_options_file != 'None' and options.fill_options_file is not None:
     try:
         if simplejson is None:
             raise simplejson_exception
         fill_options = Bunch( **stringify_dictionary_keys( simplejson.load( open( options.fill_options_file ) ) ) )
     except Exception, e:
         print 'Warning: Ignoring fill options due to simplejson error (%s).' % e
 if fill_options is None:
     fill_options = Bunch()
 if 'file1_columns' not in fill_options:
     fill_options.file1_columns = None
 if fill_options and fill_options.file1_columns:
     fill_empty = {}
     for col in cols:
         fill_empty[ col ] = fill_options.file1_columns[ col - 1 ]
 else:
     fill_empty = None
 assert len( cols ) > 0, 'You need to select at least one column in addition to the hinge'
 delimiter = '\t'
 # make sure all files are sorted in same way, ascending
 tmp_input_files = []
 input_files = inputs[:]
 for in_file in input_files:
     tmp_file = tempfile.NamedTemporaryFile()
Example #41
0
        default=None,
        help="Fill empty columns with a values from a JSONified file.",
    )

    options, args = parser.parse_args()

    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch(
                **stringify_dictionary_keys(json.load(open(options.fill_options_file)))
            )  # json.load( open( options.fill_options_file ) )
        except Exception, e:
            print "Warning: Ignoring fill options due to json error (%s)." % e
    if fill_options is None:
        fill_options = Bunch()
    if "fill_unjoined_only" not in fill_options:
        fill_options.fill_unjoined_only = True
    if "file1_columns" not in fill_options:
        fill_options.file1_columns = None
    if "file2_columns" not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int(args[2]) - 1
        column2 = int(args[3]) - 1
        out_filename = args[4]
    except:
        print >> sys.stderr, "Error parsing command line."
Example #42
0
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=dataset_name)
                #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)
                #return None, None, None, None, None
        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            filenames = []
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read(1024)
                if url_paste.lstrip().lower().startswith(
                        'http://') or url_paste.lstrip().lower().startswith(
                            'ftp://') or url_paste.lstrip().lower().startswith(
                                'https://'):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith(
                                    'http://') and not line.lower().startswith(
                                        'ftp://') and not line.lower(
                                        ).startswith('https://'):
                                continue  # non-url line, ignore
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                            #yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  #we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=precreated_name)
                    #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )
        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
                if ftp_files:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip(
            ):  #we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename),
                                               user_ftp_dir)
                                if not os.path.islink(
                                        os.path.join(dirpath, filename)):
                                    ftp_data_file = {
                                        'local_filename':
                                        os.path.abspath(
                                            os.path.join(user_ftp_dir, path)),
                                        'filename':
                                        os.path.basename(path)
                                    }
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info)
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            warnings = []
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, unicode)
                ]
                if trans.user is None:
                    log.warning(
                        'Anonymous user passed values in ftp_files: %s' %
                        ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for (dirpath, dirnames,
                         filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        'User passed an invalid file path in ftp_files: %s' %
                        ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        d_type = self.get_datatype(trans, context)
        dbkey = context.get('dbkey', None)
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for filename in writable_files]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None:
            #handle uploading of composite datatypes
            #Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            #load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = dict([
                (composite_file.substitute_name_with_metadata,
                 d_type.metadata_spec[
                     composite_file.substitute_name_with_metadata].default)
                for composite_file in d_type.composite_files.values()
                if composite_file.substitute_name_with_metadata
            ])
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(
                                meta_value,
                                default=metadata_name_substition_default_dict[
                                    meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.precreated_name = dataset.name = self.get_composite_dataset_name(
                context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                #replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file(
                    StringIO.StringIO(d_type.generate_primary_file(dataset)),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  #remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            keys = [value.name for value in writable_files.values()]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if group_incoming is None and not writable_files[
                        writable_files.keys()[keys.index(key)]].optional:
                    dataset.warnings.append(
                        "A required composite file (%s) was not specified." %
                        (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        dataset.composite_files[key] = file_bunch.__dict__
                    else:
                        dataset.composite_files[key] = None
                        if not writable_files[writable_files.keys()[keys.index(
                                key)]].optional:
                            dataset.warnings.append(
                                "A required composite file (%s) was not specified."
                                % (key))
            return [dataset]
        else:
            datasets = get_filenames(context[self.name][0])
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext(trans, context)
                dataset.dbkey = dbkey
                rval.append(dataset)
            return rval
Example #43
0
 def __init__(self, app):
     self.app = app
     self.options = Bunch(sanitize=False)
Example #44
0
class Dataset( object ):
    states = Bunch( NEW='new',
                    UPLOAD='upload',
                    QUEUED='queued',
                    RUNNING='running',
                    OK='ok',
                    EMPTY='empty',
                    ERROR='error',
                    DISCARDED='discarded' )
    permitted_actions = get_permitted_actions( filter='DATASET' )
    file_path = "/tmp/"
    engine = None

    def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
        self.id = id
        self.state = state
        self.deleted = False
        self.purged = False
        self.purgable = purgable
        self.external_filename = external_filename
        self._extra_files_path = extra_files_path
        self.file_size = file_size

    def get_file_name( self ):
        if not self.external_filename:
            assert self.id is not None, "ID must be set before filename used (commit the object)"
            # First try filename directly under file_path
            filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
            # Only use that filename if it already exists (backward compatibility),
            # otherwise construct hashed path
            if not os.path.exists( filename ):
                dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
                # Create directory if it does not exist
                try:
                    os.makedirs( dir )
                except OSError as e:
                    # File Exists is okay, otherwise reraise
                    if e.errno != errno.EEXIST:
                        raise
                # Return filename inside hashed directory
                return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
        else:
            filename = self.external_filename
        # Make filename absolute
        return os.path.abspath( filename )

    def set_file_name( self, filename ):
        if not filename:
            self.external_filename = None
        else:
            self.external_filename = filename
    file_name = property( get_file_name, set_file_name )

    @property
    def extra_files_path( self ):
        if self._extra_files_path:
            path = self._extra_files_path
        else:
            path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
            # only use path directly under self.file_path if it exists
            if not os.path.exists( path ):
                path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
        # Make path absolute
        return os.path.abspath( path )

    def get_size( self ):
        """Returns the size of the data on disk"""
        if self.file_size:
            return self.file_size
        else:
            try:
                return os.path.getsize( self.file_name )
            except OSError:
                return 0

    def set_size( self ):
        """Returns the size of the data on disk"""
        try:
            if not self.file_size:
                self.file_size = os.path.getsize( self.file_name )
        except OSError:
            self.file_size = 0

    def has_data( self ):
        """Detects whether there is any data"""
        return self.get_size() > 0

    def mark_deleted( self, include_children=True ):
        self.deleted = True
    # FIXME: sqlalchemy will replace this

    def _delete(self):
        """Remove the file that corresponds to this data"""
        try:
            os.remove(self.data.file_name)
        except OSError as e:
            log.critical('%s delete error %s' % (self.__class__.__name__, e))
Example #45
0
 def __init__(self, test_directory):
     self.config = Bunch(tool_data_path=test_directory)
Example #46
0
"""
Model objects for docker objects
"""
from __future__ import absolute_import

import logging

try:
    import docker
except ImportError:
    from galaxy.util.bunch import Bunch
    docker = Bunch(errors=Bunch(NotFound=None))

from galaxy.containers import (Container, ContainerPort, ContainerVolume)
from galaxy.util import (
    pretty_print_time_interval,
    unicodify,
)

CPUS_LABEL = '_galaxy_cpus'
IMAGE_LABEL = '_galaxy_image'
CPUS_CONSTRAINT = 'node.labels.' + CPUS_LABEL
IMAGE_CONSTRAINT = 'node.labels.' + IMAGE_LABEL

log = logging.getLogger(__name__)


class DockerAttributeContainer(object):
    def __init__(self, members=None):
        if members is None:
            members = set()
import sys
sys.path.insert(1,'/galaxy-central')
sys.path.insert(1,'/galaxy-central/lib')

from scripts.db_shell import *
from galaxy.util.bunch import Bunch
from galaxy.security import GalaxyRBACAgent
from sqlalchemy.orm import sessionmaker
from sqlalchemy import *
import argparse
bunch = Bunch( **globals() )
engine = create_engine('postgres://*****:*****@localhost:5432/galaxy')
bunch.session = sessionmaker(bind=engine)
# For backward compatibility with "model.context.current"
bunch.context = sessionmaker(bind=engine)

security_agent = GalaxyRBACAgent( bunch )
security_agent.sa_session = sa_session


def add_user(email, password, key=None):
    """
        Add Galaxy User.
        From John https://gist.github.com/jmchilton/4475646
    """
    query = sa_session.query( User ).filter_by( email=email )
    if query.count() > 0:
        return query.first()
    else:
        User.use_pbkdf2 = False
        user = User(email)
Example #48
0
def __main__():
    #Parse Command Line
    parser = optparse.OptionParser()
    parser.add_option( '', '--ref_name', dest='ref_name', help='The reference name to change all output matches to' )
    parser.add_option( '', '--ref_source', dest='ref_source', help='Whether the reference is cached or from the history' )
    parser.add_option( '', '--ref_sequences', dest='ref_sequences', help='Number of sequences in the reference dataset' )
    parser.add_option( '', '--source_select', dest='source_select', help='Whether to used pre-set or cached reference file' )
    parser.add_option( '', '--input1', dest='input1', help='The name of the reference file if using history or reference base name if using cached' )
    parser.add_option( '', '--input2', dest='input2', help='The reads file to align' )
    parser.add_option( '', '--pre_set_options', dest='pre_set_options', help='Which of the pre set options to use, if using pre-sets' )
    parser.add_option( '', '--strand', dest='strand', help='Which strand of the read to search, if specifying all parameters' )
    parser.add_option( '', '--seed', dest='seed', help='Seeding settings, if specifying all parameters' )
    parser.add_option( '', '--transition', dest='transition', help='Number of transitions to allow in each seed hit, if specifying all parameters' )
    parser.add_option( '', '--gfextend', dest='gfextend', help='Whether to perform gap-free extension of seed hits to HSPs (high scoring segment pairs), if specifying all parameters' )
    parser.add_option( '', '--chain', dest='chain', help='Whether to perform chaining of HSPs, if specifying all parameters' )
    parser.add_option( '', '--O', dest='O', help='Gap opening penalty, if specifying all parameters' )
    parser.add_option( '', '--E', dest='E', help='Gap extension penalty, if specifying all parameters' )
    parser.add_option( '', '--X', dest='X', help='X-drop threshold, if specifying all parameters' )
    parser.add_option( '', '--Y', dest='Y', help='Y-drop threshold, if specifying all parameters' )
    parser.add_option( '', '--K', dest='K', help='Threshold for HSPs, if specifying all parameters' )
    parser.add_option( '', '--L', dest='L', help='Threshold for gapped alignments, if specifying all parameters' )
    parser.add_option( '', '--entropy', dest='entropy', help='Whether to involve entropy when filtering HSPs, if specifying all parameters' )
    parser.add_option( '', '--identity_min', dest='identity_min', help="Minimum identity (don't report matches under this identity)" )
    parser.add_option( '', '--identity_max', dest='identity_max', help="Maximum identity (don't report matches above this identity)" )
    parser.add_option( '', '--coverage', dest='coverage', help="The minimum coverage value (don't report matches covering less than this)" )
    parser.add_option( '', '--unmask', dest='unmask', help='Whether to convert lowercase bases to uppercase' )
    parser.add_option( '', '--out_format', dest='format', help='The format of the output file (sam, diffs, or tabular (general))' )
    parser.add_option( '', '--output', dest='output', help='The output file' )
    parser.add_option( '', '--lastzSeqsFileDir', dest='lastzSeqsFileDir', help='Directory of local lastz_seqs.loc file' )
    ( options, args ) = parser.parse_args()

    # output version # of tool
    try:
        tmp = tempfile.NamedTemporaryFile().name
        tmp_stdout = open( tmp, 'wb' )
        proc = subprocess.Popen( args='lastz -v', shell=True, stdout=tmp_stdout )
        tmp_stdout.close()
        returncode = proc.wait()
        stdout = None
        for line in open( tmp_stdout.name, 'rb' ):
            if line.lower().find( 'version' ) >= 0:
                stdout = line.strip()
                break
        if stdout:
            sys.stdout.write( '%s\n' % stdout )
        else:
            raise Exception
    except:
        sys.stdout.write( 'Could not determine Lastz version\n' )

    if options.unmask == 'yes':
        unmask = '[unmask]'
    else:
        unmask = ''
    if options.ref_name:
        ref_name = '[nickname=%s]' % options.ref_name
    else:
        ref_name = ''
    # Prepare for commonly-used preset options
    if options.source_select == 'pre_set':
        set_options = '--%s' % options.pre_set_options
    # Prepare for user-specified options
    else:
        set_options = '--%s --%s --gapped --strand=%s --seed=%s --%s O=%s E=%s X=%s Y=%s K=%s L=%s --%s' % \
                    ( options.gfextend, options.chain, options.strand, options.seed, options.transition,
                      options.O, options.E, options.X, options.Y, options.K, options.L, options.entropy )
    # Specify input2 and add [fullnames] modifier if output format is diffs
    if options.format == 'diffs':
        input2 = '%s[fullnames]' % options.input2
    else:
        input2 = options.input2
    if options.format == 'tabular':
        # Change output format to general if it's tabular and add field names for tabular output
        format = 'general-'
        tabular_fields = ':score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle'
    elif options.format == 'sam':
        # We currently ALWAYS suppress SAM headers.
        format = 'sam-'
        tabular_fields = ''
    else:
        format = options.format
        tabular_fields = ''

    # Set up our queues
    lastz_job_queue = LastzJobQueue( WORKERS, slots=SLOTS )
    combine_data_queue = CombineDataQueue( options.output )

    if options.ref_source == 'history':
        # Reference is a fasta dataset from the history, so split job across
        # the number of sequences in the dataset ( this could be a HUGE number )
        try:
            # Ensure there is at least 1 sequence in the dataset ( this may not be necessary ).
            error_msg = "The reference dataset is missing metadata, click the pencil icon in the history item and 'auto-detect' the metadata attributes."
            ref_sequences = int( options.ref_sequences )
            if ref_sequences < 1:
                stop_queues( lastz_job_queue, combine_data_queue )
                stop_err( error_msg )
        except:
            stop_queues( lastz_job_queue, combine_data_queue )
            stop_err( error_msg )
        seqs = 0
        fasta_reader = FastaReader( open( options.input1 ) )
        while True:
            # Read the next sequence from the reference dataset
            seq = fasta_reader.next()
            if not seq:
                break
            seqs += 1
            # Create a temporary file to contain the current sequence as input to lastz
            tmp_in_fd, tmp_in_name = tempfile.mkstemp( suffix='.in' )
            tmp_in = os.fdopen( tmp_in_fd, 'wb' )
            # Write the current sequence to the temporary input file
            tmp_in.write( '>%s\n%s\n' % ( seq.name, seq.text ) )
            tmp_in.close()
            # Create a 2nd temporary file to contain the output from lastz execution on the current sequence
            tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' )
            os.close( tmp_out_fd )
            # Generate the command line for calling lastz on the current sequence
            command = 'lastz %s%s%s %s %s --ambiguousn --nolaj --identity=%s..%s --coverage=%s --format=%s%s > %s' % \
                ( tmp_in_name, unmask, ref_name, input2, set_options, options.identity_min, 
                  options.identity_max, options.coverage, format, tabular_fields, tmp_out_name )
            # Create a job object
            job = Bunch()
            job.command = command
            job.output = tmp_out_name
            job.cleanup = [ tmp_in_name, tmp_out_name ]
            job.combine_data_queue = combine_data_queue
            # Add another job to the lastz_job_queue. Execution 
            # will wait at this point if the queue is full.
            lastz_job_queue.put( job, block=True )
        # Make sure the value of sequences in the metadata is the same as the
        # number of sequences read from the dataset ( this may not be necessary ).
        if ref_sequences != seqs:
            stop_queues( lastz_job_queue, combine_data_queue )
            stop_err( "The value of metadata.sequences (%d) differs from the number of sequences read from the reference (%d)." % ( ref_sequences, seqs ) )
    else:
        # Reference is a locally cached 2bit file, split job across number of chroms in 2bit file
        tbf = TwoBitFile( open( options.input1, 'r' ) )
        for chrom in tbf.keys():
            # Create a temporary file to contain the output from lastz execution on the current chrom
            tmp_out_fd, tmp_out_name = tempfile.mkstemp( suffix='.out' )
            os.close( tmp_out_fd )
            command = 'lastz %s/%s%s%s %s %s --ambiguousn --nolaj --identity=%s..%s --coverage=%s --format=%s%s >> %s' % \
                ( options.input1, chrom, unmask, ref_name, input2, set_options, options.identity_min, 
                  options.identity_max, options.coverage, format, tabular_fields, tmp_out_name )
            # Create a job object
            job = Bunch()
            job.command = command
            job.output = tmp_out_name
            job.cleanup = [ tmp_out_name ]
            job.combine_data_queue = combine_data_queue
            # Add another job to the lastz_job_queue. Execution 
            # will wait at this point if the queue is full.
            lastz_job_queue.put( job, block=True )

    # Stop the lastz_job_queue
    for t in lastz_job_queue.threads:
        lastz_job_queue.put( STOP_SIGNAL, True )
    # Although all jobs are submitted to the queue, we can't shut down the combine_data_queue
    # until we know that all jobs have been submitted to its queue.  We do this by checking
    # whether all of the threads in the lastz_job_queue have terminated.
    while threading.activeCount() > 2:
        time.sleep( 1 )
    # Now it's safe to stop the combine_data_queue
    combine_data_queue.put( STOP_SIGNAL )