def main():
    parser = argparse.ArgumentParser(
        description=
        'Writes True to stdout if a tool/owner combination does not exist on a Galaxy instance'
    )
    parser.add_argument('-g', '--galaxy_url', help='Galaxy server URL')
    parser.add_argument('-a', '--api_key', help='API key for galaxy server')
    parser.add_argument('-n', '--name', help='Tool name')
    parser.add_argument('-o', '--owner', help='Tool owner')

    args = parser.parse_args()
    galaxy_url = args.galaxy_url
    api_key = args.api_key
    name = args.name
    owner = args.owner

    gal = GalaxyInstance(galaxy_url, api_key)
    cli = ToolShedClient(gal)
    u_repos = cli.get_repositories()
    tools_with_name_and_owner = [
        t for t in u_repos if t['name'] == name and t['owner'] == owner
        and t['status'] == 'Installed'
    ]
    if not tools_with_name_and_owner:
        sys.stdout.write(
            'True'
        )  # we did not find the name/owner combination so we say that the tool is new
    else:
        sys.stdout.write('False')
Ejemplo n.º 2
0
def installed_repository_revisions(gi, omit=None):
    """
    Get a list of repository revisions installed from a Tool Shed on a Galaxy instance.
    Included are all the repository revisions that were installed from a Tool
    Shed and are available from `/api/tool_shed_repositories` url on the
    given instance of Galaxy.
    :type gi: GalaxyInstance object
    :param gi: A GalaxyInstance object as retured by `galaxy_instance` method.
    :type omit: list of strings
    :param omit: A list of strings that, if found in a tool name, will result
                    in the tool not being included in the returned list.
    :rtype: list of dicts
    :return: Each dict in the returned list will have the following keys:
             `name`, `owner`, `tool_shed_url`, `revisions`.
    .. seealso:: this method returns a subset of data returned by
                 `installed_repositories` function
    """
    if not omit:
        omit = []
    tool_shed_client = ToolShedClient(gi)

    # Create dictionary to look up all tools based on repository information

    installed_revisions_list = []
    installed_repositories_list = tool_shed_client.get_repositories()
    for installed_repository in installed_repositories_list:
        if installed_repository['status'] == 'Installed':
            skip = False
            # Check if we already processed this tool and, if so, add the new
            # revision to the existing list entry
            for installed_revision in installed_revisions_list:
                if the_same_repository(installed_repository,
                                       installed_revision):
                    installed_revision['revisions'].append(
                        installed_repository.get('changeset_revision', None))
                    skip = True
            # Check if the repo name is contained in the 'omit' list
            for omitted_repository in omit:
                if omitted_repository in installed_repository['name']:
                    skip = True
            # We have not processed this tool so create a list entry
            if not skip:
                repo_info = {
                    'name':
                    installed_repository['name'],
                    'owner':
                    installed_repository['owner'],
                    'revisions':
                    [installed_repository.get('changeset_revision', None)],
                    'tool_shed_url':
                    'https://' + installed_repository['tool_shed'],
                }
                installed_revisions_list.append(repo_info)
    return installed_revisions_list
    def repository_list(self):
        """
        Toolbox elements returned by api/tools may be of class ToolSection or Tool.
        Parse these accordingly to get a list of repositories.
        """
        repositories = []

        def record_repo(tool_elem):
            repo = get_repo_from_tool(tool_elem)
            if repo:
                repositories.append(repo)

        walk_tools(self.toolbox, record_repo)

        if self.get_data_managers:
            for tool in self.installed_tool_list:
                if tool.get("model_class") == 'DataManagerTool':
                    repo = get_repo_from_tool(tool)
                    if repo:
                        repositories.append(repo)

        if self.get_all_tools:
            tools_with_panel = repositories[:]
            tsc = ToolShedClient(self.gi)
            repos = tsc.get_repositories()
            # Hereafter follows a gruesomely ineffecient algorithm.
            # The for loop and if statement are needed to retrieve tool_panel
            # section labels and ids.
            # If someone knows a more effecient way around this problem it
            # will be greatly appreciated.
            for repo in repos:
                if not repo['deleted']:
                    tool_panel_section_id = None
                    tool_panel_section_label = None
                    for repo_with_panel in tools_with_panel:
                        if the_same_repository(repo_with_panel,
                                               repo,
                                               check_revision=False):
                            tool_panel_section_id = repo_with_panel.get(
                                'tool_panel_section_id')
                            tool_panel_section_label = repo_with_panel.get(
                                'tool_panel_section_label')
                            break
                    repositories.append(
                        dict(name=repo.get('name'),
                             owner=repo.get('owner'),
                             tool_shed_url=repo.get('tool_shed'),
                             revisions=[repo.get('changeset_revision')],
                             tool_panel_section_label=tool_panel_section_label,
                             tool_panel_section_id=tool_panel_section_id))
        return repositories
    def repository_list(self):
        """
        Toolbox elements returned by api/tools may be of class ToolSection or Tool.
        Parse these accordingly to get a list of repositories.
        """
        repositories = []

        def record_repo(tool_elem):
            repo = get_repo_from_tool(tool_elem)
            if repo:
                repositories.append(repo)

        walk_tools(self.toolbox, record_repo)

        if self.get_data_managers:
            for tool in self.installed_tool_list:
                if tool.get("model_class") == 'DataManagerTool':
                    repo = get_repo_from_tool(tool)
                    if repo:
                        repositories.append(repo)

        if self.get_all_tools:
            tools_with_panel = repositories[:]
            tsc = ToolShedClient(self.gi)
            repos = tsc.get_repositories()
            # Hereafter follows a gruesomely ineffecient algorithm.
            # The for loop and if statement are needed to retrieve tool_panel
            # section labels and ids.
            # If someone knows a more effecient way around this problem it
            # will be greatly appreciated.
            for repo in repos:
                if not repo['deleted']:
                    tool_panel_section_id = None
                    tool_panel_section_label = None
                    for repo_with_panel in tools_with_panel:
                        if the_same_repository(repo_with_panel, repo, check_revision=False):
                            tool_panel_section_id = repo_with_panel.get('tool_panel_section_id')
                            tool_panel_section_label = repo_with_panel.get('tool_panel_section_label')
                            break
                    repositories.append(
                        dict(name=repo.get('name'),
                             owner=repo.get('owner'),
                             tool_shed_url=repo.get('tool_shed'),
                             revisions=[repo.get('changeset_revision')],
                             tool_panel_section_label=tool_panel_section_label,
                             tool_panel_section_id=tool_panel_section_id)
                    )
        return repositories
Ejemplo n.º 5
0
def installed_repository_revisions(gi, omit=None):
    """
    Get a list of repository revisions installed from a Tool Shed on a Galaxy instance.
    Included are all the repository revisions that were installed from a Tool
    Shed and are available from `/api/tool_shed_repositories` url on the
    given instance of Galaxy.
    :type gi: GalaxyInstance object
    :param gi: A GalaxyInstance object as retured by `galaxy_instance` method.
    :type omit: list of strings
    :param omit: A list of strings that, if found in a tool name, will result
                    in the tool not being included in the returned list.
    :rtype: list of dicts
    :return: Each dict in the returned list will have the following keys:
             `name`, `owner`, `tool_shed_url`, `revisions`.
    .. seealso:: this method returns a subset of data returned by
                 `installed_repositories` function
    """
    if not omit:
        omit = []
    tool_shed_client = ToolShedClient(gi)

    # Create dictionary to look up all tools based on repository information

    installed_revisions_list = []
    installed_repositories_list = tool_shed_client.get_repositories()
    for installed_repository in installed_repositories_list:
        if installed_repository['status'] == 'Installed':
            skip = False
            # Check if we already processed this tool and, if so, add the new
            # revision to the existing list entry
            for installed_revision in installed_revisions_list:
                if the_same_repository(installed_repository, installed_revision):
                    installed_revision['revisions'].append(installed_repository.get('changeset_revision', None))
                    skip = True
            # Check if the repo name is contained in the 'omit' list
            for omitted_repository in omit:
                if omitted_repository in installed_repository['name']:
                    skip = True
            # We have not processed this tool so create a list entry
            if not skip:
                repo_info = {
                    'name': installed_repository['name'],
                    'owner': installed_repository['owner'],
                    'revisions': [installed_repository.get('changeset_revision', None)],
                    'tool_shed_url': 'https://' + installed_repository['tool_shed'],
                }
                installed_revisions_list.append(repo_info)
    return installed_revisions_list
Ejemplo n.º 6
0
def uninstall_tools(galaxy_server, api_key, names, force):
    tools_to_uninstall = []
    galaxy_instance = GalaxyInstance(url=galaxy_server, key=api_key)
    toolshed_client = ToolShedClient(galaxy_instance)
    installed_tools = [
        t for t in toolshed_client.get_repositories()
        if t['status'] != 'Uninstalled'
    ]

    for name in names:
        revision = None
        if '@' in name:
            (name, revision) = name.split('@')
        matching_tools = [
            t for t in installed_tools if (t['name'] == name and (
                not revision or revision == t['changeset_revision']))
        ]
        id_string = 'name %s revision %s' % (
            name, revision) if revision else 'name %s' % name
        if len(matching_tools) == 0:
            print('*** Warning: No tool with %s' % id_string)
        elif len(matching_tools) > 1 and not force:
            print(
                '*** Warning: More than one toolshed tool found for %s.  ' %
                id_string +
                'Not uninstalling any of these tools.  Run script with --force (-f) flag to uninstall anyway'
            )
        else:  # Either there is only one matching tool for the name and revision, or there are many and force=True
            tools_to_uninstall.extend(matching_tools)

    for tool in tools_to_uninstall:
        try:
            print('Uninstalling %s at revision %s' %
                  (tool['name'], tool['changeset_revision']))
            return_value = toolshed_client.uninstall_repository_revision(
                name=tool['name'],
                owner=tool['owner'],
                changeset_revision=tool['changeset_revision'],
                tool_shed_url=tool['tool_shed'],
            )
            print(return_value)
        except Exception as e:
            print(e)
Ejemplo n.º 7
0
def main():
    try:
        tool_list_file = 'tool_shed_tool_list.yaml'
        # Load tool list
        with open(tool_list_file, 'r') as f:
            tl = yaml.load(f)

        r_info = tl['tools']
        responses = []
        counter = 1
        total_num_tools = len(r_info)
        default_err_msg = 'All repositories that you are attempting to install have been previously installed.'

        gInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
        tsc = ToolShedClient(gInstance)
        tool_set = tsc.get_repositories()
        tool_name_list = [x['name'] for x in tool_set]

        for r in r_info:

            #if r['name'] in tool_name_list:
            #    print '%s already installed. skipping...' % r['name']
            #    continue

            if 'install_tool_dependencies' not in r:
                r['install_tool_dependencies'] = True
            if 'install_repository_dependencies' not in r:
                r['install_repository_dependencies'] = True
            if 'tool_shed_url' not in r:
                r['tool_shed_url'] = 'http://toolshed.g2.bx.psu.edu'

            ts = ToolShedInstance(url=r['tool_shed_url'])

            if 'revision' not in r:
                r['revision'] = ts.repositories.get_ordered_installable_revisions(
                    r['name'], r['owner'])[-1]

            start = dt.datetime.now()
            print '\n(%s/%s) Installing tool %s from %s to section %s (revision:%s depend-install:%s depend_repo_install:%s) ' % (
                counter, total_num_tools, r['name'], r['owner'],
                r['tool_panel_section_id'], r['revision'],
                r['install_tool_dependencies'],
                r['install_repository_dependencies'])

            try:
                response = tsc.install_repository_revision(
                    r['tool_shed_url'], r['name'], r['owner'], r['revision'],
                    r['install_tool_dependencies'],
                    r['install_repository_dependencies'],
                    r['tool_panel_section_id'])
            # new_tool_panel_section_label='API tests')
            except:
                print 'failed installing %s tool.' % r['name']
                info = sys.exc_info()
                err_msg = '>>>>>> This tool already installed in Galaxy.' if '"err_code": 400008' in str(
                    info[1]) else str(info[1])
                print err_msg
            else:
                print 'successful %s installation.' % r['name']
            end = dt.datetime.now()

            counter += 1

        # print responses
        print "\r\nAll tools listed in %s have been installed." % tool_list_file

    except:
        info = sys.exc_info()
        tbinfo = traceback.format_tb(info[2])
        print 'Error Info...'.ljust(80, '=')
        for tbi in tbinfo:
            print tbi
        print '  %s' % str(info[1])
        print '\n'.rjust(85, '=')
        sys.exit(1)
def main():
    parser = argparse.ArgumentParser(
        description=
        'Rewrite arbitrarily many tool.yml files as one file per tool revision'
    )
    parser.add_argument('-o', '--output_path',
                        help='Output file path')  # mandatory
    parser.add_argument(
        '-f', '--files', help='Tool input files',
        nargs='+')  # mandatory unless --update_existing is true
    parser.add_argument('-g', '--production_url', help='Galaxy server URL')
    parser.add_argument('-a',
                        '--production_api_key',
                        help='API key for galaxy server')
    parser.add_argument(
        '--update_existing',
        help=
        'If there are several toolshed entries for one name or name/revision entry uninstall all of them',
        action='store_true',
    )
    parser.add_argument('-s',
                        '--source_directory',
                        help='Directory containing tool yml files')

    args = parser.parse_args()

    files = args.files
    path = args.output_path
    update = args.update_existing
    source_dir = args.source_directory
    production_url = args.production_url
    production_api_key = args.production_api_key

    if not (files or source_dir):
        print(
            'either --files or --source_directory must be defined as an argument\n'
        )
        return
    elif files and source_dir:
        print(
            '--files and --source_directory have both been provided.  Ignoring source_directory in favour of files\n'
        )
    if source_dir and not files:
        files = [
            '%s/%s' % (source_dir, name) for name in os.listdir(source_dir)
        ]

    tools = []
    for file in files:
        with open(file) as input:
            content = yaml.safe_load(input.read())['tools']
            if isinstance(content, list):
                tools += content
            else:
                tools.append(content)

    if update:  # update tools with trusted owners where updates are available
        if not production_url and production_api_key:
            raise Exception(
                '--production_url and --production_api_key arguments are required when --update_exisiting flag is used'
            )

        with open(trusted_owners_file) as infile:
            trusted_owners = yaml.safe_load(infile.read())['trusted_owners']

        # load repository data to check which tools have updates available
        galaxy_instance = GalaxyInstance(production_url, production_api_key)
        toolshed_client = ToolShedClient(galaxy_instance)
        repos = toolshed_client.get_repositories()
        installed_repos = [r for r in repos if r['status'] == 'Installed'
                           ]  # Skip deactivated repos

        trusted_tools = [
            t for t in tools
            if [o for o in trusted_owners if t['owner'] == o['owner']] != []
        ]
        print('Checking for updates from %d tools' % len(trusted_tools))
        tools = []
        for i, tool in enumerate(trusted_tools):
            if i > 0 and i % 100 == 0:
                print('%d/%d' % (i, len(trusted_tools)))
            new_revision_info = get_new_revision(tool, installed_repos,
                                                 trusted_owners)
            # if tool_has_new_revision(tool, installed_repos, trusted_owners):
            if new_revision_info:
                extraneous_keys = [
                    key for key in tool.keys() if key not in [
                        'name', 'owner', 'tool_panel_section_label',
                        'tool_shed_url'
                    ]
                ]
                for key in extraneous_keys:
                    del tool[key]
                tool.update(new_revision_info)
                tools.append(tool)
        print('%d tools with updates available' % len(tools))

    for tool in tools:
        if 'revisions' in tool.keys() and len(tool['revisions']) > 1:
            for rev in tool['revisions']:
                new_tool = tool
                new_tool['revisions'] = [rev]
                write_output_file(path=path, tool=new_tool)
        else:
            write_output_file(path=path, tool=tool)
Ejemplo n.º 9
0
class InstallRepositoryManager(object):
    """Manages the installation of new repositories on a galaxy instance"""
    def __init__(self, galaxy_instance):
        """Initialize a new tool manager"""
        self.gi = galaxy_instance
        self.tool_shed_client = ToolShedClient(self.gi)

    def installed_repositories(self):
        """Get currently installed tools"""
        return GiToToolYaml(gi=self.gi,
                            skip_tool_panel_section_name=False,
                            get_data_managers=True,
                            get_all_tools=True).tool_list.get("tools")

    def filter_installed_repos(self, repos, check_revision=True):
        # TODO: Find a speedier algorithm.
        """This filters a list of repositories"""
        not_installed_repos = []
        already_installed_repos = []
        if check_revision:
            # If we want to check if revisions are equal, flatten the list,
            # so each repository - revision combination has its own entry
            installed_repos = flatten_repo_info(self.installed_repositories())
        else:
            # If we do not care about revision equality, do not do the flatten
            # action to limit the number of comparisons.
            installed_repos = self.installed_repositories()

        for repo in repos:
            for installed_repo in installed_repos:
                if the_same_repository(installed_repo, repo, check_revision):
                    already_installed_repos.append(repo)
                    break
            else:  # This executes when the for loop completes and no match has been found.
                not_installed_repos.append(repo)
        FilterResults = namedtuple(
            "FilterResults",
            ["not_installed_repos", "already_installed_repos"])
        return FilterResults(already_installed_repos=already_installed_repos,
                             not_installed_repos=not_installed_repos)

    def install_repositories(
            self,
            repositories,
            log=None,
            force_latest_revision=False,
            default_toolshed='https://toolshed.g2.bx.psu.edu/',
            default_install_tool_dependencies=False,
            default_install_resolver_dependencies=True,
            default_install_repository_dependencies=True):
        """Install a list of tools on the current galaxy"""
        if not repositories:
            raise ValueError("Empty list of tools was given")
        installation_start = dt.datetime.now()
        installed_repositories = []
        skipped_repositories = []
        errored_repositories = []
        counter = 0

        # Check repos for invalid keys
        for repo in repositories:
            for key in repo.keys():
                if key not in VALID_KEYS and key != 'revisions':
                    if log:
                        log.warning(
                            "'{0}' not a valid key. Will be skipped during parsing"
                            .format(key))

        # Start by flattening the repo list per revision
        flattened_repos = flatten_repo_info(repositories)
        total_num_repositories = len(flattened_repos)

        # Complete the repo information, and make sure each repository has a revision
        repository_list = []
        for repository in flattened_repos:
            start = dt.datetime.now()
            try:
                complete_repo = complete_repo_information(
                    repository,
                    default_toolshed_url=default_toolshed,
                    require_tool_panel_info=True,
                    default_install_tool_dependencies=
                    default_install_tool_dependencies,
                    default_install_resolver_dependencies=
                    default_install_resolver_dependencies,
                    default_install_repository_dependencies=
                    default_install_repository_dependencies,
                    force_latest_revision=force_latest_revision)
                repository_list.append(complete_repo)
            except (LookupError, KeyError) as e:
                if log:
                    log_repository_install_error(repository, start, str(e),
                                                 log)
                errored_repositories.append(repository)

        # Filter out already installed repos
        filtered_repos = self.filter_installed_repos(repository_list)

        for skipped_repo in filtered_repos.already_installed_repos:
            counter += 1
            if log:
                log_repository_install_skip(skipped_repo, counter,
                                            total_num_repositories, log)
            skipped_repositories.append(skipped_repo)

        # Install repos
        for repository in filtered_repos.not_installed_repos:
            counter += 1
            if log:
                log_repository_install_start(
                    repository,
                    counter=counter,
                    installation_start=installation_start,
                    log=log,
                    total_num_repositories=total_num_repositories)
            result = self.install_repository_revision(repository, log)
            if result == "error":
                errored_repositories.append(repository)
            elif result == "skipped":
                skipped_repositories.append(repository)
            elif result == "installed":
                installed_repositories.append(repository)

        # Log results
        if log:
            log.info("Installed repositories ({0}): {1}".format(
                len(installed_repositories),
                [(t['name'], t.get('changeset_revision'))
                 for t in installed_repositories]))
            log.info("Skipped repositories ({0}): {1}".format(
                len(skipped_repositories),
                [(t['name'], t.get('changeset_revision'))
                 for t in skipped_repositories]))
            log.info("Errored repositories ({0}): {1}".format(
                len(errored_repositories),
                [(t['name'], t.get('changeset_revision', ""))
                 for t in errored_repositories]))
            log.info("All repositories have been installed.")
            log.info("Total run time: {0}".format(dt.datetime.now() -
                                                  installation_start))
        InstallResults = namedtuple("InstallResults", [
            "installed_repositories", "errored_repositories",
            "skipped_repositories"
        ])
        return InstallResults(installed_repositories=installed_repositories,
                              skipped_repositories=skipped_repositories,
                              errored_repositories=errored_repositories)

    def update_repositories(self, repositories=None, log=None, **kwargs):
        if not repositories:  # Repositories None or empty list
            repositories = self.installed_repositories()
        else:
            filtered_repos = self.filter_installed_repos(repositories,
                                                         check_revision=False)
            if filtered_repos.not_installed_repos:
                if log:
                    log.warning(
                        "The following tools are not installed and will not be upgraded: {0}"
                        .format(filtered_repos.not_installed_repos))
            repositories = filtered_repos.already_installed_repos
        return self.install_repositories(repositories,
                                         force_latest_revision=True,
                                         log=log,
                                         **kwargs)

    def test_tools(self,
                   test_json,
                   repositories=None,
                   log=None,
                   test_user_api_key=None,
                   test_user="******"):
        """Run tool tests for all tools in each repository in supplied tool list or ``self.installed_repositories()``.
        """
        tool_test_start = dt.datetime.now()
        tests_passed = []
        test_exceptions = []

        if not repositories:  # If repositories is None or empty list
            # Consider a variant of this that doesn't even consume a tool list YAML? target
            # something like installed_repository_revisions(self.gi)
            repositories = self.installed_repositories()

        target_repositories = flatten_repo_info(repositories)

        installed_tools = []
        for target_repository in target_repositories:
            repo_tools = tools_for_repository(self.gi, target_repository)
            installed_tools.extend(repo_tools)

        all_test_results = []

        for tool in installed_tools:
            results = self._test_tool(tool, test_user, test_user_api_key)
            all_test_results.extend(results.tool_test_results)
            tests_passed.extend(results.tests_passed)
            test_exceptions.extend(results.test_exceptions)

        report_obj = {
            'version': '0.1',
            'tests': all_test_results,
        }
        with open(test_json, "w") as f:
            json.dump(report_obj, f)
        if log:
            log.info("Passed tool tests ({0}): {1}".format(
                len(tests_passed), [t for t in tests_passed]))
            log.info("Failed tool tests ({0}): {1}".format(
                len(test_exceptions), [t[0] for t in test_exceptions]))
            log.info("Total tool test time: {0}".format(dt.datetime.now() -
                                                        tool_test_start))

    def _test_tool(self, tool, test_user, test_user_api_key):

        if test_user_api_key is None:
            whoami = self.gi.make_get_request(self.gi.url + "/whoami").json()
            if whoami is not None:
                test_user_api_key = self.gi.key
        galaxy_interactor_kwds = {
            "galaxy_url": re.sub('/api', '', self.gi.url),
            "master_api_key": self.gi.key,
            "api_key": None,  # TODO
            "keep_outputs_dir": '',
        }
        if test_user_api_key is None:
            galaxy_interactor_kwds["test_user"] = test_user
        galaxy_interactor = GalaxyInteractorApi(**galaxy_interactor_kwds)
        tool_id = tool["id"]
        tool_version = tool["version"]
        tool_test_dicts = galaxy_interactor.get_tool_tests(
            tool_id, tool_version=tool_version)
        test_indices = list(range(len(tool_test_dicts)))
        tool_test_results = []
        tests_passed = []
        test_exceptions = []

        for test_index in test_indices:
            test_id = tool_id + "-" + str(test_index)

            def register(job_data):
                tool_test_results.append({
                    'id': test_id,
                    'has_data': True,
                    'data': job_data,
                })

            try:
                verify_tool(tool_id,
                            galaxy_interactor,
                            test_index=test_index,
                            tool_version=tool_version,
                            register_job_data=register,
                            quiet=True)
                tests_passed.append(test_id)
            except Exception as e:
                test_exceptions.append((test_id, e))
        Results = namedtuple(
            "Results",
            ["tool_test_results", "tests_passed", "test_exceptions"])
        return Results(tool_test_results=tool_test_results,
                       tests_passed=tests_passed,
                       test_exceptions=test_exceptions)

    def install_repository_revision(self, repository, log):
        default_err_msg = (
            'All repositories that you are attempting to install '
            'have been previously installed.')
        start = dt.datetime.now()
        try:
            repository['new_tool_panel_section_label'] = repository.pop(
                'tool_panel_section_label')
            response = self.tool_shed_client.install_repository_revision(
                **repository)
            if isinstance(response, dict) and response.get('status',
                                                           None) == 'ok':
                # This rare case happens if a repository is already installed but
                # was not recognised as such in the above check. In such a
                # case the return value looks like this:
                # {u'status': u'ok', u'message': u'No repositories were
                #  installed, possibly because the selected repository has
                #  already been installed.'}
                if log:
                    log.debug("\tRepository {0} is already installed.".format(
                        repository['name']))
            if log:
                log_repository_install_success(repository=repository,
                                               start=start,
                                               log=log)
            return "installed"
        except ConnectionError as e:
            if default_err_msg in e.body:
                # THIS SHOULD NOT HAPPEN DUE TO THE CHECKS EARLIER
                if log:
                    log.debug(
                        "\tRepository %s already installed (at revision %s)" %
                        (repository['name'], repository['changeset_revision']))
                return "skipped"
            elif "504" in str(e) or 'Connection aborted' in str(e):
                if log:
                    log.debug(
                        "Timeout during install of %s, extending wait to 1h",
                        repository['name'])
                success = self.wait_for_install(repository=repository,
                                                log=log,
                                                timeout=3600)
                if success:
                    if log:
                        log_repository_install_success(repository=repository,
                                                       start=start,
                                                       log=log)
                    return "installed"
                else:
                    if log:
                        log_repository_install_error(repository=repository,
                                                     start=start,
                                                     msg=e.body,
                                                     log=log)
                    return "error"
            else:
                if log:
                    log_repository_install_error(repository=repository,
                                                 start=start,
                                                 msg=e.body,
                                                 log=log)
                return "error"

    def wait_for_install(self, repository, log=None, timeout=3600):
        """
        If nginx times out, we look into the list of installed repositories
        and try to determine if a repository of the same namer/owner is still installing.
        Returns True if install finished successfully,
        returns False when timeout is exceeded or installation has failed.
        """
        start = dt.datetime.now()
        while (dt.datetime.now() - start) < dt.timedelta(seconds=timeout):
            try:
                installed_repo_list = self.tool_shed_client.get_repositories()
                for installing_repo in installed_repo_list:
                    if (repository['name'] == installing_repo['name']) and (
                            installing_repo['owner'] == repository['owner']):
                        if installing_repo['status'] == 'Installed':
                            return True
                        elif installing_repo['status'] == 'Error':
                            return False
                        else:
                            time.sleep(10)
            except ConnectionError as e:
                if log:
                    log.warning('Failed to get repositories list: %s', str(e))
                time.sleep(10)
        return False
Ejemplo n.º 10
0
class InstallRepositoryManager(object):
    """Manages the installation of new repositories on a galaxy instance"""
    def __init__(self, galaxy_instance):
        """Initialize a new tool manager"""
        self.gi = galaxy_instance
        self.tool_shed_client = ToolShedClient(self.gi)

    def installed_repositories(self):
        """Get currently installed tools"""
        return GiToToolYaml(gi=self.gi,
                            skip_tool_panel_section_name=False,
                            get_data_managers=True,
                            get_all_tools=True).tool_list.get("tools")

    def filter_installed_repos(self, repos, check_revision=True):
        # TODO: Find a speedier algorithm.
        """This filters a list of repositories"""
        not_installed_repos = []
        already_installed_repos = []
        if check_revision:
            # If we want to check if revisions are equal, flatten the list,
            # so each repository - revision combination has its own entry
            installed_repos = flatten_repo_info(self.installed_repositories())
        else:
            # If we do not care about revision equality, do not do the flatten
            # action to limit the number of comparisons.
            installed_repos = self.installed_repositories()

        for repo in repos:
            for installed_repo in installed_repos:
                if the_same_repository(installed_repo, repo, check_revision):
                    already_installed_repos.append(repo)
                    break
            else:  # This executes when the for loop completes and no match has been found.
                not_installed_repos.append(repo)
        FilterResults = namedtuple(
            "FilterResults",
            ["not_installed_repos", "already_installed_repos"])
        return FilterResults(already_installed_repos=already_installed_repos,
                             not_installed_repos=not_installed_repos)

    def install_repositories(
            self,
            repositories,
            log=None,
            force_latest_revision=False,
            default_toolshed='https://toolshed.g2.bx.psu.edu/',
            default_install_tool_dependencies=False,
            default_install_resolver_dependencies=True,
            default_install_repository_dependencies=True):
        """Install a list of tools on the current galaxy"""
        if not repositories:
            raise ValueError("Empty list of tools was given")
        installation_start = dt.datetime.now()
        installed_repositories = []
        skipped_repositories = []
        errored_repositories = []
        counter = 0

        # Check repos for invalid keys
        for repo in repositories:
            for key in repo.keys():
                if key not in VALID_KEYS and key != 'revisions':
                    if log:
                        log.warning(
                            "'{0}' not a valid key. Will be skipped during parsing"
                            .format(key))

        # Start by flattening the repo list per revision
        flattened_repos = flatten_repo_info(repositories)
        total_num_repositories = len(flattened_repos)

        # Complete the repo information, and make sure each repository has a revision
        repository_list = []
        for repository in flattened_repos:
            start = dt.datetime.now()
            try:
                complete_repo = complete_repo_information(
                    repository,
                    default_toolshed_url=default_toolshed,
                    require_tool_panel_info=True,
                    default_install_tool_dependencies=
                    default_install_tool_dependencies,
                    default_install_resolver_dependencies=
                    default_install_resolver_dependencies,
                    default_install_repository_dependencies=
                    default_install_repository_dependencies,
                    force_latest_revision=force_latest_revision)
                repository_list.append(complete_repo)
            except Exception as e:
                # We'll run through the loop come whatever may, we log the errored repositories at the end anyway.
                if log:
                    log_repository_install_error(repository, start,
                                                 unicodify(e), log)
                errored_repositories.append(repository)

        # Filter out already installed repos
        filtered_repos = self.filter_installed_repos(repository_list)

        for skipped_repo in filtered_repos.already_installed_repos:
            counter += 1
            if log:
                log_repository_install_skip(skipped_repo, counter,
                                            total_num_repositories, log)
            skipped_repositories.append(skipped_repo)

        # Install repos
        for repository in filtered_repos.not_installed_repos:
            counter += 1
            if log:
                log_repository_install_start(
                    repository,
                    counter=counter,
                    installation_start=installation_start,
                    log=log,
                    total_num_repositories=total_num_repositories)
            result = self.install_repository_revision(repository, log)
            if result == "error":
                errored_repositories.append(repository)
            elif result == "skipped":
                skipped_repositories.append(repository)
            elif result == "installed":
                installed_repositories.append(repository)

        # Log results
        if log:
            log.info("Installed repositories ({0}): {1}".format(
                len(installed_repositories),
                [(t['name'], t.get('changeset_revision'))
                 for t in installed_repositories]))
            log.info("Skipped repositories ({0}): {1}".format(
                len(skipped_repositories),
                [(t['name'], t.get('changeset_revision'))
                 for t in skipped_repositories]))
            log.info("Errored repositories ({0}): {1}".format(
                len(errored_repositories),
                [(t['name'], t.get('changeset_revision', ""))
                 for t in errored_repositories]))
            log.info("All repositories have been installed.")
            log.info("Total run time: {0}".format(dt.datetime.now() -
                                                  installation_start))
        InstallResults = namedtuple("InstallResults", [
            "installed_repositories", "errored_repositories",
            "skipped_repositories"
        ])
        return InstallResults(installed_repositories=installed_repositories,
                              skipped_repositories=skipped_repositories,
                              errored_repositories=errored_repositories)

    def update_repositories(self, repositories=None, log=None, **kwargs):
        if not repositories:  # Repositories None or empty list
            repositories = self.installed_repositories()
        else:
            filtered_repos = self.filter_installed_repos(repositories,
                                                         check_revision=False)
            if filtered_repos.not_installed_repos:
                if log:
                    log.warning(
                        "The following tools are not installed and will not be upgraded: {0}"
                        .format(filtered_repos.not_installed_repos))
            repositories = filtered_repos.already_installed_repos
        return self.install_repositories(repositories,
                                         force_latest_revision=True,
                                         log=log,
                                         **kwargs)

    def test_tools(
        self,
        test_json,
        repositories=None,
        log=None,
        test_user_api_key=None,
        test_user="******",
        test_history_name=None,
        parallel_tests=1,
        test_all_versions=False,
        client_test_config_path=None,
    ):
        """Run tool tests for all tools in each repository in supplied tool list or ``self.installed_repositories()``.
        """
        tool_test_start = dt.datetime.now()
        tests_passed = []
        test_exceptions = []

        if not repositories:  # If repositories is None or empty list
            # Consider a variant of this that doesn't even consume a tool list YAML? target
            # something like installed_repository_revisions(self.gi)
            repositories = self.installed_repositories()

        target_repositories = flatten_repo_info(repositories)

        installed_tools = []
        for target_repository in target_repositories:
            repo_tools = tools_for_repository(self.gi,
                                              target_repository,
                                              all_tools=test_all_versions)
            installed_tools.extend(repo_tools)

        all_test_results = []
        galaxy_interactor = self._get_interactor(test_user, test_user_api_key)
        if client_test_config_path is not None:
            with open(client_test_config_path, "r") as f:
                client_test_config_dict = yaml.full_load(f)
            client_test_config = DictClientTestConfig(
                client_test_config_dict.get("tools"))
        else:
            client_test_config = None

        if test_history_name:
            for history in self.gi.histories.get_histories(
                    name=test_history_name, deleted=False):
                test_history = history['id']
                log.debug(
                    "Using existing history with id '%s', last updated: %s",
                    test_history, history['update_time'])
                break
            else:
                test_history = galaxy_interactor.new_history(
                    history_name=test_history_name)
        else:
            test_history = galaxy_interactor.new_history()

        with ThreadPoolExecutor(max_workers=parallel_tests) as executor:
            try:
                for tool in installed_tools:
                    self._test_tool(
                        executor=executor,
                        tool=tool,
                        galaxy_interactor=galaxy_interactor,
                        test_history=test_history,
                        log=log,
                        tool_test_results=all_test_results,
                        tests_passed=tests_passed,
                        test_exceptions=test_exceptions,
                        client_test_config=client_test_config,
                    )
            finally:
                # Always write report, even if test was cancelled.
                try:
                    executor.shutdown(wait=True)
                except KeyboardInterrupt:
                    executor._threads.clear()
                    thread._threads_queues.clear()
                n_passed = len(tests_passed)
                n_failed = len(test_exceptions)
                report_obj = {
                    'version': '0.1',
                    'suitename':
                    'Ephemeris tool tests targeting %s' % self.gi.base_url,
                    'results': {
                        'total': n_passed + n_failed,
                        'errors': n_failed,
                        'failures': 0,
                        'skips': 0,
                    },
                    'tests': sorted(all_test_results, key=lambda el: el['id']),
                }
                with open(test_json, "w") as f:
                    json.dump(report_obj, f)
                if log:
                    log.info("Report written to '%s'",
                             os.path.abspath(test_json))
                    log.info("Passed tool tests ({0}): {1}".format(
                        n_passed, [t for t in tests_passed]))
                    log.info("Failed tool tests ({0}): {1}".format(
                        n_failed, [t[0] for t in test_exceptions]))
                    log.info(
                        "Total tool test time: {0}".format(dt.datetime.now() -
                                                           tool_test_start))

    def _get_interactor(self, test_user, test_user_api_key):
        if test_user_api_key is None:
            whoami = self.gi.make_get_request(self.gi.url + "/whoami").json()
            if whoami is not None:
                test_user_api_key = self.gi.key
        galaxy_interactor_kwds = {
            "galaxy_url": re.sub('/api', '', self.gi.url),
            "master_api_key": self.gi.key,
            "api_key": test_user_api_key,  # TODO
            "keep_outputs_dir": '',
        }
        if test_user_api_key is None:
            galaxy_interactor_kwds["test_user"] = test_user
        galaxy_interactor = GalaxyInteractorApi(**galaxy_interactor_kwds)
        return galaxy_interactor

    @staticmethod
    def _test_tool(
        executor,
        tool,
        galaxy_interactor,
        tool_test_results,
        tests_passed,
        test_exceptions,
        log,
        test_history=None,
        client_test_config=None,
    ):
        if test_history is None:
            test_history = galaxy_interactor.new_history()
        tool_id = tool["id"]
        tool_version = tool["version"]
        # If given a tool_id with a version suffix, strip it off so we can treat tool_version
        # correctly at least in client_test_config.
        if tool_version and tool_id.endswith("/" + tool_version):
            tool_id = tool_id[:-len("/" + tool_version)]

        label_base = tool_id
        if tool_version:
            label_base += "/" + str(tool_version)
        try:
            tool_test_dicts = galaxy_interactor.get_tool_tests(
                tool_id, tool_version=tool_version)
        except Exception as e:
            if log:
                log.warning("Fetching test definition for tool '%s' failed",
                            label_base,
                            exc_info=True)
            test_exceptions.append((label_base, e))
            Results = namedtuple(
                "Results",
                ["tool_test_results", "tests_passed", "test_exceptions"])
            return Results(tool_test_results=tool_test_results,
                           tests_passed=tests_passed,
                           test_exceptions=test_exceptions)
        test_indices = list(range(len(tool_test_dicts)))

        for test_index in test_indices:
            test_id = label_base + "-" + str(test_index)

            def run_test(index, test_id):
                def register(job_data):
                    tool_test_results.append({
                        'id': test_id,
                        'has_data': True,
                        'data': job_data,
                    })

                try:
                    if log:
                        log.info("Executing test '%s'", test_id)
                    verify_tool(
                        tool_id,
                        galaxy_interactor,
                        test_index=index,
                        tool_version=tool_version,
                        register_job_data=register,
                        quiet=True,
                        test_history=test_history,
                        client_test_config=client_test_config,
                    )
                    tests_passed.append(test_id)
                    if log:
                        log.info("Test '%s' passed", test_id)
                except Exception as e:
                    if log:
                        log.warning("Test '%s' failed", test_id, exc_info=True)
                    test_exceptions.append((test_id, e))

            executor.submit(run_test, test_index, test_id)

    def install_repository_revision(self, repository, log):
        default_err_msg = (
            'All repositories that you are attempting to install '
            'have been previously installed.')
        start = dt.datetime.now()
        try:
            repository['new_tool_panel_section_label'] = repository.pop(
                'tool_panel_section_label')
            response = self.tool_shed_client.install_repository_revision(
                **repository)
            if isinstance(response, dict) and response.get('status',
                                                           None) == 'ok':
                # This rare case happens if a repository is already installed but
                # was not recognised as such in the above check. In such a
                # case the return value looks like this:
                # {u'status': u'ok', u'message': u'No repositories were
                #  installed, possibly because the selected repository has
                #  already been installed.'}
                if log:
                    log.debug("\tRepository {0} is already installed.".format(
                        repository['name']))
            if log:
                log_repository_install_success(repository=repository,
                                               start=start,
                                               log=log)
            return "installed"
        except (ConnectionError, requests.exceptions.ConnectionError) as e:
            if default_err_msg in unicodify(e):
                # THIS SHOULD NOT HAPPEN DUE TO THE CHECKS EARLIER
                if log:
                    log.debug(
                        "\tRepository %s already installed (at revision %s)" %
                        (repository['name'], repository['changeset_revision']))
                return "skipped"
            elif "504" in unicodify(e) or 'Connection aborted' in unicodify(e):
                if log:
                    log.debug(
                        "Timeout during install of %s, extending wait to 1h",
                        repository['name'])
                success = self.wait_for_install(repository=repository,
                                                log=log,
                                                timeout=3600)
                if success:
                    if log:
                        log_repository_install_success(repository=repository,
                                                       start=start,
                                                       log=log)
                    return "installed"
                else:
                    if log:
                        log_repository_install_error(repository=repository,
                                                     start=start,
                                                     msg=e.body,
                                                     log=log)
                    return "error"
            else:
                if log:
                    log_repository_install_error(repository=repository,
                                                 start=start,
                                                 msg=e.body,
                                                 log=log)
                return "error"

    def wait_for_install(self, repository, log=None, timeout=3600):
        """
        If nginx times out, we look into the list of installed repositories
        and try to determine if a repository of the same namer/owner is still installing.
        Returns True if install finished successfully,
        returns False when timeout is exceeded or installation has failed.
        """
        # We request a repository revision, but Galaxy may decide to install the next downloable revision.
        # This ensures we have a revision to track, and if not, finds the revision that is actually being installed
        name = repository['name']
        owner = repository['owner']
        changeset_revision = repository['changeset_revision']
        installed_repos = self.tool_shed_client.get_repositories()
        filtered_repos = [
            r for r in installed_repos
            if r['name'] == name and r['owner'] == owner
        ]
        assert filtered_repos, "Repository '%s' from owner '%s' not in list of repositories." % (
            name, owner)
        # Check if exact repository revision in filtered_repos
        installing_repo_id = None
        for repo in filtered_repos:
            if repo['changeset_revision'] == changeset_revision:
                installing_repo_id = repo['id']
                break
        else:
            # Galaxy may have decided to install a newer repository revision. We now try to guess which repository that is.
            non_terminal = [
                r for r in filtered_repos
                if r['status'] in NON_TERMINAL_REPOSITORY_STATES
            ]
            if len(non_terminal) == 1:
                # Unambiguous, we wait for this repo
                installing_repo_id = non_terminal[0]['id']
            elif len(filtered_repos) == 1:
                installing_repo_id = filtered_repos[0]['id']
            else:
                # We may have a repo that is permanently in a non-terminal state (e.g because of restart during installation).
                # Raise an exception and continue with the remaining repos.
                msg = "Could not track repository for name '%s', owner '%s', revision '%s'. "
                msg += "Please uninstall all non-terminal repositories and ensure revision '%s' is installable."
                raise AssertionError(
                    msg %
                    (name, owner, changeset_revision, changeset_revision))
        start = dt.datetime.now()
        while (dt.datetime.now() - start) < dt.timedelta(seconds=timeout):
            try:
                installed_repo = self.tool_shed_client.show_repository(
                    installing_repo_id)
                status = installed_repo['status']
                if status == 'Installed':
                    return True
                elif status == 'Error':
                    return False
                elif status in NON_TERMINAL_REPOSITORY_STATES:
                    time.sleep(10)
                else:
                    raise AssertionError(
                        "Repository name '%s', owner '%s' in unknown status '%s'"
                        % (name, owner, status))
            except ConnectionError as e:
                if log:
                    log.warning('Failed to get repositories list: %s',
                                unicodify(e))
                time.sleep(10)
        return False