def _scripts_check_output(self, script, args):
     cwd = galaxy_directory()
     cmd = ["python", os.path.join(cwd, "scripts", script)] + args
     clean_env = {
         "PATH": os.environ.get("PATH", None),
     }  # Don't let testing environment variables interfere with config.
     return unicodify(subprocess.check_output(cmd, cwd=cwd, env=clean_env))
Beispiel #2
0
 def _test_simple_output(self):
     source_file_name = os.path.join(
         galaxy_directory(), "test/functional/tools/for_workflows/cat.xml")
     self._init_tool_for_path(source_file_name)
     output_dataset = self._create_output_dataset(extension="fasta", )
     sa_session = self.app.model.session
     sa_session.flush()
     output_datasets = {
         "out_file1": output_dataset,
     }
     command = self.metadata_command(output_datasets)
     self._write_output_dataset_contents(output_dataset,
                                         ">seq1\nGCTGCATG\n")
     self._write_job_files()
     self.exec_metadata_command(command)
     assert self.metadata_compute_strategy
     metadata_set_successfully = self.metadata_compute_strategy.external_metadata_set_successfully(
         output_dataset,
         "out_file1",
         sa_session,
         working_directory=self.job_working_directory)
     assert metadata_set_successfully
     self.metadata_compute_strategy.load_metadata(
         output_dataset,
         "out_file1",
         sa_session,
         working_directory=self.job_working_directory)
     assert output_dataset.metadata.data_lines == 2
     assert output_dataset.metadata.sequences == 1
Beispiel #3
0
 def __init_pulsar_app(self, conf, pulsar_conf_path):
     if conf is None and pulsar_conf_path is None and not self.default_build_pulsar_app:
         self.pulsar_app = None
         return
     if conf is None:
         conf = {}
         if pulsar_conf_path is None:
             log.info(
                 "Creating a Pulsar app with default configuration (no pulsar_conf specified)."
             )
         else:
             log.info("Loading Pulsar app configuration from %s" %
                      pulsar_conf_path)
             with open(pulsar_conf_path, "r") as f:
                 conf.update(yaml.safe_load(f) or {})
     if "job_metrics_config_file" not in conf:
         conf["job_metrics"] = self.app.job_metrics
     if "staging_directory" not in conf:
         conf["staging_directory"] = os.path.join(self.app.config.data_dir,
                                                  "pulsar_staging")
     if "persistence_directory" not in conf:
         conf["persistence_directory"] = os.path.join(
             self.app.config.data_dir, "pulsar_persisted_data")
     if "galaxy_home" not in conf:
         conf["galaxy_home"] = galaxy_directory()
     self.pulsar_app = pulsar.core.PulsarApp(**conf)
Beispiel #4
0
class SpecialToolLoaderTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(
        galaxy_directory(),
        "lib/galaxy/tools/imp_exp/exp_history_to_archive.xml")
    source_contents = None

    def test_tool_type(self):
        tool_module = self._tool_source.parse_tool_module()
        # Probably we don't parse_tool_module any more? -
        # tool_type seems sufficient.
        assert tool_module[0] == "galaxy.tools"
        assert tool_module[1] == "ExportHistoryTool"
        assert self._tool_source.parse_tool_type() == "export_history"

    def test_is_multi_byte(self):
        assert not self._tool_source.parse_is_multi_byte()

    def test_version_command(self):
        assert self._tool_source.parse_version_command() is None
        assert self._tool_source.parse_version_command_interpreter() is None

    def test_action(self):
        action = self._tool_source.parse_action_module()
        assert action[0] == "galaxy.tools.actions.history_imp_exp"
        assert action[1] == "ExportHistoryToolAction"
Beispiel #5
0
class ExpressionTestToolLoaderTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(
        galaxy_directory(),
        "test/functional/tools/expression_null_handling_boolean.xml")
    source_contents = None

    def test_test(self):
        test_dicts = self._tool_source.parse_tests_to_dict()['tests']
        assert len(test_dicts) == 3
        test_dict_0 = test_dicts[0]
        assert 'outputs' in test_dict_0, test_dict_0
        outputs = test_dict_0['outputs']
        output0 = outputs[0]
        assert 'object' in output0['attributes']
        assert output0['attributes']['object'] is True

        test_dict_1 = test_dicts[1]
        assert 'outputs' in test_dict_1, test_dict_1
        outputs = test_dict_1['outputs']
        output0 = outputs[0]
        assert 'object' in output0['attributes']
        assert output0['attributes']['object'] is False

        test_dict_2 = test_dicts[2]
        assert 'outputs' in test_dict_2, test_dict_2
        outputs = test_dict_2['outputs']
        output0 = outputs[0]
        assert 'object' in output0['attributes']
        assert output0['attributes']['object'] is None
Beispiel #6
0
class CollectionOutputYamlTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/collection_creates_pair_y.yml")
    source_contents = None

    def test_tests(self):
        outputs, output_collections = self._tool_source.parse_outputs(None)
        assert len(output_collections) == 1
Beispiel #7
0
 def _scripts_check_output(self, script, args):
     cwd = galaxy_directory()
     cmd = ["python", os.path.join(cwd, "scripts", script)] + args
     clean_env = {
         "PATH": os.environ.get("PATH", None),
     }  # Don't let testing environment variables interfere with config.
     return subprocess.check_output(cmd, cwd=cwd, env=clean_env)
Beispiel #8
0
 def _test_primary_dataset_output_extension(self):
     source_file_name = os.path.join(
         galaxy_directory(), "test/functional/tools/for_workflows/cat.xml")
     self._init_tool_for_path(source_file_name)
     # setting extension to 'auto' here, results in the extension specified in
     # galaxy.json (below) being respected.
     output_dataset = self._create_output_dataset(extension="auto", )
     sa_session = self.app.model.session
     sa_session.flush()
     output_datasets = {
         "out_file1": output_dataset,
     }
     command = self.metadata_command(output_datasets)
     self._write_galaxy_json(
         """{"type": "dataset", "dataset_id": "%s", "name": "my dynamic name", "ext": "fasta", "info": "my dynamic info"}"""
         % output_dataset.dataset.id)
     self._write_output_dataset_contents(output_dataset,
                                         ">seq1\nGCTGCATG\n")
     self._write_job_files()
     self.exec_metadata_command(command)
     assert self.metadata_compute_strategy
     metadata_set_successfully = self.metadata_compute_strategy.external_metadata_set_successfully(
         output_dataset,
         "out_file1",
         sa_session,
         working_directory=self.job_working_directory)
     assert metadata_set_successfully
     output_dataset.extension = "fasta"  # gets done in job finish...
     self.metadata_compute_strategy.load_metadata(
         output_dataset,
         "out_file1",
         sa_session,
         working_directory=self.job_working_directory)
     assert output_dataset.metadata.data_lines == 2
     assert output_dataset.metadata.sequences == 1
Beispiel #9
0
class EnvironmentVariablesTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/environment_variables.xml")
    source_contents = None

    def test_tests(self):
        tests_dict = self._tool_source.parse_tests_to_dict()
        tests = tests_dict["tests"]
        assert len(tests) == 1
Beispiel #10
0
class BuildListToolLoaderTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "lib/galaxy/tools/build_list.xml")
    source_contents = None

    def test_tool_type(self):
        tool_module = self._tool_source.parse_tool_module()
        assert tool_module[0] == "galaxy.tools"
        assert tool_module[1] == "BuildListCollectionTool"
Beispiel #11
0
class QcStdioTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/qc_stdout.xml")
    source_contents = None

    def test_tests(self):
        exit, regexes = self._tool_source.parse_stdio()
        assert len(exit) == 2
        assert len(regexes) == 1
        regex = regexes[0]
        assert regex.error_level == 1.1
Beispiel #12
0
class ExpressionOutputDataToolLoaderTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/expression_pick_larger_file.xml")
    source_contents = None

    def test_output_parsing(self):
        outputs, _ = self._tool_source.parse_outputs(None)
        assert 'larger_file' in outputs
        tool_output = outputs['larger_file']
        assert tool_output.format == "data"
        assert tool_output.from_expression == "output"
Beispiel #13
0
class ExpectationsCommandVersionTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/job_properties.xml")
    source_contents = None

    def test_tests(self):
        tests_dict = self._tool_source.parse_tests_to_dict()
        tests = tests_dict["tests"]
        assert len(tests) > 0
        test_0 = tests[0]
        assert len(test_0["command_version"]) == 1
Beispiel #14
0
def read_dbnames(filename):
    """ Read build names from file """
    db_names = []
    try:
        ucsc_builds = {}
        man_builds = []  # assume these are integers
        name_to_db_base = {}
        if filename is None:
            # Should only be happening with the galaxy.tools.parameters.basic:GenomeBuildParameter docstring unit test
            filename = os.path.join(galaxy_directory(), 'tool-data', 'shared',
                                    'ucsc', 'builds.txt.sample')
        for line in open(filename):
            try:
                if line[0:1] == "#":
                    continue
                fields = line.replace("\r", "").replace("\n", "").split("\t")
                # Special case of unspecified build is at top of list
                if fields[0] == "?":
                    db_names.insert(0, (fields[0], fields[1]))
                    continue
                try:  # manual build (i.e. microbes)
                    int(fields[0])
                    man_builds.append((fields[1], fields[0]))
                except Exception:  # UCSC build
                    db_base = fields[0].rstrip('0123456789')
                    if db_base not in ucsc_builds:
                        ucsc_builds[db_base] = []
                        name_to_db_base[fields[1]] = db_base
                    # we want to sort within a species numerically by revision number
                    build_rev = re.compile(r'\d+$')
                    try:
                        build_rev = int(build_rev.findall(fields[0])[0])
                    except Exception:
                        build_rev = 0
                    ucsc_builds[db_base].append(
                        (build_rev, fields[0], fields[1]))
            except Exception:
                continue
        sort_names = sorted(name_to_db_base.keys())
        for name in sort_names:
            db_base = name_to_db_base[name]
            ucsc_builds[db_base].sort()
            ucsc_builds[db_base].reverse()
            ucsc_builds[db_base] = [(build, name)
                                    for _, build, name in ucsc_builds[db_base]]
            db_names = list(db_names + ucsc_builds[db_base])
        if len(db_names) > 1 and len(man_builds) > 0:
            db_names.append((GenomeBuilds.default_value,
                             '----- Additional Species Are Below -----'))
        man_builds.sort()
        man_builds = [(build, name) for name, build in man_builds]
        db_names = list(db_names + man_builds)
    except Exception as e:
        log.error("ERROR: Unable to read builds file: %s", unicodify(e))
    return db_names
Beispiel #15
0
class ExpectationsTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/detect_errors.xml")
    source_contents = None

    def test_tests(self):
        tests_dict = self._tool_source.parse_tests_to_dict()
        tests = tests_dict["tests"]
        assert len(tests) == 10
        test_0 = tests[0]
        assert len(test_0["stderr"]) == 1
        assert len(test_0["stdout"]) == 2
Beispiel #16
0
class CollectionTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "test/functional/tools/collection_two_paired.xml")
    source_contents = None

    def test_tests(self):
        tests_dict = self._tool_source.parse_tests_to_dict()
        tests = tests_dict["tests"]
        assert len(tests) == 2
        assert len(tests[0]["inputs"]) == 3, tests[0]

        outputs, output_collections = self._tool_source.parse_outputs(None)
        assert len(output_collections) == 0
Beispiel #17
0
class ApplyRulesToolLoaderTestCase(BaseLoaderTestCase):
    source_file_name = os.path.join(galaxy_directory(), "lib/galaxy/tools/apply_rules.xml")
    source_contents = None

    def test_tool_type(self):
        tool_module = self._tool_source.parse_tool_module()
        assert tool_module[0] == "galaxy.tools"
        assert tool_module[1] == "ApplyRulesTool"
        assert self._tool_source.parse_tool_type() == "apply_rules_to_collection"

    def test_outputs(self):
        outputs, output_collections = self._tool_source.parse_outputs(object())
        assert len(outputs) == 1
        assert len(output_collections) == 1
Beispiel #18
0
 def _scripts_check_output(self, script, args):
     cwd = galaxy_directory()
     cmd = ["python", os.path.join(cwd, "scripts", script)] + args
     clean_env = {
         "PATH": os.environ.get("PATH", None),
     }  # Don't let testing environment variables interfere with config.
     try:
         return unicodify(
             subprocess.check_output(cmd, cwd=cwd, env=clean_env))
     except Exception as e:
         if isinstance(e, subprocess.CalledProcessError):
             raise Exception("%s\nOutput was:\n%s" %
                             (unicodify(e), unicodify(e.output)))
         raise
Beispiel #19
0
 def test_list_discovery_extended(self):
     self.app.config.metadata_strategy = "extended"
     source_file_name = os.path.join(
         galaxy_directory(),
         "test/functional/tools/collection_split_on_column.xml")
     self._init_tool_for_path(source_file_name)
     collection = model.DatasetCollection(populated=False)
     collection.collection_type = "list"
     output_dataset_collection = self._create_output_dataset_collection(
         collection=collection, )
     assert output_dataset_collection.collection
     command = self.metadata_command(
         {}, {"split_output": output_dataset_collection})
     self._write_work_dir_file("1.tabular", "1\n2\n3")
     self._write_work_dir_file("2.tabular", "4\n5\n6")
     self._write_job_files()
     self.exec_metadata_command(command)
def test_yaml_advanced_validation():
    schema = GALAXY_SCHEMAS_PATH / 'job_config_schema.yml'
    integration_tests_dir = os.path.join(galaxy_directory(), "test",
                                         "integration")
    valid_files = [
        ADVANCED_JOB_CONF_YAML,
        os.path.join(integration_tests_dir, "delay_job_conf.yml"),
        os.path.join(integration_tests_dir,
                     "embedded_pulsar_metadata_job_conf.yml"),
        os.path.join(integration_tests_dir, "io_injection_job_conf.yml"),
        os.path.join(integration_tests_dir, "resubmission_job_conf.yml"),
        os.path.join(integration_tests_dir,
                     "resubmission_default_job_conf.yml"),
    ]
    for valid_file in valid_files:
        c = Core(
            source_file=valid_file,
            schema_files=[str(schema)],
        )
        c.validate()
Beispiel #21
0
 def __init_pulsar_app( self, pulsar_conf_path ):
     if pulsar_conf_path is None and not self.default_build_pulsar_app:
         self.pulsar_app = None
         return
     conf = {}
     if pulsar_conf_path is None:
         log.info("Creating a Pulsar app with default configuration (no pulsar_conf specified).")
     else:
         log.info("Loading Pulsar app configuration from %s" % pulsar_conf_path)
         with open(pulsar_conf_path, "r") as f:
             conf.update(yaml.load(f) or {})
     if "job_metrics_config_file" not in conf:
         conf["job_metrics"] = self.app.job_metrics
     if "staging_directory" not in conf:
         conf["staging_directory"] = "database/pulsar_staging"
     if "persistence_directory" not in conf:
         conf["persistence_directory"] = "database/pulsar_persisted_data"
     if "galaxy_home" not in conf:
         conf["galaxy_home"] = galaxy_directory()
     self.pulsar_app = pulsar.core.PulsarApp(**conf)
Beispiel #22
0
import json
import os
import shutil
import subprocess
import tempfile

from galaxy.tools import expressions
from galaxy.util import galaxy_directory

LIB_DIRECTORY = os.path.join(galaxy_directory(), "lib")


def test_run_simple():
    test_directory = tempfile.mkdtemp()
    try:
        environment_path = os.path.join(test_directory, "env.json")
        environment = {
            'job': {
                'input1': '7'
            },
            'outputs': [{
                'name': 'out1',
                'from_expression': "output1",
                'path': 'moo'
            }],
            'script':
            "{return {'output1': parseInt($job.input1)};}",
        }
        with open(environment_path, "w") as f:
            json.dump(environment, f)
        expressions.write_evalute_script(test_directory, )
"""
Test lib/galaxy/visualization/plugins/registry.
"""
import os
import re
import unittest
from typing import Dict

from galaxy import model
from galaxy.app_unittest_utils import galaxy_mock
from galaxy.util import clean_multiline_string, galaxy_directory
from galaxy.visualization.plugins import plugin
from galaxy.visualization.plugins.registry import VisualizationsRegistry
from . import VisualizationsBase_TestCase

glx_dir = galaxy_directory()
template_cache_dir = os.path.join(glx_dir, 'database', 'compiled_templates')
addtional_templates_dir = os.path.join(glx_dir, 'config', 'plugins', 'visualizations', 'common', 'templates')
vis_reg_path = 'config/plugins/visualizations'

config1 = """\
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE visualization SYSTEM "../../visualization.dtd">
<visualization name="scatterplot">
    <data_sources>
        <data_source>
            <model_class>HistoryDatasetAssociation</model_class>
            <test type="isinstance" test_attr="datatype" result_type="datatype">tabular.Tabular</test>
            <to_param param_attr="id">dataset_id</to_param>
        </data_source>
    </data_sources>
Beispiel #24
0
from galaxy.model.tool_shed_install import mapping as toolshed_mapping
from galaxy.tool_util.verify.interactor import GalaxyInteractorApi, verify_tool
from galaxy.util import asbool, download_to_file, galaxy_directory
from galaxy.util.properties import load_app_properties
from galaxy.webapps.galaxy import buildapp
from galaxy_test.base.api_util import get_admin_api_key, get_user_api_key
from galaxy_test.base.env import (
    DEFAULT_WEB_HOST,
    target_url_parts,
)
from galaxy_test.base.instrument import StructuredTestDataPlugin
from galaxy_test.base.nose_util import run
from tool_shed.webapp.app import UniverseApplication as ToolshedUniverseApplication
from .test_logging import logging_config_file

galaxy_root = galaxy_directory()
DEFAULT_CONFIG_PREFIX = "GALAXY"
GALAXY_TEST_DIRECTORY = os.path.join(galaxy_root, "test")
GALAXY_TEST_FILE_DIR = "test-data,https://github.com/galaxyproject/galaxy-test-data.git"
TOOL_SHED_TEST_DATA = os.path.join(galaxy_root, "lib", "tool_shed", "test",
                                   "test_data")
TEST_WEBHOOKS_DIR = os.path.join(galaxy_root, "test", "functional", "webhooks")
FRAMEWORK_TOOLS_DIR = os.path.join(GALAXY_TEST_DIRECTORY, "functional",
                                   "tools")
FRAMEWORK_UPLOAD_TOOL_CONF = os.path.join(FRAMEWORK_TOOLS_DIR,
                                          "upload_tool_conf.xml")
FRAMEWORK_SAMPLE_TOOLS_CONF = os.path.join(FRAMEWORK_TOOLS_DIR,
                                           "samples_tool_conf.xml")
FRAMEWORK_DATATYPES_CONF = os.path.join(FRAMEWORK_TOOLS_DIR,
                                        "sample_datatypes_conf.xml")
MIGRATED_TOOL_PANEL_CONFIG = 'config/migrated_tools_conf.xml'
Beispiel #25
0
def t_data_path(name):
    return os.path.join(galaxy_directory(), "test-data", name)
Beispiel #26
0
def do_split(job_wrapper):
    parent_job = job_wrapper.get_job()
    working_directory = os.path.abspath(job_wrapper.working_directory)

    parallel_settings = job_wrapper.tool.parallelism.attributes
    # Syntax: split_inputs="input1,input2" shared_inputs="genome"
    # Designates inputs to be split or shared
    split_inputs = parallel_settings.get("split_inputs")
    if split_inputs is None:
        split_inputs = []
    else:
        split_inputs = [x.strip() for x in split_inputs.split(",")]

    shared_inputs = parallel_settings.get("shared_inputs")
    if shared_inputs is None:
        shared_inputs = []
    else:
        shared_inputs = [x.strip() for x in shared_inputs.split(",")]
    illegal_inputs = [x for x in shared_inputs if x in split_inputs]
    if len(illegal_inputs) > 0:
        raise Exception("Inputs have conflicting parallelism attributes: %s" %
                        str(illegal_inputs))

    subdir_index = [
        0
    ]  # use a list to get around Python 2.x lame closure support
    task_dirs = []

    def get_new_working_directory_name():
        dir = os.path.join(working_directory, 'task_%d' % subdir_index[0])
        subdir_index[0] = subdir_index[0] + 1
        if not os.path.exists(dir):
            os.makedirs(dir)
        task_dirs.append(dir)
        return dir

    # For things like paired end alignment, we need two inputs to be split. Since all inputs to all
    # derived subtasks need to be correlated, allow only one input type to be split
    type_to_input_map = {}
    for input in parent_job.input_datasets:
        if input.name in split_inputs:
            type_to_input_map.setdefault(input.dataset.datatype,
                                         []).append(input.name)
        elif input.name in shared_inputs:
            pass  # pass original file name
        else:
            log_error = "The input '%s' does not define a method for implementing parallelism" % str(
                input.name)
            log.exception(log_error)
            raise Exception(log_error)

    if len(type_to_input_map) > 1:
        log_error = "The multi splitter does not support splitting inputs of more than one type"
        log.error(log_error)
        raise Exception(log_error)

    # split the first one to build up the task directories
    input_datasets = []
    for input in parent_job.input_datasets:
        if input.name in split_inputs:
            this_input_files = job_wrapper.get_input_dataset_fnames(
                input.dataset)
            if len(this_input_files) > 1:
                log_error = "The input '%s' is composed of multiple files - splitting is not allowed" % str(
                    input.name)
                log.error(log_error)
                raise Exception(log_error)
            input_datasets.append(input.dataset)

    input_type = type_to_input_map.keys()[0]
    # DBTODO execute an external task to do the splitting, this should happen at refactor.
    # If the number of tasks is sufficiently high, we can use it to calculate job completion % and give a running status.
    try:
        input_type.split(input_datasets, get_new_working_directory_name,
                         parallel_settings)
    except AttributeError:
        log_error = "The type '%s' does not define a method for splitting files" % str(
            input_type)
        log.error(log_error)
        raise
    log.debug('do_split created %d parts' % len(task_dirs))
    # next, after we know how many divisions there are, add the shared inputs via soft links
    for input in parent_job.input_datasets:
        if input and input.name in shared_inputs:
            names = job_wrapper.get_input_dataset_fnames(input.dataset)
            for dir in task_dirs:
                for file in names:
                    os.symlink(file, os.path.join(dir, os.path.basename(file)))
    tasks = []
    prepare_files = os.path.join(util.galaxy_directory(),
                                 'extract_dataset_parts.sh') + ' %s'
    for dir in task_dirs:
        task = model.Task(parent_job, dir, prepare_files % dir)
        tasks.append(task)
    return tasks
Beispiel #27
0
def test_get_file_peek():
    # should get the first 5 lines of the file without a trailing newline character
    assert get_file_peek(os.path.join(galaxy_directory(), 'test-data/1.tabular'), line_wrap=False) == 'chr22\t1000\tNM_17\nchr22\t2000\tNM_18\nchr10\t2200\tNM_10\nchr10\thap\ttest\nchr10\t1200\tNM_11\n'
Beispiel #28
0
import os
import shutil
import tarfile
import tempfile
from contextlib import contextmanager

from galaxy.tools.repositories import ValidationContext
from galaxy.util import galaxy_directory
from tool_shed.tools.tool_validator import ToolValidator
from ..unittest_utils.galaxy_mock import MockApp

BISMARK_TAR = os.path.join(galaxy_directory(),
                           'lib/tool_shed/test/test_data/bismark/bismark.tar')
BOWTIE2_INDICES = os.path.join(
    galaxy_directory(),
    'lib/tool_shed/test/test_data/bowtie2_loc_sample/bowtie2_indices.loc.sample'
)


def test_validate_valid_tool():
    with get_tool_validator() as tv, setup_bismark() as repo_dir:
        full_path = os.path.join(repo_dir, 'bismark_methylation_extractor.xml')
        tool, valid, message = tv.load_tool_from_config(repository_id=None,
                                                        full_path=full_path)
        assert tool.name == 'Bismark'
        assert not tool.params_with_missing_data_table_entry
        assert not tool.params_with_missing_index_file
        assert valid is True
        assert message is None

Beispiel #29
0
def do_split (job_wrapper):
    parent_job = job_wrapper.get_job()
    working_directory = os.path.abspath(job_wrapper.working_directory)

    parallel_settings = job_wrapper.tool.parallelism.attributes
    # Syntax: split_inputs="input1,input2" shared_inputs="genome" 
    # Designates inputs to be split or shared
    split_inputs=parallel_settings.get("split_inputs")
    if split_inputs is None:
        split_inputs = []
    else:
        split_inputs = [x.strip() for x in split_inputs.split(",")]

    shared_inputs=parallel_settings.get("shared_inputs")
    if shared_inputs is None:
        shared_inputs = []
    else:
        shared_inputs = [x.strip() for x in shared_inputs.split(",")]
    illegal_inputs = [x for x in shared_inputs if x in split_inputs]
    if len(illegal_inputs) > 0:
        raise Exception("Inputs have conflicting parallelism attributes: %s" % str( illegal_inputs ))
    
    subdir_index = [0] # use a list to get around Python 2.x lame closure support
    task_dirs = []
    def get_new_working_directory_name():
        dir=os.path.join(working_directory,  'task_%d' % subdir_index[0])
        subdir_index[0] = subdir_index[0] + 1
        if not os.path.exists(dir):
            os.makedirs(dir)
        task_dirs.append(dir)
        return dir
    
    # For things like paired end alignment, we need two inputs to be split. Since all inputs to all
    # derived subtasks need to be correlated, allow only one input type to be split
    type_to_input_map = {}
    for input in parent_job.input_datasets:
        if input.name in split_inputs:
            type_to_input_map.setdefault(input.dataset.datatype,  []).append(input.name)
        elif input.name in shared_inputs:
            pass # pass original file name
        else:
            log_error = "The input '%s' does not define a method for implementing parallelism" % str(input.name)
            log.exception(log_error)
            raise Exception(log_error)

    if len(type_to_input_map) > 1:
        log_error = "The multi splitter does not support splitting inputs of more than one type"
        log.error(log_error)
        raise Exception(log_error)
    
    # split the first one to build up the task directories
    input_datasets = []
    for input in parent_job.input_datasets:
        if input.name in split_inputs:
            this_input_files = job_wrapper.get_input_dataset_fnames(input.dataset)
            if len(this_input_files) > 1:
                log_error = "The input '%s' is composed of multiple files - splitting is not allowed" % str(input.name)
                log.error(log_error)
                raise Exception(log_error)
            input_datasets.append(input.dataset)
    
    input_type = type_to_input_map.keys()[0]
    # DBTODO execute an external task to do the splitting, this should happen at refactor.
    # If the number of tasks is sufficiently high, we can use it to calculate job completion % and give a running status.
    try:
        input_type.split(input_datasets, get_new_working_directory_name, parallel_settings)
    except AttributeError:
        log_error = "The type '%s' does not define a method for splitting files" % str(input_type)
        log.error(log_error)
        raise
    log.debug('do_split created %d parts' % len(task_dirs))
    # next, after we know how many divisions there are, add the shared inputs via soft links
    for input in parent_job.input_datasets:
        if input and input.name in shared_inputs:
            names = job_wrapper.get_input_dataset_fnames(input.dataset)
            for dir in task_dirs:
                for file in names:
                    os.symlink(file, os.path.join(dir,  os.path.basename(file)))
    tasks = []
    prepare_files = os.path.join(util.galaxy_directory(), 'extract_dataset_parts.sh') + ' %s'
    for dir in task_dirs:
        task = model.Task(parent_job, dir, prepare_files % dir)
        tasks.append(task)
    return tasks