Ejemplo n.º 1
0
def test_export_dataset_with_deleted_and_purged():
    app, sa_session, h = _setup_history_for_export(
        "Datasets History with deleted")

    d1, d2 = _create_datasets(sa_session, h, 2)

    # Maybe use abstractions for deleting?
    d1.deleted = True
    d1.dataset.deleted = True
    d1.dataset.purged = False

    d2.deleted = True
    d2.dataset.deleted = True
    d2.dataset.purged = True

    j1 = model.Job()
    j1.user = h.user
    j1.tool_id = "cat1"

    j1.add_output_dataset("out_file1", d1)

    j2 = model.Job()
    j2.user = h.user
    j2.tool_id = "cat1"

    j2.add_output_dataset("out_file1", d2)

    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(j1)
    sa_session.add(j2)
    sa_session.add(h)
    sa_session.flush()

    assert d1.deleted

    app.object_store.update_from_file(d1,
                                      file_name="test-data/1.txt",
                                      create=True)
    app.object_store.update_from_file(d2,
                                      file_name="test-data/2.bed",
                                      create=True)

    imported_history = _import_export(app, h)

    datasets = list(imported_history.contents_iter(types=["dataset"]))
    assert len(datasets) == 1

    assert datasets[0].state == 'discarded'
    assert datasets[0].deleted
    assert datasets[0].dataset.deleted
    assert datasets[0].creating_job
Ejemplo n.º 2
0
 def _assert_user_quota_is(self, user, amount):
     assert amount == self.quota_agent.get_quota(user)
     if amount is None:
         user.total_disk_usage = 1000
         job = model.Job()
         job.user = user
         assert not self.quota_agent.is_over_quota(None, job, None)
     else:
         job = model.Job()
         job.user = user
         user.total_disk_usage = amount - 1
         assert not self.quota_agent.is_over_quota(None, job, None)
         user.total_disk_usage = amount + 1
         assert self.quota_agent.is_over_quota(None, job, None)
Ejemplo n.º 3
0
def _setup_simple_cat_job(app):
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    d1, d2 = _create_datasets(sa_session, h, 2)

    j = model.Job()
    j.user = u
    j.tool_id = "cat1"

    j.add_input_dataset("input1", d1)
    j.add_output_dataset("out_file1", d2)

    sa_session.add_all((d1, d2, h, j))
    sa_session.flush()

    app.object_store.update_from_file(d1,
                                      file_name="test-data/1.txt",
                                      create=True)
    app.object_store.update_from_file(d2,
                                      file_name="test-data/2.bed",
                                      create=True)

    return u, h, d1, d2, j
Ejemplo n.º 4
0
def test_export_dataset():
    app, sa_session, h = _setup_history_for_export("Datasets History")

    d1, d2 = _create_datasets(sa_session, h, 2)
    d1_hash = model.DatasetHash()
    d1_hash.hash_function = "MD5"
    d1_hash.hash_value = "foobar"
    d1.dataset.hashes.append(d1_hash)
    d1.dataset.created_from_basename = "my_cool_name.txt"
    d1_source = model.DatasetSource()
    d1_source.source_uri = "http://google.com/mycooldata.txt"
    d1.dataset.sources.append(d1_source)

    d1.state = d2.state = 'ok'

    j = model.Job()
    j.user = h.user
    j.tool_id = "cat1"
    j.state = 'ok'

    j.add_input_dataset("input1", d1)
    j.add_output_dataset("out_file1", d2)

    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(h)
    sa_session.add(j)
    sa_session.flush()

    app.object_store.update_from_file(d1, file_name=t_data_path("1.txt"), create=True)
    app.object_store.update_from_file(d2, file_name=t_data_path("2.bed"), create=True)

    imported_history = _import_export(app, h)

    datasets = list(imported_history.contents_iter(types=["dataset"]))
    assert len(datasets) == 2
    imported_job = datasets[1].creating_job
    assert imported_job
    assert imported_job.output_datasets
    assert imported_job.output_datasets[0].dataset == datasets[1]

    assert imported_job.input_datasets
    assert imported_job.input_datasets[0].dataset == datasets[0]

    assert datasets[0].state == 'ok'
    assert datasets[1].state == 'ok'
    assert len(datasets[0].dataset.hashes) == 1
    dataset_hash = datasets[0].dataset.hashes[0]
    assert dataset_hash.hash_function == "MD5"
    assert dataset_hash.hash_value == "foobar"
    assert datasets[0].dataset.created_from_basename == "my_cool_name.txt"

    assert len(datasets[0].dataset.sources) == 1
    dataset_source = datasets[0].dataset.sources[0]
    assert dataset_source.source_uri == "http://google.com/mycooldata.txt"

    with open(datasets[0].file_name) as f:
        assert f.read().startswith("chr1    4225    19670")
    with open(datasets[1].file_name) as f:
        assert f.read().startswith("chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-")
Ejemplo n.º 5
0
 def test_dataset_job_relationship(self):
     model = self.model
     dataset = model.Dataset()
     job = model.Job()
     dataset.job = job
     self.persist(job, dataset)
     loaded_dataset = model.session.query(model.Dataset).filter(model.Dataset.id == dataset.id).one()
     assert loaded_dataset.job_id == job.id
Ejemplo n.º 6
0
def test_multi_inputs():
    app, sa_session, h = _setup_history_for_export("Datasets History")

    d1, d2, d3 = _create_datasets(sa_session, h, 3)

    j = model.Job()
    j.user = h.user
    j.tool_id = "cat_multi"

    # Emulate multiple data inputs into multi data input parameter...
    j.add_input_dataset("input1", d1)
    j.add_input_dataset("input11", d1)
    j.add_input_dataset("input12", d2)
    j.add_output_dataset("out_file1", d3)

    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(d3)
    sa_session.add(h)
    sa_session.add(j)
    sa_session.flush()

    app.object_store.update_from_file(d1,
                                      file_name="test-data/1.txt",
                                      create=True)
    app.object_store.update_from_file(d2,
                                      file_name="test-data/2.bed",
                                      create=True)
    app.object_store.update_from_file(d3,
                                      file_name="test-data/4.bed",
                                      create=True)

    imported_history = _import_export(app, h)

    datasets = list(imported_history.contents_iter(types=["dataset"]))
    assert len(datasets) == 3
    imported_job = datasets[2].creating_job
    assert imported_job
    assert imported_job.output_datasets
    assert imported_job.output_datasets[0].dataset.hid == 3
    assert imported_job.output_datasets[0].dataset == datasets[2]

    assert imported_job.input_datasets
    assert len(imported_job.input_datasets) == 3
    names = [d.name for d in imported_job.input_datasets]
    hids = [d.dataset.hid for d in imported_job.input_datasets]
    _assert_distinct(names)
    for name in ["input1", "input11", "input12"]:
        assert name in names
    for hid in [1, 2]:
        assert hid in hids

    with open(datasets[0].file_name, "r") as f:
        assert f.read().startswith("chr1    4225    19670")
    with open(datasets[1].file_name, "r") as f:
        assert f.read().startswith(
            "chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-"
        )
Ejemplo n.º 7
0
 def test_implicit_map_job_hdca(self):
     creating_job = model.Job()
     hdca = MockHdca(implicit_output_name="out1", job=creating_job)
     self.history.active_datasets.append(hdca)
     job_dict, warnings = extract.summarize(trans=self.trans)
     assert not warnings
     assert len(job_dict) == 1
     job = next(iter(job_dict.keys()))
     assert job is creating_job
Ejemplo n.º 8
0
def _run_jihaw_cleanup(archive_dir, app=None):
    app = app or _mock_app()
    job = model.Job()
    job.tool_stderr = ''
    jiha = model.JobImportHistoryArchive(job=job, archive_dir=archive_dir)
    app.model.context.current.add_all([job, jiha])
    app.model.context.flush()
    jihaw = JobImportHistoryArchiveWrapper(app, job.id)  # yeehaw!
    return app, jihaw.cleanup_after_job()
Ejemplo n.º 9
0
 def _setup_test_output(self):
     dataset = model.Dataset()
     dataset.external_filename = "example_output"  # This way object store isn't asked about size...
     self.hda = model.HistoryDatasetAssociation(name="test", dataset=dataset)
     job = model.Job()
     job.add_output_dataset(DEFAULT_TOOL_OUTPUT, self.hda)
     self.app.model.context.add(job)
     self.job = job
     self.history = self._new_history(hdas=[self.hda])
     self.outputs = {DEFAULT_TOOL_OUTPUT: self.hda}
Ejemplo n.º 10
0
    def test_tasks(self):
        model = self.model
        u = model.User(email="*****@*****.**", password="******")
        job = model.Job()
        task = model.Task(job=job, working_directory="/tmp", prepare_files_cmd="split.sh")
        job.user = u
        self.persist(u, job, task)

        loaded_task = model.session.query(model.Task).filter(model.Task.job == job).first()
        assert loaded_task.prepare_input_files_cmd == "split.sh"
Ejemplo n.º 11
0
    def test_jobs(self):
        model = self.model
        u = model.User(email="*****@*****.**", password="******")
        job = model.Job()
        job.user = u
        job.tool_id = "cat1"

        self.persist(u, job)

        loaded_job = model.session.query(model.Job).filter(model.Job.user == u).first()
        assert loaded_job.tool_id == "cat1"
Ejemplo n.º 12
0
 def __init__(self, state='ok', output_name='out1', job=None):
     self.id = 123
     self.state = state
     self.copied_from_history_dataset_association = None
     if job is not UNDEFINED_JOB:
         if not job:
             job = model.Job()
         self.job = job
         assoc = model.JobToOutputDatasetAssociation(output_name, self)
         assoc.job = job
         self.creating_job_associations = [assoc]
     else:
         self.creating_job_associations = []
Ejemplo n.º 13
0
def _run_jihaw_cleanup(history_archive, msg):
    app = MockApp()
    job = model.Job()
    job.stderr = ''
    jiha = model.JobImportHistoryArchive(job=job, archive_dir=history_archive.arc_directory)
    app.model.context.current.add_all([job, jiha])
    app.model.context.flush()
    jihaw = JobImportHistoryArchiveWrapper(app, 1)  # yeehaw!
    try:
        jihaw.cleanup_after_job()
        data = app.object_store.get_data(model.Dataset(1))
        assert data != 'insecure', msg
    except MalformedContents:
        pass
Ejemplo n.º 14
0
def test_job_context_discover_outputs_flushes_once(mocker):
    app = _mock_app()
    sa_session = app.model.context
    # mocker is a pytest-mock fixture

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    tool = Tool(app)
    tool_provided_metadata = None
    job = model.Job()
    job.history = h
    sa_session.add(job)
    sa_session.flush()
    job_working_directory = tempfile.mkdtemp()
    setup_data(job_working_directory)
    permission_provider = PermissionProvider()
    metadata_source_provider = MetadataSourceProvider()
    object_store = app.object_store
    input_dbkey = '?'
    final_job_state = 'ok'
    collection_description = FilePatternDatasetCollectionDescription(
        pattern="__name__")
    collection = model.DatasetCollection(collection_type='list',
                                         populated=False)
    sa_session.add(collection)
    job_context = JobContext(tool, tool_provided_metadata, job,
                             job_working_directory, permission_provider,
                             metadata_source_provider, input_dbkey,
                             object_store, final_job_state)
    collection_builder = builder.BoundCollectionBuilder(collection)
    dataset_collectors = [dataset_collector(collection_description)]
    output_name = 'output'
    filenames = job_context.find_files(output_name, collection,
                                       dataset_collectors)
    assert len(filenames) == 10
    spy = mocker.spy(sa_session, 'flush')
    job_context.populate_collection_elements(
        collection,
        collection_builder,
        filenames,
        name=output_name,
        metadata_source_name='',
        final_job_state=job_context.final_job_state,
    )
    collection_builder.populate()
    assert spy.call_count == 0
    sa_session.flush()
    assert len(collection.dataset_instances) == 10
    assert collection.dataset_instances[0].dataset.file_size == 1
Ejemplo n.º 15
0
def test_export_dataset():
    app, sa_session, h = _setup_history_for_export("Datasets History")

    d1, d2 = _create_datasets(sa_session, h, 2)
    d1.state = d2.state = 'ok'

    j = model.Job()
    j.user = h.user
    j.tool_id = "cat1"
    j.state = 'ok'

    j.add_input_dataset("input1", d1)
    j.add_output_dataset("out_file1", d2)

    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(h)
    sa_session.add(j)
    sa_session.flush()

    app.object_store.update_from_file(d1,
                                      file_name="test-data/1.txt",
                                      create=True)
    app.object_store.update_from_file(d2,
                                      file_name="test-data/2.bed",
                                      create=True)

    imported_history = _import_export(app, h)

    datasets = list(imported_history.contents_iter(types=["dataset"]))
    assert len(datasets) == 2
    imported_job = datasets[1].creating_job
    assert imported_job
    assert imported_job.output_datasets
    assert imported_job.output_datasets[0].dataset == datasets[1]

    assert imported_job.input_datasets
    assert imported_job.input_datasets[0].dataset == datasets[0]

    assert datasets[0].state == 'ok'
    assert datasets[1].state == 'ok'

    with open(datasets[0].file_name, "r") as f:
        assert f.read().startswith("chr1    4225    19670")
    with open(datasets[1].file_name, "r") as f:
        assert f.read().startswith(
            "chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-"
        )
Ejemplo n.º 16
0
 def setUp(self):
     super(MetadataTestCase, self).setUp()
     self.setup_app()
     model.Dataset.object_store = self.app.object_store
     job = model.Job()
     sa_session = self.app.model.session
     sa_session.add(job)
     history = model.History()
     job.history = history
     sa_session.flush()
     self.job = job
     self.history = history
     self.job_working_directory = os.path.join(self.test_directory,
                                               "job_working")
     self.tool_working_directory = os.path.join(self.job_working_directory,
                                                "working")
     os.mkdir(self.job_working_directory)
     os.mkdir(self.tool_working_directory)
Ejemplo n.º 17
0
 def test_history_resume(self):
     user2 = self.user_manager.create(**user2_data)
     history = self.history_manager.create(name='history', user=user2)
     # No running jobs
     history.resume_paused_jobs()
     # Mock running jobs
     with mock.patch('galaxy.model.History.paused_jobs',
                     new_callable=mock.PropertyMock) as mock_paused_jobs:
         job = model.Job()
         job.state = model.Job.states.PAUSED
         jobs = [job]
         self.trans.sa_session.add(jobs[0])
         self.trans.sa_session.flush()
         assert job.state == model.Job.states.PAUSED
         mock_paused_jobs.return_value = jobs
         history.resume_paused_jobs()
         mock_paused_jobs.assert_called_once()
         assert job.state == model.Job.states.NEW, job.state
Ejemplo n.º 18
0
def _import_export(app, h, dest_export=None):
    if dest_export is None:
        dest_parent = tempfile.mkdtemp()
        dest_export = os.path.join(dest_parent, "moo.tgz")

    job = model.Job()
    jeha = model.JobExportHistoryArchive.create_for_history(
        h, job, app.model.context, app.object_store, compressed=True
    )
    wrapper = JobExportHistoryArchiveWrapper(app, job.id)
    wrapper.setup_job(h, jeha.temp_directory)

    from galaxy.tools.imp_exp import export_history
    ret = export_history.main(["--gzip", jeha.temp_directory, dest_export])
    assert ret == 0, ret

    _, imported_history = import_archive(dest_export, app=app)
    assert imported_history
    return imported_history
Ejemplo n.º 19
0
    def test_job_metrics(self):
        job = model.Job()
        job.id = 1
        example = """# Example
```galaxy
job_metrics(job_id=1)
```
"""
        metrics = [
            {"plugin": "core", "title": "Cores Allocated", "value": 1},
            {"plugin": "core", "title": "Job Start Time", "value": "2019-12-17 11:53:13"},
            {"plugin": "env", "title": "GALAXY_HOME", "value": "/path/to/home"},
        ]
        with mock.patch.object(JobManager, 'get_accessible_job', return_value=job):
            with mock.patch("galaxy.managers.markdown_util.summarize_job_metrics", return_value=metrics):
                result = self._to_basic(example)
        assert "**core**\n" in result
        assert "**env**\n" in result
        assert "| Cores Allocated | 1 |\n" in result
        assert "| GALAXY_HOME | /path/to/home |\n" in result
Ejemplo n.º 20
0
    def test_job_parameters(self):
        job = model.Job()
        job.id = 1
        example = """# Example
```galaxy
job_parameters(job_id=1)
```
"""
        parameters = [
            {
                "text": "Num Lines",
                "value": "6",
                "depth": 1
            },
            {
                "text": "Plot",
                "value": "coolselect",
                "depth": 2
            },
            {
                "text": "Input Dataset",
                "value": [{
                    "src": "hda",
                    "hid": 5,
                    "name": "Cool Data"
                }],
                "depth": 1
            },
        ]
        response = {"parameters": parameters}
        with mock.patch.object(JobManager,
                               'get_accessible_job',
                               return_value=job):
            with mock.patch(
                    "galaxy.managers.markdown_util.summarize_job_parameters",
                    return_value=response):
                result = self._to_basic(example)
        assert "| Num Lines |" in result
        assert "| > Plot |" in result
        assert "| Input Dataset | " in result
        assert "| 5: Cool Data |\n" in result
Ejemplo n.º 21
0
    def test_job_metrics(self):
        model = self.model
        u = model.User(email="*****@*****.**", password="******")
        job = model.Job()
        job.user = u
        job.tool_id = "cat1"

        job.add_metric("gx", "galaxy_slots", 5)
        job.add_metric("system", "system_name", "localhost")

        self.persist(u, job)

        task = model.Task(job=job, working_directory="/tmp", prepare_files_cmd="split.sh")
        task.add_metric("gx", "galaxy_slots", 5)
        task.add_metric("system", "system_name", "localhost")

        big_value = ":".join("%d" % i for i in range(2000))
        task.add_metric("env", "BIG_PATH", big_value)
        self.persist(task)
        # Ensure big values truncated
        assert len(task.text_metrics[1].metric_value) <= 1023
Ejemplo n.º 22
0
def _import_export(app, h, dest_export=None):
    if dest_export is None:
        dest_parent = mkdtemp()
        dest_export = os.path.join(dest_parent, "moo.tgz")

    dataset = model.Dataset(id=100)

    jeha = model.JobExportHistoryArchive(job=model.Job(),
                                         history=h,
                                         dataset=dataset,
                                         compressed=True)
    wrapper = JobExportHistoryArchiveWrapper(app, 1)
    wrapper.setup_job(jeha)

    from galaxy.tools.imp_exp import export_history
    ret = export_history.main(["--gzip", jeha.temp_directory, dest_export])
    assert ret == 0, ret

    _, imported_history = import_archive(dest_export, app=app)
    assert imported_history
    return imported_history
Ejemplo n.º 23
0
    def __init__(self, app, test_directory, tool):
        working_directory = os.path.join(test_directory, "workdir")
        tool_working_directory = os.path.join(working_directory, "working")
        os.makedirs(tool_working_directory)
        self.app = app
        self.tool = tool
        self.requires_containerization = False
        self.state = model.Job.states.QUEUED
        self.command_line = "echo HelloWorld"
        self.environment_variables = []
        self.commands_in_new_shell = False
        self.prepare_called = False
        self.write_version_cmd = None
        self.dependency_shell_commands = None
        self.working_directory = working_directory
        self.tool_working_directory = tool_working_directory
        self.requires_setting_metadata = True
        self.job_destination = bunch.Bunch(id="default", params={})
        self.galaxy_lib_dir = os.path.abspath("lib")
        self.job = model.Job()
        self.job_id = 1
        self.job.id = 1
        self.output_paths = ['/tmp/output1.dat']
        self.mock_metadata_path = os.path.abspath(
            os.path.join(test_directory, "METADATA_SET"))
        self.metadata_command = "touch %s" % self.mock_metadata_path
        self.galaxy_virtual_env = None
        self.shell = "/bin/bash"
        self.cleanup_job = "never"
        self.tmp_dir_creation_statement = ""
        self.use_metadata_binary = False
        self.guest_ports = []

        # Cruft for setting metadata externally, axe at some point.
        self.external_output_metadata = bunch.Bunch(
            set_job_runner_external_pid=lambda pid, session: None)
        self.app.datatypes_registry.set_external_metadata_tool = bunch.Bunch(
            build_dependency_shell_commands=lambda: [])
Ejemplo n.º 24
0
def __new_job(**kwds):
    job = model.Job()
    for key, value in kwds.items():
        setattr(job, key, value)
    return job
Ejemplo n.º 25
0
def __two_jobs():
    job1 = model.Job()
    job1.id = 1
    job2 = model.Job()
    job2.id = 2
    return job1, job2
Ejemplo n.º 26
0
    def cleanup_after_job(self):
        """ Set history, datasets, and jobs' attributes and clean up archive directory. """

        #
        # Helper methods.
        #

        def file_in_dir(file_path, a_dir):
            """ Returns true if file is in directory. """
            abs_file_path = os.path.abspath(file_path)
            return os.path.split(abs_file_path)[0] == a_dir

        def get_tag_str(tag, value):
            """ Builds a tag string for a tag, value pair. """
            if not value:
                return tag
            else:
                return tag + ":" + value

        #
        # Import history.
        #

        jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by(
            job_id=self.job_id).first()
        if jiha:
            try:
                archive_dir = jiha.archive_dir
                archive_dir = os.path.realpath(archive_dir)
                user = jiha.job.user

                # Bioblend previous to 17.01 exported histories with an extra subdir.
                if not os.path.exists(
                        os.path.join(archive_dir, 'history_attrs.txt')):
                    for d in os.listdir(archive_dir):
                        if os.path.isdir(os.path.join(archive_dir, d)):
                            archive_dir = os.path.join(archive_dir, d)
                            break

                #
                # Create history.
                #
                history_attr_file_name = os.path.join(archive_dir,
                                                      'history_attrs.txt')
                history_attrs = load(open(history_attr_file_name))

                # Create history.
                new_history = model.History(name='imported from archive: %s' %
                                            history_attrs['name'],
                                            user=user)
                new_history.importing = True
                new_history.hid_counter = history_attrs['hid_counter']
                new_history.genome_build = history_attrs['genome_build']
                self.sa_session.add(new_history)
                jiha.history = new_history
                self.sa_session.flush()

                # Add annotation, tags.
                if user:
                    self.add_item_annotation(self.sa_session, user,
                                             new_history,
                                             history_attrs['annotation'])
                    """
                    TODO: figure out to how add tags to item.
                    for tag, value in history_attrs[ 'tags' ].items():
                        trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
                    """

                #
                # Create datasets.
                #
                datasets_attrs_file_name = os.path.join(
                    archive_dir, 'datasets_attrs.txt')
                datasets_attrs = load(open(datasets_attrs_file_name))
                provenance_file_name = datasets_attrs_file_name + ".provenance"

                if os.path.exists(provenance_file_name):
                    provenance_attrs = load(open(provenance_file_name))
                    datasets_attrs += provenance_attrs

                # Get counts of how often each dataset file is used; a file can
                # be linked to multiple dataset objects (HDAs).
                datasets_usage_counts = {}
                for dataset_attrs in datasets_attrs:
                    temp_dataset_file_name = \
                        os.path.realpath(os.path.join(archive_dir, dataset_attrs['file_name']))
                    if (temp_dataset_file_name not in datasets_usage_counts):
                        datasets_usage_counts[temp_dataset_file_name] = 0
                    datasets_usage_counts[temp_dataset_file_name] += 1

                # Create datasets.
                for dataset_attrs in datasets_attrs:
                    metadata = dataset_attrs['metadata']

                    # Create dataset and HDA.
                    hda = model.HistoryDatasetAssociation(
                        name=dataset_attrs['name'],
                        extension=dataset_attrs['extension'],
                        info=dataset_attrs['info'],
                        blurb=dataset_attrs['blurb'],
                        peek=dataset_attrs['peek'],
                        designation=dataset_attrs['designation'],
                        visible=dataset_attrs['visible'],
                        dbkey=metadata['dbkey'],
                        metadata=metadata,
                        history=new_history,
                        create_dataset=True,
                        sa_session=self.sa_session)
                    if 'uuid' in dataset_attrs:
                        hda.dataset.uuid = dataset_attrs["uuid"]
                    if dataset_attrs.get('exported', True) is False:
                        hda.state = hda.states.DISCARDED
                        hda.deleted = True
                        hda.purged = True
                    else:
                        hda.state = hda.states.OK
                    self.sa_session.add(hda)
                    self.sa_session.flush()
                    new_history.add_dataset(hda, genome_build=None)
                    hda.hid = dataset_attrs[
                        'hid']  # Overwrite default hid set when HDA added to history.
                    # TODO: Is there a way to recover permissions? Is this needed?
                    # permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                    # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                    self.sa_session.flush()
                    if dataset_attrs.get('exported', True) is True:
                        # Do security check and move/copy dataset data.
                        temp_dataset_file_name = \
                            os.path.realpath(os.path.abspath(os.path.join(archive_dir, dataset_attrs['file_name'])))
                        if not file_in_dir(
                                temp_dataset_file_name,
                                os.path.join(archive_dir, "datasets")):
                            raise MalformedContents(
                                "Invalid dataset path: %s" %
                                temp_dataset_file_name)
                        if datasets_usage_counts[temp_dataset_file_name] == 1:
                            self.app.object_store.update_from_file(
                                hda.dataset,
                                file_name=temp_dataset_file_name,
                                create=True)

                            # Import additional files if present. Histories exported previously might not have this attribute set.
                            dataset_extra_files_path = dataset_attrs.get(
                                'extra_files_path', None)
                            if dataset_extra_files_path:
                                try:
                                    file_list = os.listdir(
                                        os.path.join(archive_dir,
                                                     dataset_extra_files_path))
                                except OSError:
                                    file_list = []

                                if file_list:
                                    for extra_file in file_list:
                                        self.app.object_store.update_from_file(
                                            hda.dataset,
                                            extra_dir='dataset_%s_files' %
                                            hda.dataset.id,
                                            alt_name=extra_file,
                                            file_name=os.path.join(
                                                archive_dir,
                                                dataset_extra_files_path,
                                                extra_file),
                                            create=True)
                        else:
                            datasets_usage_counts[temp_dataset_file_name] -= 1
                            shutil.copyfile(temp_dataset_file_name,
                                            hda.file_name)
                        hda.dataset.set_total_size(
                        )  # update the filesize record in the database

                    # Set tags, annotations.
                    if user:
                        self.add_item_annotation(self.sa_session, user, hda,
                                                 dataset_attrs['annotation'])
                        # TODO: Set tags.
                        """
                        for tag, value in dataset_attrs[ 'tags' ].items():
                            trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                            self.sa_session.flush()
                        """

                    # Although metadata is set above, need to set metadata to recover BAI for BAMs.
                    if hda.extension == 'bam':
                        self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app(
                            self.app.datatypes_registry.
                            set_external_metadata_tool,
                            self.app,
                            jiha.job.session_id,
                            new_history.id,
                            jiha.job.user,
                            incoming={'input1': hda},
                            overwrite=False)

                #
                # Create jobs.
                #

                # Decode jobs attributes.
                def as_hda(obj_dct):
                    """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by
                        the encoded object. This only works because HDAs are created above. """
                    if obj_dct.get('__HistoryDatasetAssociation__', False):
                        return self.sa_session.query(model.HistoryDatasetAssociation) \
                            .filter_by(history=new_history, hid=obj_dct['hid']).first()
                    return obj_dct

                jobs_attr_file_name = os.path.join(archive_dir,
                                                   'jobs_attrs.txt')
                jobs_attrs = load(open(jobs_attr_file_name),
                                  object_hook=as_hda)

                # Create each job.
                for job_attrs in jobs_attrs:
                    imported_job = model.Job()
                    imported_job.user = user
                    # TODO: set session?
                    # imported_job.session = trans.get_galaxy_session().id
                    imported_job.history = new_history
                    imported_job.imported = True
                    imported_job.tool_id = job_attrs['tool_id']
                    imported_job.tool_version = job_attrs['tool_version']
                    imported_job.set_state(job_attrs['state'])
                    imported_job.info = job_attrs.get('info', None)
                    imported_job.exit_code = job_attrs.get('exit_code', None)
                    imported_job.traceback = job_attrs.get('traceback', None)
                    imported_job.stdout = job_attrs.get('stdout', None)
                    imported_job.stderr = job_attrs.get('stderr', None)
                    imported_job.command_line = job_attrs.get(
                        'command_line', None)
                    try:
                        imported_job.create_time = datetime.datetime.strptime(
                            job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f")
                        imported_job.update_time = datetime.datetime.strptime(
                            job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f")
                    except Exception:
                        pass
                    self.sa_session.add(imported_job)
                    self.sa_session.flush()

                    class HistoryDatasetAssociationIDEncoder(json.JSONEncoder):
                        """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                        def default(self, obj):
                            """ Encode an HDA, default encoding for everything else. """
                            if isinstance(obj,
                                          model.HistoryDatasetAssociation):
                                return obj.id
                            return json.JSONEncoder.default(self, obj)

                    # Set parameters. May be useful to look at metadata.py for creating parameters.
                    # TODO: there may be a better way to set parameters, e.g.:
                    #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
                    #       job.add_parameter( name, value )
                    # to make this work, we'd need to flesh out the HDA objects. The code below is
                    # relatively similar.
                    for name, value in job_attrs['params'].items():
                        # Transform parameter values when necessary.
                        if isinstance(value, model.HistoryDatasetAssociation):
                            # HDA input: use hid to find input.
                            input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                                            .filter_by(history=new_history, hid=value.hid).first()
                            value = input_hda.id
                        # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                        imported_job.add_parameter(
                            name,
                            dumps(value,
                                  cls=HistoryDatasetAssociationIDEncoder))

                    # TODO: Connect jobs to input datasets.

                    # Connect jobs to output datasets.
                    for output_hid in job_attrs['output_datasets']:
                        # print "%s job has output dataset %i" % (imported_job.id, output_hid)
                        output_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                            .filter_by(history=new_history, hid=output_hid).first()
                        if output_hda:
                            imported_job.add_output_dataset(
                                output_hda.name, output_hda)

                    # Connect jobs to input datasets.
                    if 'input_mapping' in job_attrs:
                        for input_name, input_hid in job_attrs[
                                'input_mapping'].items():
                            input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                                            .filter_by(history=new_history, hid=input_hid).first()
                            if input_hda:
                                imported_job.add_input_dataset(
                                    input_name, input_hda)

                    self.sa_session.flush()

                # Done importing.
                new_history.importing = False
                self.sa_session.flush()

                # Cleanup.
                if os.path.exists(archive_dir):
                    shutil.rmtree(archive_dir)
            except Exception as e:
                jiha.job.stderr += "Error cleaning up history import job: %s" % e
                self.sa_session.flush()
                raise
Ejemplo n.º 27
0
def test_export_collection_history():
    app, sa_session, h = _setup_history_for_export("Collection History")

    d1, d2, d3, d4 = _create_datasets(sa_session, h, 4)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")

    dce1 = model.DatasetCollectionElement(collection=c1,
                                          element=d1,
                                          element_identifier="forward",
                                          element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1,
                                          element=d2,
                                          element_identifier="reverse",
                                          element_index=1)

    c2 = model.DatasetCollection(collection_type="list:paired")
    hc2 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=2, collection=c2, name="HistoryCollectionTest2")

    cleaf = model.DatasetCollection(collection_type="paired")
    dce2leaf1 = model.DatasetCollectionElement(collection=cleaf,
                                               element=d3,
                                               element_identifier="forward",
                                               element_index=0)
    dce2leaf2 = model.DatasetCollectionElement(collection=cleaf,
                                               element=d4,
                                               element_identifier="reverse",
                                               element_index=1)

    dce21 = model.DatasetCollectionElement(collection=c2,
                                           element=cleaf,
                                           element_identifier="listel",
                                           element_index=0)

    j = model.Job()
    j.user = h.user
    j.tool_id = "cat1"
    j.add_input_dataset_collection("input1_collect", hc1)
    j.add_output_dataset_collection("output_collect", hc2)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(dce21)
    sa_session.add(dce2leaf1)
    sa_session.add(dce2leaf2)
    sa_session.add(hc1)
    sa_session.add(hc2)
    sa_session.add(j)
    sa_session.flush()

    imported_history = _import_export(app, h)

    datasets = imported_history.datasets
    assert len(datasets) == 4

    dataset_collections = list(
        imported_history.contents_iter(types=["dataset_collection"]))
    assert len(dataset_collections) == 2

    imported_hdca1 = dataset_collections[0]
    imported_hdca2 = dataset_collections[1]

    imported_collection_2 = imported_hdca2.collection
    assert imported_hdca1.collection.collection_type == "paired"
    assert imported_collection_2.collection_type == "list:paired"

    assert len(imported_collection_2.elements) == 1
    imported_top_level_element = imported_collection_2.elements[0]
    assert imported_top_level_element.element_identifier == "listel", imported_top_level_element.element_identifier
    assert imported_top_level_element.element_index == 0, imported_top_level_element.element_index
    imported_nested_collection = imported_top_level_element.child_collection
    assert len(imported_nested_collection.elements) == 2
    assert imported_nested_collection.collection_type == "paired", imported_nested_collection.collection_type

    assert len(imported_history.jobs) == 1
    imported_job = imported_history.jobs[0]
    assert imported_job
    assert len(imported_job.input_dataset_collections) == 1, len(
        imported_job.input_dataset_collections)
    assert len(imported_job.output_dataset_collection_instances) == 1
    assert imported_job.id != j.id
Ejemplo n.º 28
0
    def cleanup_after_job(self, db_session):
        """ Set history, datasets, and jobs' attributes and clean up archive directory. """

        #
        # Helper methods.
        #

        def file_in_dir(file_path, a_dir):
            """ Returns true if file is in directory. """
            abs_file_path = os.path.abspath(file_path)
            return os.path.split(abs_file_path)[0] == a_dir

        def read_file_contents(file_path):
            """ Read contents of a file. """
            fp = open(file_path, 'rb')
            buffsize = 1048576
            file_contents = ''
            try:
                while True:
                    file_contents += fp.read(buffsize)
                    if not file_contents or len(file_contents) % buffsize != 0:
                        break
            except OverflowError:
                pass
            fp.close()
            return file_contents

        def get_tag_str(tag, value):
            """ Builds a tag string for a tag, value pair. """
            if not value:
                return tag
            else:
                return tag + ":" + value

        #
        # Import history.
        #

        jiha = db_session.query(model.JobImportHistoryArchive).filter_by(
            job_id=self.job_id).first()
        if jiha:
            try:
                archive_dir = jiha.archive_dir
                user = jiha.job.user

                #
                # Create history.
                #
                history_attr_file_name = os.path.join(archive_dir,
                                                      'history_attrs.txt')
                history_attr_str = read_file_contents(history_attr_file_name)
                history_attrs = from_json_string(history_attr_str)

                # Create history.
                new_history = model.History( name='imported from archive: %s' % history_attrs['name'].encode( 'utf-8' ), \
                                             user=user )
                new_history.importing = True
                new_history.hid_counter = history_attrs['hid_counter']
                new_history.genome_build = history_attrs['genome_build']
                db_session.add(new_history)
                jiha.history = new_history
                db_session.flush()

                # Add annotation, tags.
                if user:
                    self.add_item_annotation(db_session, user, new_history,
                                             history_attrs['annotation'])
                    """
                    TODO: figure out to how add tags to item.
                    for tag, value in history_attrs[ 'tags' ].items():
                        trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
                    """

                #
                # Create datasets.
                #
                datasets_attrs_file_name = os.path.join(
                    archive_dir, 'datasets_attrs.txt')
                datasets_attr_str = read_file_contents(
                    datasets_attrs_file_name)
                datasets_attrs = from_json_string(datasets_attr_str)

                # Get counts of how often each dataset file is used; a file can
                # be linked to multiple dataset objects (HDAs).
                datasets_usage_counts = {}
                for dataset_attrs in datasets_attrs:
                    temp_dataset_file_name = \
                        os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
                    if (temp_dataset_file_name not in datasets_usage_counts):
                        datasets_usage_counts[temp_dataset_file_name] = 0
                    datasets_usage_counts[temp_dataset_file_name] += 1

                # Create datasets.
                for dataset_attrs in datasets_attrs:
                    metadata = dataset_attrs['metadata']

                    # Create dataset and HDA.
                    hda = model.HistoryDatasetAssociation(
                        name=dataset_attrs['name'].encode('utf-8'),
                        extension=dataset_attrs['extension'],
                        info=dataset_attrs['info'].encode('utf-8'),
                        blurb=dataset_attrs['blurb'],
                        peek=dataset_attrs['peek'],
                        designation=dataset_attrs['designation'],
                        visible=dataset_attrs['visible'],
                        dbkey=metadata['dbkey'],
                        metadata=metadata,
                        history=new_history,
                        create_dataset=True,
                        sa_session=db_session)
                    hda.state = hda.states.OK
                    db_session.add(hda)
                    db_session.flush()
                    new_history.add_dataset(hda, genome_build=None)
                    hda.hid = dataset_attrs[
                        'hid']  # Overwrite default hid set when HDA added to history.
                    # TODO: Is there a way to recover permissions? Is this needed?
                    #permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                    #trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                    db_session.flush()

                    # Do security check and move/copy dataset data.
                    temp_dataset_file_name = \
                        os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
                    if not file_in_dir(temp_dataset_file_name,
                                       os.path.join(archive_dir, "datasets")):
                        raise Exception("Invalid dataset path: %s" %
                                        temp_dataset_file_name)
                    if datasets_usage_counts[temp_dataset_file_name] == 1:
                        shutil.move(temp_dataset_file_name, hda.file_name)
                    else:
                        datasets_usage_counts[temp_dataset_file_name] -= 1
                        shutil.copyfile(temp_dataset_file_name, hda.file_name)

                    # Set tags, annotations.
                    if user:
                        self.add_item_annotation(db_session, user, hda,
                                                 dataset_attrs['annotation'])
                        # TODO: Set tags.
                        """
                        for tag, value in dataset_attrs[ 'tags' ].items():
                            trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                            db_session.flush()
                        """

                #
                # Create jobs.
                #

                # Read jobs attributes.
                jobs_attr_file_name = os.path.join(archive_dir,
                                                   'jobs_attrs.txt')
                jobs_attr_str = read_file_contents(jobs_attr_file_name)

                # Decode jobs attributes.
                def as_hda(obj_dct):
                    """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by 
                        the encoded object. This only works because HDAs are created above. """
                    if obj_dct.get('__HistoryDatasetAssociation__', False):
                        return db_session.query( model.HistoryDatasetAssociation ) \
                                        .filter_by( history=new_history, hid=obj_dct['hid'] ).first()
                    return obj_dct

                jobs_attrs = from_json_string(jobs_attr_str,
                                              object_hook=as_hda)

                # Create each job.
                for job_attrs in jobs_attrs:
                    imported_job = model.Job()
                    imported_job.user = user
                    # TODO: set session?
                    # imported_job.session = trans.get_galaxy_session().id
                    imported_job.history = new_history
                    imported_job.tool_id = job_attrs['tool_id']
                    imported_job.tool_version = job_attrs['tool_version']
                    imported_job.set_state(job_attrs['state'])
                    imported_job.imported = True
                    db_session.add(imported_job)
                    db_session.flush()

                    class HistoryDatasetAssociationIDEncoder(
                            simplejson.JSONEncoder):
                        """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                        def default(self, obj):
                            """ Encode an HDA, default encoding for everything else. """
                            if isinstance(obj,
                                          model.HistoryDatasetAssociation):
                                return obj.id
                            return simplejson.JSONEncoder.default(self, obj)

                    # Set parameters. May be useful to look at metadata.py for creating parameters.
                    # TODO: there may be a better way to set parameters, e.g.:
                    #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
                    #       job.add_parameter( name, value )
                    # to make this work, we'd need to flesh out the HDA objects. The code below is
                    # relatively similar.
                    for name, value in job_attrs['params'].items():
                        # Transform parameter values when necessary.
                        if isinstance(value, model.HistoryDatasetAssociation):
                            # HDA input: use hid to find input.
                            input_hda = db_session.query( model.HistoryDatasetAssociation ) \
                                            .filter_by( history=new_history, hid=value.hid ).first()
                            value = input_hda.id
                        #print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                        imported_job.add_parameter(
                            name,
                            to_json_string(
                                value, cls=HistoryDatasetAssociationIDEncoder))

                    # TODO: Connect jobs to input datasets.

                    # Connect jobs to output datasets.
                    for output_hid in job_attrs['output_datasets']:
                        #print "%s job has output dataset %i" % (imported_job.id, output_hid)
                        output_hda = db_session.query( model.HistoryDatasetAssociation ) \
                                        .filter_by( history=new_history, hid=output_hid ).first()
                        if output_hda:
                            imported_job.add_output_dataset(
                                output_hda.name, output_hda)

                    # Done importing.
                    new_history.importing = False

                    db_session.flush()

                # Cleanup.
                if os.path.exists(archive_dir):
                    shutil.rmtree(archive_dir)
            except Exception, e:
                jiha.job.stderr += "Error cleaning up history import job: %s" % e
                db_session.flush()
Ejemplo n.º 29
0
 def get_job( self ):
     return model.Job()
Ejemplo n.º 30
0
def test_export_collection_with_mapping_history():
    app, sa_session, h = _setup_history_for_export(
        "Collection Mapping History")

    d1, d2, d3, d4 = _create_datasets(sa_session, h, 4)

    c1 = model.DatasetCollection(collection_type="list")
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")
    dce1 = model.DatasetCollectionElement(collection=c1,
                                          element=d1,
                                          element_identifier="el1",
                                          element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1,
                                          element=d2,
                                          element_identifier="el2",
                                          element_index=1)

    c2 = model.DatasetCollection(collection_type="list")
    hc2 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=2, collection=c2, name="HistoryCollectionTest2")
    dce3 = model.DatasetCollectionElement(collection=c2,
                                          element=d3,
                                          element_identifier="el1",
                                          element_index=0)
    dce4 = model.DatasetCollectionElement(collection=c2,
                                          element=d4,
                                          element_identifier="el2",
                                          element_index=1)

    hc2.add_implicit_input_collection("input1", hc1)

    j1 = model.Job()
    j1.user = h.user
    j1.tool_id = "cat1"
    j1.add_input_dataset("input1", d1)
    j1.add_output_dataset("out_file1", d3)

    j2 = model.Job()
    j2.user = h.user
    j2.tool_id = "cat1"
    j2.add_input_dataset("input1", d2)
    j2.add_output_dataset("out_file1", d4)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(dce3)
    sa_session.add(dce4)
    sa_session.add(hc1)
    sa_session.add(hc2)
    sa_session.add(j1)
    sa_session.add(j2)
    sa_session.flush()

    implicit_collection_jobs = model.ImplicitCollectionJobs()
    j1.add_output_dataset_collection("out_file1", hc2)  # really?
    ija1 = model.ImplicitCollectionJobsJobAssociation()
    ija1.order_index = 0
    ija1.implicit_collection_jobs = implicit_collection_jobs
    ija1.job = j1

    j2.add_output_dataset_collection("out_file1", hc2)  # really?
    ija2 = model.ImplicitCollectionJobsJobAssociation()
    ija2.order_index = 1
    ija2.implicit_collection_jobs = implicit_collection_jobs
    ija2.job = j2

    sa_session.add(implicit_collection_jobs)
    sa_session.add(ija1)
    sa_session.add(ija2)
    sa_session.flush()

    imported_history = _import_export(app, h)
    assert len(imported_history.jobs) == 2
    imported_job0 = imported_history.jobs[0]

    imported_icj = imported_job0.implicit_collection_jobs_association.implicit_collection_jobs
    assert imported_icj
    assert len(imported_icj.jobs) == 2, len(imported_icj.jobs)