def test_export_dataset_with_deleted_and_purged(): app, sa_session, h = _setup_history_for_export( "Datasets History with deleted") d1, d2 = _create_datasets(sa_session, h, 2) # Maybe use abstractions for deleting? d1.deleted = True d1.dataset.deleted = True d1.dataset.purged = False d2.deleted = True d2.dataset.deleted = True d2.dataset.purged = True j1 = model.Job() j1.user = h.user j1.tool_id = "cat1" j1.add_output_dataset("out_file1", d1) j2 = model.Job() j2.user = h.user j2.tool_id = "cat1" j2.add_output_dataset("out_file1", d2) sa_session.add(d1) sa_session.add(d2) sa_session.add(j1) sa_session.add(j2) sa_session.add(h) sa_session.flush() assert d1.deleted app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True) app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True) imported_history = _import_export(app, h) datasets = list(imported_history.contents_iter(types=["dataset"])) assert len(datasets) == 1 assert datasets[0].state == 'discarded' assert datasets[0].deleted assert datasets[0].dataset.deleted assert datasets[0].creating_job
def _assert_user_quota_is(self, user, amount): assert amount == self.quota_agent.get_quota(user) if amount is None: user.total_disk_usage = 1000 job = model.Job() job.user = user assert not self.quota_agent.is_over_quota(None, job, None) else: job = model.Job() job.user = user user.total_disk_usage = amount - 1 assert not self.quota_agent.is_over_quota(None, job, None) user.total_disk_usage = amount + 1 assert self.quota_agent.is_over_quota(None, job, None)
def _setup_simple_cat_job(app): sa_session = app.model.context u = model.User(email="*****@*****.**", password="******") h = model.History(name="Test History", user=u) d1, d2 = _create_datasets(sa_session, h, 2) j = model.Job() j.user = u j.tool_id = "cat1" j.add_input_dataset("input1", d1) j.add_output_dataset("out_file1", d2) sa_session.add_all((d1, d2, h, j)) sa_session.flush() app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True) app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True) return u, h, d1, d2, j
def test_export_dataset(): app, sa_session, h = _setup_history_for_export("Datasets History") d1, d2 = _create_datasets(sa_session, h, 2) d1_hash = model.DatasetHash() d1_hash.hash_function = "MD5" d1_hash.hash_value = "foobar" d1.dataset.hashes.append(d1_hash) d1.dataset.created_from_basename = "my_cool_name.txt" d1_source = model.DatasetSource() d1_source.source_uri = "http://google.com/mycooldata.txt" d1.dataset.sources.append(d1_source) d1.state = d2.state = 'ok' j = model.Job() j.user = h.user j.tool_id = "cat1" j.state = 'ok' j.add_input_dataset("input1", d1) j.add_output_dataset("out_file1", d2) sa_session.add(d1) sa_session.add(d2) sa_session.add(h) sa_session.add(j) sa_session.flush() app.object_store.update_from_file(d1, file_name=t_data_path("1.txt"), create=True) app.object_store.update_from_file(d2, file_name=t_data_path("2.bed"), create=True) imported_history = _import_export(app, h) datasets = list(imported_history.contents_iter(types=["dataset"])) assert len(datasets) == 2 imported_job = datasets[1].creating_job assert imported_job assert imported_job.output_datasets assert imported_job.output_datasets[0].dataset == datasets[1] assert imported_job.input_datasets assert imported_job.input_datasets[0].dataset == datasets[0] assert datasets[0].state == 'ok' assert datasets[1].state == 'ok' assert len(datasets[0].dataset.hashes) == 1 dataset_hash = datasets[0].dataset.hashes[0] assert dataset_hash.hash_function == "MD5" assert dataset_hash.hash_value == "foobar" assert datasets[0].dataset.created_from_basename == "my_cool_name.txt" assert len(datasets[0].dataset.sources) == 1 dataset_source = datasets[0].dataset.sources[0] assert dataset_source.source_uri == "http://google.com/mycooldata.txt" with open(datasets[0].file_name) as f: assert f.read().startswith("chr1 4225 19670") with open(datasets[1].file_name) as f: assert f.read().startswith("chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-")
def test_dataset_job_relationship(self): model = self.model dataset = model.Dataset() job = model.Job() dataset.job = job self.persist(job, dataset) loaded_dataset = model.session.query(model.Dataset).filter(model.Dataset.id == dataset.id).one() assert loaded_dataset.job_id == job.id
def test_multi_inputs(): app, sa_session, h = _setup_history_for_export("Datasets History") d1, d2, d3 = _create_datasets(sa_session, h, 3) j = model.Job() j.user = h.user j.tool_id = "cat_multi" # Emulate multiple data inputs into multi data input parameter... j.add_input_dataset("input1", d1) j.add_input_dataset("input11", d1) j.add_input_dataset("input12", d2) j.add_output_dataset("out_file1", d3) sa_session.add(d1) sa_session.add(d2) sa_session.add(d3) sa_session.add(h) sa_session.add(j) sa_session.flush() app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True) app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True) app.object_store.update_from_file(d3, file_name="test-data/4.bed", create=True) imported_history = _import_export(app, h) datasets = list(imported_history.contents_iter(types=["dataset"])) assert len(datasets) == 3 imported_job = datasets[2].creating_job assert imported_job assert imported_job.output_datasets assert imported_job.output_datasets[0].dataset.hid == 3 assert imported_job.output_datasets[0].dataset == datasets[2] assert imported_job.input_datasets assert len(imported_job.input_datasets) == 3 names = [d.name for d in imported_job.input_datasets] hids = [d.dataset.hid for d in imported_job.input_datasets] _assert_distinct(names) for name in ["input1", "input11", "input12"]: assert name in names for hid in [1, 2]: assert hid in hids with open(datasets[0].file_name, "r") as f: assert f.read().startswith("chr1 4225 19670") with open(datasets[1].file_name, "r") as f: assert f.read().startswith( "chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-" )
def test_implicit_map_job_hdca(self): creating_job = model.Job() hdca = MockHdca(implicit_output_name="out1", job=creating_job) self.history.active_datasets.append(hdca) job_dict, warnings = extract.summarize(trans=self.trans) assert not warnings assert len(job_dict) == 1 job = next(iter(job_dict.keys())) assert job is creating_job
def _run_jihaw_cleanup(archive_dir, app=None): app = app or _mock_app() job = model.Job() job.tool_stderr = '' jiha = model.JobImportHistoryArchive(job=job, archive_dir=archive_dir) app.model.context.current.add_all([job, jiha]) app.model.context.flush() jihaw = JobImportHistoryArchiveWrapper(app, job.id) # yeehaw! return app, jihaw.cleanup_after_job()
def _setup_test_output(self): dataset = model.Dataset() dataset.external_filename = "example_output" # This way object store isn't asked about size... self.hda = model.HistoryDatasetAssociation(name="test", dataset=dataset) job = model.Job() job.add_output_dataset(DEFAULT_TOOL_OUTPUT, self.hda) self.app.model.context.add(job) self.job = job self.history = self._new_history(hdas=[self.hda]) self.outputs = {DEFAULT_TOOL_OUTPUT: self.hda}
def test_tasks(self): model = self.model u = model.User(email="*****@*****.**", password="******") job = model.Job() task = model.Task(job=job, working_directory="/tmp", prepare_files_cmd="split.sh") job.user = u self.persist(u, job, task) loaded_task = model.session.query(model.Task).filter(model.Task.job == job).first() assert loaded_task.prepare_input_files_cmd == "split.sh"
def test_jobs(self): model = self.model u = model.User(email="*****@*****.**", password="******") job = model.Job() job.user = u job.tool_id = "cat1" self.persist(u, job) loaded_job = model.session.query(model.Job).filter(model.Job.user == u).first() assert loaded_job.tool_id == "cat1"
def __init__(self, state='ok', output_name='out1', job=None): self.id = 123 self.state = state self.copied_from_history_dataset_association = None if job is not UNDEFINED_JOB: if not job: job = model.Job() self.job = job assoc = model.JobToOutputDatasetAssociation(output_name, self) assoc.job = job self.creating_job_associations = [assoc] else: self.creating_job_associations = []
def _run_jihaw_cleanup(history_archive, msg): app = MockApp() job = model.Job() job.stderr = '' jiha = model.JobImportHistoryArchive(job=job, archive_dir=history_archive.arc_directory) app.model.context.current.add_all([job, jiha]) app.model.context.flush() jihaw = JobImportHistoryArchiveWrapper(app, 1) # yeehaw! try: jihaw.cleanup_after_job() data = app.object_store.get_data(model.Dataset(1)) assert data != 'insecure', msg except MalformedContents: pass
def test_job_context_discover_outputs_flushes_once(mocker): app = _mock_app() sa_session = app.model.context # mocker is a pytest-mock fixture u = model.User(email="*****@*****.**", password="******") h = model.History(name="Test History", user=u) tool = Tool(app) tool_provided_metadata = None job = model.Job() job.history = h sa_session.add(job) sa_session.flush() job_working_directory = tempfile.mkdtemp() setup_data(job_working_directory) permission_provider = PermissionProvider() metadata_source_provider = MetadataSourceProvider() object_store = app.object_store input_dbkey = '?' final_job_state = 'ok' collection_description = FilePatternDatasetCollectionDescription( pattern="__name__") collection = model.DatasetCollection(collection_type='list', populated=False) sa_session.add(collection) job_context = JobContext(tool, tool_provided_metadata, job, job_working_directory, permission_provider, metadata_source_provider, input_dbkey, object_store, final_job_state) collection_builder = builder.BoundCollectionBuilder(collection) dataset_collectors = [dataset_collector(collection_description)] output_name = 'output' filenames = job_context.find_files(output_name, collection, dataset_collectors) assert len(filenames) == 10 spy = mocker.spy(sa_session, 'flush') job_context.populate_collection_elements( collection, collection_builder, filenames, name=output_name, metadata_source_name='', final_job_state=job_context.final_job_state, ) collection_builder.populate() assert spy.call_count == 0 sa_session.flush() assert len(collection.dataset_instances) == 10 assert collection.dataset_instances[0].dataset.file_size == 1
def test_export_dataset(): app, sa_session, h = _setup_history_for_export("Datasets History") d1, d2 = _create_datasets(sa_session, h, 2) d1.state = d2.state = 'ok' j = model.Job() j.user = h.user j.tool_id = "cat1" j.state = 'ok' j.add_input_dataset("input1", d1) j.add_output_dataset("out_file1", d2) sa_session.add(d1) sa_session.add(d2) sa_session.add(h) sa_session.add(j) sa_session.flush() app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True) app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True) imported_history = _import_export(app, h) datasets = list(imported_history.contents_iter(types=["dataset"])) assert len(datasets) == 2 imported_job = datasets[1].creating_job assert imported_job assert imported_job.output_datasets assert imported_job.output_datasets[0].dataset == datasets[1] assert imported_job.input_datasets assert imported_job.input_datasets[0].dataset == datasets[0] assert datasets[0].state == 'ok' assert datasets[1].state == 'ok' with open(datasets[0].file_name, "r") as f: assert f.read().startswith("chr1 4225 19670") with open(datasets[1].file_name, "r") as f: assert f.read().startswith( "chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-" )
def setUp(self): super(MetadataTestCase, self).setUp() self.setup_app() model.Dataset.object_store = self.app.object_store job = model.Job() sa_session = self.app.model.session sa_session.add(job) history = model.History() job.history = history sa_session.flush() self.job = job self.history = history self.job_working_directory = os.path.join(self.test_directory, "job_working") self.tool_working_directory = os.path.join(self.job_working_directory, "working") os.mkdir(self.job_working_directory) os.mkdir(self.tool_working_directory)
def test_history_resume(self): user2 = self.user_manager.create(**user2_data) history = self.history_manager.create(name='history', user=user2) # No running jobs history.resume_paused_jobs() # Mock running jobs with mock.patch('galaxy.model.History.paused_jobs', new_callable=mock.PropertyMock) as mock_paused_jobs: job = model.Job() job.state = model.Job.states.PAUSED jobs = [job] self.trans.sa_session.add(jobs[0]) self.trans.sa_session.flush() assert job.state == model.Job.states.PAUSED mock_paused_jobs.return_value = jobs history.resume_paused_jobs() mock_paused_jobs.assert_called_once() assert job.state == model.Job.states.NEW, job.state
def _import_export(app, h, dest_export=None): if dest_export is None: dest_parent = tempfile.mkdtemp() dest_export = os.path.join(dest_parent, "moo.tgz") job = model.Job() jeha = model.JobExportHistoryArchive.create_for_history( h, job, app.model.context, app.object_store, compressed=True ) wrapper = JobExportHistoryArchiveWrapper(app, job.id) wrapper.setup_job(h, jeha.temp_directory) from galaxy.tools.imp_exp import export_history ret = export_history.main(["--gzip", jeha.temp_directory, dest_export]) assert ret == 0, ret _, imported_history = import_archive(dest_export, app=app) assert imported_history return imported_history
def test_job_metrics(self): job = model.Job() job.id = 1 example = """# Example ```galaxy job_metrics(job_id=1) ``` """ metrics = [ {"plugin": "core", "title": "Cores Allocated", "value": 1}, {"plugin": "core", "title": "Job Start Time", "value": "2019-12-17 11:53:13"}, {"plugin": "env", "title": "GALAXY_HOME", "value": "/path/to/home"}, ] with mock.patch.object(JobManager, 'get_accessible_job', return_value=job): with mock.patch("galaxy.managers.markdown_util.summarize_job_metrics", return_value=metrics): result = self._to_basic(example) assert "**core**\n" in result assert "**env**\n" in result assert "| Cores Allocated | 1 |\n" in result assert "| GALAXY_HOME | /path/to/home |\n" in result
def test_job_parameters(self): job = model.Job() job.id = 1 example = """# Example ```galaxy job_parameters(job_id=1) ``` """ parameters = [ { "text": "Num Lines", "value": "6", "depth": 1 }, { "text": "Plot", "value": "coolselect", "depth": 2 }, { "text": "Input Dataset", "value": [{ "src": "hda", "hid": 5, "name": "Cool Data" }], "depth": 1 }, ] response = {"parameters": parameters} with mock.patch.object(JobManager, 'get_accessible_job', return_value=job): with mock.patch( "galaxy.managers.markdown_util.summarize_job_parameters", return_value=response): result = self._to_basic(example) assert "| Num Lines |" in result assert "| > Plot |" in result assert "| Input Dataset | " in result assert "| 5: Cool Data |\n" in result
def test_job_metrics(self): model = self.model u = model.User(email="*****@*****.**", password="******") job = model.Job() job.user = u job.tool_id = "cat1" job.add_metric("gx", "galaxy_slots", 5) job.add_metric("system", "system_name", "localhost") self.persist(u, job) task = model.Task(job=job, working_directory="/tmp", prepare_files_cmd="split.sh") task.add_metric("gx", "galaxy_slots", 5) task.add_metric("system", "system_name", "localhost") big_value = ":".join("%d" % i for i in range(2000)) task.add_metric("env", "BIG_PATH", big_value) self.persist(task) # Ensure big values truncated assert len(task.text_metrics[1].metric_value) <= 1023
def _import_export(app, h, dest_export=None): if dest_export is None: dest_parent = mkdtemp() dest_export = os.path.join(dest_parent, "moo.tgz") dataset = model.Dataset(id=100) jeha = model.JobExportHistoryArchive(job=model.Job(), history=h, dataset=dataset, compressed=True) wrapper = JobExportHistoryArchiveWrapper(app, 1) wrapper.setup_job(jeha) from galaxy.tools.imp_exp import export_history ret = export_history.main(["--gzip", jeha.temp_directory, dest_export]) assert ret == 0, ret _, imported_history = import_archive(dest_export, app=app) assert imported_history return imported_history
def __init__(self, app, test_directory, tool): working_directory = os.path.join(test_directory, "workdir") tool_working_directory = os.path.join(working_directory, "working") os.makedirs(tool_working_directory) self.app = app self.tool = tool self.requires_containerization = False self.state = model.Job.states.QUEUED self.command_line = "echo HelloWorld" self.environment_variables = [] self.commands_in_new_shell = False self.prepare_called = False self.write_version_cmd = None self.dependency_shell_commands = None self.working_directory = working_directory self.tool_working_directory = tool_working_directory self.requires_setting_metadata = True self.job_destination = bunch.Bunch(id="default", params={}) self.galaxy_lib_dir = os.path.abspath("lib") self.job = model.Job() self.job_id = 1 self.job.id = 1 self.output_paths = ['/tmp/output1.dat'] self.mock_metadata_path = os.path.abspath( os.path.join(test_directory, "METADATA_SET")) self.metadata_command = "touch %s" % self.mock_metadata_path self.galaxy_virtual_env = None self.shell = "/bin/bash" self.cleanup_job = "never" self.tmp_dir_creation_statement = "" self.use_metadata_binary = False self.guest_ports = [] # Cruft for setting metadata externally, axe at some point. self.external_output_metadata = bunch.Bunch( set_job_runner_external_pid=lambda pid, session: None) self.app.datatypes_registry.set_external_metadata_tool = bunch.Bunch( build_dependency_shell_commands=lambda: [])
def __new_job(**kwds): job = model.Job() for key, value in kwds.items(): setattr(job, key, value) return job
def __two_jobs(): job1 = model.Job() job1.id = 1 job2 = model.Job() job2.id = 2 return job1, job2
def cleanup_after_job(self): """ Set history, datasets, and jobs' attributes and clean up archive directory. """ # # Helper methods. # def file_in_dir(file_path, a_dir): """ Returns true if file is in directory. """ abs_file_path = os.path.abspath(file_path) return os.path.split(abs_file_path)[0] == a_dir def get_tag_str(tag, value): """ Builds a tag string for a tag, value pair. """ if not value: return tag else: return tag + ":" + value # # Import history. # jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by( job_id=self.job_id).first() if jiha: try: archive_dir = jiha.archive_dir archive_dir = os.path.realpath(archive_dir) user = jiha.job.user # Bioblend previous to 17.01 exported histories with an extra subdir. if not os.path.exists( os.path.join(archive_dir, 'history_attrs.txt')): for d in os.listdir(archive_dir): if os.path.isdir(os.path.join(archive_dir, d)): archive_dir = os.path.join(archive_dir, d) break # # Create history. # history_attr_file_name = os.path.join(archive_dir, 'history_attrs.txt') history_attrs = load(open(history_attr_file_name)) # Create history. new_history = model.History(name='imported from archive: %s' % history_attrs['name'], user=user) new_history.importing = True new_history.hid_counter = history_attrs['hid_counter'] new_history.genome_build = history_attrs['genome_build'] self.sa_session.add(new_history) jiha.history = new_history self.sa_session.flush() # Add annotation, tags. if user: self.add_item_annotation(self.sa_session, user, new_history, history_attrs['annotation']) """ TODO: figure out to how add tags to item. for tag, value in history_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) ) """ # # Create datasets. # datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attrs = load(open(datasets_attrs_file_name)) provenance_file_name = datasets_attrs_file_name + ".provenance" if os.path.exists(provenance_file_name): provenance_attrs = load(open(provenance_file_name)) datasets_attrs += provenance_attrs # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). datasets_usage_counts = {} for dataset_attrs in datasets_attrs: temp_dataset_file_name = \ os.path.realpath(os.path.join(archive_dir, dataset_attrs['file_name'])) if (temp_dataset_file_name not in datasets_usage_counts): datasets_usage_counts[temp_dataset_file_name] = 0 datasets_usage_counts[temp_dataset_file_name] += 1 # Create datasets. for dataset_attrs in datasets_attrs: metadata = dataset_attrs['metadata'] # Create dataset and HDA. hda = model.HistoryDatasetAssociation( name=dataset_attrs['name'], extension=dataset_attrs['extension'], info=dataset_attrs['info'], blurb=dataset_attrs['blurb'], peek=dataset_attrs['peek'], designation=dataset_attrs['designation'], visible=dataset_attrs['visible'], dbkey=metadata['dbkey'], metadata=metadata, history=new_history, create_dataset=True, sa_session=self.sa_session) if 'uuid' in dataset_attrs: hda.dataset.uuid = dataset_attrs["uuid"] if dataset_attrs.get('exported', True) is False: hda.state = hda.states.DISCARDED hda.deleted = True hda.purged = True else: hda.state = hda.states.OK self.sa_session.add(hda) self.sa_session.flush() new_history.add_dataset(hda, genome_build=None) hda.hid = dataset_attrs[ 'hid'] # Overwrite default hid set when HDA added to history. # TODO: Is there a way to recover permissions? Is this needed? # permissions = trans.app.security_agent.history_get_default_permissions( new_history ) # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) self.sa_session.flush() if dataset_attrs.get('exported', True) is True: # Do security check and move/copy dataset data. temp_dataset_file_name = \ os.path.realpath(os.path.abspath(os.path.join(archive_dir, dataset_attrs['file_name']))) if not file_in_dir( temp_dataset_file_name, os.path.join(archive_dir, "datasets")): raise MalformedContents( "Invalid dataset path: %s" % temp_dataset_file_name) if datasets_usage_counts[temp_dataset_file_name] == 1: self.app.object_store.update_from_file( hda.dataset, file_name=temp_dataset_file_name, create=True) # Import additional files if present. Histories exported previously might not have this attribute set. dataset_extra_files_path = dataset_attrs.get( 'extra_files_path', None) if dataset_extra_files_path: try: file_list = os.listdir( os.path.join(archive_dir, dataset_extra_files_path)) except OSError: file_list = [] if file_list: for extra_file in file_list: self.app.object_store.update_from_file( hda.dataset, extra_dir='dataset_%s_files' % hda.dataset.id, alt_name=extra_file, file_name=os.path.join( archive_dir, dataset_extra_files_path, extra_file), create=True) else: datasets_usage_counts[temp_dataset_file_name] -= 1 shutil.copyfile(temp_dataset_file_name, hda.file_name) hda.dataset.set_total_size( ) # update the filesize record in the database # Set tags, annotations. if user: self.add_item_annotation(self.sa_session, user, hda, dataset_attrs['annotation']) # TODO: Set tags. """ for tag, value in dataset_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) ) self.sa_session.flush() """ # Although metadata is set above, need to set metadata to recover BAI for BAMs. if hda.extension == 'bam': self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app( self.app.datatypes_registry. set_external_metadata_tool, self.app, jiha.job.session_id, new_history.id, jiha.job.user, incoming={'input1': hda}, overwrite=False) # # Create jobs. # # Decode jobs attributes. def as_hda(obj_dct): """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by the encoded object. This only works because HDAs are created above. """ if obj_dct.get('__HistoryDatasetAssociation__', False): return self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=obj_dct['hid']).first() return obj_dct jobs_attr_file_name = os.path.join(archive_dir, 'jobs_attrs.txt') jobs_attrs = load(open(jobs_attr_file_name), object_hook=as_hda) # Create each job. for job_attrs in jobs_attrs: imported_job = model.Job() imported_job.user = user # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history imported_job.imported = True imported_job.tool_id = job_attrs['tool_id'] imported_job.tool_version = job_attrs['tool_version'] imported_job.set_state(job_attrs['state']) imported_job.info = job_attrs.get('info', None) imported_job.exit_code = job_attrs.get('exit_code', None) imported_job.traceback = job_attrs.get('traceback', None) imported_job.stdout = job_attrs.get('stdout', None) imported_job.stderr = job_attrs.get('stderr', None) imported_job.command_line = job_attrs.get( 'command_line', None) try: imported_job.create_time = datetime.datetime.strptime( job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f") imported_job.update_time = datetime.datetime.strptime( job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f") except Exception: pass self.sa_session.add(imported_job) self.sa_session.flush() class HistoryDatasetAssociationIDEncoder(json.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, model.HistoryDatasetAssociation): return obj.id return json.JSONEncoder.default(self, obj) # Set parameters. May be useful to look at metadata.py for creating parameters. # TODO: there may be a better way to set parameters, e.g.: # for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): # job.add_parameter( name, value ) # to make this work, we'd need to flesh out the HDA objects. The code below is # relatively similar. for name, value in job_attrs['params'].items(): # Transform parameter values when necessary. if isinstance(value, model.HistoryDatasetAssociation): # HDA input: use hid to find input. input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=value.hid).first() value = input_hda.id # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id ) imported_job.add_parameter( name, dumps(value, cls=HistoryDatasetAssociationIDEncoder)) # TODO: Connect jobs to input datasets. # Connect jobs to output datasets. for output_hid in job_attrs['output_datasets']: # print "%s job has output dataset %i" % (imported_job.id, output_hid) output_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=output_hid).first() if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda) # Connect jobs to input datasets. if 'input_mapping' in job_attrs: for input_name, input_hid in job_attrs[ 'input_mapping'].items(): input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=input_hid).first() if input_hda: imported_job.add_input_dataset( input_name, input_hda) self.sa_session.flush() # Done importing. new_history.importing = False self.sa_session.flush() # Cleanup. if os.path.exists(archive_dir): shutil.rmtree(archive_dir) except Exception as e: jiha.job.stderr += "Error cleaning up history import job: %s" % e self.sa_session.flush() raise
def test_export_collection_history(): app, sa_session, h = _setup_history_for_export("Collection History") d1, d2, d3, d4 = _create_datasets(sa_session, h, 4) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) c2 = model.DatasetCollection(collection_type="list:paired") hc2 = model.HistoryDatasetCollectionAssociation( history=h, hid=2, collection=c2, name="HistoryCollectionTest2") cleaf = model.DatasetCollection(collection_type="paired") dce2leaf1 = model.DatasetCollectionElement(collection=cleaf, element=d3, element_identifier="forward", element_index=0) dce2leaf2 = model.DatasetCollectionElement(collection=cleaf, element=d4, element_identifier="reverse", element_index=1) dce21 = model.DatasetCollectionElement(collection=c2, element=cleaf, element_identifier="listel", element_index=0) j = model.Job() j.user = h.user j.tool_id = "cat1" j.add_input_dataset_collection("input1_collect", hc1) j.add_output_dataset_collection("output_collect", hc2) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(dce21) sa_session.add(dce2leaf1) sa_session.add(dce2leaf2) sa_session.add(hc1) sa_session.add(hc2) sa_session.add(j) sa_session.flush() imported_history = _import_export(app, h) datasets = imported_history.datasets assert len(datasets) == 4 dataset_collections = list( imported_history.contents_iter(types=["dataset_collection"])) assert len(dataset_collections) == 2 imported_hdca1 = dataset_collections[0] imported_hdca2 = dataset_collections[1] imported_collection_2 = imported_hdca2.collection assert imported_hdca1.collection.collection_type == "paired" assert imported_collection_2.collection_type == "list:paired" assert len(imported_collection_2.elements) == 1 imported_top_level_element = imported_collection_2.elements[0] assert imported_top_level_element.element_identifier == "listel", imported_top_level_element.element_identifier assert imported_top_level_element.element_index == 0, imported_top_level_element.element_index imported_nested_collection = imported_top_level_element.child_collection assert len(imported_nested_collection.elements) == 2 assert imported_nested_collection.collection_type == "paired", imported_nested_collection.collection_type assert len(imported_history.jobs) == 1 imported_job = imported_history.jobs[0] assert imported_job assert len(imported_job.input_dataset_collections) == 1, len( imported_job.input_dataset_collections) assert len(imported_job.output_dataset_collection_instances) == 1 assert imported_job.id != j.id
def cleanup_after_job(self, db_session): """ Set history, datasets, and jobs' attributes and clean up archive directory. """ # # Helper methods. # def file_in_dir(file_path, a_dir): """ Returns true if file is in directory. """ abs_file_path = os.path.abspath(file_path) return os.path.split(abs_file_path)[0] == a_dir def read_file_contents(file_path): """ Read contents of a file. """ fp = open(file_path, 'rb') buffsize = 1048576 file_contents = '' try: while True: file_contents += fp.read(buffsize) if not file_contents or len(file_contents) % buffsize != 0: break except OverflowError: pass fp.close() return file_contents def get_tag_str(tag, value): """ Builds a tag string for a tag, value pair. """ if not value: return tag else: return tag + ":" + value # # Import history. # jiha = db_session.query(model.JobImportHistoryArchive).filter_by( job_id=self.job_id).first() if jiha: try: archive_dir = jiha.archive_dir user = jiha.job.user # # Create history. # history_attr_file_name = os.path.join(archive_dir, 'history_attrs.txt') history_attr_str = read_file_contents(history_attr_file_name) history_attrs = from_json_string(history_attr_str) # Create history. new_history = model.History( name='imported from archive: %s' % history_attrs['name'].encode( 'utf-8' ), \ user=user ) new_history.importing = True new_history.hid_counter = history_attrs['hid_counter'] new_history.genome_build = history_attrs['genome_build'] db_session.add(new_history) jiha.history = new_history db_session.flush() # Add annotation, tags. if user: self.add_item_annotation(db_session, user, new_history, history_attrs['annotation']) """ TODO: figure out to how add tags to item. for tag, value in history_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) ) """ # # Create datasets. # datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attr_str = read_file_contents( datasets_attrs_file_name) datasets_attrs = from_json_string(datasets_attr_str) # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). datasets_usage_counts = {} for dataset_attrs in datasets_attrs: temp_dataset_file_name = \ os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) if (temp_dataset_file_name not in datasets_usage_counts): datasets_usage_counts[temp_dataset_file_name] = 0 datasets_usage_counts[temp_dataset_file_name] += 1 # Create datasets. for dataset_attrs in datasets_attrs: metadata = dataset_attrs['metadata'] # Create dataset and HDA. hda = model.HistoryDatasetAssociation( name=dataset_attrs['name'].encode('utf-8'), extension=dataset_attrs['extension'], info=dataset_attrs['info'].encode('utf-8'), blurb=dataset_attrs['blurb'], peek=dataset_attrs['peek'], designation=dataset_attrs['designation'], visible=dataset_attrs['visible'], dbkey=metadata['dbkey'], metadata=metadata, history=new_history, create_dataset=True, sa_session=db_session) hda.state = hda.states.OK db_session.add(hda) db_session.flush() new_history.add_dataset(hda, genome_build=None) hda.hid = dataset_attrs[ 'hid'] # Overwrite default hid set when HDA added to history. # TODO: Is there a way to recover permissions? Is this needed? #permissions = trans.app.security_agent.history_get_default_permissions( new_history ) #trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) db_session.flush() # Do security check and move/copy dataset data. temp_dataset_file_name = \ os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) if not file_in_dir(temp_dataset_file_name, os.path.join(archive_dir, "datasets")): raise Exception("Invalid dataset path: %s" % temp_dataset_file_name) if datasets_usage_counts[temp_dataset_file_name] == 1: shutil.move(temp_dataset_file_name, hda.file_name) else: datasets_usage_counts[temp_dataset_file_name] -= 1 shutil.copyfile(temp_dataset_file_name, hda.file_name) # Set tags, annotations. if user: self.add_item_annotation(db_session, user, hda, dataset_attrs['annotation']) # TODO: Set tags. """ for tag, value in dataset_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) ) db_session.flush() """ # # Create jobs. # # Read jobs attributes. jobs_attr_file_name = os.path.join(archive_dir, 'jobs_attrs.txt') jobs_attr_str = read_file_contents(jobs_attr_file_name) # Decode jobs attributes. def as_hda(obj_dct): """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by the encoded object. This only works because HDAs are created above. """ if obj_dct.get('__HistoryDatasetAssociation__', False): return db_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=obj_dct['hid'] ).first() return obj_dct jobs_attrs = from_json_string(jobs_attr_str, object_hook=as_hda) # Create each job. for job_attrs in jobs_attrs: imported_job = model.Job() imported_job.user = user # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history imported_job.tool_id = job_attrs['tool_id'] imported_job.tool_version = job_attrs['tool_version'] imported_job.set_state(job_attrs['state']) imported_job.imported = True db_session.add(imported_job) db_session.flush() class HistoryDatasetAssociationIDEncoder( simplejson.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, model.HistoryDatasetAssociation): return obj.id return simplejson.JSONEncoder.default(self, obj) # Set parameters. May be useful to look at metadata.py for creating parameters. # TODO: there may be a better way to set parameters, e.g.: # for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): # job.add_parameter( name, value ) # to make this work, we'd need to flesh out the HDA objects. The code below is # relatively similar. for name, value in job_attrs['params'].items(): # Transform parameter values when necessary. if isinstance(value, model.HistoryDatasetAssociation): # HDA input: use hid to find input. input_hda = db_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=value.hid ).first() value = input_hda.id #print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id ) imported_job.add_parameter( name, to_json_string( value, cls=HistoryDatasetAssociationIDEncoder)) # TODO: Connect jobs to input datasets. # Connect jobs to output datasets. for output_hid in job_attrs['output_datasets']: #print "%s job has output dataset %i" % (imported_job.id, output_hid) output_hda = db_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=output_hid ).first() if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda) # Done importing. new_history.importing = False db_session.flush() # Cleanup. if os.path.exists(archive_dir): shutil.rmtree(archive_dir) except Exception, e: jiha.job.stderr += "Error cleaning up history import job: %s" % e db_session.flush()
def get_job( self ): return model.Job()
def test_export_collection_with_mapping_history(): app, sa_session, h = _setup_history_for_export( "Collection Mapping History") d1, d2, d3, d4 = _create_datasets(sa_session, h, 4) c1 = model.DatasetCollection(collection_type="list") hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="el1", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="el2", element_index=1) c2 = model.DatasetCollection(collection_type="list") hc2 = model.HistoryDatasetCollectionAssociation( history=h, hid=2, collection=c2, name="HistoryCollectionTest2") dce3 = model.DatasetCollectionElement(collection=c2, element=d3, element_identifier="el1", element_index=0) dce4 = model.DatasetCollectionElement(collection=c2, element=d4, element_identifier="el2", element_index=1) hc2.add_implicit_input_collection("input1", hc1) j1 = model.Job() j1.user = h.user j1.tool_id = "cat1" j1.add_input_dataset("input1", d1) j1.add_output_dataset("out_file1", d3) j2 = model.Job() j2.user = h.user j2.tool_id = "cat1" j2.add_input_dataset("input1", d2) j2.add_output_dataset("out_file1", d4) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(dce3) sa_session.add(dce4) sa_session.add(hc1) sa_session.add(hc2) sa_session.add(j1) sa_session.add(j2) sa_session.flush() implicit_collection_jobs = model.ImplicitCollectionJobs() j1.add_output_dataset_collection("out_file1", hc2) # really? ija1 = model.ImplicitCollectionJobsJobAssociation() ija1.order_index = 0 ija1.implicit_collection_jobs = implicit_collection_jobs ija1.job = j1 j2.add_output_dataset_collection("out_file1", hc2) # really? ija2 = model.ImplicitCollectionJobsJobAssociation() ija2.order_index = 1 ija2.implicit_collection_jobs = implicit_collection_jobs ija2.job = j2 sa_session.add(implicit_collection_jobs) sa_session.add(ija1) sa_session.add(ija2) sa_session.flush() imported_history = _import_export(app, h) assert len(imported_history.jobs) == 2 imported_job0 = imported_history.jobs[0] imported_icj = imported_job0.implicit_collection_jobs_association.implicit_collection_jobs assert imported_icj assert len(imported_icj.jobs) == 2, len(imported_icj.jobs)