def _new_hda(self): hda = model.HistoryDatasetAssociation() hda.visible = True hda.dataset = model.Dataset() self.app.model.context.add(hda) self.app.model.context.flush() return hda
def __init__(self, implicit_output_name=None, job=None, hid=1): self.id = 124 self.copied_from_history_dataset_collection_association = None self.history_content_type = "dataset_collection" self.implicit_output_name = implicit_output_name self.hid = 1 self.collection = model.DatasetCollection() self.creating_job_associations = [] element = model.DatasetCollectionElement( collection=self.collection, element=model.HistoryDatasetAssociation(), element_index=0, element_identifier="moocow", ) element.dataset_instance.dataset = model.Dataset() element.dataset_instance.dataset.state = "ok" creating = model.JobToOutputDatasetAssociation( implicit_output_name, element.dataset_instance, ) creating.job = job element.dataset_instance.creating_job_associations = [ creating, ] self.collection.elements = [element]
def test_subworkflow_progress(self): self._setup_workflow(TEST_SUBWORKFLOW_YAML) hda = model.HistoryDatasetAssociation() self._set_previous_progress({ 100: {"output": hda}, 101: UNSCHEDULED_STEP, }) self.invocation.create_subworkflow_invocation_for_step( self.invocation.workflow.step_by_index(1) ) progress = self._new_workflow_progress() remaining_steps = progress.remaining_steps() subworkflow_step = remaining_steps[0] subworkflow_progress = progress.subworkflow_progress(subworkflow_step) subworkflow = subworkflow_step.subworkflow assert subworkflow_progress.workflow_invocation.workflow == subworkflow subworkflow_input_step = subworkflow.step_by_index(0) subworkflow_progress.set_outputs_for_input( subworkflow_input_step ) subworkflow_cat_step = subworkflow.step_by_index(1) assert hda is subworkflow_progress.replacement_for_tool_input( subworkflow_cat_step, MockInput(), "input1", )
def _create_hda(model, object_store, history, path, visible=True, include_metadata_file=False): hda = model.HistoryDatasetAssociation(extension="bam", create_dataset=True, sa_session=model.context) hda.visible = visible model.context.add(hda) model.context.flush([hda]) object_store.update_from_file(hda, file_name=path, create=True) if include_metadata_file: hda.metadata.from_JSON_dict( json_dict={ "bam_index": MetadataTempFile.from_JSON({ "kwds": {}, "filename": path }) }) _check_metadata_file(hda) hda.set_size() history.add_dataset(hda) hda.add_item_annotation(model.context, history.user, hda, "annotation #%d" % hda.hid) return hda
def test_nested_collection_attributes(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="bam", history=h1, create_dataset=True, sa_session=model.session) index = NamedTemporaryFile("w") index.write("cool bam index") index2 = NamedTemporaryFile("w") index2.write("cool bam index 2") metadata_dict = {"bam_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index.name}), "bam_csi_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index2.name})} d1.metadata.from_JSON_dict(json_dict=metadata_dict) assert d1.metadata.bam_index assert d1.metadata.bam_csi_index assert isinstance(d1.metadata.bam_index, model.MetadataFile) assert isinstance(d1.metadata.bam_csi_index, model.MetadataFile) d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type='paired') dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) c2 = model.DatasetCollection(collection_type="list:paired") dce3 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="inner_list", element_index=0) c3 = model.DatasetCollection(collection_type="list:list") c4 = model.DatasetCollection(collection_type="list:list:paired") dce4 = model.DatasetCollectionElement(collection=c4, element=c2, element_identifier="outer_list", element_index=0) model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4]) model.session.flush() q = c2._get_nested_collection_attributes(element_attributes=('element_identifier',), hda_attributes=('extension',), dataset_attributes=('state',)) assert [(r.keys()) for r in q] == [['element_identifier_0', 'element_identifier_1', 'extension', 'state'], ['element_identifier_0', 'element_identifier_1', 'extension', 'state']] assert q.all() == [('inner_list', 'forward', 'bam', 'new'), ('inner_list', 'reverse', 'txt', 'new')] q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation,)) assert q.all() == [d1, d2] q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation, model.Dataset)) assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] assert c2.dataset_action_tuples == [] assert c2.populated_optimized assert c2.dataset_states_and_extensions_summary == ({'new'}, {'txt', 'bam'}) assert c2.element_identifiers_extensions_paths_and_metadata_files == [[('inner_list', 'forward'), 'bam', 'mock_dataset_14.dat', [('bai', 'mock_dataset_14.dat'), ('bam.csi', 'mock_dataset_14.dat')]], [('inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat', []]] assert c3.dataset_instances == [] assert c3.dataset_elements == [] assert c3.dataset_states_and_extensions_summary == (set(), set()) q = c4._get_nested_collection_attributes(element_attributes=('element_identifier',)) assert q.all() == [('outer_list', 'inner_list', 'forward'), ('outer_list', 'inner_list', 'reverse')] assert c4.dataset_elements == [dce1, dce2] assert c4.element_identifiers_extensions_and_paths == [(('outer_list', 'inner_list', 'forward'), 'bam', 'mock_dataset_14.dat'), (('outer_list', 'inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat')]
def test_remaining_steps_with_progress(self): self._setup_workflow(TEST_WORKFLOW_YAML) hda3 = model.HistoryDatasetAssociation() self._set_previous_progress({ 100: {"output": model.HistoryDatasetAssociation()}, 101: {"output": model.HistoryDatasetAssociation()}, 102: {"out_file1": hda3}, 103: {"out_file1": model.HistoryDatasetAssociation()}, 104: UNSCHEDULED_STEP, }) progress = self._new_workflow_progress() steps = progress.remaining_steps() assert len(steps) == 1 assert steps[0] is self.invocation.workflow.steps[4] replacement = progress.replacement_for_tool_input(self._step(4), MockInput(), "input1") assert replacement is hda3
def _create_datasets(sa_session, history, n, extension="txt"): return [ model.HistoryDatasetAssociation(extension=extension, history=history, create_dataset=True, sa_session=sa_session, hid=i + 1) for i in range(n) ]
def test_validated_values(self): self.options_xml = '''<options><filter type="data_meta" ref="input_bam" key="dbkey"/></options>''' try: self.param.from_json("42", self.trans, {"input_bam": model.HistoryDatasetAssociation()}) except ValueError as err: assert str(err) == "An invalid option was selected for my_name, '42', please verify." return assert False
def test_validated_datasets(self): self.options_xml = '''<options><filter type="data_meta" ref="input_bam" key="dbkey"/></options>''' try: self.param.from_json(model.HistoryDatasetAssociation(), self.trans, {"input_bam": None}) except ValueError as err: assert str(err) == "Parameter my_name requires a value, but has no legal values defined." return assert False
def __add_dataset(self, state='ok'): hda = model.HistoryDatasetAssociation() hda.dataset = model.Dataset() hda.dataset.state = 'ok' hda.dataset.external_filename = "/tmp/datasets/dataset_001.dat" self.history.add_dataset(hda) self.app.model.context.flush() return hda
def test_unvalidated_datasets(self): self.options_xml = '''<options><filter type="data_meta" ref="input_bam" key="dbkey"/></options>''' self.trans.workflow_building_mode = True assert isinstance( self.param.from_html(model.HistoryDatasetAssociation(), self.trans, { "input_bam": basic.RuntimeValue() }).value, model.HistoryDatasetAssociation)
def test_validated_datasets(self): self.options_xml = '''<options><filter type="data_meta" ref="input_bam" key="dbkey"/></options>''' with pytest.raises(ValueError) as exc_info: self.param.from_json(model.HistoryDatasetAssociation(), self.trans, {"input_bam": None}) assert str( exc_info.value ) == "parameter 'my_name': requires a value, but no legal values defined"
def test_annotations(self): model = self.model u = model.User(email="*****@*****.**", password="******") self.persist(u) def persist_and_check_annotation(annotation_class, **kwds): annotated_association = annotation_class() annotated_association.annotation = "Test Annotation" annotated_association.user = u for key, value in kwds.items(): setattr(annotated_association, key, value) self.persist(annotated_association) self.expunge() stored_annotation = self.query(annotation_class).all()[0] assert stored_annotation.annotation == "Test Annotation" assert stored_annotation.user.email == "*****@*****.**" sw = model.StoredWorkflow() sw.user = u self.persist(sw) persist_and_check_annotation(model.StoredWorkflowAnnotationAssociation, stored_workflow=sw) workflow = model.Workflow() workflow.stored_workflow = sw self.persist(workflow) ws = model.WorkflowStep() ws.workflow = workflow self.persist(ws) persist_and_check_annotation(model.WorkflowStepAnnotationAssociation, workflow_step=ws) h = model.History(name="History for Annotation", user=u) self.persist(h) persist_and_check_annotation(model.HistoryAnnotationAssociation, history=h) d1 = model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=model.session) self.persist(d1) persist_and_check_annotation(model.HistoryDatasetAssociationAnnotationAssociation, hda=d1) page = model.Page() page.user = u self.persist(page) persist_and_check_annotation(model.PageAnnotationAssociation, page=page) visualization = model.Visualization() visualization.user = u self.persist(visualization) persist_and_check_annotation(model.VisualizationAnnotationAssociation, visualization=visualization) dataset_collection = model.DatasetCollection(collection_type="paired") history_dataset_collection = model.HistoryDatasetCollectionAssociation(collection=dataset_collection) self.persist(history_dataset_collection) persist_and_check_annotation(model.HistoryDatasetCollectionAssociationAnnotationAssociation, history_dataset_collection=history_dataset_collection) library_dataset_collection = model.LibraryDatasetCollectionAssociation(collection=dataset_collection) self.persist(library_dataset_collection) persist_and_check_annotation(model.LibraryDatasetCollectionAnnotationAssociation, library_dataset_collection=library_dataset_collection)
def _create_output_dataset(self, **kwd): output_dataset = model.HistoryDatasetAssociation( sa_session=self.app.model.session, create_dataset=True, flush=True, **kwd) self.history.add_dataset(output_dataset) ObjectStorePopulator(self.app).set_object_store_id(output_dataset) return output_dataset
def build_ready_hda(self): hist = model.History() self.app.model.context.add(hist) ready_hda = hist.add_dataset( model.HistoryDatasetAssociation(extension='interval', create_dataset=True, sa_session=self.app.model.context)) ready_hda.set_dataset_state('ok') return ready_hda
def test_filtered_hda_unmatched_key(self): self.filtered_param = True data1_val = model.HistoryDatasetAssociation() data1_val.dbkey = "hg19" self.other_values = {"data1": data1_val} # Other param value and this dataset both hg19, should be valid hda_match = self.test_context.hda_match(self.mock_hda) assert hda_match
def test_filtered_hda_matched_key(self): self.filtered_param = True data1_val = model.HistoryDatasetAssociation() data1_val.dbkey = "hg18" self.other_values = {"data1": data1_val} assert self.test_context.filter_value == "hg18" # mock_hda is hg19, other is hg18 so should not be "valid hda" hda_match = self.test_context.hda_match(self.mock_hda) assert not hda_match
def test_data_input_compute_runtime_state_args(): module = __from_step(type="data_input") tool_state = module.get_state() hda = model.HistoryDatasetAssociation() with mock.patch("galaxy.workflow.modules.check_param") as check_method: check_method.return_value = (hda, None) state, errors = module.compute_runtime_state(module.trans, module.test_step, {"input": 4, "tool_state": tool_state}) assert not errors assert "input" in state.inputs assert state.inputs["input"] is hda
def _setup_test_output(self): dataset = model.Dataset() dataset.external_filename = "example_output" # This way object store isn't asked about size... self.hda = model.HistoryDatasetAssociation(name="test", dataset=dataset) job = model.Job() job.add_output_dataset(DEFAULT_TOOL_OUTPUT, self.hda) self.app.model.context.add(job) self.job = job self.history = self._new_history(hdas=[self.hda]) self.outputs = {DEFAULT_TOOL_OUTPUT: self.hda}
def test_nested_context_validation_not_needed(self): self.options_xml = '''<options><filter type="data_meta" ref="input_bam" key="dbkey"/></options>''' # Data ref currently must be same level, if not at top level. assert not self.param.need_late_validation( self.trans, { "reference_source": { "my_name": "42", "input_bam": model.HistoryDatasetAssociation() } })
def _new_hda(self, contents=None): hda = model.HistoryDatasetAssociation() hda.id = 1 if contents is not None: hda.dataset = mock.MagicMock() hda.dataset.purged = False t = tempfile.NamedTemporaryFile(mode="w", delete=False) t.write(contents) hda.dataset.get_file_name.return_value = t.name return hda
def test_replacement_for_tool_input( self ): self._setup_workflow(TEST_WORKFLOW_YAML) hda = model.HistoryDatasetAssociation() self.inputs_by_step_id = {100: hda} progress = self._new_workflow_progress() progress.set_outputs_for_input( self._step(0) ) replacement = progress.replacement_for_tool_input(self._step(2), MockInput(), "input1") assert replacement is hda
def test_basic(self): model = self.model original_user_count = len(model.session.query(model.User).all()) # Make some changes and commit them u = model.User(email="*****@*****.**", password="******") # gs = model.GalaxySession() h1 = model.History(name="History 1", user=u) # h1.queries.append( model.Query( "h1->q1" ) ) # h1.queries.append( model.Query( "h1->q2" ) ) h2 = model.History(name=("H" * 1024)) self.persist(u, h1, h2) # q1 = model.Query( "h2->q1" ) metadata = dict(chromCol=1, startCol=2, endCol=3) d1 = model.HistoryDatasetAssociation(extension="interval", metadata=metadata, history=h2, create_dataset=True, sa_session=model.session) # h2.queries.append( q1 ) # h2.queries.append( model.Query( "h2->q2" ) ) self.persist(d1) # Check users = model.session.query(model.User).all() assert len(users) == original_user_count + 1 user = [user for user in users if user.email == "*****@*****.**"][0] assert user.email == "*****@*****.**" assert user.password == "password" assert len(user.histories) == 1 assert user.histories[0].name == "History 1" hists = model.session.query(model.History).all() hist0 = [history for history in hists if history.name == "History 1"][0] hist1 = [history for history in hists if history.name == "H" * 255][0] assert hist0.name == "History 1" assert hist1.name == ("H" * 255) assert hist0.user == user assert hist1.user is None assert hist1.datasets[0].metadata.chromCol == 1 # The filename test has moved to objectstore # id = hist1.datasets[0].id # assert hist1.datasets[0].file_name == os.path.join( "/tmp", *directory_hash_id( id ) ) + ( "/dataset_%d.dat" % id ) # Do an update and check hist1.name = "History 2b" self.expunge() hists = model.session.query(model.History).all() hist0 = [history for history in hists if history.name == "History 1"][0] hist1 = [history for history in hists if history.name == "History 2b"][0] assert hist0.name == "History 1" assert hist1.name == "History 2b"
def example_invocation(trans): invocation = model.WorkflowInvocation() workflow = yaml_to_model(TEST_WORKFLOW_YAML) workflow.id = 342 invocation.id = 44 invocation.workflow = workflow # TODO: fix this to use workflow id and eliminate hack. stored_workflow = model.StoredWorkflow() stored_workflow.id = 342 invocation.workflow.stored_workflow = stored_workflow hda = model.HistoryDatasetAssociation(create_dataset=True, sa_session=trans.sa_session) hda.id = 567 invocation.add_input(hda, step=workflow.steps[0]) out_hda = model.HistoryDatasetAssociation(create_dataset=True, sa_session=trans.sa_session) out_hda.id = 563 wf_output = model.WorkflowOutput(workflow.steps[2], label="output_label") invocation.add_output(wf_output, workflow.steps[2], out_hda) return invocation
def test_connect_tool_output(self): self._setup_workflow(TEST_WORKFLOW_YAML) hda = model.HistoryDatasetAssociation() progress = self._new_workflow_progress() progress.set_step_outputs(self._invocation_step(2), {"out1": hda}) conn = model.WorkflowStepConnection() conn.output_name = "out1" conn.output_step = self._step(2) assert progress.replacement_for_connection(conn) is hda
def __assert_output_format_is(expected, output, input_extensions=None, param_context=None, add_collection=False): input_extensions = input_extensions or {} param_context = param_context or {} inputs = {} last_ext = "data" i = 1 for name, ext in input_extensions: hda = model.HistoryDatasetAssociation(extension=ext) hda.metadata.random_field = str( i) # Populate a random metadata field for testing inputs[name] = hda last_ext = ext i += 1 input_collections = {} if add_collection: hda_forward = model.HistoryDatasetAssociation(extension="txt") hda_reverse = model.HistoryDatasetAssociation(extension="txt") c1 = model.DatasetCollection(collection_type="pair") hc1 = model.HistoryDatasetCollectionAssociation( collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=hda_forward, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=hda_reverse, element_identifier="reverse", element_index=1) c1.elements = [dce1, dce2] input_collections["hdcai"] = [(hc1, False)] actual_format = determine_output_format(output, param_context, inputs, input_collections, last_ext) assert actual_format == expected, f"Actual format {actual_format}, does not match expected {expected}"
def test_valid_hda_implicit_convered(self): # Find conversion returns an HDA to an already implicitly converted # dataset. self.mock_hda.datatype_matches = False converted_hda = model.HistoryDatasetAssociation() self.mock_hda.conversion_destination = ("tabular", converted_hda) hda_match = self.test_context.hda_match(self.mock_hda) assert hda_match assert hda_match.implicit_conversion assert hda_match.hda == converted_hda assert hda_match.target_ext == "tabular"
def test_connect_data_input(self): self._setup_workflow(TEST_WORKFLOW_YAML) hda = model.HistoryDatasetAssociation() self.inputs_by_step_id = {100: hda} progress = self._new_workflow_progress() progress.set_outputs_for_input(self._invocation_step(0)) conn = model.WorkflowStepConnection() conn.output_name = "output" conn.output_step = self._step(0) assert progress.replacement_for_connection(conn) is hda
def test_collections_in_histories(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type="pair") hc1 = model.HistoryDatasetCollectionAssociation(history=h1, collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="left") dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="right") self.persist(u, h1, d1, d2, c1, hc1, dce1, dce2) loaded_dataset_collection = self.query(model.HistoryDatasetCollectionAssociation).filter(model.HistoryDatasetCollectionAssociation.name == "HistoryCollectionTest1").first().collection self.assertEqual(len(loaded_dataset_collection.elements), 2) assert loaded_dataset_collection.collection_type == "pair" assert loaded_dataset_collection["left"] == dce1 assert loaded_dataset_collection["right"] == dce2
def test_collection_get_interface(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type="list") elements = 100 dces = [model.DatasetCollectionElement(collection=c1, element=d1, element_identifier=f"{i}", element_index=i) for i in range(elements)] self.persist(u, h1, d1, c1, *dces, flush=False, expunge=False) model.session.flush() for i in range(elements): assert c1[i] == dces[i]