def test_export_collection_with_datasets_from_other_history(): app, sa_session, h = _setup_history_for_export( "Collection History with dataset from other history") dataset_history = model.History(name="Dataset History", user=h.user) d1, d2 = _create_datasets(sa_session, dataset_history, 2) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") h.hid_counter = 2 dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(d1) sa_session.add(d2) sa_session.add(hc1) sa_session.flush() imported_history = _import_export(app, h) assert imported_history.hid_counter == 4, imported_history.hid_counter assert len(imported_history.dataset_collections) == 1 assert len(imported_history.datasets) == 2 for hdca in imported_history.dataset_collections: assert hdca.hid == 1, hdca.hid for hda in imported_history.datasets: assert hda.hid in [2, 3] _assert_distinct_hids(imported_history)
def setUp(self): self.setup_app(mock_model=False) self.mock_tool = bunch.Bunch( app=self.app, tool_type="default", ) self.test_history = model.History() self.app.model.context.add(self.test_history) self.app.model.context.flush() self.app.tool_data_tables["test_table"] = MockToolDataTable() self.trans = bunch.Bunch( app=self.app, get_history=lambda: self.test_history, get_current_user_roles=lambda: [], workflow_building_mode=False, webapp=bunch.Bunch(name="galaxy"), ) self.type = "select" self.set_data_ref = False self.multiple = False self.optional = False self.options_xml = "" self._param = None
def setUp(self): self.setup_app(mock_model=False) self.mock_tool = bunch.Bunch( app=self.app, tool_type="default", ) self.test_history = model.History() self.app.model.context.add(self.test_history) self.app.model.context.flush() self.trans = bunch.Bunch( app=self.app, get_history=lambda: self.test_history, get_current_user_roles=lambda: [], workflow_building_mode=False, webapp=bunch.Bunch(name="galaxy"), ) self.type = "data_column" self.other_attributes = "" self.set_data_ref = "input_tsv" self.multiple = False self.optional = False self._param = None
def _import_directory_to_history(app, target, work_directory): sa_session = app.model.context u = model.User(email="*****@*****.**", password="******") import_history = model.History(name="Test History for Import", user=u) sa_session = app.model.context sa_session.add_all([u, import_history]) sa_session.flush() assert len(import_history.datasets) == 0 import_options = store.ImportOptions(allow_dataset_object_edit=True) import_model_store = store.get_import_model_store_for_directory( target, app=app, user=u, import_options=import_options, tag_handler=app.tag_handler.create_tag_handler_session()) with import_model_store.target_history(default_history=import_history): import_model_store.perform_import(import_history) return import_history
def _setup_simple_cat_job(app, state='ok'): sa_session = app.model.context u = model.User(email="*****@*****.**", password="******") h = model.History(name="Test History", user=u) d1, d2 = _create_datasets(sa_session, h, 2) d1.state = d2.state = state j = model.Job() j.user = u j.tool_id = "cat1" j.state = state j.add_input_dataset("input1", d1) j.add_output_dataset("out_file1", d2) sa_session.add_all((d1, d2, h, j)) sa_session.flush() app.object_store.update_from_file(d1, file_name=TEST_PATH_1, create=True) app.object_store.update_from_file(d2, file_name=TEST_PATH_2, create=True) return u, h, d1, d2, j
def test_workflows(self): model = self.model user = model.User( email="*****@*****.**", password="******" ) def workflow_from_steps(steps): stored_workflow = model.StoredWorkflow() stored_workflow.user = user workflow = model.Workflow() workflow.steps = steps workflow.stored_workflow = stored_workflow return workflow child_workflow = workflow_from_steps([]) self.persist(child_workflow) workflow_step_1 = model.WorkflowStep() workflow_step_1.order_index = 0 workflow_step_1.type = "data_input" workflow_step_2 = model.WorkflowStep() workflow_step_2.order_index = 1 workflow_step_2.type = "subworkflow" workflow_step_2.subworkflow = child_workflow workflow_step_1.get_or_add_input("moo1") workflow_step_1.get_or_add_input("moo2") workflow_step_2.get_or_add_input("moo") workflow_step_1.add_connection("foo", "cow", workflow_step_2) workflow = workflow_from_steps([workflow_step_1, workflow_step_2]) self.persist(workflow) workflow_id = workflow.id annotation = model.WorkflowStepAnnotationAssociation() annotation.annotation = "Test Step Annotation" annotation.user = user annotation.workflow_step = workflow_step_1 self.persist(annotation) assert workflow_step_1.id is not None h1 = model.History(name="WorkflowHistory1", user=user) invocation_uuid = uuid.uuid1() workflow_invocation = model.WorkflowInvocation() workflow_invocation.uuid = invocation_uuid workflow_invocation.history = h1 workflow_invocation_step1 = model.WorkflowInvocationStep() workflow_invocation_step1.workflow_invocation = workflow_invocation workflow_invocation_step1.workflow_step = workflow_step_1 subworkflow_invocation = model.WorkflowInvocation() workflow_invocation.attach_subworkflow_invocation_for_step(workflow_step_2, subworkflow_invocation) workflow_invocation_step2 = model.WorkflowInvocationStep() workflow_invocation_step2.workflow_invocation = workflow_invocation workflow_invocation_step2.workflow_step = workflow_step_2 workflow_invocation.workflow = workflow d1 = self.new_hda(h1, name="1") workflow_request_dataset = model.WorkflowRequestToInputDatasetAssociation() workflow_request_dataset.workflow_invocation = workflow_invocation workflow_request_dataset.workflow_step = workflow_step_1 workflow_request_dataset.dataset = d1 self.persist(workflow_invocation) assert workflow_request_dataset is not None assert workflow_invocation.id is not None history_id = h1.id self.expunge() loaded_invocation = self.query(model.WorkflowInvocation).get(workflow_invocation.id) assert loaded_invocation.uuid == invocation_uuid, "%s != %s" % (loaded_invocation.uuid, invocation_uuid) assert loaded_invocation assert loaded_invocation.history.id == history_id step_1, step_2 = loaded_invocation.workflow.steps assert not step_1.subworkflow assert step_2.subworkflow assert len(loaded_invocation.steps) == 2 subworkflow_invocation_assoc = loaded_invocation.get_subworkflow_invocation_association_for_step(step_2) assert subworkflow_invocation_assoc is not None assert isinstance(subworkflow_invocation_assoc.subworkflow_invocation, model.WorkflowInvocation) assert isinstance(subworkflow_invocation_assoc.parent_workflow_invocation, model.WorkflowInvocation) assert subworkflow_invocation_assoc.subworkflow_invocation.history.id == history_id loaded_workflow = self.query(model.Workflow).get(workflow_id) assert len(loaded_workflow.steps[0].annotations) == 1 copied_workflow = loaded_workflow.copy(user=user) annotations = copied_workflow.steps[0].annotations assert len(annotations) == 1
def test_import_export_edit_collection(): """Test modifying existing collections with imports.""" app = _mock_app() sa_session = app.model.context u = model.User(email="*****@*****.**", password="******") h = model.History(name="Test History", user=u) c1 = model.DatasetCollection(collection_type="list", populated=False) hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") sa_session.add(hc1) sa_session.add(h) sa_session.flush() import_history = model.History(name="Test History for Import", user=u) sa_session.add(import_history) temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, for_edit=True) as export_store: export_store.add_dataset_collection(hc1) # Fabric editing metadata for collection... collections_metadata_path = os.path.join(temp_directory, store.ATTRS_FILENAME_COLLECTIONS) datasets_metadata_path = os.path.join(temp_directory, store.ATTRS_FILENAME_DATASETS) with open(collections_metadata_path, "r") as f: hdcas_metadata = json.load(f) assert len(hdcas_metadata) == 1 hdca_metadata = hdcas_metadata[0] assert hdca_metadata assert "id" in hdca_metadata assert "collection" in hdca_metadata collection_metadata = hdca_metadata["collection"] assert "populated_state" in collection_metadata assert collection_metadata[ "populated_state"] == model.DatasetCollection.populated_states.NEW collection_metadata[ "populated_state"] = model.DatasetCollection.populated_states.OK d1 = model.HistoryDatasetAssociation(extension="txt", create_dataset=True, flush=False) d1.hid = 1 d2 = model.HistoryDatasetAssociation(extension="txt", create_dataset=True, flush=False) d2.hid = 2 serialization_options = model.SerializationOptions(for_edit=True) dataset_list = [ d1.serialize(app.security, serialization_options), d2.serialize(app.security, serialization_options) ] dc = model.DatasetCollection( id=collection_metadata["id"], collection_type="list", element_count=2, ) dc.populated_state = model.DatasetCollection.populated_states.OK dce1 = model.DatasetCollectionElement( element=d1, element_index=0, element_identifier="first", ) dce2 = model.DatasetCollectionElement( element=d2, element_index=1, element_identifier="second", ) dc.elements = [dce1, dce2] with open(datasets_metadata_path, "w") as datasets_f: json.dump(dataset_list, datasets_f) hdca_metadata["collection"] = dc.serialize(app.security, serialization_options) with open(collections_metadata_path, "w") as collections_f: json.dump(hdcas_metadata, collections_f) _perform_import_from_directory(temp_directory, app, u, import_history, store.ImportOptions(allow_edit=True)) sa_session.refresh(c1) assert c1.populated_state == model.DatasetCollection.populated_states.OK, c1.populated_state assert len(c1.elements) == 2
def build_ready_hda(self): hist = model.History() self.app.model.context.add(hist) ready_hda = hist.add_dataset(model.HistoryDatasetAssociation(extension='interval', create_dataset=True, sa_session=self.app.model.context)) ready_hda.set_dataset_state('ok') return ready_hda
def cleanup_after_job(self): """ Set history, datasets, and jobs' attributes and clean up archive directory. """ # # Helper methods. # def file_in_dir(file_path, a_dir): """ Returns true if file is in directory. """ abs_file_path = os.path.abspath(file_path) return os.path.split(abs_file_path)[0] == a_dir def get_tag_str(tag, value): """ Builds a tag string for a tag, value pair. """ if not value: return tag else: return tag + ":" + value # # Import history. # jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by( job_id=self.job_id).first() if jiha: try: archive_dir = jiha.archive_dir archive_dir = os.path.realpath(archive_dir) user = jiha.job.user # Bioblend previous to 17.01 exported histories with an extra subdir. if not os.path.exists( os.path.join(archive_dir, 'history_attrs.txt')): for d in os.listdir(archive_dir): if os.path.isdir(os.path.join(archive_dir, d)): archive_dir = os.path.join(archive_dir, d) break # # Create history. # history_attr_file_name = os.path.join(archive_dir, 'history_attrs.txt') history_attrs = load(open(history_attr_file_name)) # Create history. new_history = model.History(name='imported from archive: %s' % history_attrs['name'], user=user) new_history.importing = True new_history.hid_counter = history_attrs['hid_counter'] new_history.genome_build = history_attrs['genome_build'] self.sa_session.add(new_history) jiha.history = new_history self.sa_session.flush() # Add annotation, tags. if user: self.add_item_annotation(self.sa_session, user, new_history, history_attrs['annotation']) """ TODO: figure out to how add tags to item. for tag, value in history_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) ) """ # # Create datasets. # datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attrs = load(open(datasets_attrs_file_name)) provenance_file_name = datasets_attrs_file_name + ".provenance" if os.path.exists(provenance_file_name): provenance_attrs = load(open(provenance_file_name)) datasets_attrs += provenance_attrs # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). datasets_usage_counts = {} for dataset_attrs in datasets_attrs: temp_dataset_file_name = \ os.path.realpath(os.path.join(archive_dir, dataset_attrs['file_name'])) if (temp_dataset_file_name not in datasets_usage_counts): datasets_usage_counts[temp_dataset_file_name] = 0 datasets_usage_counts[temp_dataset_file_name] += 1 # Create datasets. for dataset_attrs in datasets_attrs: metadata = dataset_attrs['metadata'] # Create dataset and HDA. hda = model.HistoryDatasetAssociation( name=dataset_attrs['name'], extension=dataset_attrs['extension'], info=dataset_attrs['info'], blurb=dataset_attrs['blurb'], peek=dataset_attrs['peek'], designation=dataset_attrs['designation'], visible=dataset_attrs['visible'], dbkey=metadata['dbkey'], metadata=metadata, history=new_history, create_dataset=True, sa_session=self.sa_session) if 'uuid' in dataset_attrs: hda.dataset.uuid = dataset_attrs["uuid"] if dataset_attrs.get('exported', True) is False: hda.state = hda.states.DISCARDED hda.deleted = True hda.purged = True else: hda.state = hda.states.OK self.sa_session.add(hda) self.sa_session.flush() new_history.add_dataset(hda, genome_build=None) hda.hid = dataset_attrs[ 'hid'] # Overwrite default hid set when HDA added to history. # TODO: Is there a way to recover permissions? Is this needed? # permissions = trans.app.security_agent.history_get_default_permissions( new_history ) # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) self.sa_session.flush() if dataset_attrs.get('exported', True) is True: # Do security check and move/copy dataset data. temp_dataset_file_name = \ os.path.realpath(os.path.abspath(os.path.join(archive_dir, dataset_attrs['file_name']))) if not file_in_dir( temp_dataset_file_name, os.path.join(archive_dir, "datasets")): raise MalformedContents( "Invalid dataset path: %s" % temp_dataset_file_name) if datasets_usage_counts[temp_dataset_file_name] == 1: self.app.object_store.update_from_file( hda.dataset, file_name=temp_dataset_file_name, create=True) # Import additional files if present. Histories exported previously might not have this attribute set. dataset_extra_files_path = dataset_attrs.get( 'extra_files_path', None) if dataset_extra_files_path: try: file_list = os.listdir( os.path.join(archive_dir, dataset_extra_files_path)) except OSError: file_list = [] if file_list: for extra_file in file_list: self.app.object_store.update_from_file( hda.dataset, extra_dir='dataset_%s_files' % hda.dataset.id, alt_name=extra_file, file_name=os.path.join( archive_dir, dataset_extra_files_path, extra_file), create=True) else: datasets_usage_counts[temp_dataset_file_name] -= 1 shutil.copyfile(temp_dataset_file_name, hda.file_name) hda.dataset.set_total_size( ) # update the filesize record in the database # Set tags, annotations. if user: self.add_item_annotation(self.sa_session, user, hda, dataset_attrs['annotation']) # TODO: Set tags. """ for tag, value in dataset_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) ) self.sa_session.flush() """ # Although metadata is set above, need to set metadata to recover BAI for BAMs. if hda.extension == 'bam': self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app( self.app.datatypes_registry. set_external_metadata_tool, self.app, jiha.job.session_id, new_history.id, jiha.job.user, incoming={'input1': hda}, overwrite=False) # # Create jobs. # # Decode jobs attributes. def as_hda(obj_dct): """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by the encoded object. This only works because HDAs are created above. """ if obj_dct.get('__HistoryDatasetAssociation__', False): return self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=obj_dct['hid']).first() return obj_dct jobs_attr_file_name = os.path.join(archive_dir, 'jobs_attrs.txt') jobs_attrs = load(open(jobs_attr_file_name), object_hook=as_hda) # Create each job. for job_attrs in jobs_attrs: imported_job = model.Job() imported_job.user = user # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history imported_job.imported = True imported_job.tool_id = job_attrs['tool_id'] imported_job.tool_version = job_attrs['tool_version'] imported_job.set_state(job_attrs['state']) imported_job.info = job_attrs.get('info', None) imported_job.exit_code = job_attrs.get('exit_code', None) imported_job.traceback = job_attrs.get('traceback', None) imported_job.stdout = job_attrs.get('stdout', None) imported_job.stderr = job_attrs.get('stderr', None) imported_job.command_line = job_attrs.get( 'command_line', None) try: imported_job.create_time = datetime.datetime.strptime( job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f") imported_job.update_time = datetime.datetime.strptime( job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f") except Exception: pass self.sa_session.add(imported_job) self.sa_session.flush() class HistoryDatasetAssociationIDEncoder(json.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, model.HistoryDatasetAssociation): return obj.id return json.JSONEncoder.default(self, obj) # Set parameters. May be useful to look at metadata.py for creating parameters. # TODO: there may be a better way to set parameters, e.g.: # for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): # job.add_parameter( name, value ) # to make this work, we'd need to flesh out the HDA objects. The code below is # relatively similar. for name, value in job_attrs['params'].items(): # Transform parameter values when necessary. if isinstance(value, model.HistoryDatasetAssociation): # HDA input: use hid to find input. input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=value.hid).first() value = input_hda.id # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id ) imported_job.add_parameter( name, dumps(value, cls=HistoryDatasetAssociationIDEncoder)) # TODO: Connect jobs to input datasets. # Connect jobs to output datasets. for output_hid in job_attrs['output_datasets']: # print "%s job has output dataset %i" % (imported_job.id, output_hid) output_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=output_hid).first() if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda) # Connect jobs to input datasets. if 'input_mapping' in job_attrs: for input_name, input_hid in job_attrs[ 'input_mapping'].items(): input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=input_hid).first() if input_hda: imported_job.add_input_dataset( input_name, input_hda) self.sa_session.flush() # Done importing. new_history.importing = False self.sa_session.flush() # Cleanup. if os.path.exists(archive_dir): shutil.rmtree(archive_dir) except Exception as e: jiha.job.stderr += "Error cleaning up history import job: %s" % e self.sa_session.flush() raise
def test_annotations(self): model = self.model u = model.User(email="*****@*****.**", password="******") self.persist(u) def persist_and_check_annotation(annotation_class, **kwds): annotated_association = annotation_class() annotated_association.annotation = "Test Annotation" annotated_association.user = u for key, value in kwds.items(): setattr(annotated_association, key, value) self.persist(annotated_association) self.expunge() stored_annotation = self.query(annotation_class).all()[0] assert stored_annotation.annotation == "Test Annotation" assert stored_annotation.user.email == "*****@*****.**" sw = model.StoredWorkflow() sw.user = u self.persist(sw) persist_and_check_annotation(model.StoredWorkflowAnnotationAssociation, stored_workflow=sw) workflow = model.Workflow() workflow.stored_workflow = sw self.persist(workflow) ws = model.WorkflowStep() ws.workflow = workflow self.persist(ws) persist_and_check_annotation(model.WorkflowStepAnnotationAssociation, workflow_step=ws) h = model.History(name="History for Annotation", user=u) self.persist(h) persist_and_check_annotation(model.HistoryAnnotationAssociation, history=h) d1 = model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=model.session) self.persist(d1) persist_and_check_annotation( model.HistoryDatasetAssociationAnnotationAssociation, hda=d1) page = model.Page() page.user = u self.persist(page) persist_and_check_annotation(model.PageAnnotationAssociation, page=page) visualization = model.Visualization() visualization.user = u self.persist(visualization) persist_and_check_annotation(model.VisualizationAnnotationAssociation, visualization=visualization) dataset_collection = model.DatasetCollection(collection_type="paired") history_dataset_collection = model.HistoryDatasetCollectionAssociation( collection=dataset_collection) self.persist(history_dataset_collection) persist_and_check_annotation( model.HistoryDatasetCollectionAssociationAnnotationAssociation, history_dataset_collection=history_dataset_collection) library_dataset_collection = model.LibraryDatasetCollectionAssociation( collection=dataset_collection) self.persist(library_dataset_collection) persist_and_check_annotation( model.LibraryDatasetCollectionAnnotationAssociation, library_dataset_collection=library_dataset_collection)
def test_tags(self): model = self.model my_tag = model.Tag(name="Test Tag") u = model.User(email="*****@*****.**", password="******") self.persist(my_tag, u) def tag_and_test(taggable_object, tag_association_class, backref_name): assert len( getattr( self.query(model.Tag).filter( model.Tag.name == "Test Tag").all()[0], backref_name)) == 0 tag_association = tag_association_class() tag_association.tag = my_tag taggable_object.tags = [tag_association] self.persist(tag_association, taggable_object) assert len( getattr( self.query(model.Tag).filter( model.Tag.name == "Test Tag").all()[0], backref_name)) == 1 sw = model.StoredWorkflow() sw.user = u tag_and_test(sw, model.StoredWorkflowTagAssociation, "tagged_workflows") h = model.History(name="History for Tagging", user=u) tag_and_test(h, model.HistoryTagAssociation, "tagged_histories") d1 = model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=model.session) tag_and_test(d1, model.HistoryDatasetAssociationTagAssociation, "tagged_history_dataset_associations") page = model.Page() page.user = u tag_and_test(page, model.PageTagAssociation, "tagged_pages") visualization = model.Visualization() visualization.user = u tag_and_test(visualization, model.VisualizationTagAssociation, "tagged_visualizations") dataset_collection = model.DatasetCollection(collection_type="paired") history_dataset_collection = model.HistoryDatasetCollectionAssociation( collection=dataset_collection) tag_and_test(history_dataset_collection, model.HistoryDatasetCollectionTagAssociation, "tagged_history_dataset_collections") library_dataset_collection = model.LibraryDatasetCollectionAssociation( collection=dataset_collection) tag_and_test(library_dataset_collection, model.LibraryDatasetCollectionTagAssociation, "tagged_library_dataset_collections")
def test_ratings(self): model = self.model u = model.User(email="*****@*****.**", password="******") self.persist(u) def persist_and_check_rating(rating_class, **kwds): rating_association = rating_class() rating_association.rating = 5 rating_association.user = u for key, value in kwds.items(): setattr(rating_association, key, value) self.persist(rating_association) self.expunge() stored_annotation = self.query(rating_class).all()[0] assert stored_annotation.rating == 5 assert stored_annotation.user.email == "*****@*****.**" sw = model.StoredWorkflow() sw.user = u self.persist(sw) persist_and_check_rating(model.StoredWorkflowRatingAssociation, stored_workflow=sw) h = model.History(name="History for Rating", user=u) self.persist(h) persist_and_check_rating(model.HistoryRatingAssociation, history=h) d1 = model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=model.session) self.persist(d1) persist_and_check_rating( model.HistoryDatasetAssociationRatingAssociation, hda=d1) page = model.Page() page.user = u self.persist(page) persist_and_check_rating(model.PageRatingAssociation, page=page) visualization = model.Visualization() visualization.user = u self.persist(visualization) persist_and_check_rating(model.VisualizationRatingAssociation, visualization=visualization) dataset_collection = model.DatasetCollection(collection_type="paired") history_dataset_collection = model.HistoryDatasetCollectionAssociation( collection=dataset_collection) self.persist(history_dataset_collection) persist_and_check_rating( model.HistoryDatasetCollectionRatingAssociation, history_dataset_collection=history_dataset_collection) library_dataset_collection = model.LibraryDatasetCollectionAssociation( collection=dataset_collection) self.persist(library_dataset_collection) persist_and_check_rating( model.LibraryDatasetCollectionRatingAssociation, library_dataset_collection=library_dataset_collection)
def cleanup_after_job(self, db_session): """ Set history, datasets, and jobs' attributes and clean up archive directory. """ # # Helper methods. # def file_in_dir(file_path, a_dir): """ Returns true if file is in directory. """ abs_file_path = os.path.abspath(file_path) return os.path.split(abs_file_path)[0] == a_dir def read_file_contents(file_path): """ Read contents of a file. """ fp = open(file_path, 'rb') buffsize = 1048576 file_contents = '' try: while True: file_contents += fp.read(buffsize) if not file_contents or len(file_contents) % buffsize != 0: break except OverflowError: pass fp.close() return file_contents def get_tag_str(tag, value): """ Builds a tag string for a tag, value pair. """ if not value: return tag else: return tag + ":" + value # # Import history. # jiha = db_session.query(model.JobImportHistoryArchive).filter_by( job_id=self.job_id).first() if jiha: try: archive_dir = jiha.archive_dir user = jiha.job.user # # Create history. # history_attr_file_name = os.path.join(archive_dir, 'history_attrs.txt') history_attr_str = read_file_contents(history_attr_file_name) history_attrs = from_json_string(history_attr_str) # Create history. new_history = model.History( name='imported from archive: %s' % history_attrs['name'].encode( 'utf-8' ), \ user=user ) new_history.importing = True new_history.hid_counter = history_attrs['hid_counter'] new_history.genome_build = history_attrs['genome_build'] db_session.add(new_history) jiha.history = new_history db_session.flush() # Add annotation, tags. if user: self.add_item_annotation(db_session, user, new_history, history_attrs['annotation']) """ TODO: figure out to how add tags to item. for tag, value in history_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) ) """ # # Create datasets. # datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attr_str = read_file_contents( datasets_attrs_file_name) datasets_attrs = from_json_string(datasets_attr_str) # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). datasets_usage_counts = {} for dataset_attrs in datasets_attrs: temp_dataset_file_name = \ os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) if (temp_dataset_file_name not in datasets_usage_counts): datasets_usage_counts[temp_dataset_file_name] = 0 datasets_usage_counts[temp_dataset_file_name] += 1 # Create datasets. for dataset_attrs in datasets_attrs: metadata = dataset_attrs['metadata'] # Create dataset and HDA. hda = model.HistoryDatasetAssociation( name=dataset_attrs['name'].encode('utf-8'), extension=dataset_attrs['extension'], info=dataset_attrs['info'].encode('utf-8'), blurb=dataset_attrs['blurb'], peek=dataset_attrs['peek'], designation=dataset_attrs['designation'], visible=dataset_attrs['visible'], dbkey=metadata['dbkey'], metadata=metadata, history=new_history, create_dataset=True, sa_session=db_session) hda.state = hda.states.OK db_session.add(hda) db_session.flush() new_history.add_dataset(hda, genome_build=None) hda.hid = dataset_attrs[ 'hid'] # Overwrite default hid set when HDA added to history. # TODO: Is there a way to recover permissions? Is this needed? #permissions = trans.app.security_agent.history_get_default_permissions( new_history ) #trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) db_session.flush() # Do security check and move/copy dataset data. temp_dataset_file_name = \ os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) if not file_in_dir(temp_dataset_file_name, os.path.join(archive_dir, "datasets")): raise Exception("Invalid dataset path: %s" % temp_dataset_file_name) if datasets_usage_counts[temp_dataset_file_name] == 1: shutil.move(temp_dataset_file_name, hda.file_name) else: datasets_usage_counts[temp_dataset_file_name] -= 1 shutil.copyfile(temp_dataset_file_name, hda.file_name) # Set tags, annotations. if user: self.add_item_annotation(db_session, user, hda, dataset_attrs['annotation']) # TODO: Set tags. """ for tag, value in dataset_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) ) db_session.flush() """ # # Create jobs. # # Read jobs attributes. jobs_attr_file_name = os.path.join(archive_dir, 'jobs_attrs.txt') jobs_attr_str = read_file_contents(jobs_attr_file_name) # Decode jobs attributes. def as_hda(obj_dct): """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by the encoded object. This only works because HDAs are created above. """ if obj_dct.get('__HistoryDatasetAssociation__', False): return db_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=obj_dct['hid'] ).first() return obj_dct jobs_attrs = from_json_string(jobs_attr_str, object_hook=as_hda) # Create each job. for job_attrs in jobs_attrs: imported_job = model.Job() imported_job.user = user # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history imported_job.tool_id = job_attrs['tool_id'] imported_job.tool_version = job_attrs['tool_version'] imported_job.set_state(job_attrs['state']) imported_job.imported = True db_session.add(imported_job) db_session.flush() class HistoryDatasetAssociationIDEncoder( simplejson.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, model.HistoryDatasetAssociation): return obj.id return simplejson.JSONEncoder.default(self, obj) # Set parameters. May be useful to look at metadata.py for creating parameters. # TODO: there may be a better way to set parameters, e.g.: # for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): # job.add_parameter( name, value ) # to make this work, we'd need to flesh out the HDA objects. The code below is # relatively similar. for name, value in job_attrs['params'].items(): # Transform parameter values when necessary. if isinstance(value, model.HistoryDatasetAssociation): # HDA input: use hid to find input. input_hda = db_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=value.hid ).first() value = input_hda.id #print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id ) imported_job.add_parameter( name, to_json_string( value, cls=HistoryDatasetAssociationIDEncoder)) # TODO: Connect jobs to input datasets. # Connect jobs to output datasets. for output_hid in job_attrs['output_datasets']: #print "%s job has output dataset %i" % (imported_job.id, output_hid) output_hda = db_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=output_hid ).first() if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda) # Done importing. new_history.importing = False db_session.flush() # Cleanup. if os.path.exists(archive_dir): shutil.rmtree(archive_dir) except Exception, e: jiha.job.stderr += "Error cleaning up history import job: %s" % e db_session.flush()
def test_export_copied_objects_copied_outside_history(): app, sa_session, h = _setup_history_for_export( "Collection History with copied objects") d1, d2 = _create_datasets(sa_session, h, 2) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=3, collection=c1, name="HistoryCollectionTest1") h.hid_counter = 4 dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) sa_session.add_all((dce1, dce2, d1, d2, hc1)) sa_session.flush() hc2 = hc1.copy(element_destination=h) h.add_dataset_collection(hc2) sa_session.add(hc2) other_h = model.History(name=h.name + "-other", user=h.user) sa_session.add(other_h) hc3 = hc2.copy(element_destination=other_h) other_h.add_dataset_collection(hc3) sa_session.add(hc3) sa_session.flush() hc4 = hc3.copy(element_destination=h) h.add_dataset_collection(hc4) sa_session.add(hc4) sa_session.flush() assert h.hid_counter == 10 original_by_hid = _hid_dict(h) assert original_by_hid[ 7].copied_from_history_dataset_association != original_by_hid[4] assert original_by_hid[ 8].copied_from_history_dataset_association != original_by_hid[5] assert original_by_hid[ 9].copied_from_history_dataset_collection_association != original_by_hid[ 6] imported_history = _import_export(app, h) assert imported_history.hid_counter == 10 assert len(imported_history.dataset_collections) == 3 assert len(imported_history.datasets) == 6 _assert_distinct_hids(imported_history) imported_by_hid = _hid_dict(imported_history) assert imported_by_hid[ 4].copied_from_history_dataset_association == imported_by_hid[1] assert imported_by_hid[ 5].copied_from_history_dataset_association == imported_by_hid[2] assert imported_by_hid[ 6].copied_from_history_dataset_collection_association == imported_by_hid[ 3] assert imported_by_hid[ 7].copied_from_history_dataset_association == imported_by_hid[4] assert imported_by_hid[ 8].copied_from_history_dataset_association == imported_by_hid[5] assert imported_by_hid[ 9].copied_from_history_dataset_collection_association == imported_by_hid[ 6]
def _new_history(self): history = model.History() history.id = 1 history.name = "New History" return history
def test_nested_collection_attributes(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="bam", history=h1, create_dataset=True, sa_session=model.session) index = NamedTemporaryFile("w") index.write("cool bam index") index2 = NamedTemporaryFile("w") index2.write("cool bam index 2") metadata_dict = { "bam_index": MetadataTempFile.from_JSON({ "kwds": {}, "filename": index.name }), "bam_csi_index": MetadataTempFile.from_JSON({ "kwds": {}, "filename": index2.name }) } d1.metadata.from_JSON_dict(json_dict=metadata_dict) assert d1.metadata.bam_index assert d1.metadata.bam_csi_index assert isinstance(d1.metadata.bam_index, model.MetadataFile) assert isinstance(d1.metadata.bam_csi_index, model.MetadataFile) d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type='paired') dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) c2 = model.DatasetCollection(collection_type="list:paired") dce3 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="inner_list", element_index=0) c3 = model.DatasetCollection(collection_type="list:list") c4 = model.DatasetCollection(collection_type="list:list:paired") dce4 = model.DatasetCollectionElement(collection=c4, element=c2, element_identifier="outer_list", element_index=0) model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4]) model.session.flush() q = c2._get_nested_collection_attributes( element_attributes=('element_identifier', ), hda_attributes=('extension', ), dataset_attributes=('state', )) assert [(r.keys()) for r in q] == [[ 'element_identifier_0', 'element_identifier_1', 'extension', 'state' ], [ 'element_identifier_0', 'element_identifier_1', 'extension', 'state' ]] assert q.all() == [('inner_list', 'forward', 'bam', 'new'), ('inner_list', 'reverse', 'txt', 'new')] q = c2._get_nested_collection_attributes( return_entities=(model.HistoryDatasetAssociation, )) assert q.all() == [d1, d2] q = c2._get_nested_collection_attributes( return_entities=(model.HistoryDatasetAssociation, model.Dataset)) assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] assert c2.dataset_action_tuples == [] assert c2.populated_optimized assert c2.dataset_states_and_extensions_summary == ({'new'}, {'txt', 'bam'}) assert c2.element_identifiers_extensions_paths_and_metadata_files == [[ ('inner_list', 'forward'), 'bam', 'mock_dataset_14.dat', [('bai', 'mock_dataset_14.dat'), ('bam.csi', 'mock_dataset_14.dat')] ], [('inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat', []]] assert c3.dataset_instances == [] assert c3.dataset_elements == [] assert c3.dataset_states_and_extensions_summary == (set(), set()) q = c4._get_nested_collection_attributes( element_attributes=('element_identifier', )) assert q.all() == [('outer_list', 'inner_list', 'forward'), ('outer_list', 'inner_list', 'reverse')] assert c4.dataset_elements == [dce1, dce2] assert c4.element_identifiers_extensions_and_paths == [ (('outer_list', 'inner_list', 'forward'), 'bam', 'mock_dataset_14.dat'), (('outer_list', 'inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat') ]