예제 #1
0
def test_export_collection_with_datasets_from_other_history():
    app, sa_session, h = _setup_history_for_export(
        "Collection History with dataset from other history")

    dataset_history = model.History(name="Dataset History", user=h.user)

    d1, d2 = _create_datasets(sa_session, dataset_history, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 2
    dce1 = model.DatasetCollectionElement(collection=c1,
                                          element=d1,
                                          element_identifier="forward",
                                          element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1,
                                          element=d2,
                                          element_identifier="reverse",
                                          element_index=1)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(hc1)
    sa_session.flush()

    imported_history = _import_export(app, h)

    assert imported_history.hid_counter == 4, imported_history.hid_counter
    assert len(imported_history.dataset_collections) == 1
    assert len(imported_history.datasets) == 2
    for hdca in imported_history.dataset_collections:
        assert hdca.hid == 1, hdca.hid
    for hda in imported_history.datasets:
        assert hda.hid in [2, 3]
    _assert_distinct_hids(imported_history)
예제 #2
0
 def setUp(self):
     self.setup_app(mock_model=False)
     self.mock_tool = bunch.Bunch(
         app=self.app,
         tool_type="default",
     )
     self.test_history = model.History()
     self.app.model.context.add(self.test_history)
     self.app.model.context.flush()
     self.app.tool_data_tables["test_table"] = MockToolDataTable()
     self.trans = bunch.Bunch(
         app=self.app,
         get_history=lambda: self.test_history,
         get_current_user_roles=lambda: [],
         workflow_building_mode=False,
         webapp=bunch.Bunch(name="galaxy"),
     )
     self.type = "select"
     self.set_data_ref = False
     self.multiple = False
     self.optional = False
     self.options_xml = ""
     self._param = None
예제 #3
0
    def setUp(self):
        self.setup_app(mock_model=False)
        self.mock_tool = bunch.Bunch(
            app=self.app,
            tool_type="default",
        )
        self.test_history = model.History()
        self.app.model.context.add(self.test_history)
        self.app.model.context.flush()
        self.trans = bunch.Bunch(
            app=self.app,
            get_history=lambda: self.test_history,
            get_current_user_roles=lambda: [],
            workflow_building_mode=False,
            webapp=bunch.Bunch(name="galaxy"),
        )

        self.type = "data_column"
        self.other_attributes = ""
        self.set_data_ref = "input_tsv"
        self.multiple = False
        self.optional = False
        self._param = None
예제 #4
0
def _import_directory_to_history(app, target, work_directory):
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    import_history = model.History(name="Test History for Import", user=u)

    sa_session = app.model.context
    sa_session.add_all([u, import_history])
    sa_session.flush()

    assert len(import_history.datasets) == 0

    import_options = store.ImportOptions(allow_dataset_object_edit=True)
    import_model_store = store.get_import_model_store_for_directory(
        target,
        app=app,
        user=u,
        import_options=import_options,
        tag_handler=app.tag_handler.create_tag_handler_session())
    with import_model_store.target_history(default_history=import_history):
        import_model_store.perform_import(import_history)

    return import_history
예제 #5
0
def _setup_simple_cat_job(app, state='ok'):
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    d1, d2 = _create_datasets(sa_session, h, 2)
    d1.state = d2.state = state

    j = model.Job()
    j.user = u
    j.tool_id = "cat1"
    j.state = state

    j.add_input_dataset("input1", d1)
    j.add_output_dataset("out_file1", d2)

    sa_session.add_all((d1, d2, h, j))
    sa_session.flush()

    app.object_store.update_from_file(d1, file_name=TEST_PATH_1, create=True)
    app.object_store.update_from_file(d2, file_name=TEST_PATH_2, create=True)

    return u, h, d1, d2, j
예제 #6
0
    def test_workflows(self):
        model = self.model
        user = model.User(
            email="*****@*****.**",
            password="******"
        )

        def workflow_from_steps(steps):
            stored_workflow = model.StoredWorkflow()
            stored_workflow.user = user
            workflow = model.Workflow()
            workflow.steps = steps
            workflow.stored_workflow = stored_workflow
            return workflow

        child_workflow = workflow_from_steps([])
        self.persist(child_workflow)

        workflow_step_1 = model.WorkflowStep()
        workflow_step_1.order_index = 0
        workflow_step_1.type = "data_input"
        workflow_step_2 = model.WorkflowStep()
        workflow_step_2.order_index = 1
        workflow_step_2.type = "subworkflow"
        workflow_step_2.subworkflow = child_workflow

        workflow_step_1.get_or_add_input("moo1")
        workflow_step_1.get_or_add_input("moo2")
        workflow_step_2.get_or_add_input("moo")
        workflow_step_1.add_connection("foo", "cow", workflow_step_2)

        workflow = workflow_from_steps([workflow_step_1, workflow_step_2])
        self.persist(workflow)
        workflow_id = workflow.id

        annotation = model.WorkflowStepAnnotationAssociation()
        annotation.annotation = "Test Step Annotation"
        annotation.user = user
        annotation.workflow_step = workflow_step_1
        self.persist(annotation)

        assert workflow_step_1.id is not None
        h1 = model.History(name="WorkflowHistory1", user=user)

        invocation_uuid = uuid.uuid1()

        workflow_invocation = model.WorkflowInvocation()
        workflow_invocation.uuid = invocation_uuid
        workflow_invocation.history = h1

        workflow_invocation_step1 = model.WorkflowInvocationStep()
        workflow_invocation_step1.workflow_invocation = workflow_invocation
        workflow_invocation_step1.workflow_step = workflow_step_1

        subworkflow_invocation = model.WorkflowInvocation()
        workflow_invocation.attach_subworkflow_invocation_for_step(workflow_step_2, subworkflow_invocation)

        workflow_invocation_step2 = model.WorkflowInvocationStep()
        workflow_invocation_step2.workflow_invocation = workflow_invocation
        workflow_invocation_step2.workflow_step = workflow_step_2

        workflow_invocation.workflow = workflow

        d1 = self.new_hda(h1, name="1")
        workflow_request_dataset = model.WorkflowRequestToInputDatasetAssociation()
        workflow_request_dataset.workflow_invocation = workflow_invocation
        workflow_request_dataset.workflow_step = workflow_step_1
        workflow_request_dataset.dataset = d1
        self.persist(workflow_invocation)
        assert workflow_request_dataset is not None
        assert workflow_invocation.id is not None

        history_id = h1.id
        self.expunge()

        loaded_invocation = self.query(model.WorkflowInvocation).get(workflow_invocation.id)
        assert loaded_invocation.uuid == invocation_uuid, "%s != %s" % (loaded_invocation.uuid, invocation_uuid)
        assert loaded_invocation
        assert loaded_invocation.history.id == history_id

        step_1, step_2 = loaded_invocation.workflow.steps

        assert not step_1.subworkflow
        assert step_2.subworkflow
        assert len(loaded_invocation.steps) == 2

        subworkflow_invocation_assoc = loaded_invocation.get_subworkflow_invocation_association_for_step(step_2)
        assert subworkflow_invocation_assoc is not None
        assert isinstance(subworkflow_invocation_assoc.subworkflow_invocation, model.WorkflowInvocation)
        assert isinstance(subworkflow_invocation_assoc.parent_workflow_invocation, model.WorkflowInvocation)

        assert subworkflow_invocation_assoc.subworkflow_invocation.history.id == history_id

        loaded_workflow = self.query(model.Workflow).get(workflow_id)
        assert len(loaded_workflow.steps[0].annotations) == 1
        copied_workflow = loaded_workflow.copy(user=user)
        annotations = copied_workflow.steps[0].annotations
        assert len(annotations) == 1
예제 #7
0
def test_import_export_edit_collection():
    """Test modifying existing collections with imports."""
    app = _mock_app()
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    c1 = model.DatasetCollection(collection_type="list", populated=False)
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")

    sa_session.add(hc1)
    sa_session.add(h)
    sa_session.flush()

    import_history = model.History(name="Test History for Import", user=u)
    sa_session.add(import_history)

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app,
                                         for_edit=True) as export_store:
        export_store.add_dataset_collection(hc1)

    # Fabric editing metadata for collection...
    collections_metadata_path = os.path.join(temp_directory,
                                             store.ATTRS_FILENAME_COLLECTIONS)
    datasets_metadata_path = os.path.join(temp_directory,
                                          store.ATTRS_FILENAME_DATASETS)
    with open(collections_metadata_path, "r") as f:
        hdcas_metadata = json.load(f)

    assert len(hdcas_metadata) == 1
    hdca_metadata = hdcas_metadata[0]
    assert hdca_metadata
    assert "id" in hdca_metadata
    assert "collection" in hdca_metadata
    collection_metadata = hdca_metadata["collection"]
    assert "populated_state" in collection_metadata
    assert collection_metadata[
        "populated_state"] == model.DatasetCollection.populated_states.NEW

    collection_metadata[
        "populated_state"] = model.DatasetCollection.populated_states.OK

    d1 = model.HistoryDatasetAssociation(extension="txt",
                                         create_dataset=True,
                                         flush=False)
    d1.hid = 1
    d2 = model.HistoryDatasetAssociation(extension="txt",
                                         create_dataset=True,
                                         flush=False)
    d2.hid = 2
    serialization_options = model.SerializationOptions(for_edit=True)
    dataset_list = [
        d1.serialize(app.security, serialization_options),
        d2.serialize(app.security, serialization_options)
    ]

    dc = model.DatasetCollection(
        id=collection_metadata["id"],
        collection_type="list",
        element_count=2,
    )
    dc.populated_state = model.DatasetCollection.populated_states.OK
    dce1 = model.DatasetCollectionElement(
        element=d1,
        element_index=0,
        element_identifier="first",
    )
    dce2 = model.DatasetCollectionElement(
        element=d2,
        element_index=1,
        element_identifier="second",
    )
    dc.elements = [dce1, dce2]
    with open(datasets_metadata_path, "w") as datasets_f:
        json.dump(dataset_list, datasets_f)

    hdca_metadata["collection"] = dc.serialize(app.security,
                                               serialization_options)
    with open(collections_metadata_path, "w") as collections_f:
        json.dump(hdcas_metadata, collections_f)

    _perform_import_from_directory(temp_directory, app, u, import_history,
                                   store.ImportOptions(allow_edit=True))

    sa_session.refresh(c1)
    assert c1.populated_state == model.DatasetCollection.populated_states.OK, c1.populated_state
    assert len(c1.elements) == 2
예제 #8
0
 def build_ready_hda(self):
     hist = model.History()
     self.app.model.context.add(hist)
     ready_hda = hist.add_dataset(model.HistoryDatasetAssociation(extension='interval', create_dataset=True, sa_session=self.app.model.context))
     ready_hda.set_dataset_state('ok')
     return ready_hda
예제 #9
0
    def cleanup_after_job(self):
        """ Set history, datasets, and jobs' attributes and clean up archive directory. """

        #
        # Helper methods.
        #

        def file_in_dir(file_path, a_dir):
            """ Returns true if file is in directory. """
            abs_file_path = os.path.abspath(file_path)
            return os.path.split(abs_file_path)[0] == a_dir

        def get_tag_str(tag, value):
            """ Builds a tag string for a tag, value pair. """
            if not value:
                return tag
            else:
                return tag + ":" + value

        #
        # Import history.
        #

        jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by(
            job_id=self.job_id).first()
        if jiha:
            try:
                archive_dir = jiha.archive_dir
                archive_dir = os.path.realpath(archive_dir)
                user = jiha.job.user

                # Bioblend previous to 17.01 exported histories with an extra subdir.
                if not os.path.exists(
                        os.path.join(archive_dir, 'history_attrs.txt')):
                    for d in os.listdir(archive_dir):
                        if os.path.isdir(os.path.join(archive_dir, d)):
                            archive_dir = os.path.join(archive_dir, d)
                            break

                #
                # Create history.
                #
                history_attr_file_name = os.path.join(archive_dir,
                                                      'history_attrs.txt')
                history_attrs = load(open(history_attr_file_name))

                # Create history.
                new_history = model.History(name='imported from archive: %s' %
                                            history_attrs['name'],
                                            user=user)
                new_history.importing = True
                new_history.hid_counter = history_attrs['hid_counter']
                new_history.genome_build = history_attrs['genome_build']
                self.sa_session.add(new_history)
                jiha.history = new_history
                self.sa_session.flush()

                # Add annotation, tags.
                if user:
                    self.add_item_annotation(self.sa_session, user,
                                             new_history,
                                             history_attrs['annotation'])
                    """
                    TODO: figure out to how add tags to item.
                    for tag, value in history_attrs[ 'tags' ].items():
                        trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
                    """

                #
                # Create datasets.
                #
                datasets_attrs_file_name = os.path.join(
                    archive_dir, 'datasets_attrs.txt')
                datasets_attrs = load(open(datasets_attrs_file_name))
                provenance_file_name = datasets_attrs_file_name + ".provenance"

                if os.path.exists(provenance_file_name):
                    provenance_attrs = load(open(provenance_file_name))
                    datasets_attrs += provenance_attrs

                # Get counts of how often each dataset file is used; a file can
                # be linked to multiple dataset objects (HDAs).
                datasets_usage_counts = {}
                for dataset_attrs in datasets_attrs:
                    temp_dataset_file_name = \
                        os.path.realpath(os.path.join(archive_dir, dataset_attrs['file_name']))
                    if (temp_dataset_file_name not in datasets_usage_counts):
                        datasets_usage_counts[temp_dataset_file_name] = 0
                    datasets_usage_counts[temp_dataset_file_name] += 1

                # Create datasets.
                for dataset_attrs in datasets_attrs:
                    metadata = dataset_attrs['metadata']

                    # Create dataset and HDA.
                    hda = model.HistoryDatasetAssociation(
                        name=dataset_attrs['name'],
                        extension=dataset_attrs['extension'],
                        info=dataset_attrs['info'],
                        blurb=dataset_attrs['blurb'],
                        peek=dataset_attrs['peek'],
                        designation=dataset_attrs['designation'],
                        visible=dataset_attrs['visible'],
                        dbkey=metadata['dbkey'],
                        metadata=metadata,
                        history=new_history,
                        create_dataset=True,
                        sa_session=self.sa_session)
                    if 'uuid' in dataset_attrs:
                        hda.dataset.uuid = dataset_attrs["uuid"]
                    if dataset_attrs.get('exported', True) is False:
                        hda.state = hda.states.DISCARDED
                        hda.deleted = True
                        hda.purged = True
                    else:
                        hda.state = hda.states.OK
                    self.sa_session.add(hda)
                    self.sa_session.flush()
                    new_history.add_dataset(hda, genome_build=None)
                    hda.hid = dataset_attrs[
                        'hid']  # Overwrite default hid set when HDA added to history.
                    # TODO: Is there a way to recover permissions? Is this needed?
                    # permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                    # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                    self.sa_session.flush()
                    if dataset_attrs.get('exported', True) is True:
                        # Do security check and move/copy dataset data.
                        temp_dataset_file_name = \
                            os.path.realpath(os.path.abspath(os.path.join(archive_dir, dataset_attrs['file_name'])))
                        if not file_in_dir(
                                temp_dataset_file_name,
                                os.path.join(archive_dir, "datasets")):
                            raise MalformedContents(
                                "Invalid dataset path: %s" %
                                temp_dataset_file_name)
                        if datasets_usage_counts[temp_dataset_file_name] == 1:
                            self.app.object_store.update_from_file(
                                hda.dataset,
                                file_name=temp_dataset_file_name,
                                create=True)

                            # Import additional files if present. Histories exported previously might not have this attribute set.
                            dataset_extra_files_path = dataset_attrs.get(
                                'extra_files_path', None)
                            if dataset_extra_files_path:
                                try:
                                    file_list = os.listdir(
                                        os.path.join(archive_dir,
                                                     dataset_extra_files_path))
                                except OSError:
                                    file_list = []

                                if file_list:
                                    for extra_file in file_list:
                                        self.app.object_store.update_from_file(
                                            hda.dataset,
                                            extra_dir='dataset_%s_files' %
                                            hda.dataset.id,
                                            alt_name=extra_file,
                                            file_name=os.path.join(
                                                archive_dir,
                                                dataset_extra_files_path,
                                                extra_file),
                                            create=True)
                        else:
                            datasets_usage_counts[temp_dataset_file_name] -= 1
                            shutil.copyfile(temp_dataset_file_name,
                                            hda.file_name)
                        hda.dataset.set_total_size(
                        )  # update the filesize record in the database

                    # Set tags, annotations.
                    if user:
                        self.add_item_annotation(self.sa_session, user, hda,
                                                 dataset_attrs['annotation'])
                        # TODO: Set tags.
                        """
                        for tag, value in dataset_attrs[ 'tags' ].items():
                            trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                            self.sa_session.flush()
                        """

                    # Although metadata is set above, need to set metadata to recover BAI for BAMs.
                    if hda.extension == 'bam':
                        self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app(
                            self.app.datatypes_registry.
                            set_external_metadata_tool,
                            self.app,
                            jiha.job.session_id,
                            new_history.id,
                            jiha.job.user,
                            incoming={'input1': hda},
                            overwrite=False)

                #
                # Create jobs.
                #

                # Decode jobs attributes.
                def as_hda(obj_dct):
                    """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by
                        the encoded object. This only works because HDAs are created above. """
                    if obj_dct.get('__HistoryDatasetAssociation__', False):
                        return self.sa_session.query(model.HistoryDatasetAssociation) \
                            .filter_by(history=new_history, hid=obj_dct['hid']).first()
                    return obj_dct

                jobs_attr_file_name = os.path.join(archive_dir,
                                                   'jobs_attrs.txt')
                jobs_attrs = load(open(jobs_attr_file_name),
                                  object_hook=as_hda)

                # Create each job.
                for job_attrs in jobs_attrs:
                    imported_job = model.Job()
                    imported_job.user = user
                    # TODO: set session?
                    # imported_job.session = trans.get_galaxy_session().id
                    imported_job.history = new_history
                    imported_job.imported = True
                    imported_job.tool_id = job_attrs['tool_id']
                    imported_job.tool_version = job_attrs['tool_version']
                    imported_job.set_state(job_attrs['state'])
                    imported_job.info = job_attrs.get('info', None)
                    imported_job.exit_code = job_attrs.get('exit_code', None)
                    imported_job.traceback = job_attrs.get('traceback', None)
                    imported_job.stdout = job_attrs.get('stdout', None)
                    imported_job.stderr = job_attrs.get('stderr', None)
                    imported_job.command_line = job_attrs.get(
                        'command_line', None)
                    try:
                        imported_job.create_time = datetime.datetime.strptime(
                            job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f")
                        imported_job.update_time = datetime.datetime.strptime(
                            job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f")
                    except Exception:
                        pass
                    self.sa_session.add(imported_job)
                    self.sa_session.flush()

                    class HistoryDatasetAssociationIDEncoder(json.JSONEncoder):
                        """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                        def default(self, obj):
                            """ Encode an HDA, default encoding for everything else. """
                            if isinstance(obj,
                                          model.HistoryDatasetAssociation):
                                return obj.id
                            return json.JSONEncoder.default(self, obj)

                    # Set parameters. May be useful to look at metadata.py for creating parameters.
                    # TODO: there may be a better way to set parameters, e.g.:
                    #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
                    #       job.add_parameter( name, value )
                    # to make this work, we'd need to flesh out the HDA objects. The code below is
                    # relatively similar.
                    for name, value in job_attrs['params'].items():
                        # Transform parameter values when necessary.
                        if isinstance(value, model.HistoryDatasetAssociation):
                            # HDA input: use hid to find input.
                            input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                                            .filter_by(history=new_history, hid=value.hid).first()
                            value = input_hda.id
                        # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                        imported_job.add_parameter(
                            name,
                            dumps(value,
                                  cls=HistoryDatasetAssociationIDEncoder))

                    # TODO: Connect jobs to input datasets.

                    # Connect jobs to output datasets.
                    for output_hid in job_attrs['output_datasets']:
                        # print "%s job has output dataset %i" % (imported_job.id, output_hid)
                        output_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                            .filter_by(history=new_history, hid=output_hid).first()
                        if output_hda:
                            imported_job.add_output_dataset(
                                output_hda.name, output_hda)

                    # Connect jobs to input datasets.
                    if 'input_mapping' in job_attrs:
                        for input_name, input_hid in job_attrs[
                                'input_mapping'].items():
                            input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                                            .filter_by(history=new_history, hid=input_hid).first()
                            if input_hda:
                                imported_job.add_input_dataset(
                                    input_name, input_hda)

                    self.sa_session.flush()

                # Done importing.
                new_history.importing = False
                self.sa_session.flush()

                # Cleanup.
                if os.path.exists(archive_dir):
                    shutil.rmtree(archive_dir)
            except Exception as e:
                jiha.job.stderr += "Error cleaning up history import job: %s" % e
                self.sa_session.flush()
                raise
예제 #10
0
    def test_annotations(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        self.persist(u)

        def persist_and_check_annotation(annotation_class, **kwds):
            annotated_association = annotation_class()
            annotated_association.annotation = "Test Annotation"
            annotated_association.user = u
            for key, value in kwds.items():
                setattr(annotated_association, key, value)
            self.persist(annotated_association)
            self.expunge()
            stored_annotation = self.query(annotation_class).all()[0]
            assert stored_annotation.annotation == "Test Annotation"
            assert stored_annotation.user.email == "*****@*****.**"

        sw = model.StoredWorkflow()
        sw.user = u
        self.persist(sw)
        persist_and_check_annotation(model.StoredWorkflowAnnotationAssociation,
                                     stored_workflow=sw)

        workflow = model.Workflow()
        workflow.stored_workflow = sw
        self.persist(workflow)

        ws = model.WorkflowStep()
        ws.workflow = workflow
        self.persist(ws)
        persist_and_check_annotation(model.WorkflowStepAnnotationAssociation,
                                     workflow_step=ws)

        h = model.History(name="History for Annotation", user=u)
        self.persist(h)
        persist_and_check_annotation(model.HistoryAnnotationAssociation,
                                     history=h)

        d1 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h,
                                             create_dataset=True,
                                             sa_session=model.session)
        self.persist(d1)
        persist_and_check_annotation(
            model.HistoryDatasetAssociationAnnotationAssociation, hda=d1)

        page = model.Page()
        page.user = u
        self.persist(page)
        persist_and_check_annotation(model.PageAnnotationAssociation,
                                     page=page)

        visualization = model.Visualization()
        visualization.user = u
        self.persist(visualization)
        persist_and_check_annotation(model.VisualizationAnnotationAssociation,
                                     visualization=visualization)

        dataset_collection = model.DatasetCollection(collection_type="paired")
        history_dataset_collection = model.HistoryDatasetCollectionAssociation(
            collection=dataset_collection)
        self.persist(history_dataset_collection)
        persist_and_check_annotation(
            model.HistoryDatasetCollectionAssociationAnnotationAssociation,
            history_dataset_collection=history_dataset_collection)

        library_dataset_collection = model.LibraryDatasetCollectionAssociation(
            collection=dataset_collection)
        self.persist(library_dataset_collection)
        persist_and_check_annotation(
            model.LibraryDatasetCollectionAnnotationAssociation,
            library_dataset_collection=library_dataset_collection)
예제 #11
0
    def test_tags(self):
        model = self.model

        my_tag = model.Tag(name="Test Tag")
        u = model.User(email="*****@*****.**", password="******")
        self.persist(my_tag, u)

        def tag_and_test(taggable_object, tag_association_class, backref_name):
            assert len(
                getattr(
                    self.query(model.Tag).filter(
                        model.Tag.name == "Test Tag").all()[0],
                    backref_name)) == 0

            tag_association = tag_association_class()
            tag_association.tag = my_tag
            taggable_object.tags = [tag_association]
            self.persist(tag_association, taggable_object)

            assert len(
                getattr(
                    self.query(model.Tag).filter(
                        model.Tag.name == "Test Tag").all()[0],
                    backref_name)) == 1

        sw = model.StoredWorkflow()
        sw.user = u
        tag_and_test(sw, model.StoredWorkflowTagAssociation,
                     "tagged_workflows")

        h = model.History(name="History for Tagging", user=u)
        tag_and_test(h, model.HistoryTagAssociation, "tagged_histories")

        d1 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h,
                                             create_dataset=True,
                                             sa_session=model.session)
        tag_and_test(d1, model.HistoryDatasetAssociationTagAssociation,
                     "tagged_history_dataset_associations")

        page = model.Page()
        page.user = u
        tag_and_test(page, model.PageTagAssociation, "tagged_pages")

        visualization = model.Visualization()
        visualization.user = u
        tag_and_test(visualization, model.VisualizationTagAssociation,
                     "tagged_visualizations")

        dataset_collection = model.DatasetCollection(collection_type="paired")
        history_dataset_collection = model.HistoryDatasetCollectionAssociation(
            collection=dataset_collection)
        tag_and_test(history_dataset_collection,
                     model.HistoryDatasetCollectionTagAssociation,
                     "tagged_history_dataset_collections")

        library_dataset_collection = model.LibraryDatasetCollectionAssociation(
            collection=dataset_collection)
        tag_and_test(library_dataset_collection,
                     model.LibraryDatasetCollectionTagAssociation,
                     "tagged_library_dataset_collections")
예제 #12
0
    def test_ratings(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        self.persist(u)

        def persist_and_check_rating(rating_class, **kwds):
            rating_association = rating_class()
            rating_association.rating = 5
            rating_association.user = u
            for key, value in kwds.items():
                setattr(rating_association, key, value)
            self.persist(rating_association)
            self.expunge()
            stored_annotation = self.query(rating_class).all()[0]
            assert stored_annotation.rating == 5
            assert stored_annotation.user.email == "*****@*****.**"

        sw = model.StoredWorkflow()
        sw.user = u
        self.persist(sw)
        persist_and_check_rating(model.StoredWorkflowRatingAssociation,
                                 stored_workflow=sw)

        h = model.History(name="History for Rating", user=u)
        self.persist(h)
        persist_and_check_rating(model.HistoryRatingAssociation, history=h)

        d1 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h,
                                             create_dataset=True,
                                             sa_session=model.session)
        self.persist(d1)
        persist_and_check_rating(
            model.HistoryDatasetAssociationRatingAssociation, hda=d1)

        page = model.Page()
        page.user = u
        self.persist(page)
        persist_and_check_rating(model.PageRatingAssociation, page=page)

        visualization = model.Visualization()
        visualization.user = u
        self.persist(visualization)
        persist_and_check_rating(model.VisualizationRatingAssociation,
                                 visualization=visualization)

        dataset_collection = model.DatasetCollection(collection_type="paired")
        history_dataset_collection = model.HistoryDatasetCollectionAssociation(
            collection=dataset_collection)
        self.persist(history_dataset_collection)
        persist_and_check_rating(
            model.HistoryDatasetCollectionRatingAssociation,
            history_dataset_collection=history_dataset_collection)

        library_dataset_collection = model.LibraryDatasetCollectionAssociation(
            collection=dataset_collection)
        self.persist(library_dataset_collection)
        persist_and_check_rating(
            model.LibraryDatasetCollectionRatingAssociation,
            library_dataset_collection=library_dataset_collection)
예제 #13
0
    def cleanup_after_job(self, db_session):
        """ Set history, datasets, and jobs' attributes and clean up archive directory. """

        #
        # Helper methods.
        #

        def file_in_dir(file_path, a_dir):
            """ Returns true if file is in directory. """
            abs_file_path = os.path.abspath(file_path)
            return os.path.split(abs_file_path)[0] == a_dir

        def read_file_contents(file_path):
            """ Read contents of a file. """
            fp = open(file_path, 'rb')
            buffsize = 1048576
            file_contents = ''
            try:
                while True:
                    file_contents += fp.read(buffsize)
                    if not file_contents or len(file_contents) % buffsize != 0:
                        break
            except OverflowError:
                pass
            fp.close()
            return file_contents

        def get_tag_str(tag, value):
            """ Builds a tag string for a tag, value pair. """
            if not value:
                return tag
            else:
                return tag + ":" + value

        #
        # Import history.
        #

        jiha = db_session.query(model.JobImportHistoryArchive).filter_by(
            job_id=self.job_id).first()
        if jiha:
            try:
                archive_dir = jiha.archive_dir
                user = jiha.job.user

                #
                # Create history.
                #
                history_attr_file_name = os.path.join(archive_dir,
                                                      'history_attrs.txt')
                history_attr_str = read_file_contents(history_attr_file_name)
                history_attrs = from_json_string(history_attr_str)

                # Create history.
                new_history = model.History( name='imported from archive: %s' % history_attrs['name'].encode( 'utf-8' ), \
                                             user=user )
                new_history.importing = True
                new_history.hid_counter = history_attrs['hid_counter']
                new_history.genome_build = history_attrs['genome_build']
                db_session.add(new_history)
                jiha.history = new_history
                db_session.flush()

                # Add annotation, tags.
                if user:
                    self.add_item_annotation(db_session, user, new_history,
                                             history_attrs['annotation'])
                    """
                    TODO: figure out to how add tags to item.
                    for tag, value in history_attrs[ 'tags' ].items():
                        trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
                    """

                #
                # Create datasets.
                #
                datasets_attrs_file_name = os.path.join(
                    archive_dir, 'datasets_attrs.txt')
                datasets_attr_str = read_file_contents(
                    datasets_attrs_file_name)
                datasets_attrs = from_json_string(datasets_attr_str)

                # Get counts of how often each dataset file is used; a file can
                # be linked to multiple dataset objects (HDAs).
                datasets_usage_counts = {}
                for dataset_attrs in datasets_attrs:
                    temp_dataset_file_name = \
                        os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
                    if (temp_dataset_file_name not in datasets_usage_counts):
                        datasets_usage_counts[temp_dataset_file_name] = 0
                    datasets_usage_counts[temp_dataset_file_name] += 1

                # Create datasets.
                for dataset_attrs in datasets_attrs:
                    metadata = dataset_attrs['metadata']

                    # Create dataset and HDA.
                    hda = model.HistoryDatasetAssociation(
                        name=dataset_attrs['name'].encode('utf-8'),
                        extension=dataset_attrs['extension'],
                        info=dataset_attrs['info'].encode('utf-8'),
                        blurb=dataset_attrs['blurb'],
                        peek=dataset_attrs['peek'],
                        designation=dataset_attrs['designation'],
                        visible=dataset_attrs['visible'],
                        dbkey=metadata['dbkey'],
                        metadata=metadata,
                        history=new_history,
                        create_dataset=True,
                        sa_session=db_session)
                    hda.state = hda.states.OK
                    db_session.add(hda)
                    db_session.flush()
                    new_history.add_dataset(hda, genome_build=None)
                    hda.hid = dataset_attrs[
                        'hid']  # Overwrite default hid set when HDA added to history.
                    # TODO: Is there a way to recover permissions? Is this needed?
                    #permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                    #trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                    db_session.flush()

                    # Do security check and move/copy dataset data.
                    temp_dataset_file_name = \
                        os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
                    if not file_in_dir(temp_dataset_file_name,
                                       os.path.join(archive_dir, "datasets")):
                        raise Exception("Invalid dataset path: %s" %
                                        temp_dataset_file_name)
                    if datasets_usage_counts[temp_dataset_file_name] == 1:
                        shutil.move(temp_dataset_file_name, hda.file_name)
                    else:
                        datasets_usage_counts[temp_dataset_file_name] -= 1
                        shutil.copyfile(temp_dataset_file_name, hda.file_name)

                    # Set tags, annotations.
                    if user:
                        self.add_item_annotation(db_session, user, hda,
                                                 dataset_attrs['annotation'])
                        # TODO: Set tags.
                        """
                        for tag, value in dataset_attrs[ 'tags' ].items():
                            trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                            db_session.flush()
                        """

                #
                # Create jobs.
                #

                # Read jobs attributes.
                jobs_attr_file_name = os.path.join(archive_dir,
                                                   'jobs_attrs.txt')
                jobs_attr_str = read_file_contents(jobs_attr_file_name)

                # Decode jobs attributes.
                def as_hda(obj_dct):
                    """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by 
                        the encoded object. This only works because HDAs are created above. """
                    if obj_dct.get('__HistoryDatasetAssociation__', False):
                        return db_session.query( model.HistoryDatasetAssociation ) \
                                        .filter_by( history=new_history, hid=obj_dct['hid'] ).first()
                    return obj_dct

                jobs_attrs = from_json_string(jobs_attr_str,
                                              object_hook=as_hda)

                # Create each job.
                for job_attrs in jobs_attrs:
                    imported_job = model.Job()
                    imported_job.user = user
                    # TODO: set session?
                    # imported_job.session = trans.get_galaxy_session().id
                    imported_job.history = new_history
                    imported_job.tool_id = job_attrs['tool_id']
                    imported_job.tool_version = job_attrs['tool_version']
                    imported_job.set_state(job_attrs['state'])
                    imported_job.imported = True
                    db_session.add(imported_job)
                    db_session.flush()

                    class HistoryDatasetAssociationIDEncoder(
                            simplejson.JSONEncoder):
                        """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                        def default(self, obj):
                            """ Encode an HDA, default encoding for everything else. """
                            if isinstance(obj,
                                          model.HistoryDatasetAssociation):
                                return obj.id
                            return simplejson.JSONEncoder.default(self, obj)

                    # Set parameters. May be useful to look at metadata.py for creating parameters.
                    # TODO: there may be a better way to set parameters, e.g.:
                    #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
                    #       job.add_parameter( name, value )
                    # to make this work, we'd need to flesh out the HDA objects. The code below is
                    # relatively similar.
                    for name, value in job_attrs['params'].items():
                        # Transform parameter values when necessary.
                        if isinstance(value, model.HistoryDatasetAssociation):
                            # HDA input: use hid to find input.
                            input_hda = db_session.query( model.HistoryDatasetAssociation ) \
                                            .filter_by( history=new_history, hid=value.hid ).first()
                            value = input_hda.id
                        #print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                        imported_job.add_parameter(
                            name,
                            to_json_string(
                                value, cls=HistoryDatasetAssociationIDEncoder))

                    # TODO: Connect jobs to input datasets.

                    # Connect jobs to output datasets.
                    for output_hid in job_attrs['output_datasets']:
                        #print "%s job has output dataset %i" % (imported_job.id, output_hid)
                        output_hda = db_session.query( model.HistoryDatasetAssociation ) \
                                        .filter_by( history=new_history, hid=output_hid ).first()
                        if output_hda:
                            imported_job.add_output_dataset(
                                output_hda.name, output_hda)

                    # Done importing.
                    new_history.importing = False

                    db_session.flush()

                # Cleanup.
                if os.path.exists(archive_dir):
                    shutil.rmtree(archive_dir)
            except Exception, e:
                jiha.job.stderr += "Error cleaning up history import job: %s" % e
                db_session.flush()
예제 #14
0
def test_export_copied_objects_copied_outside_history():
    app, sa_session, h = _setup_history_for_export(
        "Collection History with copied objects")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1,
                                          element=d1,
                                          element_identifier="forward",
                                          element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1,
                                          element=d2,
                                          element_identifier="reverse",
                                          element_index=1)

    sa_session.add_all((dce1, dce2, d1, d2, hc1))
    sa_session.flush()

    hc2 = hc1.copy(element_destination=h)
    h.add_dataset_collection(hc2)

    sa_session.add(hc2)

    other_h = model.History(name=h.name + "-other", user=h.user)
    sa_session.add(other_h)

    hc3 = hc2.copy(element_destination=other_h)
    other_h.add_dataset_collection(hc3)
    sa_session.add(hc3)
    sa_session.flush()

    hc4 = hc3.copy(element_destination=h)
    h.add_dataset_collection(hc4)
    sa_session.add(hc4)
    sa_session.flush()

    assert h.hid_counter == 10

    original_by_hid = _hid_dict(h)
    assert original_by_hid[
        7].copied_from_history_dataset_association != original_by_hid[4]
    assert original_by_hid[
        8].copied_from_history_dataset_association != original_by_hid[5]
    assert original_by_hid[
        9].copied_from_history_dataset_collection_association != original_by_hid[
            6]

    imported_history = _import_export(app, h)

    assert imported_history.hid_counter == 10
    assert len(imported_history.dataset_collections) == 3
    assert len(imported_history.datasets) == 6

    _assert_distinct_hids(imported_history)
    imported_by_hid = _hid_dict(imported_history)
    assert imported_by_hid[
        4].copied_from_history_dataset_association == imported_by_hid[1]
    assert imported_by_hid[
        5].copied_from_history_dataset_association == imported_by_hid[2]
    assert imported_by_hid[
        6].copied_from_history_dataset_collection_association == imported_by_hid[
            3]

    assert imported_by_hid[
        7].copied_from_history_dataset_association == imported_by_hid[4]
    assert imported_by_hid[
        8].copied_from_history_dataset_association == imported_by_hid[5]
    assert imported_by_hid[
        9].copied_from_history_dataset_collection_association == imported_by_hid[
            6]
예제 #15
0
 def _new_history(self):
     history = model.History()
     history.id = 1
     history.name = "New History"
     return history
예제 #16
0
 def test_nested_collection_attributes(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="bam",
                                          history=h1,
                                          create_dataset=True,
                                          sa_session=model.session)
     index = NamedTemporaryFile("w")
     index.write("cool bam index")
     index2 = NamedTemporaryFile("w")
     index2.write("cool bam index 2")
     metadata_dict = {
         "bam_index":
         MetadataTempFile.from_JSON({
             "kwds": {},
             "filename": index.name
         }),
         "bam_csi_index":
         MetadataTempFile.from_JSON({
             "kwds": {},
             "filename": index2.name
         })
     }
     d1.metadata.from_JSON_dict(json_dict=metadata_dict)
     assert d1.metadata.bam_index
     assert d1.metadata.bam_csi_index
     assert isinstance(d1.metadata.bam_index, model.MetadataFile)
     assert isinstance(d1.metadata.bam_csi_index, model.MetadataFile)
     d2 = model.HistoryDatasetAssociation(extension="txt",
                                          history=h1,
                                          create_dataset=True,
                                          sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1,
                                           element=d1,
                                           element_identifier="forward",
                                           element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1,
                                           element=d2,
                                           element_identifier="reverse",
                                           element_index=1)
     c2 = model.DatasetCollection(collection_type="list:paired")
     dce3 = model.DatasetCollectionElement(collection=c2,
                                           element=c1,
                                           element_identifier="inner_list",
                                           element_index=0)
     c3 = model.DatasetCollection(collection_type="list:list")
     c4 = model.DatasetCollection(collection_type="list:list:paired")
     dce4 = model.DatasetCollectionElement(collection=c4,
                                           element=c2,
                                           element_identifier="outer_list",
                                           element_index=0)
     model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4])
     model.session.flush()
     q = c2._get_nested_collection_attributes(
         element_attributes=('element_identifier', ),
         hda_attributes=('extension', ),
         dataset_attributes=('state', ))
     assert [(r.keys()) for r in q] == [[
         'element_identifier_0', 'element_identifier_1', 'extension',
         'state'
     ],
                                        [
                                            'element_identifier_0',
                                            'element_identifier_1',
                                            'extension', 'state'
                                        ]]
     assert q.all() == [('inner_list', 'forward', 'bam', 'new'),
                        ('inner_list', 'reverse', 'txt', 'new')]
     q = c2._get_nested_collection_attributes(
         return_entities=(model.HistoryDatasetAssociation, ))
     assert q.all() == [d1, d2]
     q = c2._get_nested_collection_attributes(
         return_entities=(model.HistoryDatasetAssociation, model.Dataset))
     assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)]
     # Assert properties that use _get_nested_collection_attributes return correct content
     assert c2.dataset_instances == [d1, d2]
     assert c2.dataset_elements == [dce1, dce2]
     assert c2.dataset_action_tuples == []
     assert c2.populated_optimized
     assert c2.dataset_states_and_extensions_summary == ({'new'},
                                                         {'txt', 'bam'})
     assert c2.element_identifiers_extensions_paths_and_metadata_files == [[
         ('inner_list', 'forward'), 'bam', 'mock_dataset_14.dat',
         [('bai', 'mock_dataset_14.dat'),
          ('bam.csi', 'mock_dataset_14.dat')]
     ], [('inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat', []]]
     assert c3.dataset_instances == []
     assert c3.dataset_elements == []
     assert c3.dataset_states_and_extensions_summary == (set(), set())
     q = c4._get_nested_collection_attributes(
         element_attributes=('element_identifier', ))
     assert q.all() == [('outer_list', 'inner_list', 'forward'),
                        ('outer_list', 'inner_list', 'reverse')]
     assert c4.dataset_elements == [dce1, dce2]
     assert c4.element_identifiers_extensions_and_paths == [
         (('outer_list', 'inner_list', 'forward'), 'bam',
          'mock_dataset_14.dat'),
         (('outer_list', 'inner_list', 'reverse'), 'txt',
          'mock_dataset_14.dat')
     ]