Ejemplo n.º 1
0
 def test_run_workflow_with_dataset_collection(self):
     dataset1 = self.hist.paste_content(FOO_DATA)
     dataset2 = self.hist.paste_content(FOO_DATA_2)
     collection_description = dataset_collections.CollectionDescription(
         name="MyDatasetList",
         elements=[
             dataset_collections.HistoryDatasetElement(name="sample1",
                                                       id=dataset1.id),
             dataset_collections.HistoryDatasetElement(name="sample2",
                                                       id=dataset2.id),
         ])
     dataset_collection = self.hist.create_dataset_collection(
         collection_description)
     input_map = {
         "Input Dataset Collection": dataset_collection,
         "Input 2": dataset1
     }
     outputs, out_hist = self.wf.run(input_map, self.hist, wait=True)
     self.assertEqual(len(outputs), 1)
     out_hdca = outputs[0]
     self.assertIsInstance(out_hdca,
                           wrappers.HistoryDatasetCollectionAssociation)
     self.assertEqual(out_hdca.collection_type, 'list')
     self.assertEqual(len(out_hdca.elements), 2)
     self.assertEqual(out_hist.id, self.hist.id)
Ejemplo n.º 2
0
 def test_create_list_in_history(self):
     history_id = self.gi.histories.create_history(
         name="TestDSListCreate")["id"]
     dataset1_id = self._test_dataset(history_id)
     dataset2_id = self._test_dataset(history_id)
     dataset3_id = self._test_dataset(history_id)
     collection_response = self.gi.histories.create_dataset_collection(
         history_id=history_id,
         collection_description=collections.CollectionDescription(
             name="MyDatasetList",
             elements=[
                 collections.HistoryDatasetElement(name="sample1",
                                                   id=dataset1_id),
                 collections.HistoryDatasetElement(name="sample2",
                                                   id=dataset2_id),
                 collections.HistoryDatasetElement(name="sample3",
                                                   id=dataset3_id),
             ]))
     self.assertEqual(collection_response["name"], "MyDatasetList")
     self.assertEqual(collection_response["collection_type"], "list")
     elements = collection_response["elements"]
     self.assertEqual(len(elements), 3)
     self.assertEqual(elements[0]["element_index"], 0)
     self.assertEqual(elements[0]["object"]["id"], dataset1_id)
     self.assertEqual(elements[1]["object"]["id"], dataset2_id)
     self.assertEqual(elements[2]["object"]["id"], dataset3_id)
     self.assertEqual(elements[2]["element_identifier"], "sample3")
Ejemplo n.º 3
0
    def build_list(self):
        """
        Builds list of fastqs and fasta files respectively, from the data uploaded to SNVPhyl
        Also checks the number fo r1 and r2 files to see if there is a discrepancy in the data
        :return: 
        """
        while True:
            try:
                contents = self.gi.histories.show_history(self.history_id, contents=True)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
        fastqs = []

        # create a list of galaxy items, for all the fastq files that are found
        for item in contents:
            if item["history_content_type"] == "dataset" and item["extension"] == "fastq":
                fastqs.append(item)

        # create a list of galaxy items for the r1 and r2 files, to check if they are the same length
        r1s = []
        r2s = []
        for fastq in fastqs:
            result1 = re.findall(r"(.+)_[Rr]1", fastq["name"], flags=0)
            result2 = re.findall(r"(.+)_[Rr]2", fastq["name"], flags=0)
            if len(result1) >= 1:
                fastq["name"] = result1[0]
                r1s.append(fastq)
            if len(result2) >= 1:
                fastq["name"] = result2[0]
                r2s.append(fastq)

        if len(r1s) != len(r2s):
            self.t.time_print("[WARNING] There are different amounts of R1 and R2 files,"
                              " will only use ones that can be paired.")

        pairs = []
        done = []

        # create collection elements with the pairs of r1 and r2 files that were found
        for sequence in r1s:
            for compare in r2s:
                if sequence["name"] == compare["name"] and sequence["name"] not in done:
                    # Pair them
                    elements = [
                            collections.HistoryDatasetElement(name="forward", id=sequence["id"]),
                            collections.HistoryDatasetElement(name="reverse", id=compare["id"])
                        ]
                    done.append(sequence["name"])
                    pairs.append(collections.CollectionElement(sequence["name"], type="paired", elements=elements))

        collection_description = collections.CollectionDescription("pair_list", type="list:paired", elements=pairs)
        while True:
            try:
                self.gi.histories.create_dataset_collection(self.history_id, collection_description)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
Ejemplo n.º 4
0
    def test_create_list_of_paired_datasets_in_history(self):
        history_id = self.gi.histories.create_history(
            name="TestDSListCreate")["id"]
        dataset1_id = self._test_dataset(history_id)
        dataset2_id = self._test_dataset(history_id)
        dataset3_id = self._test_dataset(history_id)
        dataset4_id = self._test_dataset(history_id)
        collection_response = self.gi.histories.create_dataset_collection(
            history_id=history_id,
            collection_description=collections.CollectionDescription(
                name="MyListOfPairedDatasets",
                type="list:paired",
                elements=[
                    collections.CollectionElement(
                        name="sample1",
                        type="paired",
                        elements=[
                            collections.HistoryDatasetElement(name="forward",
                                                              id=dataset1_id),
                            collections.HistoryDatasetElement(name="reverse",
                                                              id=dataset2_id),
                        ]),
                    collections.CollectionElement(
                        name="sample2",
                        type="paired",
                        elements=[
                            collections.HistoryDatasetElement(name="forward",
                                                              id=dataset3_id),
                            collections.HistoryDatasetElement(name="reverse",
                                                              id=dataset4_id),
                        ]),
                ]))
        self.assertEqual(collection_response["name"], "MyListOfPairedDatasets")
        self.assertEqual(collection_response["collection_type"], "list:paired")
        elements = collection_response["elements"]
        self.assertEqual(len(elements), 2)
        self.assertEqual(elements[0]["element_index"], 0)
        created_pair1 = elements[0]["object"]
        self.assertEqual(created_pair1["collection_type"], "paired")
        self.assertEqual(len(created_pair1["elements"]), 2)
        forward_element1 = created_pair1["elements"][0]
        self.assertEqual(forward_element1["element_identifier"], "forward")
        self.assertEqual(forward_element1["element_index"], 0)
        forward_dataset1 = forward_element1["object"]
        self.assertEqual(forward_dataset1["id"], dataset1_id)

        self.assertEqual(elements[1]["element_index"], 1)
        created_pair2 = elements[1]["object"]
        self.assertEqual(created_pair2["collection_type"], "paired")
        self.assertEqual(len(created_pair2["elements"]), 2)
        reverse_element2 = created_pair2["elements"][1]
        reverse_dataset2 = reverse_element2["object"]

        self.assertEqual(reverse_element2["element_identifier"], "reverse")
        self.assertEqual(reverse_element2["element_index"], 1)
        self.assertEqual(reverse_dataset2["id"], dataset4_id)
Ejemplo n.º 5
0
 def _create_collection_description(self):
     self.dataset1 = self.hist.paste_content(FOO_DATA)
     self.dataset2 = self.hist.paste_content(FOO_DATA_2)
     self.collection_description = dataset_collections.CollectionDescription(
         name="MyDatasetList",
         elements=[
             dataset_collections.HistoryDatasetElement(name="sample1", id=self.dataset1.id),
             dataset_collections.HistoryDatasetElement(name="sample2", id=self.dataset2.id),
         ]
     )
Ejemplo n.º 6
0
 def _create_pair_in_history(self, history_id):
     dataset1_id = self._test_dataset(history_id)
     dataset2_id = self._test_dataset(history_id)
     collection_response = self.gi.histories.create_dataset_collection(
         history_id=history_id,
         collection_description=collections.CollectionDescription(
             name="MyTestPair",
             type="paired",
             elements=[
                 collections.HistoryDatasetElement(name="forward",
                                                   id=dataset1_id),
                 collections.HistoryDatasetElement(name="reverse",
                                                   id=dataset2_id),
             ]))
     return collection_response
Ejemplo n.º 7
0
    def create_dataset_collection(self, gi, outputhist, name="DatasetList"):
        """
        Make a dataset collection with the datasets listed in self.dataset_collection

        Args:
            gi (GalaxyInstance): The current instance of Galaxy being used
            outputhist (History): The history in which to create the dataset collection
            name (str): The name of the new dataset collection
        Returns:
            dataset_collection (HistoryDatasetCollectionAssociation): The new dataset collection object
        """
        self.logger.info("Dataset collection name: '%s'" % name)
        collection_elements = []
        datasets = self.import_datasets('dataset_collection', gi, outputhist)
        if self.dataset_collection['type'] == 'list':
            for i in range(0, len(datasets)):
                collection_elements.append(
                    collections.HistoryDatasetElement(name=datasets[i].name,
                                                      id=datasets[i].id))
        elif self.dataset_collection['type'] == 'list:paired':
            pair_num = 1
            for i in range(0, len(datasets), 2):
                collection_elements.append(
                    collections.CollectionElement(
                        name=datasets[i].name,
                        type='paired',
                        elements=[
                            collections.HistoryDatasetElement(
                                name='forward', id=datasets[i].id),
                            collections.HistoryDatasetElement(
                                name='reverse', id=datasets[i + 1].id),
                        ]))
                pair_num += 1
        else:
            self.logger.error(
                "Dataset collection type must be 'list' or 'list:paired'")
            raise ValueError(
                "Dataset collection type must be 'list' or 'list:paired'")
        collection_description = collections.CollectionDescription(
            name=name,
            type=self.dataset_collection['type'],
            elements=collection_elements)
        dataset_collection = outputhist.create_dataset_collection(
            collection_description)
        return dataset_collection
Ejemplo n.º 8
0
 def test_run_workflow_with_dataset_collection(self):
     dataset1 = self.hist.paste_content(FOO_DATA)
     dataset2 = self.hist.paste_content(FOO_DATA_2)
     collection_description = dataset_collections.CollectionDescription(
         name="MyDatasetList",
         elements=[
             dataset_collections.HistoryDatasetElement(name="sample1",
                                                       id=dataset1.id),
             dataset_collections.HistoryDatasetElement(name="sample2",
                                                       id=dataset2.id),
         ])
     dataset_collection = self.hist.create_dataset_collection(
         collection_description)
     self.assertEqual(len(self.hist.content_infos), 3)
     input_map = {"0": dataset_collection, "1": dataset1}
     inv = self.wf.invoke(input_map, history=self.hist)
     inv.wait()
     self.hist.refresh()
     self.assertEqual(len(self.hist.content_infos), 6)
     last_step = inv.sorted_steps_by()[-1]
     out_hdca = last_step.get_output_collections()['out_file1']
     self.assertEqual(out_hdca.collection_type, 'list')
     self.assertEqual(len(out_hdca.elements), 2)
     self.assertEqual(out_hdca.container.id, self.hist.id)
Ejemplo n.º 9
0
    def build_list(self):
        while True:
            try:
                contents = self.gi.histories.show_history(self.history_id,
                                                          contents=True)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
        fastqs = []

        # get fastq files
        for item in contents:
            if item["history_content_type"] == "dataset" and item[
                    "extension"] == "fastq":
                fastqs.append(item)

        # pair fastq files
        r1s = []
        r2s = []
        for fastq in fastqs:
            result1 = re.findall(r"(.+)_[Rr]1", fastq["name"], flags=0)
            result2 = re.findall(r"(.+)_[Rr]2", fastq["name"], flags=0)
            if len(result1) >= 1:
                fastq["name"] = result1[0]
                r1s.append(fastq)
            if len(result2) >= 1:
                fastq["name"] = result2[0]
                r2s.append(fastq)

        if len(r1s) != len(r2s):
            self.t.time_print(
                "[WARNING] There are different amounts of R1 and R2 files,"
                " will only use ones that can be paired.")

        pairs = []
        done = []

        for sequence in r1s:
            for compare in r2s:
                if sequence["name"] == compare["name"] and sequence[
                        "name"] not in done:
                    # Pair them
                    elements = [
                        collections.HistoryDatasetElement(name="forward",
                                                          id=sequence["id"]),
                        collections.HistoryDatasetElement(name="reverse",
                                                          id=compare["id"])
                    ]
                    done.append(sequence["name"])
                    pairs.append(
                        collections.CollectionElement(sequence["name"],
                                                      type="paired",
                                                      elements=elements))

        collection_description = collections.CollectionDescription(
            "pair_list", type="list:paired", elements=pairs)
        while True:
            try:
                self.gi.histories.create_dataset_collection(
                    self.history_id, collection_description)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
failedCollection = gi.histories.show_dataset_collection(
    historyId, collectionId)
okDatasets = filter(
    lambda d: d['object']['state'] == 'ok' and d['object']['file_size'] > 0,
    failedCollection['elements'])
notOkDatasets = filter(
    lambda d: d['object']['state'] != 'ok' or d['object']['file_size'] == 0,
    failedCollection['elements'])
okCollectionName = failedCollection['name'] + " (ok)"
notOkCollectionName = failedCollection['name'] + " (not ok)"

gi.histories.create_dataset_collection(
    history_id=historyId,
    collection_description=collections.CollectionDescription(
        name=okCollectionName,
        elements=[
            collections.HistoryDatasetElement(d['object']['name'],
                                              d['object']['id'])
            for d in okDatasets
        ]))

gi.histories.create_dataset_collection(
    history_id=historyId,
    collection_description=collections.CollectionDescription(
        name=notOkCollectionName,
        elements=[
            collections.HistoryDatasetElement(d['object']['name'],
                                              d['object']['id'])
            for d in notOkDatasets
        ]))
historyId = historyMatches[0]['id']
historyContents = gi.histories.show_history(historyId, contents=True, deleted=False, visible=True, details=False)
matchingCollections = [x for x in historyContents if x['hid'] == collectionHistoryId]

if len(matchingCollections) == 0:
    print("Error: no collections matching that id found.")
    exit(1)

if len(matchingCollections) > 1:
    print("Error: more than one collection matching that id found (WTF?)")
    exit(1)

collectionId = matchingCollections[0]['id']
failedCollection = gi.histories.show_dataset_collection(historyId, collectionId)
okDatasets = [d for d in failedCollection['elements'] if d['object']['state'] == 'ok' and d['object']['file_size'] > 0]
notOkDatasets = [d for d in failedCollection['elements'] if d['object']['state'] != 'ok' or d['object']['file_size'] == 0]
okCollectionName = failedCollection['name'] + " (ok)"
notOkCollectionName = failedCollection['name'] + " (not ok)"

gi.histories.create_dataset_collection(
    history_id=historyId,
    collection_description=collections.CollectionDescription(
        name=okCollectionName,
        elements=[collections.HistoryDatasetElement(d['object']['name'], d['object']['id']) for d in okDatasets]))

gi.histories.create_dataset_collection(
    history_id=historyId,
    collection_description=collections.CollectionDescription(
        name=notOkCollectionName,
        elements=[collections.HistoryDatasetElement(d['object']['name'], d['object']['id']) for d in notOkDatasets]))