Esempio n. 1
0
    def loadReadsSet(self, fwd_reads, rev_reads, params):
        if hasattr(self.__class__, 'reads_set_ref'):
            return self.__class__.reads_set_ref
        pe_reads_ref = self.loadPairedEndReads(fwd_reads, rev_reads, params)
        reads_set_name = 'TestReadsSet'
        # create the set object

        reads_set_data = {
            'description': 'Reads Set for testing Bwa',
            'items': [{
                'ref': pe_reads_ref,
                'label': 'rs1'
            }]
        }
        # test a save
        set_api = SetAPI(self.srv_wiz_url)
        res = set_api.save_reads_set_v1({
            'data': reads_set_data,
            'output_object_name': reads_set_name,
            'workspace': params["workspace_name"]
        })
        reads_set_ref = res['set_ref']

        # reads_set_ref = '5264/52/1'
        print('Loaded ReadsSet: ' + reads_set_ref)
        return reads_set_ref
Esempio n. 2
0
 def __init__(self, scratch_dir, callback_url, workspace_url, srv_wiz_url):
     self.scratch_dir = scratch_dir
     self.rau = ReadsAlignmentUtils(callback_url)
     self.kbr = KBaseReport(callback_url)
     self.dfu = DataFileUtil(callback_url)
     self.gfu = GenomeFileUtil(callback_url)
     self.set_api = SetAPI(srv_wiz_url)
     self.ws = Workspace(workspace_url)
     self.valid_commands = ['bamqc', 'multi-bamqc']
Esempio n. 3
0
    def test_list_object_with_sets(self):
        ws_name1 = self.createWs()
        reads_obj_ref = self.__class__.example_reads_ref
        set_obj_name = "MyReadsSet.1"
        sapi = SetAPI(self.__class__.serviceWizardURL, token=self.getContext()['token'],
                      service_ver=self.__class__.SetAPI_version)
        sapi.save_reads_set_v1({'workspace': ws_name1, 'output_object_name': set_obj_name,
                                'data': {'description': '', 'items': [{'ref': reads_obj_ref}]}})
        list_ret = self.getImpl().list_objects_with_sets(self.getContext(),
                                                         {"ws_name": ws_name1})[0]
        ret = list_ret['data']
        self.assertTrue(len(ret) > 0)
        set_count = 0
        for item in ret:
            self.assertTrue("object_info" in item)
            if "set_items" in item:
                set_count += 1
                set_items = item["set_items"]["set_items_info"]
                self.assertEqual(1, len(set_items))
        self.assertEqual(1, set_count)
        self.assertNotIn('data_palette_refs', list_ret)
        ws_id = self.getWsClient().get_workspace_info({"workspace": ws_name1})[0]

        list_ret = self.getImpl().list_objects_with_sets(self.getContext(),
                                                         {"ws_name": ws_name1,
                                                          "include_data_palettes": 1})[0]
        ret = list_ret['data']
        self.assertTrue(len(ret) > 0)
        set_count = 0
        for item in ret:
            self.assertTrue("object_info" in item)
            if "set_items" in item:
                set_count += 1
                set_items = item["set_items"]["set_items_info"]
                self.assertEqual(1, len(set_items))
        self.assertEqual(1, set_count)
        self.assertIn('data_palette_refs', list_ret)
        ws_id = self.getWsClient().get_workspace_info({"workspace": ws_name1})[0]


        ret2 = self.getImpl().list_objects_with_sets(self.getContext(),
                                                     {"ws_id": ws_id})[0]["data"]
        self.assertEqual(len(ret), len(ret2))
        type_filter = "KBaseSets.ReadsSet"
        ret3 = self.getImpl().list_objects_with_sets(self.getContext(),
                                                     {"types": [type_filter],
                                                     "workspaces": [str(ws_id)]})[0]["data"]
        self.assertTrue(len(ret3) > 0)
        for item in ret3:
            info = item['object_info']
            obj_type = info[2].split('-')[0]
            self.assertEqual(type_filter, obj_type)
        type_filter = "KBaseGenomes.Genome"
        ret4 = self.getImpl().list_objects_with_sets(self.getContext(),
                                                     {"types": [type_filter],
                                                     "workspaces": [str(ws_id)]})[0]["data"]
        self.assertTrue(len(ret4) == 0)
Esempio n. 4
0
def load_reads_set(srv_wiz_url, ws_name, reads_set, target_name):
    """
    Combine a list of reads references into a ReadsSet.
    if file_rev is None or not a present key, then this is treated as a single end reads.
    """
    set_client = SetAPI(srv_wiz_url)
    set_output = set_client.save_reads_set_v1({
        "workspace": ws_name,
        "output_object_name": target_name,
        "data": {
            "description": "reads set for testing",
            "items": reads_set
        }
    })
    return set_output["set_ref"]
Esempio n. 5
0
def fetch_reads_refs_from_sampleset(ref, ws_url, srv_wiz_url):
    """
    From the given object ref, return a list of all reads objects that are a part of that
    object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary
    refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows:
    {
        "ref": reads object reference,
        "condition": condition string associated with that reads object,
        "name": reads object name (needed for saving an AlignmentSet)
    }
    The only one required is "ref", all other keys may or may not be present, based on the reads
    object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info
    for each reads object, but a single PairedEndLibrary may not have that info.

    If ref is already a Reads library, just returns a list with ref as a single element.
    """
    obj_type = get_object_type(ref, ws_url)
    refs = list()
    if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type:
        print("Looking up reads references in ReadsSet object")
        set_client = SetAPI(srv_wiz_url)
        reads_set = set_client.get_reads_set_v1({
            "ref": ref,
            "include_item_info": 0,
            "include_set_item_ref_paths": 1
        })
        print("Got results from ReadsSet object")
        pprint(reads_set)
        ref_list = [r["ref_path"] for r in reads_set["data"]["items"]]
        reads_names = get_object_names(ref_list, ws_url)
        for reads in reads_set["data"]["items"]:
            ref = reads["ref_path"]
            refs.append({
                "ref": ref,
                "condition": reads["label"],
                "name": reads_names[ref]
            })
    elif ("KBaseAssembly.SingleEndLibrary" in obj_type
          or "KBaseFile.SingleEndLibrary" in obj_type
          or "KBaseAssembly.PairedEndLibrary" in obj_type
          or "KBaseFile.PairedEndLibrary" in obj_type):
        refs.append({"ref": ref, "name": get_object_names([ref], ws_url)[ref]})
    else:
        raise ValueError("Unable to fetch reads reference from object {} "
                         "which is a {}".format(ref, obj_type))

    return refs
Esempio n. 6
0
 def test_two_users_set_inside_dp(self):
     ws_name1_1 = self.createWs()
     # Injecting reads object (real copy) into workspace1
     orig_reads_obj_ref = self.__class__.example_reads_ref
     reads_obj_name = "TestReads"
     self.getWsClient().copy_object({'from': {'ref': orig_reads_obj_ref},
                                     'to': {'workspace': ws_name1_1,
                                            'name': reads_obj_name}})
     copy_reads_obj_ref = ws_name1_1 + '/' + reads_obj_name
     ws_name1_2 = self.createWs()
     set_obj_name = "MyReadsSet.1"
     sapi = SetAPI(self.__class__.serviceWizardURL, token=self.getContext()['token'],
                   service_ver=self.__class__.SetAPI_version)
     sapi.save_reads_set_v1({'workspace': ws_name1_2, 'output_object_name': set_obj_name,
                             'data': {'description': '', 'items': [{'ref': copy_reads_obj_ref}]}})
     orig_set_ref = ws_name1_2 + '/' + set_obj_name
     # Making DP-copy of reads set object by user2
     ws_name2 = self.createWs2()
     # Let's share workspace containing set with user2
     self.getWsClient().set_permissions({'workspace': ws_name1_2, 'new_permission': 'r',
                                         'users': [self.getContext2()['user_id']]})
     # Import reads set ref into DataPalette of third workspace
     dps = DataPaletteService(self.__class__.serviceWizardURL,
                               token=self.getContext2()['token'],
                               service_ver=self.__class__.DataPalette_version)
     dps.add_to_palette({'workspace': ws_name2, 'new_refs': [{'ref': orig_set_ref}]})
     dp_ref_map = dps.list_data({'workspaces': [ws_name2]})['data_palette_refs']
     set_ref_path = next(iter(dp_ref_map.values())) + ';' + orig_set_ref
     reads_ref_path = set_ref_path + ';' + copy_reads_obj_ref
     # Un-share original workspace
     self.getWsClient().set_permissions({'workspace': ws_name1_2, 'new_permission': 'n',
                                         'users': [self.getContext2()['user_id']]})
     # Let's check that we can list set and see reads object as set item
     ret = self.getImpl().list_objects_with_sets(self.getContext2(),
                                                 {"ws_name": ws_name2})[0]["data"]
     self.assertEqual(1, len(ret))
     item = ret[0]
     self.assertTrue('set_items' in item)
     self.assertTrue('set_items_info' in item['set_items'])
     self.assertEqual(1, len(item['set_items']['set_items_info']))
     # Check access to reads and to set objects
     info = self.getWsClient2().get_object_info_new({'objects': [{'ref': set_ref_path}]})[0]
     self.assertEqual(set_obj_name, info[1])
     info = self.getWsClient2().get_object_info_new({'objects': [{'ref': reads_ref_path}]})[0]
     self.assertEqual(reads_obj_name, info[1])
Esempio n. 7
0
 def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.token = config['KB_AUTH_TOKEN']
     self.scratch = config['scratch']
     self.user_id = config['USER_ID']
     self.dfu = DataFileUtil(self.callback_url)
     self.genbank_import = ImportGenbankUtil(config)
     self.gff_fasta_import = ImportGFFFastaUtil(config)
     self.fasta_import = ImportAssemblyUtil(config)
     self.set_client = SetAPI(config['srv-wiz-url'])
Esempio n. 8
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_Msuite'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'kb_Msuite',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = kb_Msuite(cls.cfg)
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.scratch = cls.cfg['scratch']
        cls.suffix = int(time.time() * 1000)
        #cls.scratch = cls.cfg['scratch']+'_'+str(suffix)
        #cls.cfg['scratch'] = cls.scratch
        #if not os.path.exists(cls.scratch):
        #    os.mkdir(cls.scratch)
        cls.checkm_runner = CheckMUtil(cls.cfg, cls.ctx)

        cls.wsName = "test_kb_Msuite_" + str(cls.suffix)
        cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        cls.setAPI = SetAPI(url=cls.cfg['srv-wiz-url'], token=cls.ctx['token'])
        cls.gfu = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'], service_ver='dev')
        cls.mu = MetagenomeUtils(os.environ['SDK_CALLBACK_URL'])

        # stage an input and output directory
        """
        cls.input_dir = os.path.join(cls.scratch, 'input_1')
        cls.output_dir = os.path.join(cls.scratch, 'output_1')
        cls.all_seq_fasta = os.path.join(cls.scratch, 'all_seq.fna')
        shutil.copytree(os.path.join('data', 'example_out', 'input'), cls.input_dir)
        shutil.copytree(os.path.join('data', 'example_out', 'output'), cls.output_dir)
        shutil.copy(os.path.join('data', 'example_out', 'all_seq.fna'), cls.all_seq_fasta)
        """

        # prepare WS data
        cls.prepare_data()
Esempio n. 9
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        test_time_stamp = int(time.time() * 1000)

        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_Msuite'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'kb_Msuite',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = Workspace(cls.wsURL)
        cls.serviceImpl = kb_Msuite(cls.cfg)
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.scratch = cls.cfg['scratch']
        cls.appdir = cls.cfg['appdir']

        cls.test_data_dir = os.path.join(cls.scratch, 'test_data')
        cls.suffix = test_time_stamp
        cls.checkm_runner = CheckMUtil(cls.cfg, cls.ctx)

        cls.wsName = "test_kb_Msuite_" + str(cls.suffix)
        cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName})

        cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        cls.gfu = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'],
                                 service_ver='dev')
        cls.mu = MetagenomeUtils(os.environ['SDK_CALLBACK_URL'])
        cls.setAPI = SetAPI(url=cls.cfg['srv-wiz-url'], token=cls.ctx['token'])
        cls.kr = KBaseReport(os.environ['SDK_CALLBACK_URL'])

        cls.data_loaded = False
Esempio n. 10
0
    def upload_alignment_set(self, alignment_items, alignmentset_name,
                             ws_name):
        """
        Compiles and saves a set of alignment references (+ other stuff) into a
        KBaseRNASeq.RNASeqAlignmentSet.
        Returns the reference to the new alignment set.

        alignment_items: [{
            "ref": alignment_ref,
            "label": condition label.
        }]
        # alignment_info = dict like this:
        # {
        #     reads_ref: {
        #         "ref": alignment_ref
        #     }
        # }
        # reads_info = dict like this:
        # {
        #     reads_ref: {
        #         "condition": "some condition"
        #     }
        # }
        # input_params = global input params to HISAT2, also has ws_name for the target workspace.
        # alignmentset_name = name of final set object.
        """
        print("Uploading completed alignment set")
        alignment_set = {
            "description":
            "Alignments using HISAT2, v.{}".format(HISAT_VERSION),
            "items": alignment_items
        }
        set_api = SetAPI(self.srv_wiz_url)
        set_info = set_api.save_reads_alignment_set_v1({
            "workspace": ws_name,
            "output_object_name": alignmentset_name,
            "data": alignment_set
        })
        return set_info["set_ref"]
Esempio n. 11
0
 def test_unique_items(self):
     # Create original workspace with reads object + ReadsSet object
     ws_name1 = self.createWs()
     foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL'])
     reads_obj_name = "testreads1"
     foft.create_fake_reads({'ws_name': ws_name1, 'obj_names': [reads_obj_name]})
     reads_obj_ref = ws_name1 + '/' + reads_obj_name
     set_obj_name = "testreads_set1"
     sapi = SetAPI(self.__class__.serviceWizardURL, token=self.getContext()['token'],
                   service_ver=self.__class__.SetAPI_version)
     sapi.save_reads_set_v1({'workspace': ws_name1, 'output_object_name': set_obj_name,
                             'data': {'description': '', 'items': [{'ref': reads_obj_ref}]}})
     set_obj_ref = ws_name1 + '/' + set_obj_name
     # Create workspace with DataPalette copy of Reads object and copy of ReadsSet
     ws_name2 = self.createWs()
     dps = DataPaletteService(self.__class__.serviceWizardURL, token=self.getContext()['token'],
                              service_ver=self.__class__.DataPalette_version)
     dps.add_to_palette({'workspace': ws_name2, 'new_refs': [{'ref': reads_obj_ref},
                                                             {'ref': set_obj_ref}]})
     # Check if listing in both these workspaces at the same time gives unique items
     ret = self.getImpl().list_objects_with_sets(self.getContext(),
                                                 {"workspaces": [ws_name1, ws_name2]})[0]["data"]
     self.assertEqual(2, len(ret))
Esempio n. 12
0
 def __init__(self, config):
     self.ws_url = config["workspace-url"]
     self.callback_url = config["SDK_CALLBACK_URL"]
     self.token = config["KB_AUTH_TOKEN"]
     self.shock_url = config["shock-url"]
     self.srv_wiz_url = config["srv-wiz-url"]
     self.scratch = config["scratch"]
     self.dfu = DataFileUtil(self.callback_url)
     self.gfu = GenomeFileUtil(self.callback_url)
     self.rau = ReadsAlignmentUtils(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
     self.eu = ExpressionUtils(self.callback_url)
     self.ws = Workspace(self.ws_url, token=self.token)
     self.set_client = SetAPI(self.srv_wiz_url, service_ver="dev")
Esempio n. 13
0
    def fetch_reads_refs_from_sampleset(self, ref, info, validated_params):
        """
        Note: adapted from kbaseapps/kb_hisat2 - file_util.py
        From the given object ref, return a list of all reads objects that are a part of that
        object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary
        refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows:
        {
            "ref": reads object reference,
            "condition": condition string associated with that reads object
        }
        The only one required is "ref", all other keys may or may not be present, based on the reads
        object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info
        for each reads object, but a single PairedEndLibrary may not have that info.
        If ref is already a Reads library, just returns a list with ref as a single element.
        """
        obj_type = self.get_type_from_obj_info(info)
        refs = list()
        refs_for_ws_info = list()
        if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type:
            print("Looking up reads references in ReadsSet object")
            set_api = SetAPI(self.srv_wiz_url)
            reads_set = set_api.get_reads_set_v1({
                'ref':
                ref,
                'include_item_info':
                0,
                'include_set_item_ref_paths':
                1
            })

            for reads in reads_set["data"]["items"]:
                refs.append({
                    'ref': reads['ref_path'],
                    'condition': reads['label']
                })
                refs_for_ws_info.append({'ref': reads['ref_path']})
        else:
            raise ValueError("Unable to fetch reads reference from object {} "
                             "which is a {}".format(ref, obj_type))

        # get object info so we can name things properly
        infos = self.ws.get_object_info3({'objects':
                                          refs_for_ws_info})['infos']

        name_ext = '_alignment'
        if 'output_alignment_suffix' in validated_params \
                and validated_params['output_alignment_suffix'] is not None:
            ext = validated_params['output_alignment_suffix'].replace(' ', '')
            if ext:
                name_ext = ext

        unique_name_lookup = {}
        for k in range(0, len(refs)):
            refs[k]['info'] = infos[k]
            name = infos[k][1]
            if name not in unique_name_lookup:
                unique_name_lookup[name] = 1
            else:
                unique_name_lookup[name] += 1
                name = name + '_' + str(unique_name_lookup[name])
            name = name + name_ext
            refs[k]['alignment_output_name'] = name

        return refs
    def prepare_data(cls):

        workspace_id = cls.dfu.ws_name_to_id(cls.wsName)

        # upload genome object
        genbank_file_name = 'minimal.gbff'
        genbank_file_path = os.path.join(cls.scratch, genbank_file_name)
        shutil.copy(os.path.join('data', genbank_file_name), genbank_file_path)

        genome_object_name = 'test_Genome'
        cls.genome_ref = cls.gfu.genbank_to_genome({
            'file': {
                'path': genbank_file_path
            },
            'workspace_name':
            cls.wsName,
            'genome_name':
            genome_object_name
        })['genome_ref']
        print('TEST genome_ref=' + cls.genome_ref)

        # upload assembly object
        file_name = 'test.fna'
        fasta_path = os.path.join(cls.scratch, file_name)
        shutil.copy(os.path.join('data', file_name), fasta_path)
        assembly_name = 'test_assembly'
        cls.assembly_ref = cls.au.save_assembly_from_fasta({
            'file': {
                'path': fasta_path
            },
            'workspace_name':
            cls.wsName,
            'assembly_name':
            assembly_name
        })

        print('TEST assembly_ref=' + cls.assembly_ref)

        # upload reads object
        reads_file_name = 'Sample1.fastq'
        reads_file_path = os.path.join(cls.scratch, reads_file_name)
        shutil.copy(os.path.join('data', reads_file_name), reads_file_path)

        reads_object_name_1 = 'test_Reads_1'
        cls.reads_ref_1 = cls.ru.upload_reads({
            'fwd_file': reads_file_path,
            'wsname': cls.wsName,
            'sequencing_tech': 'Unknown',
            'interleaved': 0,
            'name': reads_object_name_1
        })['obj_ref']
        print(('TEST reads_ref_1=' + cls.reads_ref_1))

        reads_object_name_2 = 'test_Reads_2'
        cls.reads_ref_2 = cls.ru.upload_reads({
            'fwd_file': reads_file_path,
            'wsname': cls.wsName,
            'sequencing_tech': 'Unknown',
            'interleaved': 0,
            'name': reads_object_name_2
        })['obj_ref']
        print('TEST reads_ref_2=' + cls.reads_ref_2)

        # upload alignment object
        alignment_file_name = 'accepted_hits.bam'
        alignment_file_path = os.path.join(cls.scratch, alignment_file_name)
        shutil.copy(os.path.join('data', alignment_file_name),
                    alignment_file_path)

        alignment_object_name_1 = 'test_Alignment_1'
        cls.condition_1 = 'test_condition_1'
        cls.alignment_ref_1 = cls.rau.upload_alignment({
            'file_path':
            alignment_file_path,
            'destination_ref':
            cls.wsName + '/' + alignment_object_name_1,
            'read_library_ref':
            cls.reads_ref_1,
            'condition':
            cls.condition_1,
            'library_type':
            'single_end',
            'assembly_or_genome_ref':
            cls.genome_ref
        })['obj_ref']
        print('TEST alignment_ref_1=' + cls.alignment_ref_1)

        alignment_object_name_2 = 'test_Alignment_2'
        cls.condition_2 = 'test_condition_2'
        cls.alignment_ref_2 = cls.rau.upload_alignment({
            'file_path':
            alignment_file_path,
            'destination_ref':
            cls.wsName + '/' + alignment_object_name_2,
            'read_library_ref':
            cls.reads_ref_2,
            'condition':
            cls.condition_2,
            'library_type':
            'single_end',
            'assembly_or_genome_ref':
            cls.genome_ref
        })['obj_ref']
        print('TEST alignment_ref_2=' + cls.alignment_ref_2)

        alignment_object_name_3 = 'test_Alignment_3'
        cls.condition_3 = 'test_condition_3'
        cls.alignment_ref_3 = cls.rau.upload_alignment({
            'file_path':
            alignment_file_path,
            'destination_ref':
            cls.wsName + '/' + alignment_object_name_3,
            'read_library_ref':
            cls.reads_ref_2,
            'condition':
            cls.condition_3,
            'library_type':
            'single_end',
            'assembly_or_genome_ref':
            cls.assembly_ref
        })['obj_ref']
        print('TEST alignment_ref_3=' + cls.alignment_ref_3)

        # upload sample_set object

        sample_set_object_name = 'test_Sample_Set'
        sample_set_data = {
            'sampleset_id': sample_set_object_name,
            'sample_ids': [cls.reads_ref_1, cls.reads_ref_2],
            'sampleset_desc': 'test sampleset object',
            'Library_type': 'SingleEnd',
            'condition': [cls.condition_1, cls.condition_2],
            'domain': 'Unknown',
            'num_samples': 2,
            'platform': 'Unknown'
        }
        save_object_params = {
            'id':
            workspace_id,
            'objects': [{
                'type': 'KBaseRNASeq.RNASeqSampleSet',
                'data': sample_set_data,
                'name': sample_set_object_name
            }]
        }

        dfu_oi = cls.dfu.save_objects(save_object_params)[0]
        cls.sample_set_ref = str(dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str(
            dfu_oi[4])
        print(('TEST sample_set_ref=' + cls.sample_set_ref))

        # upload alignment_set object
        object_type = 'KBaseRNASeq.RNASeqAlignmentSet'
        alignment_set_object_name = 'test_Alignment_Set'
        alignment_set_data = {
            'genome_id':
            cls.genome_ref,
            'read_sample_ids': [reads_object_name_1, reads_object_name_2],
            'mapped_rnaseq_alignments': [{
                reads_object_name_1:
                alignment_object_name_1
            }, {
                reads_object_name_2:
                alignment_object_name_2
            }],
            'mapped_alignments_ids': [{
                reads_object_name_1: cls.alignment_ref_1
            }, {
                reads_object_name_2: cls.alignment_ref_2
            }],
            'sample_alignments': [cls.alignment_ref_1, cls.alignment_ref_2],
            'sampleset_id':
            cls.sample_set_ref
        }
        save_object_params = {
            'id':
            workspace_id,
            'objects': [{
                'type': object_type,
                'data': alignment_set_data,
                'name': alignment_set_object_name
            }]
        }

        dfu_oi = cls.dfu.save_objects(save_object_params)[0]
        cls.old_alignment_set_ref = str(dfu_oi[6]) + '/' + str(
            dfu_oi[0]) + '/' + str(dfu_oi[4])
        print('TEST (legacy) KBaseRNASeq.alignment_set_ref=' +
              cls.old_alignment_set_ref)

        # Save the alignment set
        items = [{
            'ref': cls.alignment_ref_1,
            'label': 'c1'
        }, {
            'ref': cls.alignment_ref_2,
            'label': 'c2'
        }]
        alignment_set_data = {'description': '', 'items': items}
        alignment_set_save_params = {
            'data': alignment_set_data,
            'workspace': cls.wsName,
            'output_object_name': 'MyReadsAlignmentSet'
        }

        set_api = SetAPI(cls.srv_wiz_url)
        save_result = set_api.save_reads_alignment_set_v1(
            alignment_set_save_params)
        cls.new_alignment_set_ref = save_result['set_ref']
        print('TEST KBaseSet.alignment_set_ref=')
        print(cls.new_alignment_set_ref)
Esempio n. 15
0
    def stage_input(self, input_ref, fasta_file_extension):
        '''
        Stage input based on an input data reference for CheckM

        input_ref can be a reference to an Assembly, BinnedContigs, or (not yet implemented) a Genome

        This method creates a directory in the scratch area with the set of Fasta files, names
        will have the fasta_file_extension parameter tacked on.

            ex:

            staged_input = stage_input('124/15/1', 'fna')

            staged_input
            {"input_dir": '...'}
        '''
        # config
        #SERVICE_VER = 'dev'
        SERVICE_VER = 'release'
        [OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I, WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I] = range(11)  # object_info tuple
        ws = Workspace(self.ws_url)

        # 1) generate a folder in scratch to hold the input
        suffix = str(int(time.time() * 1000))
        input_dir = os.path.join(self.scratch, 'bins_' + suffix)
        all_seq_fasta = os.path.join(self.scratch, 'all_sequences_' + suffix + '.' + fasta_file_extension)
        if not os.path.exists(input_dir):
            os.makedirs(input_dir)


        # 2) based on type, download the files
        obj_name = self.get_data_obj_name (input_ref)
        type_name = self.get_data_obj_type (input_ref)

        # auClient
        try:
            auClient = AssemblyUtil(self.callbackURL, token=self.ctx['token'], service_ver=SERVICE_VER)
        except Exception as e:
            raise ValueError('Unable to instantiate auClient with callbackURL: '+ self.callbackURL +' ERROR: ' + str(e))

        # setAPI_Client
        try:
            #setAPI_Client = SetAPI (url=self.callbackURL, token=self.ctx['token'])  # for SDK local.  local doesn't work for SetAPI
            setAPI_Client = SetAPI (url=self.serviceWizardURL, token=self.ctx['token'])  # for dynamic service
        except Exception as e:
            raise ValueError('Unable to instantiate setAPI_Client with serviceWizardURL: '+ self.serviceWizardURL +' ERROR: ' + str(e))

        # mguClient
        try:
            mguClient = MetagenomeUtils(self.callbackURL, token=self.ctx['token'], service_ver=SERVICE_VER)
        except Exception as e:
            raise ValueError('Unable to instantiate mguClient with callbackURL: '+ self.callbackURL +' ERROR: ' + str(e))


        # Standard Single Assembly
        #
        if type_name in ['KBaseGenomeAnnotations.Assembly', 'KBaseGenomes.ContigSet']:
            # create file data
            filename = os.path.join(input_dir, obj_name + '.' + fasta_file_extension)
            auClient.get_assembly_as_fasta({'ref': input_ref, 'filename': filename})
            if not os.path.isfile(filename):
                raise ValueError('Error generating fasta file from an Assembly or ContigSet with AssemblyUtil')
            # make sure fasta file isn't empty
            min_fasta_len = 1
            if not self.fasta_seq_len_at_least(filename, min_fasta_len):
                raise ValueError('Assembly or ContigSet is empty in filename: '+str(filename))

        # AssemblySet
        #
        elif type_name == 'KBaseSets.AssemblySet':

            # read assemblySet
            try:
                assemblySet_obj = setAPI_Client.get_assembly_set_v1 ({'ref':input_ref, 'include_item_info':1})
            except Exception as e:
                raise ValueError('Unable to get object from workspace: (' + input_ref +')' + str(e))
            assembly_refs = []
            assembly_names = []
            for assembly_item in assemblySet_obj['data']['items']:
                this_assembly_ref = assembly_item['ref']
                # assembly obj info
                try:
                    this_assembly_info = ws.get_object_info_new ({'objects':[{'ref':this_assembly_ref}]})[0]
                    this_assembly_name = this_assembly_info[NAME_I]
                except Exception as e:
                    raise ValueError('Unable to get object from workspace: (' + this_assembly_ref +'): ' + str(e))
                assembly_refs.append(this_assembly_ref)
                assembly_names.append(this_assembly_name)

            # create file data (name for file is what's reported in results)
            for ass_i,assembly_ref in enumerate(assembly_refs):
                this_name = assembly_names[ass_i]
                filename = os.path.join(input_dir, this_name + '.' + fasta_file_extension)
                auClient.get_assembly_as_fasta({'ref': assembly_ref, 'filename': filename})
                if not os.path.isfile(filename):
                    raise ValueError('Error generating fasta file from an Assembly or ContigSet with AssemblyUtil')
                # make sure fasta file isn't empty
                min_fasta_len = 1
                if not self.fasta_seq_len_at_least(filename, min_fasta_len):
                    raise ValueError('Assembly or ContigSet is empty in filename: '+str(filename))

        # Binned Contigs
        #
        elif type_name == 'KBaseMetagenomes.BinnedContigs':

            # download the bins as fasta and set the input folder name
            bin_file_dir = mguClient.binned_contigs_to_file({'input_ref': input_ref, 'save_to_shock': 0})['bin_file_directory']
            os.rename(bin_file_dir, input_dir)
            # make sure fasta file isn't empty
            self.set_fasta_file_extensions(input_dir, fasta_file_extension)
            for (dirpath, dirnames, filenames) in os.walk(input_dir):
                for fasta_file in filenames:
                    fasta_path = os.path.join (input_dir,fasta_file)
                    min_fasta_len = 1
                    if not self.fasta_seq_len_at_least(fasta_path, min_fasta_len):
                        raise ValueError('Binned Assembly is empty for fasta_path: '+str(fasta_path))
                break

        # Genome and GenomeSet
        #
        elif type_name == 'KBaseGenomes.Genome' or type_name == 'KBaseSearch.GenomeSet':
            genome_obj_names = []
            genome_sci_names = []
            genome_assembly_refs = []

            if type_name == 'KBaseGenomes.Genome':
                genomeSet_refs = [input_ref]
            else:  # get genomeSet_refs from GenomeSet object
                genomeSet_refs = []
                try:
                    genomeSet_object = ws.get_objects2({'objects':[{'ref':input_ref}]})['data'][0]['data']
                except Exception as e:
                    raise ValueError('Unable to fetch '+str(input_ref)+' object from workspace: ' + str(e))
                    #to get the full stack trace: traceback.format_exc()

                # iterate through genomeSet members
                for genome_id in genomeSet_object['elements'].keys():
                    if 'ref' not in genomeSet_object['elements'][genome_id] or \
                       genomeSet_object['elements'][genome_id]['ref'] == None or \
                       genomeSet_object['elements'][genome_id]['ref'] == '':
                        raise ValueError('genome_ref not found for genome_id: '+str(genome_id)+' in genomeSet: '+str(input_ref))
                    else:
                        genomeSet_refs.append(genomeSet_object['elements'][genome_id]['ref'])

            # genome obj data
            for i,this_input_ref in enumerate(genomeSet_refs):
                try:
                    objects = ws.get_objects2({'objects':[{'ref':this_input_ref}]})['data']
                    genome_obj = objects[0]['data']
                    genome_obj_info = objects[0]['info']
                    genome_obj_names.append(genome_obj_info[NAME_I])
                    genome_sci_names.append(genome_obj['scientific_name'])
                except:
                    raise ValueError ("unable to fetch genome: "+this_input_ref)

                # Get genome_assembly_ref
                if ('contigset_ref' not in genome_obj or genome_obj['contigset_ref'] == None) \
                   and ('assembly_ref' not in genome_obj or genome_obj['assembly_ref'] == None):
                    msg = "Genome "+genome_obj_names[i]+" (ref:"+input_ref+") "+genome_sci_names[i]+" MISSING BOTH contigset_ref AND assembly_ref.  Cannot process.  Exiting."
                    raise ValueError (msg)
                    continue
                elif 'assembly_ref' in genome_obj and genome_obj['assembly_ref'] != None:
                    msg = "Genome "+genome_obj_names[i]+" (ref:"+input_ref+") "+genome_sci_names[i]+" USING assembly_ref: "+str(genome_obj['assembly_ref'])
                    print (msg)
                    genome_assembly_refs.append(genome_obj['assembly_ref'])
                elif 'contigset_ref' in genome_obj and genome_obj['contigset_ref'] != None:
                    msg = "Genome "+genome_obj_names[i]+" (ref:"+input_ref+") "+genome_sci_names[i]+" USING contigset_ref: "+str(genome_obj['contigset_ref'])
                    print (msg)
                    genome_assembly_refs.append(genome_obj['contigset_ref'])

            # create file data (name for file is what's reported in results)
            for ass_i,assembly_ref in enumerate(genome_assembly_refs):
                this_name = genome_obj_names[ass_i]
                filename = os.path.join(input_dir, this_name + '.' + fasta_file_extension)
                auClient.get_assembly_as_fasta({'ref': assembly_ref, 'filename': filename})
                if not os.path.isfile(filename):
                    raise ValueError('Error generating fasta file from an Assembly or ContigSet with AssemblyUtil')
                # make sure fasta file isn't empty
                min_fasta_len = 1
                if not self.fasta_seq_len_at_least(filename, min_fasta_len):
                    raise ValueError('Assembly or ContigSet is empty in filename: '+str(filename))

        # Unknown type slipped through
        #
        else:
            raise ValueError('Cannot stage fasta file input directory from type: ' + type_name)


        # create summary fasta file with all bins
        self.cat_fasta_files(input_dir, fasta_file_extension, all_seq_fasta)

        return {'input_dir': input_dir, 'folder_suffix': suffix, 'all_seq_fasta': all_seq_fasta}
Esempio n. 16
0
    def test_fractiontate_contigs_ASSEMBLY_ASSEMBLYSET_07(self):
        method = 'fractionate_contigs_pos_filter_ASSEMBLY_ASSEMBLYSET_07'

        print("\n\nRUNNING: test_" + method + "()")
        print("==========================================================\n\n")

        # upload test data
        try:
            auClient = AssemblyUtil(self.callback_url,
                                    token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate auClient with callbackURL: ' +
                self.callback_url + ' ERROR: ' + str(e))
        try:
            setAPI_Client = SetAPI(self.serviceWizardURL,
                                   token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate setAPI_Client with serviceWizardURL: ' +
                self.serviceWizardURL + ' ERROR: ' + str(e))
        base_1 = 'assembly_1plus2'
        base_2a = 'assembly_2a'
        base_2b = 'assembly_2b'
        type_1 = 'Assembly'
        type_2a = 'Assembly'
        type_2b = 'Assembly'
        ass_file_1_fa = base_1 + '.fa.gz'
        ass_file_2a_fa = base_2a + '.fa.gz'
        ass_file_2b_fa = base_2b + '.fa.gz'
        ass_path_1_fa = os.path.join(self.scratch, ass_file_1_fa)
        ass_path_2a_fa = os.path.join(self.scratch, ass_file_2a_fa)
        ass_path_2b_fa = os.path.join(self.scratch, ass_file_2b_fa)
        shutil.copy(os.path.join("data", ass_file_1_fa), ass_path_1_fa)
        shutil.copy(os.path.join("data", ass_file_2a_fa), ass_path_2a_fa)
        shutil.copy(os.path.join("data", ass_file_2b_fa), ass_path_2b_fa)
        ass_ref_1 = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_1_fa
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            base_1 + '.' + type_1
        })
        ass_ref_2a = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_2a_fa
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            base_2a + '.' + type_2a
        })
        ass_ref_2b = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_2b_fa
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            base_2b + '.' + type_2b
        })

        # AssemblySet
        assemblySet_items = [{
            'ref': ass_ref_2a,
            'label': 'assembly_2a'
        }, {
            'ref': ass_ref_2b,
            'label': 'assembly_2b'
        }]
        assemblySet_obj = {
            'description': 'test assemblySet',
            'items': assemblySet_items
        }
        assemblySet_ref = setAPI_Client.save_assembly_set_v1({
            'workspace_name':
            self.getWsName(),
            'output_object_name':
            'assembly_2a2b.AssemblySet',
            'data':
            assemblySet_obj
        })['set_ref']

        # run method
        base_output_name = method + '_output'
        fractionate_mode = 'neg'
        params = {
            'workspace_name':
            self.getWsName(),
            'input_assembly_ref':
            ass_ref_1,
            'input_pos_filter_obj_refs': [assemblySet_ref],
            'fractionate_mode':
            fractionate_mode,
            'output_name':
            'test_fractionated' + '-' + base_1 + '.' + type_1 + '-' +
            'assemblyset_2a2b' + '-' + fractionate_mode
        }
        result = self.getImpl().run_fractionate_contigs(
            self.getContext(), params)
        print('RESULT:')
        pprint(result)
        pass
Esempio n. 17
0
class QualiMapRunner:

    QUALIMAP_PATH = '/kb/module/qualimap-bin/qualimap'
    JAVA_MEM_DEFAULT_SIZE = '16G'
    LARGE_BAM_FILE_SIZE = 20 * 1024 * 1024 * 1024  # 20 GB
    TIMEOUT = 72 * 60 * 60  # 72 hours

    def _get_file_size(self, file_path):
        file_size = os.path.getsize(file_path)
        print('File size: {} -- {}'.format(file_size, file_path))
        return file_size

    def _large_file(self, file_path):

        filename, file_extension = os.path.splitext(file_path)
        multiplier = 0

        if file_extension == '.txt':
            total_file_size = 0
            with open(file_path, 'r') as f:
                for line in f:
                    bam_file_path = line.split('\t')[1]
                    total_file_size += self._get_file_size(bam_file_path)
            print('Total file size: {}'.format(total_file_size))
            multiplier = int(total_file_size) // int(self.LARGE_BAM_FILE_SIZE)
        else:
            multiplier = int(self._get_file_size(file_path)) // int(
                self.LARGE_BAM_FILE_SIZE)

        print('setting number of windows multiplier to: {}'.format(multiplier))

        return multiplier

    def _timeout_handler(self, signum, frame):
        print('Signal handler called with signal', signum)
        raise ValueError('QualiMap takes too long')

    def __init__(self, scratch_dir, callback_url, workspace_url, srv_wiz_url):
        self.scratch_dir = scratch_dir
        self.rau = ReadsAlignmentUtils(callback_url)
        self.kbr = KBaseReport(callback_url)
        self.dfu = DataFileUtil(callback_url)
        self.gfu = GenomeFileUtil(callback_url)
        self.set_api = SetAPI(srv_wiz_url)
        self.ws = Workspace(workspace_url)
        self.valid_commands = ['bamqc', 'multi-bamqc']

    def run_app(self, params):
        self.validate_params(params)
        print('Validated Params = ')
        pprint(params)
        run_info = self.get_run_info(params)

        if run_info.get('mode') not in ['single', 'multi']:
            raise ValueError(
                'Error in fetching the type to determine run settings.')

        run_error = False
        try:
            signal.signal(signal.SIGALRM, self._timeout_handler)
            signal.alarm(self.TIMEOUT)
            if run_info['mode'] == 'single':
                result = self.run_bamqc(params['input_ref'],
                                        run_info['input_info'])
            elif run_info['mode'] == 'multi':
                result = self.run_multi_sample_qc(params['input_ref'],
                                                  run_info['input_info'])
            signal.alarm(0)
        except Exception:
            run_error = True

            workdir = os.path.join(self.scratch_dir,
                                   'qualimap_' + str(int(time.time() * 10000)))
            os.makedirs(workdir)

            with open(os.path.join(workdir, 'qualimapReport.html'),
                      'w') as report:
                report.write('<html><body><p></p></body></html>')

            package_info = self.package_output_folder(
                workdir, 'QualiMap_report',
                'EMPTY HTML report directory for QualiMap BAM QC',
                'qualimapReport.html')

            result = {
                'qc_result_folder_path': workdir,
                'qc_result_zip_info': package_info,
                'shock_id': None
            }
            error_msg = 'Running QualiMap returned an error:\n{}\n'.format(
                traceback.format_exc())
            error_msg += 'Generating simple report instead\n'
            print(error_msg)

        if params['create_report']:
            result = self.create_report(result, params['output_workspace'],
                                        run_error, params['input_ref'])

        return result

    def create_report(self,
                      result,
                      output_workspace,
                      run_error=None,
                      input_ref=None):

        if run_error:
            objects_created = []
            info = self.get_obj_info(input_ref)
            obj_type = self.get_type_from_obj_info(info)
            if obj_type in ['KBaseRNASeq.RNASeqAlignment']:
                objects_created.append({
                    'ref': input_ref,
                    'description': 'Alignment'
                })

            if obj_type in [
                    'KBaseRNASeq.RNASeqAlignmentSet',
                    'KBaseSets.ReadsAlignmentSet'
            ]:
                objects_created.append({
                    'ref': input_ref,
                    'description': 'AlignmentSet'
                })
                reads_alignment_info = self.get_alignments_from_set(input_ref)
                for alignment in reads_alignment_info:
                    alignment_ref = alignment.get('ref')
                    objects_created.append({
                        'ref': alignment_ref,
                        'description': 'Alignment'
                    })

            report_info = self.kbr.create_extended_report({
                'message':
                ' ',
                'objects_created':
                objects_created,
                'report_object_name':
                'qualimap_report' + str(uuid.uuid4()),
                'workspace_name':
                output_workspace
            })
            result['report_name'] = report_info['name']
            result['report_ref'] = report_info['ref']
            return result

        qc_result_zip_info = result['qc_result_zip_info']
        report_info = self.kbr.create_extended_report({
            'message':
            '',
            'objects_created': [],
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'report_object_name':
            'qualimap_report' + str(uuid.uuid4()),
            'workspace_name':
            output_workspace
        })
        result['report_name'] = report_info['name']
        result['report_ref'] = report_info['ref']
        return result

    def get_gtf_file(self, input_ref, set_op=False):

        print('Start fetching GFF file from genome')

        if set_op:
            set_data = self.set_api.get_reads_alignment_set_v1({
                'ref':
                input_ref,
                'include_item_info':
                1
            })
            input_ref = set_data['data']['items'][0]['ref']

        obj_data = self.dfu.get_objects({"object_refs":
                                         [input_ref]})['data'][0]['data']

        genome_ref = obj_data.get('genome_id')

        if not genome_ref:
            raise ValueError(
                'Alignment is not associated with a Genome object')

        result_directory = os.path.join(self.scratch_dir, str(uuid.uuid4()))
        os.makedirs(result_directory)

        genome_gtf_file = self.gfu.genome_to_gff({
            'genome_ref': genome_ref,
            'is_gtf': True,
            'target_dir': result_directory
        })['file_path']

        return genome_gtf_file

    def run_bamqc(self, input_ref, input_info):
        # download the input and setup a working dir
        alignment_info = self.rau.download_alignment({'source_ref': input_ref})
        bam_file_path = self.find_my_bam_file(
            alignment_info['destination_dir'])
        try:
            gtf_file = self.get_gtf_file(input_ref)
        except:
            gtf_file = ''

        workdir = os.path.join(self.scratch_dir,
                               'qualimap_' + str(int(time.time() * 10000)))

        options = [
            '-bam', bam_file_path, '-c', '-outdir', workdir, '-outformat',
            'html'
        ]

        if gtf_file:
            options += ['-gff', gtf_file]

        options.append('--java-mem-size={}'.format(
            self.JAVA_MEM_DEFAULT_SIZE))  # always use large mem
        multiplier = self._large_file(bam_file_path)
        if multiplier:
            window_size = multiplier * 400
            print(f'using larger window size: {window_size} and Java memory: '
                  f'{self.JAVA_MEM_DEFAULT_SIZE}')
            options.append(
                '-nw {}'.format(window_size))  # increase size of windows

        self.run_cli_command('bamqc', options)

        package_info = self.package_output_folder(
            workdir, 'QualiMap_report',
            'HTML report directory for QualiMap BAM QC', 'qualimapReport.html')

        return {
            'qc_result_folder_path': workdir,
            'qc_result_zip_info': package_info
        }

    def run_multi_sample_qc(self, input_ref, input_info):
        # download the input and setup a working dir
        reads_alignment_info = self.get_alignments_from_set(input_ref)
        try:
            gtf_file = self.get_gtf_file(input_ref, set_op=True)
        except:
            gtf_file = ''
        suffix = 'qualimap_' + str(int(time.time() * 10000))
        workdir = os.path.join(self.scratch_dir, suffix)
        os.makedirs(workdir)

        input_file_path = self.create_multi_qualimap_cfg(
            reads_alignment_info, workdir)

        options = [
            '-d', input_file_path, '-r', '-c', '-outdir', workdir,
            '-outformat', 'html'
        ]

        if gtf_file:
            options += ['-gff', gtf_file]

        multiplier = self._large_file(input_file_path)
        if multiplier:
            window_size = multiplier * 400
            print(f'using larger window size: {window_size} and Java memory: '
                  f'{self.JAVA_MEM_DEFAULT_SIZE}')
            options.append(f'-nw {window_size}')  # increase size of windows
            options.append(f'--java-mem-size={self.JAVA_MEM_DEFAULT_SIZE}')

        self.run_cli_command('multi-bamqc', options)

        package_info = self.package_output_folder(
            workdir, 'QualiMap_report',
            'HTML report directory for QualiMap Multi-sample BAM QC',
            'multisampleBamQcReport.html')

        return {
            'qc_result_folder_path': workdir,
            'qc_result_zip_info': package_info
        }

    def get_alignments_from_set(self, alignment_set_ref):
        set_data = self.set_api.get_reads_alignment_set_v1({
            'ref':
            alignment_set_ref,
            'include_item_info':
            1
        })
        items = set_data['data']['items']

        reads_alignment_data = []
        for alignment in items:
            alignment_info = self.rau.download_alignment(
                {'source_ref': alignment['ref']})
            bam_file_path = self.find_my_bam_file(
                alignment_info['destination_dir'])
            label = None
            if 'label' in alignment:
                label = alignment['label']
            reads_alignment_data.append({
                'bam_file_path': bam_file_path,
                'ref': alignment['ref'],
                'label': label,
                'info': alignment['info']
            })
        return reads_alignment_data

    def create_multi_qualimap_cfg(self, reads_alignment_info, workdir):
        # Group by labels if there is at least one defined
        use_labels = False
        for alignment in reads_alignment_info:
            if alignment['label']:
                use_labels = True
                break

        # write the file
        input_file_path = os.path.join(workdir, 'multi_input.txt')
        input_file = open(input_file_path, 'w')
        name_lookup = {}
        for alignment in reads_alignment_info:
            name = alignment['info'][1]
            if name in name_lookup:
                name_lookup[name] += 1
                name = name + '_' + str(name_lookup[name])
            else:
                name_lookup[name] = 1

            input_file.write(name + '\t' + alignment['bam_file_path'])
            if use_labels:
                if alignment['label']:
                    input_file.write('\t' + alignment['label'])
                else:
                    input_file.write('\tunlabeled')
            input_file.write('\n')
        input_file.close()
        return input_file_path

    def get_run_info(self, params):
        info = self.get_obj_info(params['input_ref'])
        obj_type = self.get_type_from_obj_info(info)
        if obj_type in ['KBaseRNASeq.RNASeqAlignment']:
            return {'mode': 'single', 'input_info': info}
        if obj_type in [
                'KBaseRNASeq.RNASeqAlignmentSet', 'KBaseSets.ReadsAlignmentSet'
        ]:
            return {'mode': 'multi', 'input_info': info}
        raise ValueError('Object type of input_ref is not valid, was: ' +
                         str(obj_type))

    def validate_params(self, params):
        if 'input_ref' not in params:
            raise ValueError(
                'required parameter field "input_ref" was not set')

        create_report = False
        if 'create_report' in params:
            if int(params['create_report']) == 1:
                if 'output_workspace' not in params:
                    raise ValueError(
                        'If "create_report" was set, then "output_workspace" is required'
                    )
                if not params['output_workspace']:
                    raise ValueError(
                        'If "create_report" was set, then "output_workspace" is required'
                    )
                create_report = True
        params['create_report'] = create_report

    def run_cli_command(self, command, options, cwd=None):
        if command not in self.valid_commands:
            raise ValueError('Invalid QualiMap command: ' + str(command))
        command = [self.QUALIMAP_PATH, command] + options
        print('Running: ' + ' '.join(command))

        if not cwd:
            cwd = self.scratch_dir

        p = subprocess.Popen(command, cwd=cwd, shell=False)
        exitCode = p.wait()

        if exitCode == 0:
            print('Success, exit code was: ' + str(exitCode))
        else:
            raise ValueError('Error running command: ' + ' '.join(command) +
                             '\n' + 'Exit Code: ' + str(exitCode))

    def find_my_bam_file(self, dirpath):
        bam_path = None
        for f in os.listdir(dirpath):
            fullpath = os.path.join(dirpath, f)
            if os.path.isfile(fullpath) and f.lower().endswith('.bam'):
                if bam_path is not None:
                    raise ValueError(
                        'Error! Too many BAM files were downloaded for this alignment!'
                    )
                bam_path = fullpath
        if bam_path is None:
            raise ValueError(
                'Error! No BAM files were downloaded for this alignment!')
        return bam_path

    def package_output_folder(self, folder_path, zip_file_name,
                              zip_file_description, index_html_file):
        """ Simple utility for packaging a folder and saving to shock """
        output = self.dfu.file_to_shock({
            'file_path': folder_path,
            'make_handle': 0,
            'pack': 'zip'
        })
        return {
            'shock_id': output['shock_id'],
            'name': zip_file_name,
            'description': zip_file_description,
            'index_html_file_name': index_html_file
        }

    def get_type_from_obj_info(self, info):
        return info[2].split('-')[0]

    def get_obj_info(self, ref):
        return self.ws.get_object_info3({'objects': [{
            'ref': ref
        }]})['infos'][0]
Esempio n. 18
0
    def process_batch_result(self, batch_result, validated_params, reads,
                             input_set_info):

        n_jobs = len(batch_result['results'])
        n_success = 0
        n_error = 0
        ran_locally = 0
        ran_njsw = 0

        # reads alignment set items
        items = []
        objects_created = []

        for k in range(0, len(batch_result['results'])):
            job = batch_result['results'][k]
            result_package = job['result_package']
            if job['is_error']:
                n_error += 1
            else:
                n_success += 1
                print(result_package['result'])
                print(result_package['result'][0])
                print(result_package['result'][0]['output_info'])
                output_info = result_package['result'][0]['output_info']
                ra_ref = output_info['upload_results']['obj_ref']
                # Note: could add a label to the alignment here?
                items.append({'ref': ra_ref, 'label': reads[k]['condition']})
                objects_created.append({'ref': ra_ref})

            if result_package['run_context']['location'] == 'local':
                ran_locally += 1
            if result_package['run_context']['location'] == 'njsw':
                ran_njsw += 1

        # Save the alignment set
        alignment_set_data = {'description': '', 'items': items}
        alignment_set_save_params = {
            'data':
            alignment_set_data,
            'workspace':
            validated_params['output_workspace'],
            'output_object_name':
            str(input_set_info[1]) + validated_params['output_obj_name_suffix']
        }

        set_api = SetAPI(self.srv_wiz_url)
        save_result = set_api.save_reads_alignment_set_v1(
            alignment_set_save_params)
        print('Saved ReadsAlignment=')
        pprint(save_result)
        objects_created.append({
            'ref':
            save_result['set_ref'],
            'description':
            'Set of all reads alignments generated'
        })
        set_name = save_result['set_info'][1]

        # run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': save_result['set_ref']})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']

        # create the report
        report_text = 'Ran on SampleSet or ReadsSet.\n\n'
        report_text = 'Created ReadsAlignmentSet: ' + str(set_name) + '\n\n'
        report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n'
        report_text += '        Successful runs = ' + str(n_success) + '\n'
        report_text += '            Failed runs = ' + str(n_error) + '\n'
        report_text += '       Ran on main node = ' + str(ran_locally) + '\n'
        report_text += '   Ran on remote worker = ' + str(ran_njsw) + '\n\n'

        print('Report text=')
        print(report_text)

        kbr = KBaseReport(self.callback_url)
        report_info = kbr.create_extended_report({
            'message':
            report_text,
            'objects_created':
            objects_created,
            'report_object_name':
            'kb_Bwa_' + str(uuid.uuid4()),
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'workspace_name':
            validated_params['output_workspace']
        })

        result = {
            'report_info': {
                'report_name': report_info['name'],
                'report_ref': report_info['ref']
            }
        }
        result['batch_output_info'] = batch_result

        return result