Example #1
0
    def upload_alignment_set(self, alignment_items, alignmentset_name,
                             ws_name):
        """
        Compiles and saves a set of alignment references (+ other stuff) into a
        KBaseRNASeq.RNASeqAlignmentSet.
        Returns the reference to the new alignment set.

        alignment_items: [{
            "ref": alignment_ref,
            "label": condition label.
        }]
        # alignment_info = dict like this:
        # {
        #     reads_ref: {
        #         "ref": alignment_ref
        #     }
        # }
        # reads_info = dict like this:
        # {
        #     reads_ref: {
        #         "condition": "some condition"
        #     }
        # }
        # input_params = global input params to HISAT2, also has ws_name for the target workspace.
        # alignmentset_name = name of final set object.
        """
        print("Uploading completed alignment set")
        alignment_set = {
            "description":
            "Alignments using HISAT2, v.{}".format(HISAT_VERSION),
            "items": alignment_items
        }
        set_api = SetAPI(self.srv_wiz_url)
        set_info = set_api.save_reads_alignment_set_v1({
            "workspace": ws_name,
            "output_object_name": alignmentset_name,
            "data": alignment_set
        })
        return set_info["set_ref"]
    def prepare_data(cls):

        workspace_id = cls.dfu.ws_name_to_id(cls.wsName)

        # upload genome object
        genbank_file_name = 'minimal.gbff'
        genbank_file_path = os.path.join(cls.scratch, genbank_file_name)
        shutil.copy(os.path.join('data', genbank_file_name), genbank_file_path)

        genome_object_name = 'test_Genome'
        cls.genome_ref = cls.gfu.genbank_to_genome({
            'file': {
                'path': genbank_file_path
            },
            'workspace_name':
            cls.wsName,
            'genome_name':
            genome_object_name
        })['genome_ref']
        print('TEST genome_ref=' + cls.genome_ref)

        # upload assembly object
        file_name = 'test.fna'
        fasta_path = os.path.join(cls.scratch, file_name)
        shutil.copy(os.path.join('data', file_name), fasta_path)
        assembly_name = 'test_assembly'
        cls.assembly_ref = cls.au.save_assembly_from_fasta({
            'file': {
                'path': fasta_path
            },
            'workspace_name':
            cls.wsName,
            'assembly_name':
            assembly_name
        })

        print('TEST assembly_ref=' + cls.assembly_ref)

        # upload reads object
        reads_file_name = 'Sample1.fastq'
        reads_file_path = os.path.join(cls.scratch, reads_file_name)
        shutil.copy(os.path.join('data', reads_file_name), reads_file_path)

        reads_object_name_1 = 'test_Reads_1'
        cls.reads_ref_1 = cls.ru.upload_reads({
            'fwd_file': reads_file_path,
            'wsname': cls.wsName,
            'sequencing_tech': 'Unknown',
            'interleaved': 0,
            'name': reads_object_name_1
        })['obj_ref']
        print(('TEST reads_ref_1=' + cls.reads_ref_1))

        reads_object_name_2 = 'test_Reads_2'
        cls.reads_ref_2 = cls.ru.upload_reads({
            'fwd_file': reads_file_path,
            'wsname': cls.wsName,
            'sequencing_tech': 'Unknown',
            'interleaved': 0,
            'name': reads_object_name_2
        })['obj_ref']
        print('TEST reads_ref_2=' + cls.reads_ref_2)

        # upload alignment object
        alignment_file_name = 'accepted_hits.bam'
        alignment_file_path = os.path.join(cls.scratch, alignment_file_name)
        shutil.copy(os.path.join('data', alignment_file_name),
                    alignment_file_path)

        alignment_object_name_1 = 'test_Alignment_1'
        cls.condition_1 = 'test_condition_1'
        cls.alignment_ref_1 = cls.rau.upload_alignment({
            'file_path':
            alignment_file_path,
            'destination_ref':
            cls.wsName + '/' + alignment_object_name_1,
            'read_library_ref':
            cls.reads_ref_1,
            'condition':
            cls.condition_1,
            'library_type':
            'single_end',
            'assembly_or_genome_ref':
            cls.genome_ref
        })['obj_ref']
        print('TEST alignment_ref_1=' + cls.alignment_ref_1)

        alignment_object_name_2 = 'test_Alignment_2'
        cls.condition_2 = 'test_condition_2'
        cls.alignment_ref_2 = cls.rau.upload_alignment({
            'file_path':
            alignment_file_path,
            'destination_ref':
            cls.wsName + '/' + alignment_object_name_2,
            'read_library_ref':
            cls.reads_ref_2,
            'condition':
            cls.condition_2,
            'library_type':
            'single_end',
            'assembly_or_genome_ref':
            cls.genome_ref
        })['obj_ref']
        print('TEST alignment_ref_2=' + cls.alignment_ref_2)

        alignment_object_name_3 = 'test_Alignment_3'
        cls.condition_3 = 'test_condition_3'
        cls.alignment_ref_3 = cls.rau.upload_alignment({
            'file_path':
            alignment_file_path,
            'destination_ref':
            cls.wsName + '/' + alignment_object_name_3,
            'read_library_ref':
            cls.reads_ref_2,
            'condition':
            cls.condition_3,
            'library_type':
            'single_end',
            'assembly_or_genome_ref':
            cls.assembly_ref
        })['obj_ref']
        print('TEST alignment_ref_3=' + cls.alignment_ref_3)

        # upload sample_set object

        sample_set_object_name = 'test_Sample_Set'
        sample_set_data = {
            'sampleset_id': sample_set_object_name,
            'sample_ids': [cls.reads_ref_1, cls.reads_ref_2],
            'sampleset_desc': 'test sampleset object',
            'Library_type': 'SingleEnd',
            'condition': [cls.condition_1, cls.condition_2],
            'domain': 'Unknown',
            'num_samples': 2,
            'platform': 'Unknown'
        }
        save_object_params = {
            'id':
            workspace_id,
            'objects': [{
                'type': 'KBaseRNASeq.RNASeqSampleSet',
                'data': sample_set_data,
                'name': sample_set_object_name
            }]
        }

        dfu_oi = cls.dfu.save_objects(save_object_params)[0]
        cls.sample_set_ref = str(dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str(
            dfu_oi[4])
        print(('TEST sample_set_ref=' + cls.sample_set_ref))

        # upload alignment_set object
        object_type = 'KBaseRNASeq.RNASeqAlignmentSet'
        alignment_set_object_name = 'test_Alignment_Set'
        alignment_set_data = {
            'genome_id':
            cls.genome_ref,
            'read_sample_ids': [reads_object_name_1, reads_object_name_2],
            'mapped_rnaseq_alignments': [{
                reads_object_name_1:
                alignment_object_name_1
            }, {
                reads_object_name_2:
                alignment_object_name_2
            }],
            'mapped_alignments_ids': [{
                reads_object_name_1: cls.alignment_ref_1
            }, {
                reads_object_name_2: cls.alignment_ref_2
            }],
            'sample_alignments': [cls.alignment_ref_1, cls.alignment_ref_2],
            'sampleset_id':
            cls.sample_set_ref
        }
        save_object_params = {
            'id':
            workspace_id,
            'objects': [{
                'type': object_type,
                'data': alignment_set_data,
                'name': alignment_set_object_name
            }]
        }

        dfu_oi = cls.dfu.save_objects(save_object_params)[0]
        cls.old_alignment_set_ref = str(dfu_oi[6]) + '/' + str(
            dfu_oi[0]) + '/' + str(dfu_oi[4])
        print('TEST (legacy) KBaseRNASeq.alignment_set_ref=' +
              cls.old_alignment_set_ref)

        # Save the alignment set
        items = [{
            'ref': cls.alignment_ref_1,
            'label': 'c1'
        }, {
            'ref': cls.alignment_ref_2,
            'label': 'c2'
        }]
        alignment_set_data = {'description': '', 'items': items}
        alignment_set_save_params = {
            'data': alignment_set_data,
            'workspace': cls.wsName,
            'output_object_name': 'MyReadsAlignmentSet'
        }

        set_api = SetAPI(cls.srv_wiz_url)
        save_result = set_api.save_reads_alignment_set_v1(
            alignment_set_save_params)
        cls.new_alignment_set_ref = save_result['set_ref']
        print('TEST KBaseSet.alignment_set_ref=')
        print(cls.new_alignment_set_ref)
Example #3
0
    def process_batch_result(self, batch_result, validated_params, reads,
                             input_set_info):

        n_jobs = len(batch_result['results'])
        n_success = 0
        n_error = 0
        ran_locally = 0
        ran_njsw = 0

        # reads alignment set items
        items = []
        objects_created = []

        for k in range(0, len(batch_result['results'])):
            job = batch_result['results'][k]
            result_package = job['result_package']
            if job['is_error']:
                n_error += 1
            else:
                n_success += 1
                print(result_package['result'])
                print(result_package['result'][0])
                print(result_package['result'][0]['output_info'])
                output_info = result_package['result'][0]['output_info']
                ra_ref = output_info['upload_results']['obj_ref']
                # Note: could add a label to the alignment here?
                items.append({'ref': ra_ref, 'label': reads[k]['condition']})
                objects_created.append({'ref': ra_ref})

            if result_package['run_context']['location'] == 'local':
                ran_locally += 1
            if result_package['run_context']['location'] == 'njsw':
                ran_njsw += 1

        # Save the alignment set
        alignment_set_data = {'description': '', 'items': items}
        alignment_set_save_params = {
            'data':
            alignment_set_data,
            'workspace':
            validated_params['output_workspace'],
            'output_object_name':
            str(input_set_info[1]) + validated_params['output_obj_name_suffix']
        }

        set_api = SetAPI(self.srv_wiz_url)
        save_result = set_api.save_reads_alignment_set_v1(
            alignment_set_save_params)
        print('Saved ReadsAlignment=')
        pprint(save_result)
        objects_created.append({
            'ref':
            save_result['set_ref'],
            'description':
            'Set of all reads alignments generated'
        })
        set_name = save_result['set_info'][1]

        # run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': save_result['set_ref']})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']

        # create the report
        report_text = 'Ran on SampleSet or ReadsSet.\n\n'
        report_text = 'Created ReadsAlignmentSet: ' + str(set_name) + '\n\n'
        report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n'
        report_text += '        Successful runs = ' + str(n_success) + '\n'
        report_text += '            Failed runs = ' + str(n_error) + '\n'
        report_text += '       Ran on main node = ' + str(ran_locally) + '\n'
        report_text += '   Ran on remote worker = ' + str(ran_njsw) + '\n\n'

        print('Report text=')
        print(report_text)

        kbr = KBaseReport(self.callback_url)
        report_info = kbr.create_extended_report({
            'message':
            report_text,
            'objects_created':
            objects_created,
            'report_object_name':
            'kb_Bwa_' + str(uuid.uuid4()),
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'workspace_name':
            validated_params['output_workspace']
        })

        result = {
            'report_info': {
                'report_name': report_info['name'],
                'report_ref': report_info['ref']
            }
        }
        result['batch_output_info'] = batch_result

        return result