Beispiel #1
0
    def _generate_kmeans_cluster_report(self, cluster_set_refs,
                                        workspace_name):
        """
        _generate_kmeans_cluster_report: generate summary report
        """
        objects_created = []
        for cluster_set_ref in cluster_set_refs:
            objects_created.append({
                'ref': cluster_set_ref,
                'description': 'Kmeans ClusterSet'
            })
        report_params = {
            'message': '',
            'objects_created': objects_created,
            'workspace_name': workspace_name,
            'report_object_name': 'run_kmeans_cluster_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
    def generate_report(self, genome_ref, params):
        """
        :param genome_ref:  Return Val from GenomeFileUtil for Uploaded genome
                            Need to get report warnings and message from it.
        :return: 
        """
        uuid_string = str(uuid.uuid4())

        objects_created = [{'ref': genome_ref,
                            'description': 'Imported Genome'}]

        output_html_files = self.generate_html_report(genome_ref, params)
        report_params = {
            'message': '',
            'workspace_name': params.get('workspace_name'),
            'objects_created': objects_created,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 300,
            'report_object_name': 'kb_genome_upload_report_' + uuid_string}

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
Beispiel #3
0
    def _generate_report(self, params, result_directory, diff_expression_matrix_set_ref):
        """
        _generate_report: generate summary report
        """
        log('creating report')

        output_files = self._generate_output_file_list(result_directory)

        output_html_files = self._generate_html_report(
            result_directory, params, diff_expression_matrix_set_ref)

        report_params = {
            'message': '',
            'workspace_name': params.get('workspace_name'),
            'file_links': output_files,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 333,
            'report_object_name': 'kb_ballgown_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
    def _create_search_report(self, wsname, id_to_similarity, id_to_link, ttlcount):

        outdir = os.path.join(self.tmp, 'search_report')
        self._mkdir_p(outdir)

        self._write_search_results(
            os.path.join(outdir, 'index.html'), id_to_similarity, id_to_link, ttlcount)

        log('Saving Sourmash search report')

        dfu = _DFUClient(self.callbackURL)
        try:
            dfuout = dfu.file_to_shock({'file_path': outdir, 'make_handle': 0, 'pack': 'zip'})
        except _DFUError as dfue:
            # not really any way to test this block
            log('Logging exception loading results to shock')
            log(str(dfue))
            raise
        log('saved report to shock node ' + dfuout['shock_id'])
        try:
            kbr = KBaseReport(self.callbackURL)
            return kbr.create_extended_report(
                {'direct_html_link_index': 0,
                 'html_links': [{'shock_id': dfuout['shock_id'],
                                 'name': 'index.html',
                                 'label': 'Sourmash search results'}
                                ],
                 'report_object_name': 'kb_sourmash_report_' + str(uuid.uuid4()),
                 'workspace_name': wsname
                 })
        except _RepError as re:
            log('Logging exception from creating report object')
            log(str(re))
            # TODO delete shock node
            raise
    def _generate_report(self, up_feature_set_ref, down_feature_set_ref, 
                         filtered_expression_matrix_ref,
                         up_feature_ids, down_feature_ids, genome_id, workspace_name):
        """
        _generate_report: generate summary report
        """

        log('start creating report')

        output_html_files = self._generate_html_report(up_feature_ids, down_feature_ids, genome_id)
        objects_created = [{'ref': up_feature_set_ref,
                            'description': 'Upper FeatureSet Object'},
                           {'ref': down_feature_set_ref,
                            'description': 'Lower FeatureSet Object'}]

        if filtered_expression_matrix_ref:
            objects_created += [{'ref': filtered_expression_matrix_ref,
                                 'description': 'Filtered ExpressionMatrix Object'}]

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'objects_created': objects_created,
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 333,
                         'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
Beispiel #6
0
    def _save_to_ws_and_report(self, ctx, method, workspace, source, compoundset):
        """Save compound set to the workspace and make report"""
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        if 'model' in method:
            provenance[0]['input_ws_objects'] = workspace + '/' + source
        provenance[0]['service'] = 'CompoundSetUtils'
        provenance[0]['method'] = method
        info = self.ws_client.save_objects(
            {'workspace': workspace,
             "objects": [{
                 "type": "KBaseBiochem.CompoundSet",
                 "data": compoundset,
                 "name": compoundset['name']
             }]})[0]
        compoundset_ref = "%s/%s/%s" % (info[6], info[0], info[4])
        report_params = {
            'objects_created': [{'ref': compoundset_ref,
                                 'description': 'Compound Set'}],
            'message': 'Imported %s as %s' % (source, compoundset_ref),
            'workspace_name': workspace,
            'report_object_name': 'compound_set_creation_report'
        }

        # Construct the output to send back
        report_client = KBaseReport(self.callback_url)
        report_info = report_client.create_extended_report(report_params)
        output = {'report_name': report_info['name'],
                  'report_ref': report_info['ref'],
                  'compoundset_ref': compoundset_ref}
        return output
    def run_import_gwas_results(self, params):
        """
        Import GWAS results and display as manhattan plot
        """
        template_directory = "/kb/module/lib/genome_wide_association_studies/Utils/manhattan_plot_template"
        tsv_for_plot = "/kb/module/lib/genome_wide_association_studies/Utils/manhattan_plot_template/snpdata.tsv"
        output_html_files = self._generate_html_report(template_directory,
                                                       tsv_for_plot)

        report_params = {
            'message': '',
            'workspace_name': params.get('workspace_name'),
            'file_links': [],
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 333,
            'report_object_name': 'htmlreport_test_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #8
0
    def _generate_report(self, report_message, params):
        """
        generate_report: generate summary report

        """
        log('Generating report')

        uuid_string = str(uuid.uuid4())
        upload_message = 'Job Finished\n\n'
        upload_message += report_message

        log('Report message:\n{}'.format(upload_message))

        report_params = {
            'message': upload_message,
            'workspace_name': params.get('workspace_name'),
            'report_object_name': 'MetagenomeUtils_report_' + uuid_string
        }

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #9
0
    def _generate_pca_report(self, pca_ref, pca_matrix_data, workspace_name):
        """
        _generate_kmeans_cluster_report: generate summary report
        """
        objects_created = []
        objects_created.append({'ref': pca_ref, 'description': 'PCA Matrix'})

        pca_plots, n_components = self._generate_pca_plot(pca_matrix_data)
        output_html_files = self._generate_pca_html_files(
            pca_plots, n_components)
        report_params = {
            'message': '',
            'objects_created': objects_created,
            'workspace_name': workspace_name,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'report_object_name': 'run_pca_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #10
0
    def _generate_report_sets_library(self, reads_alignment_object_ref, result_directory, 
                                      workspace_name):
        """
        _generate_report_sets_library: generate summary report for sample sets
        """

        objects_created = [{'ref': reads_alignment_object_ref,
                            'description': 'AlignmentSet generated by TopHat2'}]
        alignment_set_data = self.ws.get_objects2({'objects': 
                                                  [{'ref':
                                                   reads_alignment_object_ref}]})['data'][0]
        alignment_refs = alignment_set_data['data'].get('items')
        for alignment_ref in alignment_refs:
            objects_created.append({'ref': alignment_ref['ref'],
                                    'description': 'Alignment generated by TopHat2'})

        output_files = self._generate_output_file_list_sets_library(result_directory)
        output_html_files = self._generate_html_report_sets_library(reads_alignment_object_ref, 
                                                                    result_directory)

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'file_links': output_files,
                         'objects_created': objects_created,
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 333,
                         'report_object_name': 'kb_tophat2_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
Beispiel #11
0
    def _generate_merge_report(self, workspace_name, result_directory):
        """
        _generate_merge_report: generate summary report
        """

        log('creating merge report')

        output_files = self._generate_output_file_list(result_directory)
        output_html_files = self._generate_merge_html_report(result_directory)

        report_params = {
            'message': '',
            'workspace_name': workspace_name,
            'file_links': output_files,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 366,
            'report_object_name': 'kb_stringtie_report_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #12
0
    def _generate_report_single_library(self, reads_alignment_object_ref, result_directory, 
                                        workspace_name):
        """
        _generate_report_single_library: generate summary report for single library
        """

        log('start creating report')

        output_files = self._generate_output_file_list_single_library(result_directory)
        output_html_files = self._generate_html_report_single_library(reads_alignment_object_ref, 
                                                                      result_directory)

        description = 'Alignment generated by TopHat2'
        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'file_links': output_files,
                         'objects_created': [{'ref': reads_alignment_object_ref,
                                              'description': description}],
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 333,
                         'report_object_name': 'kb_tophat2_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 job_service_url, njsw_url, auth_service_url, kbase_endpoint,
                 provenance, token):
        self.scratch = scratch_dir
        self.callback_url = callback_url

        self.workspace_url = workspace_url
        self.job_service_url = job_service_url
        self.njsw_url = njsw_url
        self.auth_service_url = auth_service_url
        self.srv_wiz_url = srv_wiz_url
        self.kbase_endpoint = kbase_endpoint
        self.provenance = provenance
        self.token = token

        _mkdir_p(self.scratch)
        self.metrics_dir = os.path.join(self.scratch, str(uuid.uuid4()))
        _mkdir_p(self.metrics_dir)

        self.statdu = UJS_CAT_NJS_DataUtils(self.workspace_url,
                                            self.job_service_url,
                                            self.srv_wiz_url, self.njsw_url,
                                            self.auth_service_url,
                                            self.kbase_endpoint,
                                            self.provenance, self.token)
        self.kbr = KBaseReport(self.callback_url)
Beispiel #14
0
    def _generate_search_report(self, header_str, table_str, workspace_name):
        log('Start creating report')

        output_html_files = self._generate_search_html_report(
            header_str, table_str)

        report_params = {
            'message': '',
            'workspace_name': workspace_name,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 366,
            'report_object_name':
            'kb_matrix_filter_report_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #15
0
    def _generate_report(self, matrix_obj_ref, workspace_name):
        """
        _generate_report: generate summary report
        """

        report_params = {
            'message':
            '',
            'objects_created': [{
                'ref': matrix_obj_ref,
                'description': 'Imported Matrix'
            }],
            'workspace_name':
            workspace_name,
            'report_object_name':
            'import_matrix_from_excel_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #16
0
 def __init__(self, scratch_dir, callback_url, workspace_url, srv_wiz_url):
     self.scratch_dir = scratch_dir
     self.rau = ReadsAlignmentUtils(callback_url)
     self.kbr = KBaseReport(callback_url)
     self.dfu = DataFileUtil(callback_url)
     self.set_api = SetAPI(srv_wiz_url)
     self.ws = Workspace(workspace_url)
     self.valid_commands = ['bamqc', 'multi-bamqc']
    def _generate_report(self, result_directory, mutual_info_dict,
                         workspace_name):
        """
		_generate_report: generate summary report
		"""
        print('-->I am here *************')
        uuidStr = str(uuid.uuid4())
        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(output_directory)
        test_file = os.path.join(output_directory, "index.html")
        self._make_index_html(test_file, mutual_info_dict[1])
        #shutil.copy2(os.path.join(os.path.dirname(__file__), 'data', 'index.html'), output_directory)

        # shutil.copy('/kb/module/data/index.html', result_directory + '/' + uuidStr + '/index.html')
        json.dump(mutual_info_dict[0],
                  open(os.path.join(output_directory, 'pdata.json'), 'w'))
        #shutil.copy('pdata.json', result_directory + '/' + uuidStr + '/pdata.json')

        # DataFileUtils to shock
        print(output_directory)
        print(os.listdir(output_directory))
        report_shock_result = self.dfu.file_to_shock({
            'file_path': output_directory,
            'pack': 'targz'
        })
        #report_shock_result = self.dfu.file_to_shock({'file_path': output_directory,
        #											 'pack': 'zip'})

        report_shock_id = report_shock_result['shock_id']
        print(report_shock_result)

        report_file = {
            'name': 'index.html',
            'description': 'the report',
            'shock_id': report_shock_id
        }
        log('creating report')
        #output_html_files = self._generate_html_report(result_directory,
        #											   mutual_info_dict)
        report_params = {
            'message': '',
            'workspace_name': workspace_name,
            'html_links': [report_file],
            'file_links': [],
            'direct_html_link_index': 0,
            'html_window_height': 333,
            'report_object_name': 'MutualInfomation_report_' + uuidStr
        }

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #18
0
    def compound_set_to_file(self, ctx, params):
        """
        CompoundSetToFile
        string compound_set_name
        string output_format
        :param params: instance of type "compoundset_download_params" ->
           structure: parameter "workspace_name" of String, parameter
           "compound_set_name" of String, parameter "output_format" of String
        :returns: instance of type "compoundset_download_results" ->
           structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN compound_set_to_file
        self._check_required_param(params, ['workspace_name', 'compound_set_name',
                                            'output_format'])
        compoundset = self.ws_client.get_objects2({'objects': [
            {'workspace': params['workspace_name'],
             'name': params['compound_set_name']}]})['data'][0]['data']
        ext = params['output_format']
        out = "%s/%s.%s" % (self.scratch, compoundset['name'], ext)
        if ext == 'sdf':
            outfile_path = parse.write_sdf(compoundset, out)
        elif ext == 'tsv':
            outfile_path = parse.write_tsv(compoundset, out)
        else:
            raise ValueError('Invalid output file type. Expects tsv or sdf')

        report_files = [{'path': outfile_path,
                         'name': os.path.basename(outfile_path),
                         'label': os.path.basename(outfile_path),
                         'description': 'A compound set in %s format' % ext}]

        report_params = {
            'objects_created': [],
            'message': 'Converted %s compound set to %s format.' % (
                params['compound_set_name'], params['output_format']),
            'file_links': report_files,
            'workspace_name': params['workspace_name'],
            'report_object_name': 'compound_set_download_report'
        }

        # Construct the output to send back
        report_client = KBaseReport(self.callback_url)
        report_info = report_client.create_extended_report(report_params)
        output = {'report_name': report_info['name'],
                  'report_ref': report_info['ref'],
                  }
        #END compound_set_to_file

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method compound_set_to_file return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Beispiel #19
0
    def _generate_compare_report(self, compare_outfile, workspace_name):
        """
        _generate_compare_report: uses the basename to add the pngs to the html report
        """
        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(output_directory)

        report_html_file = os.path.join(output_directory, 'index.html')

        shutil.copy(compare_outfile, output_directory)
        shutil.copy(compare_outfile + '.labels.txt', output_directory)
        shutil.copy(compare_outfile + '.dendro.png', output_directory)
        shutil.copy(compare_outfile + '.hist.png', output_directory)
        shutil.copy(compare_outfile + '.matrix.png', output_directory)

        base = os.path.basename(compare_outfile)

        html_file = open(report_html_file, 'w')

        html_file.write('<HTML><BODY>')
        html_file.write('<img src="{}"><img src="{}"><img src="{}">'.format(
            base + '.dendro.png', base + '.hist.png', base + '.matrix.png'))
        html_file.write('</BODY></HTML>')

        html_file.close()

        dfu = _DFUClient(self.callbackURL)
        shock = dfu.file_to_shock({
            'file_path': output_directory,
            'make_handle': 0,
            'pack': 'zip'
        })

        report_params = {
            'message':
            '',
            'workspace_name':
            workspace_name,
            'html_links': [{
                'path': report_html_file,
                'shock_id': shock['shock_id'],
                'name': os.path.basename(report_html_file),
                'label': os.path.basename(report_html_file),
                'description': 'HTML report for sourmash compare'
            }],
            'direct_html_link_index':
            0,
            'html_window_height':
            266,
            'report_object_name':
            'kb_sourmash_compare_report_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callbackURL)
        output = kbase_report_client.create_extended_report(report_params)

        return output
 def __init__(self, config):
     self.ws_url = config["workspace-url"]
     self.callback_url = config['SDK_CALLBACK_URL']
     self.token = config['token']
     self.shock_url = config['shock-url']
     self.scratch = os.path.join(config['scratch'], 'emmax_assoc_'+str(uuid.uuid4()))
     os.mkdir(self.scratch)
     self.config = config
     self.dfu = DataFileUtil(self.callback_url)
     self.kbr = KBaseReport(self.callback_url, token=self.config['token'])
Beispiel #21
0
    def _generate_report(self, diff_expression_obj_ref, params,
                         result_directory):
        """
        _generate_report: generate summary report
        """

        log('creating report')

        output_files = self._generate_output_file_list(result_directory)

        output_html_files = self._generate_html_report(
            result_directory, diff_expression_obj_ref, params)

        diff_expr_set_data = self.ws.get_objects2(
            {'objects': [{
                'ref': diff_expression_obj_ref
            }]})['data'][0]['data']

        items = diff_expr_set_data['items']

        description_set = 'DifferentialExpressionMatrixSet generated by DESeq2'
        description_object = 'DifferentialExpressionMatrix generated by DESeq2'
        objects_created = []
        objects_created.append({
            'ref': diff_expression_obj_ref,
            'description': description_set
        })

        for item in items:
            diff_expr_ref = item['ref']
            objects_created.append({
                'ref': diff_expr_ref,
                'description': description_object
            })

        report_params = {
            'message': '',
            'workspace_name': params.get('workspace_name'),
            'objects_created': objects_created,
            'file_links': output_files,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 333,
            'report_object_name': 'kb_deseq2_report_' + str(uuid.uuid4())
        }

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
    def generate_report(self, obj_refs_list, params):
        """
        generate_report: generate summary report

        obj_refs: generated workspace object references. (return of import_sra_from_staging/web)
        params:
        staging_file_subdir_path: subdirectory file path
          e.g.
            for file: /data/bulk/user_name/file_name
            staging_file_subdir_path is file_name
            for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
            staging_file_subdir_path is subdir_1/subdir_2/file_name
        workspace_name: workspace name/ID that reads will be stored to

        """
        uuid_string = str(uuid.uuid4())

        objects_created = list()
        objects_data = list()

        for obj_ref in obj_refs_list:
            get_objects_params = {
                'object_refs': [obj_ref],
                'ignore_errors': False
            }
            objects_data.append(self.dfu.get_objects(get_objects_params))

            objects_created.append({
                'ref': obj_ref,
                'description': 'Imported Reads'
            })

        output_html_files = self.generate_html_report(objects_data, params,
                                                      uuid_string)

        report_params = {
            'message': '',
            'workspace_name': params.get('workspace_name'),
            'objects_created': objects_created,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 460,
            'report_object_name': 'kb_sra_upload_report_' + uuid_string
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #23
0
    def load_report(self, input_file_name, params, wsname):
        fasta_stats = self.load_stats(input_file_name)
        lengths = [fasta_stats[contig_id] for contig_id in fasta_stats]

        assembly_ref = params[self.PARAM_IN_WS] + '/' + params[
            self.PARAM_IN_CS_NAME]

        report = ''
        report += 'Assembly saved to: ' + assembly_ref + '\n'
        report += 'Assembled into ' + str(len(lengths)) + ' contigs.\n'
        report += 'Avg Length: ' + str(sum(lengths) / float(len(lengths))) + \
            ' bp.\n'

        # compute a simple contig length distribution
        bins = 10
        counts, edges = np.histogram(lengths, bins)  # @UndefinedVariable
        report += 'Contig Length Distribution (# of contigs -- min to max ' +\
            'basepairs):\n'
        for c in range(bins):
            report += '   ' + str(counts[c]) + '\t--\t' + str(edges[c]) +\
                ' to ' + str(edges[c + 1]) + ' bp\n'
        print('Running QUAST')
        kbq = kb_quast(self.callbackURL)
        quastret = kbq.run_QUAST({
            'files': [{
                'path': input_file_name,
                'label': params[self.PARAM_IN_CS_NAME]
            }]
        })
        print('Saving report')
        kbr = KBaseReport(self.callbackURL)
        report_info = kbr.create_extended_report({
            'message':
            report,
            'objects_created': [{
                'ref': assembly_ref,
                'description': 'Assembled contigs'
            }],
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': quastret['shock_id'],
                'name': 'report.html',
                'label': 'QUAST report'
            }],
            'report_object_name':
            'kb_IDBA-UD_report_' + str(uuid.uuid4()),
            'workspace_name':
            params['workspace_name']
        })
        reportName = report_info['name']
        reportRef = report_info['ref']
        return reportName, reportRef
Beispiel #24
0
    def __init__(self, utility_params):
        self.params = utility_params
        # self.scratch = utility_params['scratch']
        self.scratch = os.path.join(utility_params['scratch'],
                                    'variation_importer_' + str(uuid.uuid4()))
        os.mkdir(self.scratch)
        self.service_wiz_url = utility_params['srv-wiz-url']
        self.callback_url = utility_params['callback_url']

        self.dfu = DataFileUtil(self.callback_url)
        self.kbr = KBaseReport(self.callback_url,
                               token=utility_params['token'])
    def generate_report(self, obj_ref, params):
        """
        generate_report: generate summary report

        obj_ref: generated workspace object references. (return of
                                                         import_fasta_as_assembly_from_staging)
        params:
        staging_file_subdir_path: subdirectory file path
          e.g.
            for file: /data/bulk/user_name/file_name
            staging_file_subdir_path is file_name
            for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
            staging_file_subdir_path is subdir_1/subdir_2/file_name
        workspace_name: workspace name/ID that reads will be stored to

        """

        uuid_string = str(uuid.uuid4())
        upload_message = 'Import Finished\n'

        get_objects_params = {'object_refs': [obj_ref], 'ignore_errors': False}

        object_data = self.dfu.get_objects(get_objects_params)
        base_count = object_data.get('data')[0].get('data').get('base_counts')
        dna_size = object_data.get('data')[0].get('data').get('dna_size')

        upload_message += "Assembly Object Name: "
        upload_message += str(object_data.get('data')[0].get('info')[1]) + '\n'
        upload_message += 'Imported Fasta File: {}\n'.format(
            params.get('staging_file_subdir_path'))

        if isinstance(dna_size, (int, long)):
            upload_message += 'DNA Size: {:,}\n'.format(dna_size)

        if isinstance(base_count, dict):
            upload_message += 'Base Count:\n{}\n'.format(
                json.dumps(base_count, indent=1)[2:-2])

        report_params = {
            'message': upload_message,
            'workspace_name': params.get('workspace_name'),
            'report_object_name': 'kb_upload_mothods_report_' + uuid_string
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #26
0
    def _save_output_to_kbase(self, io_params, app_params, output_dir, run_log,
                              run_command):
        # TODO: insert the run_command into the output log
        #
        # read the output file list
        file_lookup = self._read_outputfile(
            os.path.join(output_dir, 'file-list.txt'))

        # save the new reads
        filtered_reads_ref = None
        objects_created = None
        if 'filtered_fastq' not in file_lookup:
            print(
                'No filtered fastq file found in output! Not creating a filtered reads object.'
            )
        else:
            filtered_fastq_path = os.path.join(output_dir,
                                               file_lookup['filtered_fastq'])
            filtered_reads_ref = upload_interleaved_reads(
                self.callback_url, filtered_fastq_path,
                io_params['output_workspace_name'],
                io_params['output_library_name'],
                io_params.get('read_library_ref'))
            objects_created = [{
                'ref': filtered_reads_ref,
                'description': 'Filtered reads library'
            }]
        # build the HTML report
        html_zipped = self._build_html_report(
            io_params.get('read_library_ref'), output_dir, file_lookup)
        file_links = self._build_file_report(output_dir, run_log)
        # save the report
        report_params = {
            'message': '',
            'objects_created': objects_created,
            'direct_html_link_index': 0,
            'html_links': [html_zipped],
            'file_links': file_links,
            'report_object_name':
            'bbtools_rqcfilter_report_' + str(uuid.uuid4()),
            'workspace_name': io_params['output_workspace_name']
        }

        kr = KBaseReport(self.callback_url)
        report_output = kr.create_extended_report(report_params)

        return {
            'report_name': report_output['name'],
            'report_ref': report_output['ref'],
            'run_command': run_command
        }
Beispiel #27
0
    def build_report(self, params, reads_refs, alignments, alignment_set=None):
        """
        Builds and uploads the HISAT2 report.
        """
        report_client = KBaseReport(self.callback_url)
        report_text = None
        created_objects = list()
        for k in alignments:
            created_objects.append({
                "ref":
                alignments[k]["ref"],
                "description":
                "Reads {} aligned to Genome {}".format(k, params["genome_ref"])
            })
        if alignment_set is not None:
            created_objects.append({
                "ref": alignment_set,
                "description": "Set of all new alignments"
            })

        report_text = "Created {} alignments from the given alignment set.".format(
            len(alignments))

        qm = kb_QualiMap(self.callback_url, service_ver='dev')
        qc_ref = alignment_set
        if qc_ref is None:  # then there's only one alignment...
            qc_ref = alignments[alignments.keys()[0]]["ref"]
        bamqc_params = {"create_report": 0, "input_ref": qc_ref}
        result = qm.run_bamqc(bamqc_params)
        index_file = None
        for f in os.listdir(result["qc_result_folder_path"]):
            if f.endswith(".html"):
                index_file = f
        if index_file is None:
            raise RuntimeError(
                "QualiMap failed - no HTML file was found in the generated output."
            )
        html_zipped = package_directory(self.callback_url,
                                        result["qc_result_folder_path"],
                                        index_file, 'QualiMap Results')
        report_params = {
            "message": report_text,
            "direct_html_link_index": 0,
            "html_links": [html_zipped],
            "report_object_name": "QualiMap-" + str(uuid.uuid4()),
            "workspace_name": params["ws_name"],
            "objects_created": created_objects
        }

        report_info = report_client.create_extended_report(report_params)
        return report_info
Beispiel #28
0
    def _generate_star_report(self, obj_ref, report_text, html_links, workspace_name, index_dir, output_dir):
        """
        _generate_star_report: generate summary report
        """
        log('creating STAR report')

        output_files = self._generate_output_file_list(index_dir, output_dir)
        output_html_files = self._generate_html_report(output_dir, obj_ref)
        output_html_files += html_links

        star_obj = self.ws_client.get_objects2({'objects':[{'ref': obj_ref}]})['data'][0]
        star_obj_info = star_obj['info']
        star_obj_data = star_obj['data']

        star_obj_type = star_obj_info[2]
        if re.match('KBaseRNASeq.RNASeqAlignment-\d+.\d+', star_obj_type):
            objects_created = [{'ref': obj_ref,
                                'description': 'RNASeqAlignment generated by STAR'}]
        elif (re.match('KBaseRNASeq.RNASeqAlignmentSet-\d+.\d+', star_obj_type)
                or re.match('KBaseSets.ReadsAlignmentSet-\d+.\d+', star_obj_type)
                or re.match('KBaseSet.RNASeqAlignmentSet-\d+.\d+', star_obj_type)):
            objects_created = [{'ref': obj_ref,
                'description': '{} generated by STAR'.format(re.sub(r"-\d+.\d+", "",star_obj_type))}]
            items = star_obj_data['items']
            for item in items:
                objects_created.append({'ref': item['ref'],
                                        'description': 'Alignment generated by STAR'})
        elif re.match('KBaseRNASeq.RNASeqExpression-\d+.\d+', star_obj_type):
            objects_created = [{'ref': obj_ref,
                                'description': 'Expression generated by STAR'}]
        elif re.match('KBaseSets.ExpressionSet-\d+.\d+', star_obj_type):
            objects_created = [{'ref': obj_ref,
                                'description': 'ExpressionSet generated by STAR'}]
            items = star_obj_data['items']
            for item in items:
                objects_created.append({'ref': item['ref'],
                                        'description': 'Expression generated by STAR'})

        report_params = {'message': report_text,
                         'workspace_name': workspace_name,
                         'file_links': output_files,
                         'objects_created': objects_created,
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 366,
                         'report_object_name': 'kb_STAR_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        report_output = kbase_report_client.create_extended_report(report_params)

        return report_output
    def generate_report(self, obj_ref, params):
        """
        generate_report: generate summary report

        obj_ref: generated workspace object references. (return of
                                                        import_phenotype_set_from_staging)
        params:
        staging_file_subdir_path: subdirectory file path
          e.g.
            for file: /data/bulk/user_name/file_name
            staging_file_subdir_path is file_name
            for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
            staging_file_subdir_path is subdir_1/subdir_2/file_name
        workspace_name: workspace name/ID that reads will be stored to
        """
        uuid_string = str(uuid.uuid4())
        upload_message = 'Import Finished\n'

        get_objects_params = {'object_refs': [obj_ref], 'ignore_errors': False}

        object_data = self.dfu.get_objects(get_objects_params)

        upload_message += "Phenotype Set Name: "
        upload_message += str(object_data.get('data')[0].get('info')[1]) + '\n'
        upload_message += 'Imported File: {}\n'.format(
            params.get('staging_file_subdir_path'))

        report_params = {
            'message':
            upload_message,
            'objects_created': [{
                'ref': obj_ref,
                'description': 'Imported Phenotype Set'
            }],
            'workspace_name':
            params.get('workspace_name'),
            'report_object_name':
            'kb_upload_mothods_report_' + uuid_string
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output
Beispiel #30
0
    def _generate_report(self, obj_ref, workspace_name, result_directory,
                         exprMatrix_FPKM_ref=None, exprMatrix_TPM_ref=None):
        """
        _generate_report: generate summary report
        """

        log('creating report')

        output_files = self._generate_output_file_list(result_directory)
        output_html_files = self._generate_html_report(result_directory,
                                                       obj_ref)

        expression_object = self.ws.get_objects2({'objects':
                                                 [{'ref': obj_ref}]})['data'][0]
        expression_info = expression_object['info']
        expression_data = expression_object['data']

        expression_object_type = expression_info[2]
        if re.match('KBaseRNASeq.RNASeqExpression-\d+.\d+', expression_object_type):
            objects_created = [{'ref': obj_ref,
                                'description': 'Expression generated by StringTie'}]
        elif re.match('KBaseSets.ExpressionSet-\d+.\d+', expression_object_type):
            objects_created = [{'ref': obj_ref,
                                'description': 'ExpressionSet generated by StringTie'}]
            items = expression_data['items']
            for item in items:
                objects_created.append({'ref': item['ref'],
                                        'description': 'Expression generated by StringTie'})
            objects_created.append({'ref': exprMatrix_FPKM_ref,
                                    'description': 'FPKM ExpressionMatrix generated by StringTie'})
            objects_created.append({'ref': exprMatrix_TPM_ref,
                                    'description': 'TPM ExpressionMatrix generated by StringTie'})

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'file_links': output_files,
                         'objects_created': objects_created,
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 366,
                         'report_object_name': 'kb_stringtie_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
    def _generate_report(self, expression_matrix_ref, workspace_name):
        """
        _generate_report: generate report
        """

        objects_created = [{'ref': expression_matrix_ref,
                            'description': 'Average ExpressionMatrix'}]

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'objects_created': objects_created,
                         # 'html_links': output_html_files,
                         # 'direct_html_link_index': 0,
                         'html_window_height': 366,
                         'report_object_name': 'kb_ave_expr_matrix_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
Beispiel #32
0
    def create_search_report(self, wsname, id_to_similarity, search_db):

        outdir = os.path.join(self.tmp, 'search_report')
        self._mkdir_p(outdir)

        id_to_link = self._create_link_mapping(search_db, id_to_similarity.keys())

        self._write_search_results(
            os.path.join(outdir, 'index.html'), id_to_similarity, id_to_link)

        log('Saving Mash search report')

        dfu = _DFUClient(self.callbackURL)
        try:
            dfuout = dfu.file_to_shock({'file_path': outdir, 'make_handle': 0, 'pack': 'zip'})
        except _DFUError as dfue:
            # not really any way to test this block
            log('Logging exception loading results to shock')
            log(str(dfue))
            raise
        log('saved report to shock node ' + dfuout['shock_id'])
        try:
            kbr = KBaseReport(self.callbackURL)
            return kbr.create_extended_report(
                {'direct_html_link_index': 0,
                 'html_links': [{'shock_id': dfuout['shock_id'],
                                 'name': 'index.html',
                                 'label': 'Sourmash search results'}
                                ],
                 'report_object_name': 'kb_sourmash_report_' + str(uuid.uuid4()),
                 'workspace_name': wsname
                 })
        except _RepError as re:
            log('Logging exception from creating report object')
            log(str(re))
            # TODO delete shock node
            raise
    def _generate_report(self, up_feature_set_ref_list, down_feature_set_ref_list,
                         filtered_expression_matrix_ref_list, workspace_name):
        """
        _generate_report: generate summary report
        """

        log('start creating report')

        output_html_files = self._generate_html_report(up_feature_set_ref_list,
                                                       down_feature_set_ref_list)

        objects_created = list()
        for up_feature_set_ref in up_feature_set_ref_list:
            objects_created += [{'ref': up_feature_set_ref,
                                 'description': 'Upper FeatureSet Object'}]
        for down_feature_set_ref in down_feature_set_ref_list:
            objects_created += [{'ref': down_feature_set_ref,
                                 'description': 'Lower FeatureSet Object'}]

        for filtered_expression_matrix_ref in filtered_expression_matrix_ref_list:
            objects_created += [{'ref': filtered_expression_matrix_ref,
                                 'description': 'Filtered ExpressionMatrix Object'}]

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'objects_created': objects_created,
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 333,
                         'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
    def do_assembly(self, assemblyRef, wsName):
        #try:
        #    assembly = wsClient.get_objects2({'objects': [{'ref': assembly_ref}]})
        #except:
        #    exc_type, exc_value, exc_traceback = sys.exc_info()
        #    lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        #    orig_error = ''.join('    ' + line for line in lines)
        #    raise ValueError('Error from workspace:\n' + orig_error)

        #print assembly#[200:]
        #print assembly['data']
        #print assembly['data'][0]
        #assembly['data'][0]['data']

        #fasta_handle_ref = assembly['data'][0]['data']['fasta_handle_ref']
        #print "fasta_handle_ref "+fasta_handle_ref
        #print type(fasta_handle_ref)

        #TODO create file here /kb/module/work
        #TODO set output file name
        print "SDK_CALLBACK_URL "+os.environ['SDK_CALLBACK_URL']
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        #assembly_input_ref = "16589/2/1"
        #filename = "test.fasta"
        #obj_name = "EcoliMG1655.f"
        #wsname = "example_assembly"

        param = dict()
        param['ref'] = assemblyRef#assembly_input_ref

        input_fasta_file = au.get_assembly_as_fasta(param)#{'ref': assembly_input_ref})

        #just_input_fasta_file = os.path.basename(input_fasta_file['path'])
        #print "input_fasta_file "+ str(input_fasta_file['path'])

        newtmp = "/kb/module/work/tmp/tmp_"+self.create_random_string()
        os.mkdir(newtmp)
        os.mkdir(newtmp+"/input")

        newfasta = newtmp +"/input/"+os.path.basename(input_fasta_file['path'])
        print "newfasta "+newfasta

        os.rename(input_fasta_file['path'], newfasta)

        args = ["wrapper_phage_contigs_sorter_iPlant.pl ", "--db 2 ","--fna ", newfasta," --wdir ",newtmp]

        print str(args)

        cmdstring = "".join(args)

        print "Executing"
        cmdProcess = subprocess.Popen(cmdstring, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        print "Done "+str(cmdProcess)
        stdout, stderr = cmdProcess.communicate()
        print " stdout: " + stdout
        print " stderr: " + stderr

        #return [report]

        # Step 5 - Build a Report and return
        reportObj = {
            'objects_created': [],
            'text_message': stdout
        }
        # 'objects_created': [{'ref': new_assembly, 'description': 'Filtered contigs'}],

        #report_info = report.create({'report': reportObj, 'workspace_name': wsName})

        #reportObj = {
        #    'objects_created': [{'ref': new_assembly, 'description': 'Filtered contigs'}],
        #    'text_message': 'Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total)
        #}
        #report = KBaseReport(self.callback_url)
        #report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']})


        # contruct the output to send back
        #output = {'report_name': report_info['name'],
        #          'report_ref': report_info['ref']
        #          }
        #print('returning:' + pformat(output))

        print('Saving report')
        kbr = KBaseReport(self.callback_url, service_ver='dev')
        report = ''
        report += "cmdstring: " + str(cmdstring) + " stdout: " + str(stdout) + " stderr: " + str(stderr)

        virout = newtmp+"/"+"VIRSorter_global-phage-signal.csv"
        with open(virout, 'r') as myfile:
            data = myfile.read().replace('\n', '')

        print "wsName "+str(wsName)

        data = data.replace(",", "\t")
        data = data.replace("##", "\n##")
        report = report +"\n\n***** VirSorter output *****\n"+data
        report_data = {'message': report,
             'objects_created': None,
             'direct_html_link_index': None,
             'html_links': None,
             'report_object_name': 'kb_virsorter_' + str(uuid.uuid4()),
             'workspace_name': wsName
             }

        print "report_data"
        print str(report_data)
        report_info = kbr.create_extended_report(report_data
            )

        # 'objects_created': [{'ref': assembly_ref, 'description': 'Assembled contigs'}],
        # 'html_links': [{'shock_id': quastret['shock_id'],
        #                     'name': 'report.html',
        #                     'label': 'QUAST report'}
        #                    ],

        reportName = report_info['name']
        reportRef = report_info['ref']
        return reportName, reportRef
Beispiel #35
0
    def run_megahit(self, ctx, params):
        """
        :param params: instance of type "MegaHitParams" (Run MEGAHIT.  Most
           parameters here are just passed forward to MEGAHIT workspace_name
           - the name of the workspace for input/output read_library_ref -
           the name of the PE read library (SE library support in the future)
           output_contig_set_name - the name of the output contigset
           megahit_parameter_preset - override a group of parameters;
           possible values: meta            '--min-count 2 --k-list
           21,41,61,81,99' (generic metagenomes, default) meta-sensitive 
           '--min-count 2 --k-list 21,31,41,51,61,71,81,91,99' (more
           sensitive but slower) meta-large      '--min-count 2 --k-list
           27,37,47,57,67,77,87' (large & complex metagenomes, like soil)
           bulk            '--min-count 3 --k-list 31,51,71,91,99 --no-mercy'
           (experimental, standard bulk sequencing with >= 30x depth)
           single-cell     '--min-count 3 --k-list 21,33,55,77,99,121
           --merge_level 20,0.96' (experimental, single cell data) min_count
           - minimum multiplicity for filtering (k_min+1)-mers, default 2
           min_k - minimum kmer size (<= 127), must be odd number, default 21
           max_k - maximum kmer size (<= 127), must be odd number, default 99
           k_step - increment of kmer size of each iteration (<= 28), must be
           even number, default 10 k_list - list of kmer size (all must be
           odd, in the range 15-127, increment <= 28); override `--k-min',
           `--k-max' and `--k-step' min_contig_length - minimum length of
           contigs to output, default is 2000 @optional
           megahit_parameter_preset @optional min_count @optional k_min
           @optional k_max @optional k_step @optional k_list @optional
           min_contig_length) -> structure: parameter "workspace_name" of
           String, parameter "read_library_ref" of String, parameter
           "output_contigset_name" of String, parameter
           "megahit_parameter_preset" of String, parameter "min_count" of
           Long, parameter "k_min" of Long, parameter "k_max" of Long,
           parameter "k_step" of Long, parameter "k_list" of list of Long,
           parameter "min_contig_length" of Long
        :returns: instance of type "MegaHitOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_megahit
        print('Running run_megahit with params=')
        pprint(params)

        # STEP 1: basic parameter checks + parsing
        if 'workspace_name' not in params:
            raise ValueError('workspace_name parameter is required')
        if 'read_library_ref' not in params:
            raise ValueError('read_library_ref parameter is required')
        if 'output_contigset_name' not in params:
            raise ValueError('output_contigset_name parameter is required')

        # STEP 2: get the read library as deinterleaved fastq files
        input_ref = params['read_library_ref']
        reads_params = {'read_libraries': [input_ref],
                        'interleaved': 'false',
                        'gzipped': None
                        }
        ru = ReadsUtils(self.callbackURL)
        reads = ru.download_reads(reads_params)['files']

        print('Input reads files:')
        fwd = reads[input_ref]['files']['fwd']
        rev = reads[input_ref]['files']['rev']
        pprint('forward: ' + fwd)
        pprint('reverse: ' + rev)

        # STEP 3: run megahit
        # construct the command
        megahit_cmd = [self.MEGAHIT]

        # we only support PE reads, so add that
        megahit_cmd.append('-1')
        megahit_cmd.append(fwd)
        megahit_cmd.append('-2')
        megahit_cmd.append(rev)

        # if a preset is defined, use that:
        if 'megahit_parameter_preset' in params:
            if params['megahit_parameter_preset']:
                megahit_cmd.append('--presets')
                megahit_cmd.append(params['megahit_parameter_preset'])

        if 'min_count' in params:
            if params['min_count']:
                megahit_cmd.append('--min-count')
                megahit_cmd.append(str(params['min_count']))
        if 'k_min' in params:
            if params['k_min']:
                megahit_cmd.append('--k-min')
                megahit_cmd.append(str(params['k_min']))
        if 'k_max' in params:
            if params['k_max']:
                megahit_cmd.append('--k-max')
                megahit_cmd.append(str(params['k_max']))
        if 'k_step' in params:
            if params['k_step']:
                megahit_cmd.append('--k-step')
                megahit_cmd.append(str(params['k_step']))
        if 'k_list' in params:
            if params['k_list']:
                k_list = []
                for k_val in params['k_list']:
                    k_list.append(str(k_val))
                megahit_cmd.append('--k-list')
                megahit_cmd.append(','.join(k_list))

        min_contig_length = self.DEFAULT_MIN_CONTIG_LENGTH
        if 'min_contig_length' in params:
            if params['min_contig_length']:
                if str(params['min_contig_length']).isdigit():
                    min_contig_length = params['min_contig_length']
                else:
                    raise ValueError('min_contig_length parameter must be a non-negative integer')

        megahit_cmd.append('--min-contig-len')
        megahit_cmd.append(str(min_contig_length))

        # set the output location
        timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000)
        output_dir = os.path.join(self.scratch, 'output.' + str(timestamp))
        megahit_cmd.append('-o')
        megahit_cmd.append(output_dir)

        # run megahit
        print('running megahit:')
        print('    ' + ' '.join(megahit_cmd))
        p = subprocess.Popen(megahit_cmd, cwd=self.scratch, shell=False)
        retcode = p.wait()

        print('Return code: ' + str(retcode))
        if p.returncode != 0:
            raise ValueError('Error running MEGAHIT, return code: ' +
                             str(retcode) + '\n')

        output_contigs = os.path.join(output_dir, 'final.contigs.fa')

        # on macs, we cannot run megahit in the shared host scratch space, so we need to move the file there
        if self.mac_mode:
            shutil.move(output_contigs, os.path.join(self.host_scratch, 'final.contigs.fa'))
            output_contigs = os.path.join(self.host_scratch, 'final.contigs.fa')

        # STEP 4: save the resulting assembly
        assemblyUtil = AssemblyUtil(self.callbackURL)
        output_data_ref = assemblyUtil.save_assembly_from_fasta({
                                                                'file': {'path': output_contigs},
                                                                'workspace_name': params['workspace_name'],
                                                                'assembly_name': params['output_contigset_name']
                                                                })


        # STEP 5: generate and save the report

        # compute a simple contig length distribution for the report
        lengths = []
        for seq_record in SeqIO.parse(output_contigs, 'fasta'):
            lengths.append(len(seq_record.seq))

        report = ''
        report += 'ContigSet saved to: ' + params['workspace_name'] + '/' + params['output_contigset_name'] + '\n'
        report += 'Assembled into ' + str(len(lengths)) + ' contigs.\n'
        report += 'Avg Length: ' + str(sum(lengths) / float(len(lengths))) + ' bp.\n'

        bins = 10
        counts, edges = np.histogram(lengths, bins)
        report += 'Contig Length Distribution (# of contigs -- min to max basepairs):\n'
        for c in range(bins):
            report += '   ' + str(counts[c]) + '\t--\t' + str(edges[c]) + ' to ' + str(edges[c + 1]) + ' bp\n'

        print('Running QUAST')
        kbq = kb_quast(self.callbackURL)
        try:
            quastret = kbq.run_QUAST({'files': [{'path': output_contigs,
                                                 'label': params['output_contigset_name']}]})
        except QUASTError as qe:
            # not really any way to test this, all inputs have been checked earlier and should be
            # ok 
            print('Logging exception from running QUAST')
            print(str(qe))
            # TODO delete shock node
            raise

        print('Saving report')
        kbr = KBaseReport(self.callbackURL)
        try:
            report_info = kbr.create_extended_report(
                {'message': report,
                 'objects_created': [{'ref': output_data_ref, 'description': 'Assembled contigs'}],
                 'direct_html_link_index': 0,
                 'html_links': [{'shock_id': quastret['shock_id'],
                                 'name': 'report.html',
                                 'label': 'QUAST report'}
                                ],
                 'report_object_name': 'kb_megahit_report_' + str(uuid.uuid4()),
                 'workspace_name': params['workspace_name']
                 })
        except _RepError as re:
            # not really any way to test this, all inputs have been checked earlier and should be
            # ok 
            print('Logging exception from creating report object')
            print(str(re))
            # TODO delete shock node
            raise

        # STEP 6: contruct the output to send back
        output = {'report_name': report_info['name'], 'report_ref': report_info['ref']}

        #END run_megahit

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_megahit return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Beispiel #36
0
    def fastqutils_stats(self, ctx, params):
        """
        :param params: instance of type "FastqUtilsStatsParams" -> structure:
           parameter "workspace_name" of type "workspace_name" (A string
           representing a workspace name.), parameter "read_library_ref" of
           type "read_library_ref" (A string representing a ContigSet id.)
        :returns: instance of type "FastqUtilsStatsResult" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN fastqutils_stats

        print('Running fastqutils_stats with params=')
        print(pformat(params))

        if 'workspace_name' not in params:
            raise ValueError('workspace_name parameter is required')
        if 'read_library_ref' not in params:
            raise ValueError('read_library_ref parameter is required')

        # Get the read library as deinterleaved fastq files
        input_ref = params['read_library_ref']
        reads_params = {'read_libraries': [input_ref],
                        'interleaved': 'false',
                        'gzipped': None
                        }
        ru = ReadsUtils(self.callbackURL, token=ctx['token'])
        reads = ru.download_reads(reads_params)['files']
        files = [reads[input_ref]['files']['fwd']]
        if reads[input_ref]['files']['rev']:
            files.append(reads[input_ref]['files']['rev'])
        print('running on files:')
        for f in files:
            print(f)

        # construct the command
        stats_cmd = [self.FASTQUTILS, 'stats']

        report = ''
        for f in files:
            cmd = stats_cmd
            cmd.append(f)

            report += '============== ' + f + ' ==============\n'
            print('running: ' + ' '.join(cmd))
            p = subprocess.Popen(cmd,
                                 cwd=self.scratch,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
                                 shell=False)

            while True:
                line = p.stdout.readline()
                if not line:
                    break
                report += line
                print(line.replace('\n', ''))

            p.stdout.close()
            p.wait()
            report += "\n\n"
            print('return code: ' + str(p.returncode))
            if p.returncode != 0:
                raise ValueError('Error running ' + self.FASTQUTILS + ', return code: ' + str(p.returncode))


        reportObj = {
            'objects_created': [],
            'text_message': report
        }
        report = KBaseReport(self.callbackURL)
        report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']})
        returnVal = {'report_name': report_info['name'], 'report_ref': report_info['ref']}

        #END fastqutils_stats

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method fastqutils_stats return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]