Python sample_set_to_OTU_sheet Examples

Programming Language: Python

Namespace/Package Name: sample_uploader.utils.sample_utils

Method/Function: sample_set_to_OTU_sheet

Examples at hotexamples.com: 3

Python sample_set_to_OTU_sheet - 3 examples found. These are the top rated real world Python examples of sample_uploader.utils.sample_utils.sample_set_to_OTU_sheet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: sample_uploaderImpl.py Project: dauglyon/sample_uploader

    def generate_OTU_sheet(self, ctx, params):
        """
        :param params: instance of type "GenerateOTUSheetParams" (Generate a
           customized OTU worksheet using a SampleSet input to generate the
           appropriate columns.) -> structure: parameter "workspace_name" of
           String, parameter "workspace_id" of Long, parameter
           "sample_set_ref" of String, parameter "output_name" of String,
           parameter "output_format" of String, parameter "num_otus" of Long,
           parameter "taxonomy_source" of String, parameter "incl_seq" of
           Long, parameter "otu_prefix" of String
        :returns: instance of type "GenerateOTUSheetOutputs" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN generate_OTU_sheet
        # first we download sampleset
        sample_set_ref = params.get('sample_set_ref')
        ret = self.dfu.get_objects({'object_refs':
                                    [sample_set_ref]})['data'][0]
        sample_set = ret['data']
        if params.get('output_name'):
            output_name = params.get('output_name')
        else:
            # if output_name not specified use name of sample_set as output + "_OTUs"
            output_name = ret['info'][1] + "_OTUs"
        otu_path = sample_set_to_OTU_sheet(sample_set, output_name,
                                           self.scratch, params)
        report_client = KBaseReport(self.callback_url)
        report_name = "Generate_OTU_sheet_report_" + str(uuid.uuid4())
        report_info = report_client.create_extended_report({
            'file_links': [{
                'path':
                otu_path,
                'name':
                os.path.basename(otu_path),
                'label':
                "CSV with headers for OTU",
                'description':
                "CSV file with each column containing the assigned sample_id and sample "
                "name of each saved sample. Intended for uploading OTU data."
            }],
            'report_object_name':
            report_name,
            'workspace_name':
            params['workspace_name']
        })
        output = {
            'report_ref': report_info['ref'],
            'report_name': report_info['name'],
        }

        #END generate_OTU_sheet

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method generate_OTU_sheet return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]

Example #2

Show file

File: sample_uploaderImpl.py Project: dauglyon/sample_uploader

    def import_samples(self, ctx, params):
        """
        :param params: instance of type "ImportSampleInputs" -> structure:
           parameter "sample_set_ref" of String, parameter "sample_file" of
           String, parameter "workspace_name" of String, parameter
           "workspace_id" of Long, parameter "file_format" of String,
           parameter "description" of String, parameter "set_name" of String,
           parameter "header_row_index" of Long, parameter "id_field" of
           String, parameter "output_format" of String, parameter
           "taxonomy_source" of String, parameter "num_otus" of Long,
           parameter "incl_seq" of Long, parameter "otu_prefix" of String,
           parameter "share_within_workspace" of Long, parameter
           "prevalidate" of Long, parameter "incl_input_in_output" of Long
        :returns: instance of type "ImportSampleOutputs" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "sample_set" of type "SampleSet" -> structure:
           parameter "samples" of list of type "sample_info" -> structure:
           parameter "id" of type "sample_id", parameter "name" of String,
           parameter "description" of String, parameter "sample_set_ref" of
           String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN import_samples
        print(f"Beginning sample import with following parameters:")
        print(f"params -- {params}")
        sample_set = {"samples": []}
        # Check if we have an existing Sample Set as input
        # if so, download
        if params.get('sample_set_ref'):
            ret = self.dfu.get_objects(
                {'object_refs': [params['sample_set_ref']]})['data'][0]
            sample_set = ret['data']
            set_name = ret['info'][1]
            save_ws_id = params['sample_set_ref'].split('/')[0]
        else:
            if not params.get('set_name'):
                raise ValueError(
                    f"Sample set name required, when new SampleSet object is created."
                )
            set_name = params['set_name']
            save_ws_id = params.get('workspace_id')
        if params.get('header_row_index'):
            header_row_index = int(params["header_row_index"]) - 1
        else:
            header_row_index = 0
            if params.get('file_format') == "SESAR":
                header_row_index = 1

        username = ctx['user_id']

        if params.get('file_format') == 'ENIGMA':
            # ENIGMA_mappings['verification_mapping'].update(
            #     {key: ("is_string", []) for key in ENIGMA_mappings['basic_columns']}
            # )
            sample_set, errors = import_samples_from_file(
                params, self.sw_url, self.workspace_url, username,
                ctx['token'], ENIGMA_mappings['column_mapping'],
                ENIGMA_mappings.get('groups',
                                    []), ENIGMA_mappings['date_columns'],
                ENIGMA_mappings.get('column_unit_regex',
                                    []), sample_set, header_row_index)
        elif params.get('file_format') == 'SESAR':
            # SESAR_mappings['verification_mapping'].update(
            #     {key: ("is_string", []) for key in SESAR_mappings['basic_columns']}
            # )
            sample_set, errors = import_samples_from_file(
                params, self.sw_url, self.workspace_url, username,
                ctx['token'], SESAR_mappings['column_mapping'],
                SESAR_mappings.get('groups',
                                   []), SESAR_mappings['date_columns'],
                SESAR_mappings.get('column_unit_regex',
                                   []), sample_set, header_row_index)
        elif params.get('file_format') == 'KBASE':
            sample_set, errors = import_samples_from_file(
                params, self.sw_url, self.workspace_url, username,
                ctx['token'], {}, [], [], [], sample_set, header_row_index)
        else:
            raise ValueError(
                f"Only SESAR and ENIGMA formats are currently supported for importing samples. "
                "File of format {params.get('file_format')} not supported.")

        file_links = []
        sample_set_ref = None
        html_link = None

        if errors:
            # create UI to display the errors clearly
            html_link = _error_ui(errors, self.scratch)
        else:
            # only save object if there are no errors
            obj_info = self.dfu.save_objects({
                'id':
                save_ws_id,
                'objects': [{
                    "name": set_name,
                    "type": "KBaseSets.SampleSet",
                    "data": sample_set
                }]
            })[0]

            sample_set_ref = '/'.join(
                [str(obj_info[6]),
                 str(obj_info[0]),
                 str(obj_info[4])])
            sample_file_name = os.path.basename(
                params['sample_file']).split('.')[0] + '_OTU'

            # -- Format outputs below --
            # if output file format specified, add one to output
            if params.get('output_format') in ['csv', 'xls']:
                otu_path = sample_set_to_OTU_sheet(sample_set,
                                                   sample_file_name,
                                                   self.scratch, params)
                file_links.append({
                    'path':
                    otu_path,
                    'name':
                    os.path.basename(otu_path),
                    'label':
                    "OTU template file",
                    'description':
                    "file with each column containing the assigned sample_id and sample "
                    "name of each saved sample. Intended for uploading OTU data."
                })

        if params.get('incl_input_in_output'):
            sample_file = params.get('sample_file')
            if not os.path.isfile(sample_file):
                # try prepending '/staging/' to file and check then
                if os.path.isfile(os.path.join('/staging', sample_file)):
                    sample_file = os.path.join('/staging', sample_file)
                else:
                    raise ValueError(
                        f"input file {sample_file} does not exist.")
            sample_file_copy = os.path.join(self.scratch,
                                            os.path.basename(sample_file))
            shutil.copy(sample_file, sample_file_copy)
            file_links.append({
                "path":
                sample_file_copy,
                "name":
                os.path.basename(sample_file_copy),
                "label":
                "Input Sample file",
                "description":
                "Input file provided to create the sample set."
            })

        # create report
        report_client = KBaseReport(self.callback_url)
        report_data = {
            'report_object_name':
            "SampleSet_import_report_" + str(uuid.uuid4()),
            'workspace_name': params['workspace_name']
        }
        if file_links:
            report_data['file_links'] = file_links
        if sample_set_ref:
            report_data[
                'message'] = f"SampleSet object named \"{set_name}\" imported."
            report_data['objects_created'] = [{'ref': sample_set_ref}]

        if html_link:
            report_data['html_links'] = [{
                'path':
                html_link,
                'name':
                'index.html',
                'description':
                'Sample Set Import Error ui'
            }]
            report_data['direct_html_link_index'] = 0
        report_info = report_client.create_extended_report(report_data)
        output = {
            'report_ref': report_info['ref'],
            'report_name': report_info['name'],
            'sample_set': sample_set,
            'sample_set_ref': sample_set_ref,
            'errors': errors
        }
        #END import_samples

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method import_samples return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]

Example #3

Show file

File: sample_uploaderImpl.py Project: Tianhao-Gu/sample_uploader

    def import_samples(self, ctx, params):
        """
        :param params: instance of type "ImportSampleInputs" -> structure:
           parameter "sample_set_ref" of String, parameter "sample_file" of
           String, parameter "workspace_name" of String, parameter
           "workspace_id" of Long, parameter "file_format" of String,
           parameter "description" of String, parameter "set_name" of String,
           parameter "header_row_index" of Long, parameter "name_field" of
           String, parameter "output_format" of String, parameter
           "taxonomy_source" of String, parameter "num_otus" of Long,
           parameter "incl_seq" of Long, parameter "otu_prefix" of String,
           parameter "share_within_workspace" of Long, parameter
           "prevalidate" of Long, parameter "incl_input_in_output" of Long,
           parameter "ignore_warnings" of Long, parameter
           "keep_existing_samples" of Long
        :returns: instance of type "ImportSampleOutputs" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "sample_set" of type "SampleSet" -> structure:
           parameter "samples" of list of type "sample_info" -> structure:
           parameter "id" of type "sample_id", parameter "name" of String,
           parameter "description" of String, parameter "sample_set_ref" of
           String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN import_samples
        print(f"Beginning sample import with following parameters:")
        print(f"params -- {params}")
        sample_set = {"samples": []}
        # Check if we have an existing Sample Set as input
        # if so, download
        if params.get('sample_set_ref'):
            ret = self.dfu.get_objects(
                {'object_refs': [params['sample_set_ref']]})['data'][0]
            sample_set = ret['data']
            if params.get('set_name'):
                set_name = params.get('set_name')
            else:
                set_name = ret['info'][1]
            save_ws_id = params['sample_set_ref'].split('/')[0]
        else:
            if not params.get('set_name'):
                raise ValueError(
                    f"Sample set name required, when new SampleSet object is created."
                )
            set_name = params['set_name']
            save_ws_id = params.get('workspace_id')
        if params.get('header_row_index'):
            header_row_index = int(params["header_row_index"]) - 1
        else:
            header_row_index = find_header_row(params.get('sample_file'),
                                               params.get('file_format'))

        username = ctx['user_id']

        if str(params.get('file_format')).lower() not in [
                'enigma', 'sesar', 'kbase'
        ]:
            raise ValueError(
                f"Only SESAR, ENIGMA, and KBase formats are currently supported for importing samples. "
                f"File of format {params.get('file_format')} not supported.")
        mappings = {
            'enigma': ENIGMA_mappings,
            'sesar': SESAR_mappings,
            'kbase': {}
        }

        sample_set, has_unignored_errors, errors, sample_data_json = import_samples_from_file(
            params, self.sample_url, self.workspace_url, self.callback_url,
            username, ctx['token'],
            mappings[str(params.get('file_format')).lower()].get('groups', []),
            mappings[str(params.get('file_format')).lower()].get(
                'date_columns',
                []), mappings[str(params.get('file_format')).lower()].get(
                    'column_unit_regex', []), sample_set, header_row_index,
            aliases.get(params.get('file_format').lower(), {}))

        file_links = []
        new_data_links = []
        sample_set_ref = None

        # create UI to display the errors clearly
        html_link = _error_ui(errors, sample_data_json, has_unignored_errors,
                              self.scratch)

        if not has_unignored_errors:
            # only save object if there are no errors
            obj_info = self.dfu.save_objects({
                'id':
                save_ws_id,
                'objects': [{
                    "name": set_name,
                    "type": "KBaseSets.SampleSet",
                    "data": sample_set
                }]
            })[0]

            sample_set_ref = '/'.join(
                [str(obj_info[6]),
                 str(obj_info[0]),
                 str(obj_info[4])])
            sample_file_name = os.path.basename(
                params['sample_file']).split('.')[0] + '_OTU'

            # create a data link between each sample and the sampleset
            ss = SampleService(self.sample_url)
            for idx, sample_info in enumerate(sample_set['samples']):
                sample_id = sample_info['id']
                version = sample_info['version']
                sample = ss.get_sample({
                    'id': sample_id,
                    'version': version,
                })
                ret = ss.create_data_link(
                    dict(
                        upa=sample_set_ref,
                        id=sample_id,
                        dataid='samples/{}'.format(idx),
                        version=version,
                        node=sample['node_tree'][0]['id'],
                        update=1,
                    ))
                new_data_links.append(ret)

            # -- Format outputs below --
            # if output file format specified, add one to output
            if params.get('output_format') in ['csv', 'xls']:
                otu_path = sample_set_to_OTU_sheet(sample_set,
                                                   sample_file_name,
                                                   self.scratch, params)
                file_links.append({
                    'path':
                    otu_path,
                    'name':
                    os.path.basename(otu_path),
                    'label':
                    "OTU template file",
                    'description':
                    "file with each column containing the assigned sample_id and sample "
                    "name of each saved sample. Intended for uploading OTU data."
                })

        if params.get('incl_input_in_output'):
            sample_file = params.get('sample_file')
            if not os.path.isfile(sample_file):
                # try prepending '/staging/' to file and check then
                if os.path.isfile(os.path.join('/staging', sample_file)):
                    sample_file = os.path.join('/staging', sample_file)
                else:
                    raise ValueError(
                        f"Input file {sample_file} does not exist.")
            sample_file_copy = os.path.join(self.scratch,
                                            os.path.basename(sample_file))
            shutil.copy(sample_file, sample_file_copy)
            file_links.append({
                "path":
                sample_file_copy,
                "name":
                os.path.basename(sample_file_copy),
                "label":
                "Input Sample file",
                "description":
                "Input file provided to create the sample set."
            })

        # create report
        report_client = KBaseReport(self.callback_url)
        report_data = {
            'report_object_name':
            "SampleSet_import_report_" + str(uuid.uuid4()),
            'workspace_name': params['workspace_name']
        }
        if file_links:
            report_data['file_links'] = file_links
        if sample_set_ref:
            report_data[
                'message'] = f"SampleSet object named \"{set_name}\" imported."
            report_data['objects_created'] = [{'ref': sample_set_ref}]

        if html_link:
            report_data['html_links'] = [{
                'path':
                html_link,
                'name':
                'index.html',
                'description':
                'HTML Report for Sample Uploader'
            }]
            report_data['direct_html_link_index'] = 0
        report_info = report_client.create_extended_report(report_data)
        output = {
            'report_ref': report_info['ref'],
            'report_name': report_info['name'],
            'sample_set': sample_set,
            'sample_set_ref': sample_set_ref,
            'errors': errors,
            'links': new_data_links
        }
        #END import_samples

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method import_samples return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]