def generate_OTU_sheet(self, ctx, params): """ :param params: instance of type "GenerateOTUSheetParams" (Generate a customized OTU worksheet using a SampleSet input to generate the appropriate columns.) -> structure: parameter "workspace_name" of String, parameter "workspace_id" of Long, parameter "sample_set_ref" of String, parameter "output_name" of String, parameter "output_format" of String, parameter "num_otus" of Long, parameter "taxonomy_source" of String, parameter "incl_seq" of Long, parameter "otu_prefix" of String :returns: instance of type "GenerateOTUSheetOutputs" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN generate_OTU_sheet # first we download sampleset sample_set_ref = params.get('sample_set_ref') ret = self.dfu.get_objects({'object_refs': [sample_set_ref]})['data'][0] sample_set = ret['data'] if params.get('output_name'): output_name = params.get('output_name') else: # if output_name not specified use name of sample_set as output + "_OTUs" output_name = ret['info'][1] + "_OTUs" otu_path = sample_set_to_OTU_sheet(sample_set, output_name, self.scratch, params) report_client = KBaseReport(self.callback_url) report_name = "Generate_OTU_sheet_report_" + str(uuid.uuid4()) report_info = report_client.create_extended_report({ 'file_links': [{ 'path': otu_path, 'name': os.path.basename(otu_path), 'label': "CSV with headers for OTU", 'description': "CSV file with each column containing the assigned sample_id and sample " "name of each saved sample. Intended for uploading OTU data." }], 'report_object_name': report_name, 'workspace_name': params['workspace_name'] }) output = { 'report_ref': report_info['ref'], 'report_name': report_info['name'], } #END generate_OTU_sheet # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method generate_OTU_sheet return value ' + 'output is not type dict as required.') # return the results return [output]
def import_samples(self, ctx, params): """ :param params: instance of type "ImportSampleInputs" -> structure: parameter "sample_set_ref" of String, parameter "sample_file" of String, parameter "workspace_name" of String, parameter "workspace_id" of Long, parameter "file_format" of String, parameter "description" of String, parameter "set_name" of String, parameter "header_row_index" of Long, parameter "id_field" of String, parameter "output_format" of String, parameter "taxonomy_source" of String, parameter "num_otus" of Long, parameter "incl_seq" of Long, parameter "otu_prefix" of String, parameter "share_within_workspace" of Long, parameter "prevalidate" of Long, parameter "incl_input_in_output" of Long :returns: instance of type "ImportSampleOutputs" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "sample_set" of type "SampleSet" -> structure: parameter "samples" of list of type "sample_info" -> structure: parameter "id" of type "sample_id", parameter "name" of String, parameter "description" of String, parameter "sample_set_ref" of String """ # ctx is the context object # return variables are: output #BEGIN import_samples print(f"Beginning sample import with following parameters:") print(f"params -- {params}") sample_set = {"samples": []} # Check if we have an existing Sample Set as input # if so, download if params.get('sample_set_ref'): ret = self.dfu.get_objects( {'object_refs': [params['sample_set_ref']]})['data'][0] sample_set = ret['data'] set_name = ret['info'][1] save_ws_id = params['sample_set_ref'].split('/')[0] else: if not params.get('set_name'): raise ValueError( f"Sample set name required, when new SampleSet object is created." ) set_name = params['set_name'] save_ws_id = params.get('workspace_id') if params.get('header_row_index'): header_row_index = int(params["header_row_index"]) - 1 else: header_row_index = 0 if params.get('file_format') == "SESAR": header_row_index = 1 username = ctx['user_id'] if params.get('file_format') == 'ENIGMA': # ENIGMA_mappings['verification_mapping'].update( # {key: ("is_string", []) for key in ENIGMA_mappings['basic_columns']} # ) sample_set, errors = import_samples_from_file( params, self.sw_url, self.workspace_url, username, ctx['token'], ENIGMA_mappings['column_mapping'], ENIGMA_mappings.get('groups', []), ENIGMA_mappings['date_columns'], ENIGMA_mappings.get('column_unit_regex', []), sample_set, header_row_index) elif params.get('file_format') == 'SESAR': # SESAR_mappings['verification_mapping'].update( # {key: ("is_string", []) for key in SESAR_mappings['basic_columns']} # ) sample_set, errors = import_samples_from_file( params, self.sw_url, self.workspace_url, username, ctx['token'], SESAR_mappings['column_mapping'], SESAR_mappings.get('groups', []), SESAR_mappings['date_columns'], SESAR_mappings.get('column_unit_regex', []), sample_set, header_row_index) elif params.get('file_format') == 'KBASE': sample_set, errors = import_samples_from_file( params, self.sw_url, self.workspace_url, username, ctx['token'], {}, [], [], [], sample_set, header_row_index) else: raise ValueError( f"Only SESAR and ENIGMA formats are currently supported for importing samples. " "File of format {params.get('file_format')} not supported.") file_links = [] sample_set_ref = None html_link = None if errors: # create UI to display the errors clearly html_link = _error_ui(errors, self.scratch) else: # only save object if there are no errors obj_info = self.dfu.save_objects({ 'id': save_ws_id, 'objects': [{ "name": set_name, "type": "KBaseSets.SampleSet", "data": sample_set }] })[0] sample_set_ref = '/'.join( [str(obj_info[6]), str(obj_info[0]), str(obj_info[4])]) sample_file_name = os.path.basename( params['sample_file']).split('.')[0] + '_OTU' # -- Format outputs below -- # if output file format specified, add one to output if params.get('output_format') in ['csv', 'xls']: otu_path = sample_set_to_OTU_sheet(sample_set, sample_file_name, self.scratch, params) file_links.append({ 'path': otu_path, 'name': os.path.basename(otu_path), 'label': "OTU template file", 'description': "file with each column containing the assigned sample_id and sample " "name of each saved sample. Intended for uploading OTU data." }) if params.get('incl_input_in_output'): sample_file = params.get('sample_file') if not os.path.isfile(sample_file): # try prepending '/staging/' to file and check then if os.path.isfile(os.path.join('/staging', sample_file)): sample_file = os.path.join('/staging', sample_file) else: raise ValueError( f"input file {sample_file} does not exist.") sample_file_copy = os.path.join(self.scratch, os.path.basename(sample_file)) shutil.copy(sample_file, sample_file_copy) file_links.append({ "path": sample_file_copy, "name": os.path.basename(sample_file_copy), "label": "Input Sample file", "description": "Input file provided to create the sample set." }) # create report report_client = KBaseReport(self.callback_url) report_data = { 'report_object_name': "SampleSet_import_report_" + str(uuid.uuid4()), 'workspace_name': params['workspace_name'] } if file_links: report_data['file_links'] = file_links if sample_set_ref: report_data[ 'message'] = f"SampleSet object named \"{set_name}\" imported." report_data['objects_created'] = [{'ref': sample_set_ref}] if html_link: report_data['html_links'] = [{ 'path': html_link, 'name': 'index.html', 'description': 'Sample Set Import Error ui' }] report_data['direct_html_link_index'] = 0 report_info = report_client.create_extended_report(report_data) output = { 'report_ref': report_info['ref'], 'report_name': report_info['name'], 'sample_set': sample_set, 'sample_set_ref': sample_set_ref, 'errors': errors } #END import_samples # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method import_samples return value ' + 'output is not type dict as required.') # return the results return [output]
def import_samples(self, ctx, params): """ :param params: instance of type "ImportSampleInputs" -> structure: parameter "sample_set_ref" of String, parameter "sample_file" of String, parameter "workspace_name" of String, parameter "workspace_id" of Long, parameter "file_format" of String, parameter "description" of String, parameter "set_name" of String, parameter "header_row_index" of Long, parameter "name_field" of String, parameter "output_format" of String, parameter "taxonomy_source" of String, parameter "num_otus" of Long, parameter "incl_seq" of Long, parameter "otu_prefix" of String, parameter "share_within_workspace" of Long, parameter "prevalidate" of Long, parameter "incl_input_in_output" of Long, parameter "ignore_warnings" of Long, parameter "keep_existing_samples" of Long :returns: instance of type "ImportSampleOutputs" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "sample_set" of type "SampleSet" -> structure: parameter "samples" of list of type "sample_info" -> structure: parameter "id" of type "sample_id", parameter "name" of String, parameter "description" of String, parameter "sample_set_ref" of String """ # ctx is the context object # return variables are: output #BEGIN import_samples print(f"Beginning sample import with following parameters:") print(f"params -- {params}") sample_set = {"samples": []} # Check if we have an existing Sample Set as input # if so, download if params.get('sample_set_ref'): ret = self.dfu.get_objects( {'object_refs': [params['sample_set_ref']]})['data'][0] sample_set = ret['data'] if params.get('set_name'): set_name = params.get('set_name') else: set_name = ret['info'][1] save_ws_id = params['sample_set_ref'].split('/')[0] else: if not params.get('set_name'): raise ValueError( f"Sample set name required, when new SampleSet object is created." ) set_name = params['set_name'] save_ws_id = params.get('workspace_id') if params.get('header_row_index'): header_row_index = int(params["header_row_index"]) - 1 else: header_row_index = find_header_row(params.get('sample_file'), params.get('file_format')) username = ctx['user_id'] if str(params.get('file_format')).lower() not in [ 'enigma', 'sesar', 'kbase' ]: raise ValueError( f"Only SESAR, ENIGMA, and KBase formats are currently supported for importing samples. " f"File of format {params.get('file_format')} not supported.") mappings = { 'enigma': ENIGMA_mappings, 'sesar': SESAR_mappings, 'kbase': {} } sample_set, has_unignored_errors, errors, sample_data_json = import_samples_from_file( params, self.sample_url, self.workspace_url, self.callback_url, username, ctx['token'], mappings[str(params.get('file_format')).lower()].get('groups', []), mappings[str(params.get('file_format')).lower()].get( 'date_columns', []), mappings[str(params.get('file_format')).lower()].get( 'column_unit_regex', []), sample_set, header_row_index, aliases.get(params.get('file_format').lower(), {})) file_links = [] new_data_links = [] sample_set_ref = None # create UI to display the errors clearly html_link = _error_ui(errors, sample_data_json, has_unignored_errors, self.scratch) if not has_unignored_errors: # only save object if there are no errors obj_info = self.dfu.save_objects({ 'id': save_ws_id, 'objects': [{ "name": set_name, "type": "KBaseSets.SampleSet", "data": sample_set }] })[0] sample_set_ref = '/'.join( [str(obj_info[6]), str(obj_info[0]), str(obj_info[4])]) sample_file_name = os.path.basename( params['sample_file']).split('.')[0] + '_OTU' # create a data link between each sample and the sampleset ss = SampleService(self.sample_url) for idx, sample_info in enumerate(sample_set['samples']): sample_id = sample_info['id'] version = sample_info['version'] sample = ss.get_sample({ 'id': sample_id, 'version': version, }) ret = ss.create_data_link( dict( upa=sample_set_ref, id=sample_id, dataid='samples/{}'.format(idx), version=version, node=sample['node_tree'][0]['id'], update=1, )) new_data_links.append(ret) # -- Format outputs below -- # if output file format specified, add one to output if params.get('output_format') in ['csv', 'xls']: otu_path = sample_set_to_OTU_sheet(sample_set, sample_file_name, self.scratch, params) file_links.append({ 'path': otu_path, 'name': os.path.basename(otu_path), 'label': "OTU template file", 'description': "file with each column containing the assigned sample_id and sample " "name of each saved sample. Intended for uploading OTU data." }) if params.get('incl_input_in_output'): sample_file = params.get('sample_file') if not os.path.isfile(sample_file): # try prepending '/staging/' to file and check then if os.path.isfile(os.path.join('/staging', sample_file)): sample_file = os.path.join('/staging', sample_file) else: raise ValueError( f"Input file {sample_file} does not exist.") sample_file_copy = os.path.join(self.scratch, os.path.basename(sample_file)) shutil.copy(sample_file, sample_file_copy) file_links.append({ "path": sample_file_copy, "name": os.path.basename(sample_file_copy), "label": "Input Sample file", "description": "Input file provided to create the sample set." }) # create report report_client = KBaseReport(self.callback_url) report_data = { 'report_object_name': "SampleSet_import_report_" + str(uuid.uuid4()), 'workspace_name': params['workspace_name'] } if file_links: report_data['file_links'] = file_links if sample_set_ref: report_data[ 'message'] = f"SampleSet object named \"{set_name}\" imported." report_data['objects_created'] = [{'ref': sample_set_ref}] if html_link: report_data['html_links'] = [{ 'path': html_link, 'name': 'index.html', 'description': 'HTML Report for Sample Uploader' }] report_data['direct_html_link_index'] = 0 report_info = report_client.create_extended_report(report_data) output = { 'report_ref': report_info['ref'], 'report_name': report_info['name'], 'sample_set': sample_set, 'sample_set_ref': sample_set_ref, 'errors': errors, 'links': new_data_links } #END import_samples # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method import_samples return value ' + 'output is not type dict as required.') # return the results return [output]