def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.token = config['KB_AUTH_TOKEN'] self.srv_wiz_url = config['srv-wiz-url'] self.dfu = DataFileUtil(self.callback_url) self.sample_ser = SampleService(self.callback_url)
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('GenericsAPI'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(cls.token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'user_id': user_id, 'provenance': [{ 'service': 'GenericsAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.serviceImpl = GenericsAPI(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.shockURL = cls.cfg['shock-url'] cls.dfu = DataFileUtil(cls.callback_url) cls.sample_uploader = sample_uploader(cls.callback_url, service_ver="dev") cls.sample_url = cls.cfg.get('kbase-endpoint') + '/sampleservice' cls.sample_ser = SampleService(cls.sample_url) cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) suffix = int(time.time() * 1000) cls.wsName = "test_GenericsAPI_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.wsId = ret[0] small_file = os.path.join(cls.scratch, 'test.txt') with open(small_file, "w") as f: f.write("empty content") cls.test_shock = cls.dfu.file_to_shock({ 'file_path': small_file, 'make_handle': True }) cls.handles_to_delete = [] cls.nodes_to_delete = [] cls.handles_to_delete.append(cls.test_shock['handle']['hid']) cls.nodes_to_delete.append(cls.test_shock['shock_id']) cls.prepare_data()
def link_reads(self, ctx, params): """ Create links between samples and reads objects :param params: instance of mapping from String to unspecified object :returns: instance of type "ReportResults" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN link_reads ss = SampleService(self.sw_url, token=ctx['token'], service_ver='beta') sample_set_ref = params['sample_set_ref'] sample_set = SampleSet(self.dfu, sample_set_ref) links = [(d['sample_name'], d['reads_ref']) for d in params['links']] for sample_name, reads_ref in links: node_id, version, sample_id = sample_set.get_sample_info( sample_name) p = dict( upa=reads_ref, id=sample_id, version=version, node=node_id, update=1, ) ret = ss.create_data_link(p) report_client = KBaseReport(self.callback_url) report_info = report_client.create_extended_report({ 'workspace_name': params['workspace_name'], }) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'], } #END link_reads # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method link_reads return value ' + 'output is not type dict as required.') # return the results return [output]
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.token = config['KB_AUTH_TOKEN'] self.srv_wiz_url = config['srv-wiz-url'] self.dfu = DataFileUtil(self.callback_url) self.sample_ser = SampleService(self.callback_url) logging.basicConfig(format='%(created)s %(levelname)s: %(message)s', level=logging.INFO)
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('GenericsAPI'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'GenericsAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.serviceImpl = GenericsAPI(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.gfu = GenomeFileUtil(cls.callback_url) cls.dfu = DataFileUtil(cls.callback_url) cls.sample_uploader = sample_uploader(cls.callback_url, service_ver="dev") cls.sample_ser = SampleService(cls.cfg['srv-wiz-url']) suffix = int(time.time() * 1000) cls.wsName = "test_GenericsAPI_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.wsId = ret[0] cls.prepare_data()
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('sample_uploader'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'sample_uploader', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL, token=token) cls.serviceImpl = sample_uploader(cls.cfg) cls.curr_dir = os.path.dirname(os.path.realpath(__file__)) cls.scratch = cls.cfg['scratch'] cls.wiz_url = cls.cfg['srv-wiz-url'] cls.sample_url = get_sample_service_url(cls.wiz_url) cls.callback_url = os.environ['SDK_CALLBACK_URL'] suffix = int(time.time() * 1000) cls.wsName = "test_ContigFilter_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) # noqa cls.wsID = ret[0] cls.ss = SampleService(cls.wiz_url, token=token, service_ver='beta')
def link_reads(self, ctx, params): """ :param params: instance of type "LinkReadsParams" -> structure: parameter "workspace_name" of String, parameter "workspace_id" of String, parameter "sample_set_ref" of String, parameter "links" of list of type "ReadsLink" (Create links between samples and reads objects.) -> structure: parameter "sample_name" of String, parameter "reads_ref" of String :returns: instance of type "LinkReadsOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "links" of list of unspecified object """ # ctx is the context object # return variables are: output #BEGIN link_reads logging.info(params) ss = SampleService(self.sw_url, service_ver='dev') sample_set_ref = params['sample_set_ref'] sample_set_obj = self.dfu.get_objects( {'object_refs': [sample_set_ref]})['data'][0]['data'] sample_name_2_info = {d['name']: d for d in sample_set_obj['samples']} links = [(d['sample_name'][0], d['reads_ref']) for d in params['links']] new_data_links = [] for sample_name, reads_ref in links: sample_id = sample_name_2_info[sample_name]['id'] version = sample_name_2_info[sample_name]['version'] sample = ss.get_sample({ 'id': sample_id, 'version': version, }) ret = ss.create_data_link( dict( upa=reads_ref, id=sample_id, version=version, node=sample['node_tree'][0]['id'], update=1, )) new_data_links.append(ret) report_client = KBaseReport(self.callback_url) report_info = report_client.create_extended_report({ 'workspace_name': params['workspace_name'], }) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'], 'links': new_data_links, } #END link_reads # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method link_reads return value ' + 'output is not type dict as required.') # return the results return [output]
def run_barseqR(self, ctx, params): """ Args: :param params: instance of mapping from String to unspecified object ctx: client_ip: None or 'str', user_id: str, 'authenticated': 1, 'token': str, 'module': None, 'method': None, 'call_id': None, 'rpc_context': None, 'provenance':list<prov_d> prov_d: (d) service: (str) 'method': 'please_never_use_it_in_production', 'method_params': []}]} :returns: instance of type "ReportResults" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN run_barseqR # SETUP - Unrelated to inputs -------- logging.basicConfig(level=logging.DEBUG) logging.info("Call back url: " + str(self.callback_url)) # We create important classes dfu = DataFileUtil(self.callback_url) logging.info("DFU VARS-- " * 8) logging.info(vars(dfu)) gfu = GenomeFileUtil(self.callback_url) smpl_s = SampleService(self.callback_url) myToken = os.environ.get('KB_AUTH_TOKEN', None) ws = Workspace(self.ws_url, token=myToken) ws_id = ws.get_workspace_info({'workspace': params['workspace_name']})[0] logging.info(os.environ) logging.info('ws-url') logging.info(self.ws_url) logging.info('ctx') logging.info(ctx) # We create indir, outdir, sets_dir (Input, Output, Sets) indir = os.path.join(self.shared_folder, "indir") os.mkdir(indir) outdir = os.path.join(self.shared_folder, "outdir") os.mkdir(outdir) sets_dir = os.path.join(indir, "sets_dir") os.mkdir(sets_dir) metadir = '/kb/module/lib/RunDir/metadata' if not (os.path.isdir(metadir)): raise Exception( "metadata directory not found at: {}".format(metadir)) # We prepare locations of input files poolfile_path = os.path.join(indir, "pool.n10") gene_table_fp = os.path.join(indir, "genes.GC") exps_file = os.path.join(indir, "FEBA_Barseq.tsv") # END SETUP # VALIDATE PARAMS: logging.info("PARAMS:") logging.info(params) # From Util.validate python file val_par = validate_params(params) ''' val_par contains keys: genome_ref poolfile_ref exps_ref sets_ref output_name workspace_name ''' val_par['username'] = ctx['user_id'] # DOWNLOAD FILES download_dict = { "dfu": dfu, "gfu": gfu, "ws": ws, "smpl_s": smpl_s, "sets_dir": sets_dir, "poolfile_path": poolfile_path, "gene_table_fp": gene_table_fp, "exps_file": exps_file, "scratch_dir": self.shared_folder } # We copy input files to proper directories. # vp must contain genome_ref, poolfile_ref, exps_ref, sets_refs (list) # DownloadResults must contain keys 'org', 'set_names_list', 'set_fps_list' # set_names_list value contains the names of the sets without extensions DownloadResults = download_files(val_par, download_dict) logging.debug(json.dumps(DownloadResults, indent=2)) # Get args in this format: # [-org, org_name, -indir, Scratch_Dir_Input, -metadir, Fixed meta dir, # -outdir, scratch_dir_output, -sets_dir, within scratch_dir_input, # -sets, set1 (sets_dir), set2 (sets_dir), set3 (sets_dir), ... ] # Note meta dir is called metadata and is in RunDir # Running the entire program: arg_list = [ "-org", DownloadResults['org'], '-indir', indir, '-metadir', metadir, '-outdir', outdir, '-sets_dir', sets_dir, '-sets' ] arg_list += DownloadResults['set_names_list'] RunBarSeq(arg_list) # Returning files to user report = KBaseReport(self.callback_url) report_info = report.create({ 'report': { 'objects_created': [], 'text_message': params['parameter_1'] }, 'workspace_name': params['workspace_name'] }) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'], } #END run_barseqR # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method run_barseqR return value ' + 'output is not type dict as required.') # return the results return [output]
def import_samples(self, ctx, params): """ :param params: instance of type "ImportSampleInputs" -> structure: parameter "sample_set_ref" of String, parameter "sample_file" of String, parameter "workspace_name" of String, parameter "workspace_id" of Long, parameter "file_format" of String, parameter "description" of String, parameter "set_name" of String, parameter "header_row_index" of Long, parameter "name_field" of String, parameter "output_format" of String, parameter "taxonomy_source" of String, parameter "num_otus" of Long, parameter "incl_seq" of Long, parameter "otu_prefix" of String, parameter "share_within_workspace" of Long, parameter "prevalidate" of Long, parameter "incl_input_in_output" of Long, parameter "ignore_warnings" of Long, parameter "keep_existing_samples" of Long :returns: instance of type "ImportSampleOutputs" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "sample_set" of type "SampleSet" -> structure: parameter "samples" of list of type "sample_info" -> structure: parameter "id" of type "sample_id", parameter "name" of String, parameter "description" of String, parameter "sample_set_ref" of String """ # ctx is the context object # return variables are: output #BEGIN import_samples print(f"Beginning sample import with following parameters:") print(f"params -- {params}") sample_set = {"samples": []} # Check if we have an existing Sample Set as input # if so, download if params.get('sample_set_ref'): ret = self.dfu.get_objects( {'object_refs': [params['sample_set_ref']]})['data'][0] sample_set = ret['data'] if params.get('set_name'): set_name = params.get('set_name') else: set_name = ret['info'][1] save_ws_id = params['sample_set_ref'].split('/')[0] else: if not params.get('set_name'): raise ValueError( f"Sample set name required, when new SampleSet object is created." ) set_name = params['set_name'] save_ws_id = params.get('workspace_id') if params.get('header_row_index'): header_row_index = int(params["header_row_index"]) - 1 else: header_row_index = find_header_row(params.get('sample_file'), params.get('file_format')) username = ctx['user_id'] if str(params.get('file_format')).lower() not in [ 'enigma', 'sesar', 'kbase' ]: raise ValueError( f"Only SESAR, ENIGMA, and KBase formats are currently supported for importing samples. " f"File of format {params.get('file_format')} not supported.") mappings = { 'enigma': ENIGMA_mappings, 'sesar': SESAR_mappings, 'kbase': {} } sample_set, has_unignored_errors, errors, sample_data_json = import_samples_from_file( params, self.sample_url, self.workspace_url, self.callback_url, username, ctx['token'], mappings[str(params.get('file_format')).lower()].get('groups', []), mappings[str(params.get('file_format')).lower()].get( 'date_columns', []), mappings[str(params.get('file_format')).lower()].get( 'column_unit_regex', []), sample_set, header_row_index, aliases.get(params.get('file_format').lower(), {})) file_links = [] new_data_links = [] sample_set_ref = None # create UI to display the errors clearly html_link = _error_ui(errors, sample_data_json, has_unignored_errors, self.scratch) if not has_unignored_errors: # only save object if there are no errors obj_info = self.dfu.save_objects({ 'id': save_ws_id, 'objects': [{ "name": set_name, "type": "KBaseSets.SampleSet", "data": sample_set }] })[0] sample_set_ref = '/'.join( [str(obj_info[6]), str(obj_info[0]), str(obj_info[4])]) sample_file_name = os.path.basename( params['sample_file']).split('.')[0] + '_OTU' # create a data link between each sample and the sampleset ss = SampleService(self.sample_url) for idx, sample_info in enumerate(sample_set['samples']): sample_id = sample_info['id'] version = sample_info['version'] sample = ss.get_sample({ 'id': sample_id, 'version': version, }) ret = ss.create_data_link( dict( upa=sample_set_ref, id=sample_id, dataid='samples/{}'.format(idx), version=version, node=sample['node_tree'][0]['id'], update=1, )) new_data_links.append(ret) # -- Format outputs below -- # if output file format specified, add one to output if params.get('output_format') in ['csv', 'xls']: otu_path = sample_set_to_OTU_sheet(sample_set, sample_file_name, self.scratch, params) file_links.append({ 'path': otu_path, 'name': os.path.basename(otu_path), 'label': "OTU template file", 'description': "file with each column containing the assigned sample_id and sample " "name of each saved sample. Intended for uploading OTU data." }) if params.get('incl_input_in_output'): sample_file = params.get('sample_file') if not os.path.isfile(sample_file): # try prepending '/staging/' to file and check then if os.path.isfile(os.path.join('/staging', sample_file)): sample_file = os.path.join('/staging', sample_file) else: raise ValueError( f"Input file {sample_file} does not exist.") sample_file_copy = os.path.join(self.scratch, os.path.basename(sample_file)) shutil.copy(sample_file, sample_file_copy) file_links.append({ "path": sample_file_copy, "name": os.path.basename(sample_file_copy), "label": "Input Sample file", "description": "Input file provided to create the sample set." }) # create report report_client = KBaseReport(self.callback_url) report_data = { 'report_object_name': "SampleSet_import_report_" + str(uuid.uuid4()), 'workspace_name': params['workspace_name'] } if file_links: report_data['file_links'] = file_links if sample_set_ref: report_data[ 'message'] = f"SampleSet object named \"{set_name}\" imported." report_data['objects_created'] = [{'ref': sample_set_ref}] if html_link: report_data['html_links'] = [{ 'path': html_link, 'name': 'index.html', 'description': 'HTML Report for Sample Uploader' }] report_data['direct_html_link_index'] = 0 report_info = report_client.create_extended_report(report_data) output = { 'report_ref': report_info['ref'], 'report_name': report_info['name'], 'sample_set': sample_set, 'sample_set_ref': sample_set_ref, 'errors': errors, 'links': new_data_links } #END import_samples # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method import_samples return value ' + 'output is not type dict as required.') # return the results return [output]