def export_samples(self, ctx, params): """ :param params: instance of type "ExportParams" (export function for samples) -> structure: parameter "input_ref" of String, parameter "file_format" of String :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: output #BEGIN export_samples if not params.get('input_ref'): raise ValueError(f"variable input_ref required") sample_set_ref = params.get('input_ref') output_file_format = params.get('file_format', 'SESAR') ret = self.dfu.get_objects({'object_refs': [sample_set_ref]})['data'][0] sample_set = ret['data'] sample_set_name = ret['info'][1] sample_url = get_sample_service_url(self.sw_url) export_package_dir = os.path.join(self.scratch, "output") if not os.path.isdir(export_package_dir): os.mkdir(export_package_dir) output_file = os.path.join(export_package_dir, '_'.join(sample_set_name.split()) + ".csv") sample_set_to_output(sample_set, sample_url, ctx['token'], output_file, output_file_format) # package it up package_details = self.dfu.package_for_download({ 'file_path': export_package_dir, 'ws_refs': [params['input_ref']] }) output = { 'shock_id': package_details['shock_id'], 'result_dir': export_package_dir } #END export_samples # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method export_samples return value ' + 'output is not type dict as required.') # return the results return [output]
def update_sample_set_acls(self, ctx, params): """ :param params: instance of type "update_sample_set_acls_params" -> structure: parameter "workspace_name" of String, parameter "workspace_id" of Long, parameter "sample_set_ref" of String, parameter "new_users" of list of String, parameter "is_reader" of Long, parameter "is_writer" of Long, parameter "is_admin" of Long, parameter "share_within_workspace" of Long :returns: instance of type "update_sample_set_acls_output" -> structure: parameter "status" of String """ # ctx is the context object # return variables are: output #BEGIN update_sample_set_acls # first get sample_set object sample_set_ref = params.get('sample_set_ref') ret = self.dfu.get_objects({'object_refs': [sample_set_ref]})['data'][0] sample_set = ret['data'] sample_url = get_sample_service_url(self.sw_url) acls = {'read': [], 'write': [], 'admin': []} if params.get('share_within_workspace'): acls = get_workspace_user_perms(self.workspace_url, params.get('workspace_id'), ctx['token'], ctx['user_id'], acls) for new_user in params.get('new_users'): if params.get('is_admin'): acls['admin'].append(new_user) elif params.get('is_writer'): acls['write'].append(new_user) elif params.get('is_reader'): acls['read'].append(new_user) for sample in sample_set['samples']: sample_id = sample['id'] status = update_acls(sample_url, sample_id, acls, ctx['token']) output = {"status": status} #END update_sample_set_acls # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method update_sample_set_acls return value ' + 'output is not type dict as required.') # return the results return [output]
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('sample_uploader'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'sample_uploader', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL, token=token) cls.serviceImpl = sample_uploader(cls.cfg) cls.curr_dir = os.path.dirname(os.path.realpath(__file__)) cls.scratch = cls.cfg['scratch'] cls.wiz_url = cls.cfg['srv-wiz-url'] cls.sample_url = get_sample_service_url(cls.wiz_url) cls.callback_url = os.environ['SDK_CALLBACK_URL'] suffix = int(time.time() * 1000) cls.wsName = "test_ContigFilter_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) # noqa cls.wsID = ret[0] cls.ss = SampleService(cls.wiz_url, token=token, service_ver='beta')
def import_samples_from_file(params, sw_url, workspace_url, username, token, column_mapping, column_groups, date_columns, column_unit_regex, input_sample_set, header_row_index): """ import samples from '.csv' or '.xls' files in SESAR format """ # verify inputs sample_file = validate_params(params) ws_name = params.get('workspace_name') df = load_file(sample_file, header_row_index, date_columns) errors = [] first_sample_idx = header_row_index + 1 # change columns to upload format columns_to_input_names = {} for col_idx, col_name in enumerate(df.columns): try: renamed = upload_key_format(col_name) if renamed in columns_to_input_names: raise SampleContentError(( f"Duplicate column \"{renamed}\". \"{col_name}\" would overwrite a different column \"{columns_to_input_names[renamed]}\". " "Rename your columns to be unique alphanumericaly, ignoring whitespace and case." ), key=col_name) columns_to_input_names[renamed] = col_name except SampleContentError as e: e.column = col_idx errors.append(e) df = df.rename(columns={ columns_to_input_names[col]: col for col in columns_to_input_names }) df.replace({n: None for n in NOOP_VALS}, inplace=True) #TODO: Make sure to check all possible ID fields, even when not parameterized if params.get('id_field'): id_field = upload_key_format(params.get('id_field')) if id_field not in list(df.columns): raise ValueError( f"The expected ID field column \"{id_field}\" could not be found. " "Adjust your parameters or input such that the following are correct:\n" f"- File Format: {params.get('file_format')} (the format to which your sample data conforms)\n" f"- ID Field: {params.get('id_field','id')}\n (the header of the column containing your IDs)\n" f"- Headers Row: {params.get('header_row_index')} (the row # where column headers are located in your spreadsheet)" ) # here we rename whatever the id field was/is to "id" columns_to_input_names["id"] = columns_to_input_names.pop(id_field) df.rename(columns={id_field: "id"}, inplace=True) # remove "id" rename field from column mapping if exists if column_mapping: column_mapping = { key: val for key, val in column_mapping.items() if val != "id" } if not errors: if column_mapping: df = df.rename(columns=column_mapping) # redundant, even harmful if things get out of sync # verify_columns(df) for key in column_mapping: if key in columns_to_input_names: val = columns_to_input_names.pop(key) columns_to_input_names[column_mapping[key]] = val if params['file_format'].upper() in ['SESAR', "ENIGMA"]: if 'material' in df.columns: df.rename(columns={ "material": params['file_format'].upper() + ":material" }, inplace=True) val = columns_to_input_names.pop("material") columns_to_input_names[params['file_format'].upper() + ":material"] = val if params['file_format'].upper() == "KBASE": if 'material' in df.columns: df.rename(columns={"material": "SESAR:material"}, inplace=True) val = columns_to_input_names.pop("material") columns_to_input_names["SESAR:material"] = val acls = { "read": [], "write": [], "admin": [], "public_read": -1 # set to false (<0) } if params.get('share_within_workspace'): # query workspace for user permissions. acls = get_workspace_user_perms(workspace_url, params.get('workspace_id'), token, username, acls) groups = SAMP_SERV_CONFIG['validators'] cols = list(set(df.columns) - set(REGULATED_COLS)) sample_url = get_sample_service_url(sw_url) samples, existing_samples, produce_errors = _produce_samples( df, cols, column_groups, column_unit_regex, sample_url, token, input_sample_set['samples'], columns_to_input_names, first_sample_idx) errors += produce_errors if params.get('prevalidate') and not errors: error_detail = validate_samples([s['sample'] for s in samples], sample_url, token) errors += [ SampleContentError(e['message'], sample_name=e['sample_name'], node=e['node'], key=e['key']) for e in error_detail ] if errors: saved_samples = [] # Fill in missing location information for SamplesContentError(s) err_col_keys = {} err_key_indices = {} for col_idx, col_name in enumerate(df.columns): err_col_keys[col_idx] = col_name err_key_indices[col_name] = col_idx if col_name in columns_to_input_names and columns_to_input_names[ col_name] != col_name: err_key_indices[columns_to_input_names[col_name]] = col_idx err_row_sample_names = {} err_sample_name_indices = {} for relative_row_idx, row in df.iterrows(): row_pos = first_sample_idx + relative_row_idx sample_name = row.get('id') err_sample_name_indices[sample_name] = row_pos err_row_sample_names[row_pos] = sample_name for e in errors: if e.column != None and e.key == None and e.column in err_col_keys: e.key = err_col_keys[e.column] if e.column == None and e.key != None and e.key in err_key_indices: e.column = err_key_indices[e.key] if e.row != None and e.sample_name == None and e.row in err_row_sample_names: e.sample_name = err_row_sample_names[e.row] if e.row == None and e.sample_name != None and e.sample_name in err_sample_name_indices: e.row = err_sample_name_indices[e.sample_name] else: saved_samples = _save_samples(samples, acls, sample_url, token) saved_samples += existing_samples return { "samples": saved_samples, "description": params.get('description') }, errors
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('sample_uploader'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'sample_uploader', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL, token=token) cls.serviceImpl = sample_uploader(cls.cfg) cls.curr_dir = os.path.dirname(os.path.realpath(__file__)) cls.scratch = cls.cfg['scratch'] cls.wiz_url = cls.cfg['srv-wiz-url'] cls.sample_url = get_sample_service_url(cls.wiz_url) cls.callback_url = os.environ['SDK_CALLBACK_URL'] suffix = int(time.time() * 1000) cls.wsName = "test_sample_uploader_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) # noqa cls.wsID = ret[0] cls.sesar_sample_file = os.path.join(cls.curr_dir, "data", "fake_samples.tsv") cls.sample_set_name = "test_sample_set_1" params = { 'workspace_name': cls.wsName, 'workspace_id': cls.wsID, 'sample_file': cls.sesar_sample_file, 'file_format': "SESAR", 'header_row_index': 2, 'set_name': cls.sample_set_name, 'description': "this is a test sample set.", 'output_format': "", 'id_field': "test id field", 'incl_input_in_output': 1, 'share_within_workspace': 1, } ret = cls.serviceImpl.import_samples(cls.ctx, params)[0] cls.sample_set = ret['sample_set'] cls.a_sample_id = ret['sample_set']['samples'][0]['id'] cls.sample_set_ref = ret['sample_set_ref'] # add new user to test permissions cls.wsClient.set_permissions({ "id": cls.wsID, "new_permission": "w", "users": ["psdehal"] })
def import_samples_from_file(params, sw_url, workspace_url, username, token, column_mapping, column_groups, date_columns, column_unit_regex, input_sample_set, header_row_index): """ import samples from '.csv' or '.xls' files in SESAR format """ # verify inputs sample_file = validate_params(params) ws_name = params.get('workspace_name') df = load_file(sample_file, header_row_index, date_columns) # change columns to upload format # TODO: make sure separate columns are not being renamed to the same thing columns_to_input_names = {upload_key_format(c): c for c in df.columns} df = df.rename(columns={c: upload_key_format(c) for c in df.columns}) df.replace({n: None for n in NOOP_VALS}, inplace=True) if params.get('id_field'): id_field = upload_key_format(params['id_field']) if id_field in list(df.columns): # here we rename whatever the id field was/is to "id" columns_to_input_names["id"] = columns_to_input_names.pop(id_field) df.rename(columns={id_field: "id"}, inplace=True) # remove "id" rename field from column mapping if exists if column_mapping: column_mapping = { key: val for key, val in column_mapping.items() if val != "id" } else: raise ValueError( f"'{params['id_field']}' is not a column field in the input file." ) else: print( f"No id_field argument present in params, proceeding with defaults." ) if column_mapping: df = df.rename(columns=column_mapping) # redundant, even harmful if things get out of sync # verify_columns(df) for key in column_mapping: if key in columns_to_input_names: val = columns_to_input_names.pop(key) columns_to_input_names[column_mapping[key]] = val if params['file_format'].upper() in ['SESAR', "ENIGMA"]: if 'material' in df.columns: df.rename(columns={ "material": params['file_format'].upper() + ":material" }, inplace=True) val = columns_to_input_names.pop("material") columns_to_input_names[params['file_format'].upper() + ":material"] = val if params['file_format'].upper() == "KBASE": if 'material' in df.columns: df.rename(columns={"material": "SESAR:material"}, inplace=True) val = columns_to_input_names.pop("material") columns_to_input_names["SESAR:material"] = val acls = { "read": [], "write": [], "admin": [], "public_read": -1 # set to false (<0) } if params.get('share_within_workspace'): # query workspace for user permissions. acls = get_workspace_user_perms(workspace_url, params.get('workspace_id'), token, username, acls) groups = SAMP_SERV_CONFIG['validators'] cols = list(set(df.columns) - set(REGULATED_COLS)) sample_url = get_sample_service_url(sw_url) samples, existing_samples = _produce_samples(df, cols, column_groups, column_unit_regex, sample_url, token, input_sample_set['samples'], columns_to_input_names) errors = {} if params.get('prevalidate'): errors = validate_samples([s['sample'] for s in samples], sample_url, token) if errors: saved_samples = [] else: saved_samples = _save_samples(samples, acls, sample_url, token) saved_samples += existing_samples return { "samples": saved_samples, "description": params.get('description') }, errors