def export_samples(self, ctx, params):
        """
        :param params: instance of type "ExportParams" (export function for
           samples) -> structure: parameter "input_ref" of String, parameter
           "file_format" of String
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN export_samples
        if not params.get('input_ref'):
            raise ValueError(f"variable input_ref required")
        sample_set_ref = params.get('input_ref')
        output_file_format = params.get('file_format', 'SESAR')

        ret = self.dfu.get_objects({'object_refs':
                                    [sample_set_ref]})['data'][0]
        sample_set = ret['data']
        sample_set_name = ret['info'][1]
        sample_url = get_sample_service_url(self.sw_url)

        export_package_dir = os.path.join(self.scratch, "output")
        if not os.path.isdir(export_package_dir):
            os.mkdir(export_package_dir)
        output_file = os.path.join(export_package_dir,
                                   '_'.join(sample_set_name.split()) + ".csv")

        sample_set_to_output(sample_set, sample_url, ctx['token'], output_file,
                             output_file_format)

        # package it up
        package_details = self.dfu.package_for_download({
            'file_path':
            export_package_dir,
            'ws_refs': [params['input_ref']]
        })

        output = {
            'shock_id': package_details['shock_id'],
            'result_dir': export_package_dir
        }
        #END export_samples

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method export_samples return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def update_sample_set_acls(self, ctx, params):
        """
        :param params: instance of type "update_sample_set_acls_params" ->
           structure: parameter "workspace_name" of String, parameter
           "workspace_id" of Long, parameter "sample_set_ref" of String,
           parameter "new_users" of list of String, parameter "is_reader" of
           Long, parameter "is_writer" of Long, parameter "is_admin" of Long,
           parameter "share_within_workspace" of Long
        :returns: instance of type "update_sample_set_acls_output" ->
           structure: parameter "status" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN update_sample_set_acls

        # first get sample_set object
        sample_set_ref = params.get('sample_set_ref')
        ret = self.dfu.get_objects({'object_refs':
                                    [sample_set_ref]})['data'][0]
        sample_set = ret['data']
        sample_url = get_sample_service_url(self.sw_url)

        acls = {'read': [], 'write': [], 'admin': []}

        if params.get('share_within_workspace'):
            acls = get_workspace_user_perms(self.workspace_url,
                                            params.get('workspace_id'),
                                            ctx['token'], ctx['user_id'], acls)

        for new_user in params.get('new_users'):
            if params.get('is_admin'):
                acls['admin'].append(new_user)
            elif params.get('is_writer'):
                acls['write'].append(new_user)
            elif params.get('is_reader'):
                acls['read'].append(new_user)

        for sample in sample_set['samples']:
            sample_id = sample['id']
            status = update_acls(sample_url, sample_id, acls, ctx['token'])
        output = {"status": status}
        #END update_sample_set_acls

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method update_sample_set_acls return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Beispiel #3
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('sample_uploader'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'sample_uploader',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = Workspace(cls.wsURL, token=token)
     cls.serviceImpl = sample_uploader(cls.cfg)
     cls.curr_dir = os.path.dirname(os.path.realpath(__file__))
     cls.scratch = cls.cfg['scratch']
     cls.wiz_url = cls.cfg['srv-wiz-url']
     cls.sample_url = get_sample_service_url(cls.wiz_url)
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     suffix = int(time.time() * 1000)
     cls.wsName = "test_ContigFilter_" + str(suffix)
     ret = cls.wsClient.create_workspace({'workspace': cls.wsName})  # noqa
     cls.wsID = ret[0]
     cls.ss = SampleService(cls.wiz_url, token=token, service_ver='beta')
Beispiel #4
0
def import_samples_from_file(params, sw_url, workspace_url, username, token,
                             column_mapping, column_groups, date_columns,
                             column_unit_regex, input_sample_set,
                             header_row_index):
    """
    import samples from '.csv' or '.xls' files in SESAR  format
    """
    # verify inputs
    sample_file = validate_params(params)
    ws_name = params.get('workspace_name')
    df = load_file(sample_file, header_row_index, date_columns)

    errors = []
    first_sample_idx = header_row_index + 1

    # change columns to upload format
    columns_to_input_names = {}
    for col_idx, col_name in enumerate(df.columns):
        try:
            renamed = upload_key_format(col_name)
            if renamed in columns_to_input_names:
                raise SampleContentError((
                    f"Duplicate column \"{renamed}\". \"{col_name}\" would overwrite a different column \"{columns_to_input_names[renamed]}\". "
                    "Rename your columns to be unique alphanumericaly, ignoring whitespace and case."
                ),
                                         key=col_name)
            columns_to_input_names[renamed] = col_name
        except SampleContentError as e:
            e.column = col_idx
            errors.append(e)

    df = df.rename(columns={
        columns_to_input_names[col]: col
        for col in columns_to_input_names
    })
    df.replace({n: None for n in NOOP_VALS}, inplace=True)

    #TODO: Make sure to check all possible ID fields, even when not parameterized
    if params.get('id_field'):
        id_field = upload_key_format(params.get('id_field'))
        if id_field not in list(df.columns):
            raise ValueError(
                f"The expected ID field column \"{id_field}\" could not be found. "
                "Adjust your parameters or input such that the following are correct:\n"
                f"- File Format: {params.get('file_format')} (the format to which your sample data conforms)\n"
                f"- ID Field: {params.get('id_field','id')}\n (the header of the column containing your IDs)\n"
                f"- Headers Row: {params.get('header_row_index')} (the row # where column headers are located in your spreadsheet)"
            )
        # here we rename whatever the id field was/is to "id"
        columns_to_input_names["id"] = columns_to_input_names.pop(id_field)
        df.rename(columns={id_field: "id"}, inplace=True)
        # remove "id" rename field from column mapping if exists
        if column_mapping:
            column_mapping = {
                key: val
                for key, val in column_mapping.items() if val != "id"
            }

    if not errors:
        if column_mapping:
            df = df.rename(columns=column_mapping)
        # redundant, even harmful if things get out of sync
        # verify_columns(df)
        for key in column_mapping:
            if key in columns_to_input_names:
                val = columns_to_input_names.pop(key)
                columns_to_input_names[column_mapping[key]] = val

        if params['file_format'].upper() in ['SESAR', "ENIGMA"]:
            if 'material' in df.columns:
                df.rename(columns={
                    "material":
                    params['file_format'].upper() + ":material"
                },
                          inplace=True)
                val = columns_to_input_names.pop("material")
                columns_to_input_names[params['file_format'].upper() +
                                       ":material"] = val
        if params['file_format'].upper() == "KBASE":
            if 'material' in df.columns:
                df.rename(columns={"material": "SESAR:material"}, inplace=True)
                val = columns_to_input_names.pop("material")
                columns_to_input_names["SESAR:material"] = val

        acls = {
            "read": [],
            "write": [],
            "admin": [],
            "public_read": -1  # set to false (<0)
        }
        if params.get('share_within_workspace'):
            # query workspace for user permissions.
            acls = get_workspace_user_perms(workspace_url,
                                            params.get('workspace_id'), token,
                                            username, acls)
        groups = SAMP_SERV_CONFIG['validators']

        cols = list(set(df.columns) - set(REGULATED_COLS))
        sample_url = get_sample_service_url(sw_url)
        samples, existing_samples, produce_errors = _produce_samples(
            df, cols, column_groups, column_unit_regex, sample_url, token,
            input_sample_set['samples'], columns_to_input_names,
            first_sample_idx)
        errors += produce_errors

    if params.get('prevalidate') and not errors:
        error_detail = validate_samples([s['sample'] for s in samples],
                                        sample_url, token)
        errors += [
            SampleContentError(e['message'],
                               sample_name=e['sample_name'],
                               node=e['node'],
                               key=e['key']) for e in error_detail
        ]

    if errors:
        saved_samples = []
        # Fill in missing location information for SamplesContentError(s)
        err_col_keys = {}
        err_key_indices = {}
        for col_idx, col_name in enumerate(df.columns):
            err_col_keys[col_idx] = col_name
            err_key_indices[col_name] = col_idx
            if col_name in columns_to_input_names and columns_to_input_names[
                    col_name] != col_name:
                err_key_indices[columns_to_input_names[col_name]] = col_idx

        err_row_sample_names = {}
        err_sample_name_indices = {}
        for relative_row_idx, row in df.iterrows():
            row_pos = first_sample_idx + relative_row_idx
            sample_name = row.get('id')
            err_sample_name_indices[sample_name] = row_pos
            err_row_sample_names[row_pos] = sample_name

        for e in errors:
            if e.column != None and e.key == None and e.column in err_col_keys:
                e.key = err_col_keys[e.column]
            if e.column == None and e.key != None and e.key in err_key_indices:
                e.column = err_key_indices[e.key]
            if e.row != None and e.sample_name == None and e.row in err_row_sample_names:
                e.sample_name = err_row_sample_names[e.row]
            if e.row == None and e.sample_name != None and e.sample_name in err_sample_name_indices:
                e.row = err_sample_name_indices[e.sample_name]
    else:
        saved_samples = _save_samples(samples, acls, sample_url, token)
        saved_samples += existing_samples

    return {
        "samples": saved_samples,
        "description": params.get('description')
    }, errors
Beispiel #5
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('sample_uploader'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'sample_uploader',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = Workspace(cls.wsURL, token=token)
     cls.serviceImpl = sample_uploader(cls.cfg)
     cls.curr_dir = os.path.dirname(os.path.realpath(__file__))
     cls.scratch = cls.cfg['scratch']
     cls.wiz_url = cls.cfg['srv-wiz-url']
     cls.sample_url = get_sample_service_url(cls.wiz_url)
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     suffix = int(time.time() * 1000)
     cls.wsName = "test_sample_uploader_" + str(suffix)
     ret = cls.wsClient.create_workspace({'workspace': cls.wsName})  # noqa
     cls.wsID = ret[0]
     cls.sesar_sample_file = os.path.join(cls.curr_dir, "data",
                                          "fake_samples.tsv")
     cls.sample_set_name = "test_sample_set_1"
     params = {
         'workspace_name': cls.wsName,
         'workspace_id': cls.wsID,
         'sample_file': cls.sesar_sample_file,
         'file_format': "SESAR",
         'header_row_index': 2,
         'set_name': cls.sample_set_name,
         'description': "this is a test sample set.",
         'output_format': "",
         'id_field': "test id field",
         'incl_input_in_output': 1,
         'share_within_workspace': 1,
     }
     ret = cls.serviceImpl.import_samples(cls.ctx, params)[0]
     cls.sample_set = ret['sample_set']
     cls.a_sample_id = ret['sample_set']['samples'][0]['id']
     cls.sample_set_ref = ret['sample_set_ref']
     # add new user to test permissions
     cls.wsClient.set_permissions({
         "id": cls.wsID,
         "new_permission": "w",
         "users": ["psdehal"]
     })
Beispiel #6
0
def import_samples_from_file(params, sw_url, workspace_url, username, token,
                             column_mapping, column_groups, date_columns,
                             column_unit_regex, input_sample_set,
                             header_row_index):
    """
    import samples from '.csv' or '.xls' files in SESAR  format
    """
    # verify inputs
    sample_file = validate_params(params)
    ws_name = params.get('workspace_name')
    df = load_file(sample_file, header_row_index, date_columns)
    # change columns to upload format
    # TODO: make sure separate columns are not being renamed to the same thing
    columns_to_input_names = {upload_key_format(c): c for c in df.columns}
    df = df.rename(columns={c: upload_key_format(c) for c in df.columns})
    df.replace({n: None for n in NOOP_VALS}, inplace=True)

    if params.get('id_field'):
        id_field = upload_key_format(params['id_field'])
        if id_field in list(df.columns):
            # here we rename whatever the id field was/is to "id"
            columns_to_input_names["id"] = columns_to_input_names.pop(id_field)
            df.rename(columns={id_field: "id"}, inplace=True)
            # remove "id" rename field from column mapping if exists
            if column_mapping:
                column_mapping = {
                    key: val
                    for key, val in column_mapping.items() if val != "id"
                }
        else:
            raise ValueError(
                f"'{params['id_field']}' is not a column field in the input file."
            )
    else:
        print(
            f"No id_field argument present in params, proceeding with defaults."
        )

    if column_mapping:
        df = df.rename(columns=column_mapping)
    # redundant, even harmful if things get out of sync
    # verify_columns(df)
    for key in column_mapping:
        if key in columns_to_input_names:
            val = columns_to_input_names.pop(key)
            columns_to_input_names[column_mapping[key]] = val

    if params['file_format'].upper() in ['SESAR', "ENIGMA"]:
        if 'material' in df.columns:
            df.rename(columns={
                "material": params['file_format'].upper() + ":material"
            },
                      inplace=True)
            val = columns_to_input_names.pop("material")
            columns_to_input_names[params['file_format'].upper() +
                                   ":material"] = val
    if params['file_format'].upper() == "KBASE":
        if 'material' in df.columns:
            df.rename(columns={"material": "SESAR:material"}, inplace=True)
            val = columns_to_input_names.pop("material")
            columns_to_input_names["SESAR:material"] = val

    acls = {
        "read": [],
        "write": [],
        "admin": [],
        "public_read": -1  # set to false (<0)
    }
    if params.get('share_within_workspace'):
        # query workspace for user permissions.
        acls = get_workspace_user_perms(workspace_url,
                                        params.get('workspace_id'), token,
                                        username, acls)
    groups = SAMP_SERV_CONFIG['validators']

    cols = list(set(df.columns) - set(REGULATED_COLS))
    sample_url = get_sample_service_url(sw_url)
    samples, existing_samples = _produce_samples(df, cols, column_groups,
                                                 column_unit_regex, sample_url,
                                                 token,
                                                 input_sample_set['samples'],
                                                 columns_to_input_names)
    errors = {}
    if params.get('prevalidate'):
        errors = validate_samples([s['sample'] for s in samples], sample_url,
                                  token)
    if errors:
        saved_samples = []
    else:
        saved_samples = _save_samples(samples, acls, sample_url, token)
        saved_samples += existing_samples
    return {
        "samples": saved_samples,
        "description": params.get('description')
    }, errors