def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.scratch = config['scratch']
     self.token = config['KB_AUTH_TOKEN']
     self.srv_wiz_url = config['srv-wiz-url']
     self.dfu = DataFileUtil(self.callback_url)
     self.sample_ser = SampleService(self.callback_url)
예제 #2
0
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            cls.token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'GenericsAPI',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.shockURL = cls.cfg['shock-url']
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.sample_uploader = sample_uploader(cls.callback_url,
                                              service_ver="dev")
        cls.sample_url = cls.cfg.get('kbase-endpoint') + '/sampleservice'
        cls.sample_ser = SampleService(cls.sample_url)
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_GenericsAPI_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.wsId = ret[0]

        small_file = os.path.join(cls.scratch, 'test.txt')
        with open(small_file, "w") as f:
            f.write("empty content")
        cls.test_shock = cls.dfu.file_to_shock({
            'file_path': small_file,
            'make_handle': True
        })
        cls.handles_to_delete = []
        cls.nodes_to_delete = []
        cls.handles_to_delete.append(cls.test_shock['handle']['hid'])
        cls.nodes_to_delete.append(cls.test_shock['shock_id'])

        cls.prepare_data()
예제 #3
0
    def __init__(self, config):
        self.callback_url = config['SDK_CALLBACK_URL']
        self.scratch = config['scratch']
        self.token = config['KB_AUTH_TOKEN']
        self.srv_wiz_url = config['srv-wiz-url']
        self.dfu = DataFileUtil(self.callback_url)
        self.sample_ser = SampleService(self.callback_url)

        logging.basicConfig(format='%(created)s %(levelname)s: %(message)s',
                            level=logging.INFO)
예제 #4
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'GenericsAPI',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        cls.gfu = GenomeFileUtil(cls.callback_url)
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.sample_uploader = sample_uploader(cls.callback_url,
                                              service_ver="dev")
        cls.sample_ser = SampleService(cls.cfg['srv-wiz-url'])

        suffix = int(time.time() * 1000)
        cls.wsName = "test_GenericsAPI_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.wsId = ret[0]
        cls.prepare_data()
예제 #5
0
    def link_reads(self, ctx, params):
        """
        Create links between samples and reads objects
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN link_reads
        ss = SampleService(self.sw_url, token=ctx['token'], service_ver='beta')
        sample_set_ref = params['sample_set_ref']
        sample_set = SampleSet(self.dfu, sample_set_ref)
        links = [(d['sample_name'], d['reads_ref']) for d in params['links']]

        for sample_name, reads_ref in links:
            node_id, version, sample_id = sample_set.get_sample_info(
                sample_name)
            p = dict(
                upa=reads_ref,
                id=sample_id,
                version=version,
                node=node_id,
                update=1,
            )
            ret = ss.create_data_link(p)

        report_client = KBaseReport(self.callback_url)
        report_info = report_client.create_extended_report({
            'workspace_name':
            params['workspace_name'],
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END link_reads

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method link_reads return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
예제 #6
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('sample_uploader'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'sample_uploader',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = Workspace(cls.wsURL, token=token)
     cls.serviceImpl = sample_uploader(cls.cfg)
     cls.curr_dir = os.path.dirname(os.path.realpath(__file__))
     cls.scratch = cls.cfg['scratch']
     cls.wiz_url = cls.cfg['srv-wiz-url']
     cls.sample_url = get_sample_service_url(cls.wiz_url)
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     suffix = int(time.time() * 1000)
     cls.wsName = "test_ContigFilter_" + str(suffix)
     ret = cls.wsClient.create_workspace({'workspace': cls.wsName})  # noqa
     cls.wsID = ret[0]
     cls.ss = SampleService(cls.wiz_url, token=token, service_ver='beta')
    def link_reads(self, ctx, params):
        """
        :param params: instance of type "LinkReadsParams" -> structure:
           parameter "workspace_name" of String, parameter "workspace_id" of
           String, parameter "sample_set_ref" of String, parameter "links" of
           list of type "ReadsLink" (Create links between samples and reads
           objects.) -> structure: parameter "sample_name" of String,
           parameter "reads_ref" of String
        :returns: instance of type "LinkReadsOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String,
           parameter "links" of list of unspecified object
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN link_reads
        logging.info(params)

        ss = SampleService(self.sw_url, service_ver='dev')

        sample_set_ref = params['sample_set_ref']
        sample_set_obj = self.dfu.get_objects(
            {'object_refs': [sample_set_ref]})['data'][0]['data']
        sample_name_2_info = {d['name']: d for d in sample_set_obj['samples']}

        links = [(d['sample_name'][0], d['reads_ref'])
                 for d in params['links']]

        new_data_links = []
        for sample_name, reads_ref in links:
            sample_id = sample_name_2_info[sample_name]['id']
            version = sample_name_2_info[sample_name]['version']
            sample = ss.get_sample({
                'id': sample_id,
                'version': version,
            })
            ret = ss.create_data_link(
                dict(
                    upa=reads_ref,
                    id=sample_id,
                    version=version,
                    node=sample['node_tree'][0]['id'],
                    update=1,
                ))
            new_data_links.append(ret)

        report_client = KBaseReport(self.callback_url)
        report_info = report_client.create_extended_report({
            'workspace_name':
            params['workspace_name'],
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
            'links': new_data_links,
        }
        #END link_reads

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method link_reads return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
예제 #8
0
    def run_barseqR(self, ctx, params):
        """
        Args:
            :param params: instance of mapping from String to unspecified object
        ctx:
            client_ip: None or 'str', 
            user_id: str, 
            'authenticated': 1,
            'token': str,
            'module': None, 
            'method': None, 
            'call_id': None, 
            'rpc_context': None, 
            'provenance':list<prov_d>
                prov_d: (d)
                    service: (str)
                    'method': 'please_never_use_it_in_production', 
                    'method_params': []}]}
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_barseqR

        # SETUP - Unrelated to inputs --------

        logging.basicConfig(level=logging.DEBUG)

        logging.info("Call back url: " + str(self.callback_url))
        # We create important classes
        dfu = DataFileUtil(self.callback_url)
        logging.info("DFU VARS-- " * 8)
        logging.info(vars(dfu))
        gfu = GenomeFileUtil(self.callback_url)
        smpl_s = SampleService(self.callback_url)
        myToken = os.environ.get('KB_AUTH_TOKEN', None)
        ws = Workspace(self.ws_url, token=myToken)
        ws_id = ws.get_workspace_info({'workspace':
                                       params['workspace_name']})[0]

        logging.info(os.environ)

        logging.info('ws-url')
        logging.info(self.ws_url)
        logging.info('ctx')
        logging.info(ctx)

        # We create indir, outdir, sets_dir (Input, Output, Sets)
        indir = os.path.join(self.shared_folder, "indir")
        os.mkdir(indir)

        outdir = os.path.join(self.shared_folder, "outdir")
        os.mkdir(outdir)

        sets_dir = os.path.join(indir, "sets_dir")
        os.mkdir(sets_dir)

        metadir = '/kb/module/lib/RunDir/metadata'
        if not (os.path.isdir(metadir)):
            raise Exception(
                "metadata directory not found at: {}".format(metadir))

        # We prepare locations of input files
        poolfile_path = os.path.join(indir, "pool.n10")
        gene_table_fp = os.path.join(indir, "genes.GC")
        exps_file = os.path.join(indir, "FEBA_Barseq.tsv")

        # END SETUP

        # VALIDATE PARAMS:
        logging.info("PARAMS:")
        logging.info(params)
        # From Util.validate python file
        val_par = validate_params(params)
        '''
        val_par contains keys:
            genome_ref
            poolfile_ref
            exps_ref
            sets_ref
            output_name
            workspace_name
        '''
        val_par['username'] = ctx['user_id']

        # DOWNLOAD FILES
        download_dict = {
            "dfu": dfu,
            "gfu": gfu,
            "ws": ws,
            "smpl_s": smpl_s,
            "sets_dir": sets_dir,
            "poolfile_path": poolfile_path,
            "gene_table_fp": gene_table_fp,
            "exps_file": exps_file,
            "scratch_dir": self.shared_folder
        }
        # We copy input files to proper directories.
        # vp must contain genome_ref, poolfile_ref, exps_ref, sets_refs (list)
        # DownloadResults must contain keys 'org', 'set_names_list', 'set_fps_list'
        # set_names_list value contains the names of the sets without extensions
        DownloadResults = download_files(val_par, download_dict)

        logging.debug(json.dumps(DownloadResults, indent=2))

        # Get args in this format:
        # [-org, org_name, -indir, Scratch_Dir_Input, -metadir, Fixed meta dir,
        # -outdir, scratch_dir_output, -sets_dir, within scratch_dir_input,
        # -sets, set1 (sets_dir), set2 (sets_dir), set3 (sets_dir), ... ]
        # Note meta dir is called metadata and is in RunDir

        # Running the entire program:
        arg_list = [
            "-org", DownloadResults['org'], '-indir', indir, '-metadir',
            metadir, '-outdir', outdir, '-sets_dir', sets_dir, '-sets'
        ]
        arg_list += DownloadResults['set_names_list']

        RunBarSeq(arg_list)

        # Returning files to user

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': params['parameter_1']
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_barseqR

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_barseqR return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def import_samples(self, ctx, params):
        """
        :param params: instance of type "ImportSampleInputs" -> structure:
           parameter "sample_set_ref" of String, parameter "sample_file" of
           String, parameter "workspace_name" of String, parameter
           "workspace_id" of Long, parameter "file_format" of String,
           parameter "description" of String, parameter "set_name" of String,
           parameter "header_row_index" of Long, parameter "name_field" of
           String, parameter "output_format" of String, parameter
           "taxonomy_source" of String, parameter "num_otus" of Long,
           parameter "incl_seq" of Long, parameter "otu_prefix" of String,
           parameter "share_within_workspace" of Long, parameter
           "prevalidate" of Long, parameter "incl_input_in_output" of Long,
           parameter "ignore_warnings" of Long, parameter
           "keep_existing_samples" of Long
        :returns: instance of type "ImportSampleOutputs" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "sample_set" of type "SampleSet" -> structure:
           parameter "samples" of list of type "sample_info" -> structure:
           parameter "id" of type "sample_id", parameter "name" of String,
           parameter "description" of String, parameter "sample_set_ref" of
           String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN import_samples
        print(f"Beginning sample import with following parameters:")
        print(f"params -- {params}")
        sample_set = {"samples": []}
        # Check if we have an existing Sample Set as input
        # if so, download
        if params.get('sample_set_ref'):
            ret = self.dfu.get_objects(
                {'object_refs': [params['sample_set_ref']]})['data'][0]
            sample_set = ret['data']
            if params.get('set_name'):
                set_name = params.get('set_name')
            else:
                set_name = ret['info'][1]
            save_ws_id = params['sample_set_ref'].split('/')[0]
        else:
            if not params.get('set_name'):
                raise ValueError(
                    f"Sample set name required, when new SampleSet object is created."
                )
            set_name = params['set_name']
            save_ws_id = params.get('workspace_id')
        if params.get('header_row_index'):
            header_row_index = int(params["header_row_index"]) - 1
        else:
            header_row_index = find_header_row(params.get('sample_file'),
                                               params.get('file_format'))

        username = ctx['user_id']

        if str(params.get('file_format')).lower() not in [
                'enigma', 'sesar', 'kbase'
        ]:
            raise ValueError(
                f"Only SESAR, ENIGMA, and KBase formats are currently supported for importing samples. "
                f"File of format {params.get('file_format')} not supported.")
        mappings = {
            'enigma': ENIGMA_mappings,
            'sesar': SESAR_mappings,
            'kbase': {}
        }

        sample_set, has_unignored_errors, errors, sample_data_json = import_samples_from_file(
            params, self.sample_url, self.workspace_url, self.callback_url,
            username, ctx['token'],
            mappings[str(params.get('file_format')).lower()].get('groups', []),
            mappings[str(params.get('file_format')).lower()].get(
                'date_columns',
                []), mappings[str(params.get('file_format')).lower()].get(
                    'column_unit_regex', []), sample_set, header_row_index,
            aliases.get(params.get('file_format').lower(), {}))

        file_links = []
        new_data_links = []
        sample_set_ref = None

        # create UI to display the errors clearly
        html_link = _error_ui(errors, sample_data_json, has_unignored_errors,
                              self.scratch)

        if not has_unignored_errors:
            # only save object if there are no errors
            obj_info = self.dfu.save_objects({
                'id':
                save_ws_id,
                'objects': [{
                    "name": set_name,
                    "type": "KBaseSets.SampleSet",
                    "data": sample_set
                }]
            })[0]

            sample_set_ref = '/'.join(
                [str(obj_info[6]),
                 str(obj_info[0]),
                 str(obj_info[4])])
            sample_file_name = os.path.basename(
                params['sample_file']).split('.')[0] + '_OTU'

            # create a data link between each sample and the sampleset
            ss = SampleService(self.sample_url)
            for idx, sample_info in enumerate(sample_set['samples']):
                sample_id = sample_info['id']
                version = sample_info['version']
                sample = ss.get_sample({
                    'id': sample_id,
                    'version': version,
                })
                ret = ss.create_data_link(
                    dict(
                        upa=sample_set_ref,
                        id=sample_id,
                        dataid='samples/{}'.format(idx),
                        version=version,
                        node=sample['node_tree'][0]['id'],
                        update=1,
                    ))
                new_data_links.append(ret)

            # -- Format outputs below --
            # if output file format specified, add one to output
            if params.get('output_format') in ['csv', 'xls']:
                otu_path = sample_set_to_OTU_sheet(sample_set,
                                                   sample_file_name,
                                                   self.scratch, params)
                file_links.append({
                    'path':
                    otu_path,
                    'name':
                    os.path.basename(otu_path),
                    'label':
                    "OTU template file",
                    'description':
                    "file with each column containing the assigned sample_id and sample "
                    "name of each saved sample. Intended for uploading OTU data."
                })

        if params.get('incl_input_in_output'):
            sample_file = params.get('sample_file')
            if not os.path.isfile(sample_file):
                # try prepending '/staging/' to file and check then
                if os.path.isfile(os.path.join('/staging', sample_file)):
                    sample_file = os.path.join('/staging', sample_file)
                else:
                    raise ValueError(
                        f"Input file {sample_file} does not exist.")
            sample_file_copy = os.path.join(self.scratch,
                                            os.path.basename(sample_file))
            shutil.copy(sample_file, sample_file_copy)
            file_links.append({
                "path":
                sample_file_copy,
                "name":
                os.path.basename(sample_file_copy),
                "label":
                "Input Sample file",
                "description":
                "Input file provided to create the sample set."
            })

        # create report
        report_client = KBaseReport(self.callback_url)
        report_data = {
            'report_object_name':
            "SampleSet_import_report_" + str(uuid.uuid4()),
            'workspace_name': params['workspace_name']
        }
        if file_links:
            report_data['file_links'] = file_links
        if sample_set_ref:
            report_data[
                'message'] = f"SampleSet object named \"{set_name}\" imported."
            report_data['objects_created'] = [{'ref': sample_set_ref}]

        if html_link:
            report_data['html_links'] = [{
                'path':
                html_link,
                'name':
                'index.html',
                'description':
                'HTML Report for Sample Uploader'
            }]
            report_data['direct_html_link_index'] = 0
        report_info = report_client.create_extended_report(report_data)
        output = {
            'report_ref': report_info['ref'],
            'report_name': report_info['name'],
            'sample_set': sample_set,
            'sample_set_ref': sample_set_ref,
            'errors': errors,
            'links': new_data_links
        }
        #END import_samples

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method import_samples return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]