def run_PFLOTRAN(self, ctx, params):
        """
        Thi function enables users to run a pflotran simulation from an input plfotran model and fbamodel chemistry
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_PFLOTRAN
        report = KBaseReport(self.callback_url)
        report_info = report.create({'report': {'objects_created':[],
                                                'text_message': params['parameter_1']},
                                                'workspace_name': params['workspace_name']})
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_PFLOTRAN

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_PFLOTRAN return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_cjneely101MetaSanity(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_cjneely101MetaSanity
        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': params['parameter_1']
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_cjneely101MetaSanity

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_cjneely101MetaSanity return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #3
0
    def run_kb_Bwa(self, ctx, params):
        """
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_kb_Bwa

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': "report submitted"
            },
            'workspace_name':
            params['output_workspace']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_kb_Bwa

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_kb_Bwa return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #4
0
    def run_akExMod(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_akExMod
        id = params['parameter_1']
        
        # Initialize DataFileUtil client and get an object by reference.
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.dfu = DataFileUtil(self.callback_url)
        FBAModel_ref = params['parameter_1']
        FBAModel_data = self.dfu.get_objects({'object_refs': [FBAModel_ref]})['data'][0]
        FBAModel_obj = FBAModel_data['data']
        FBAModel_meta = FBAModel_data['info'][10]
        
        #To test if model is input.
        #print('Printing the objective')
        #pprint(FBAModel_obj)
        #print('Printing the reference ID')
        #pprint(FBAModel_ref)
        #print('Printing the data')
        #pprint(FBAModel_data)
        #print('Printing the meta')
        #pprint(FBAModel_meta)
        
        if id.find('biomass'):
            print('Yes, string found in file')
        else:
            print('String not found in file')

        
        report = KBaseReport(self.callback_url)
        report_info = report.create({'report': {'objects_created':[],
                                                'text_message': params['parameter_1']},
                                                'workspace_name': params['workspace_name']})
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_akExMod

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_akExMod return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_kb_ldannotate(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_kb_ldannotate

        logging.info("validating input parameters")
        self.cld.validate_params(params)

        output_dir = os.path.join(self.shared_folder, str(uuid.uuid4()))
        os.mkdir(output_dir)

        #parsing input parameters
        vcf_file = params.get("vcf_ref")
        gff_file = params.get("gff_ref")
        candidate_snp_file = params.get("candidate_snps")
        feature_type = params.get("feature_type")
        threshold = params.get("threshold")
        output_file = params.get("output_file")

        #cmd = self.lau.build_ldannotate_command(vcf_file, gff_file, candidate_snp_file, feature_type, threshold, output_file, output_dir)

        #self.lau.run_ldannotate_command(cmd)
        self.cld.create_output_file(vcf_file, gff_file, candidate_snp_file,
                                    feature_type, threshold, output_file,
                                    output_dir)

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': 'Nice Report'
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_kb_ldannotate

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_kb_ldannotate return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #6
0
    def run_alans_job(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_alans_job
        SERVICE_VER = 'release'
        print("i'm so cool i'm so fashionble")
        import time
        time.sleep(10)
        print("i'm so cool i'm so fashionble")
        #TODO ADD ,service_ver='fake'
        report = KBaseReport(self.callback_url)
        # dfu = DataFileUtil(self.callback_url)
        # dwf = {'download_type' : 'Google Drive',
        #        'file_url' : 'www.google.com'}
        # filepath = dfu.download_web_file(params=dwf)
        # print("Filepath is", filepath)

        print("About to open refdata")
        with open("/data/kmer") as f:
            data = f.readlines()
        print("All done!")
        print(data)

        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': f'The app is done. We didnt do anything'
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_alans_job

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_alans_job return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_nkk_compHelloWorld(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_nkk_compHelloWorld

        sim_dir = '~/../simulation'
        os.system('ls')
        import pandas as pd

        # Read inputs from .tsv file

        df = pd.read_csv(params['Input_File'], sep='\t')
        ids = df['id']
        InChIes = df['structure']

        import inchi_to_submission as its
        import extract_properties_mulliken_charges_mol2 as mul
        import compound_parsing as com

        its.inchi_to_dft(ids, InChIes)

        length = len(ids)
        for i in range(length):
            os.chdir('./' + ids[i] + '/dft')
            file1 = open('nwchem.out', 'r')
            nAtoms = mul.getNumberOfAtoms(file1)
            energy = mul.getInternalEnergy0K(file1)
            charge = mul.getMullikenCharge(file1, nAtoms)
            file1.close()

            mul.nAtoms = nAtoms
            mul.E0K = energy

            mul.calculate(ids[i])

        for j in range(length):
            os.chdir('./' + ids[j] + '/dft')
            os.system('ls')

            #with open(ids[j]+'_Mulliken.mol2') as IN:

            #xx = com._make_compound_info(open(ids[j]+'_Mulliken.mol2'))
            #print(xx)

            #os.chdir('../..')

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': params['Input_File'],
                'text_message': params['calculation_type']
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }

        return [output]

        #END run_nkk_compHelloWorld

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_nkk_compHelloWorld return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
import logging
import os

from installed_clients.KBaseReportClient import KBaseReport
#END_HEADER
#BEGIN_CLASS_HEADER
#END_CLASS_HEADER
#BEGIN_CONSTRUCTOR
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.shared_folder = config['scratch']
        logging.basicConfig(format='%(created)s %(levelname)s: %(message)s',
                            level=logging.INFO)
#END_CONSTRUCTOR
#BEGIN run_imramboVAMB
        report = KBaseReport(self.callback_url)
        report_info = report.create({'report': {'objects_created':[],
                                                'text_message': params['parameter_1']},
                                                'workspace_name': params['workspace_name']})
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
#END run_imramboVAMB
#BEGIN_STATUS
        returnVal = {'state': "OK",
                     'message': "",
                     'version': self.VERSION,
                     'git_url': self.GIT_URL,
                     'git_commit_hash': self.GIT_COMMIT_HASH}
#END_STATUS
Exemple #9
0
    def run_VariationMerge(self, ctx, params):
        """
        :param params: instance of type "inparams" (This example function
           accepts any number of parameters and returns results in a
           KBaseReport) -> structure: parameter "obj_name" of String,
           parameter "workspace_name" of String, parameter "vcflist" of list
           of String
        :returns: instance of type "OutResults" -> structure: parameter
           "output_obj_ref" of String, parameter "report_name" of String,
           parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_VariationMerge

        self.ws = Workspace(url=self.ws_url, token=ctx['token'])

        print(params)

        vcf_flist = []
        assembly_ref_set = set()
        sampleset_ref_set = set()
        genome_set_ref_set = set()
        for i in range(len(params['vcflist'])):
            variation_ref = params['vcflist'][i]

            variation_obj = self.ws.get_objects2(
                {'objects': [{
                    'ref': variation_ref
                }]})['data'][0]
            print(variation_obj['data']['assembly_ref'])

            if 'assembly_ref' in variation_obj['data']:
                assembly_ref = variation_obj['data']['assembly_ref']
                assembly_ref_set.add(assembly_ref)
            elif 'genome_ref' in variation_obj['data']:
                genome_ref = variation_obj['data']['genome_ref']
                genome_set_ref_set.add(genome_ref)

            print(params['vcflist'][i])
            vcf_filename = "/kb/module/work/tmp/variation" + str(i) + ".vcf.gz"
            vcf_flist.append(vcf_filename)

            inparams = {}
            inparams['variation_ref'] = variation_ref
            inparams['filename'] = vcf_filename

            self.vu.get_variation_as_vcf(inparams)
            os.rename("/kb/module/work/tmp/variation.vcf.gz", vcf_filename)
            self.mu.index_vcf(vcf_filename)
            var_object_ref = params['vcflist'][i]
            data = self.ws.get_objects2({
                'objects': [{
                    "ref": var_object_ref,
                    'included': ['/sample_set_ref']
                }]
            })['data'][0]['data']
            sampleset_ref_set.add(data['sample_set_ref'])

        #Raising exception

        if (len(genome_set_ref_set) == 0 and len(assembly_ref_set) != 1):
            raise Exception(
                "variation objects are from different assembly refs")
        elif (len(sampleset_ref_set) != 1):
            raise Exception(
                "variation objects are from different sample set refs")
        elif (len(assembly_ref_set) == 0 and len(genome_set_ref_set) != 1):
            raise Exception(
                "variation objects are from different genome set refs")

        merged_file = os.path.join(self.shared_folder,
                                   "merged_gatk_variation_jmc2_test.vcf")
        self.mu.merge_vcf(vcf_flist, merged_file)

        save_variation_params = {
            'workspace_name': params['workspace_name'],
            'genome_or_assembly_ref': assembly_ref_set.pop(),
            'sample_set_ref': sampleset_ref_set.pop(),
            'sample_attribute_name': 'sample_attr',
            'vcf_staging_file_path': merged_file,
            'variation_object_name': params['variation_object_name']
        }
        self.vu.save_variation_from_vcf(save_variation_params)

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': 'success'
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_VariationMerge

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_VariationMerge return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def filter_contigs(self, ctx, params):
        """
        The actual function is declared using 'funcdef' to specify the name
        and input/return arguments to the function.  For all typical KBase
        Apps that run in the Narrative, your function should have the 
        'authentication required' modifier.
        :param params: instance of type "FilterContigsParams" (A 'typedef'
           can also be used to define compound or container objects, like
           lists, maps, and structures.  The standard KBase convention is to
           use structures, as shown here, to define the input and output of
           your function.  Here the input is a reference to the Assembly data
           object, a workspace to save output, and a length threshold for
           filtering. To define lists and maps, use a syntax similar to C++
           templates to indicate the type contained in the list or map.  For
           example: list <string> list_of_strings; mapping <string, int>
           map_of_ints;) -> structure: parameter "assembly_input_ref" of type
           "assembly_ref" (A 'typedef' allows you to provide a more specific
           name for a type.  Built-in primitive types include 'string',
           'int', 'float'.  Here we define a type named assembly_ref to
           indicate a string that should be set to a KBase ID reference to an
           Assembly data object.), parameter "workspace_name" of String,
           parameter "min_length" of Long
        :returns: instance of type "FilterContigsResults" (Here is the
           definition of the output of the function.  The output can be used
           by other SDK modules which call your code, or the output
           visualizations in the Narrative.  'report_name' and 'report_ref'
           are special output fields- if defined, the Narrative can
           automatically render your Report.) -> structure: parameter
           "report_name" of String, parameter "report_ref" of String,
           parameter "assembly_output" of type "assembly_ref" (A 'typedef'
           allows you to provide a more specific name for a type.  Built-in
           primitive types include 'string', 'int', 'float'.  Here we define
           a type named assembly_ref to indicate a string that should be set
           to a KBase ID reference to an Assembly data object.), parameter
           "n_initial_contigs" of Long, parameter "n_contigs_removed" of
           Long, parameter "n_contigs_remaining" of Long
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN filter_contigs

        # Print statements to stdout/stderr are captured and available as the App log
        print('Starting Filter Contigs function. Params=')
        pprint(params)

        # Step 1 - Parse/examine the parameters and catch any errors
        # It is important to check that parameters exist and are defined, and that nice error
        # messages are returned to users.  Parameter values go through basic validation when
        # defined in a Narrative App, but advanced users or other SDK developers can call
        # this function directly, so validation is still important.
        print('Validating parameters.')
        if 'workspace_name' not in params:
            raise ValueError('Parameter workspace_name is not set in input arguments')
        workspace_name = params['workspace_name']
        if 'assembly_input_ref' not in params:
            raise ValueError('Parameter assembly_input_ref is not set in input arguments')
        assembly_input_ref = params['assembly_input_ref']
        if 'min_length' not in params:
            raise ValueError('Parameter min_length is not set in input arguments')
        min_length_orig = params['min_length']
        min_length = None
        try:
            min_length = int(min_length_orig)
        except ValueError:
            raise ValueError('Cannot parse integer from min_length parameter (' + str(min_length_orig) + ')')
        if min_length < 0:
            raise ValueError('min_length parameter cannot be negative (' + str(min_length) + ')')


        # Step 2 - Download the input data as a Fasta and
        # We can use the AssemblyUtils module to download a FASTA file from our Assembly data object.
        # The return object gives us the path to the file that was created.
        print('Downloading Assembly data as a Fasta file.')
        assemblyUtil = AssemblyUtil(self.callback_url)
        fasta_file = assemblyUtil.get_assembly_as_fasta({'ref': assembly_input_ref})


        # Step 3 - Actually perform the filter operation, saving the good contigs to a new fasta file.
        # We can use BioPython to parse the Fasta file and build and save the output to a file.
        good_contigs = []
        n_total = 0
        n_remaining = 0
        for record in SeqIO.parse(fasta_file['path'], 'fasta'):
            n_total += 1
            if len(record.seq) >= min_length:
                good_contigs.append(record)
                n_remaining += 1

        print('Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total))
        filtered_fasta_file = os.path.join(self.shared_folder, 'filtered.fasta')
        SeqIO.write(good_contigs, filtered_fasta_file, 'fasta')


        # Step 4 - Save the new Assembly back to the system
        print('Uploading filtered Assembly data.')
        new_assembly = assemblyUtil.save_assembly_from_fasta({'file': {'path': filtered_fasta_file},
                                                              'workspace_name': workspace_name,
                                                              'assembly_name': fasta_file['assembly_name']
                                                              })


        # Step 5 - Build a Report and return
        reportObj = {
            'objects_created': [{'ref': new_assembly, 'description': 'Filtered contigs'}],
            'text_message': 'Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total)
        }
        report = KBaseReport(self.callback_url)
        report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']})


        # STEP 6: contruct the output to send back
        output = {'report_name': report_info['name'],
                  'report_ref': report_info['ref'],
                  'assembly_output': new_assembly,
                  'n_initial_contigs': n_total,
                  'n_contigs_removed': n_total - n_remaining,
                  'n_contigs_remaining': n_remaining
                  }
        print('returning:' + pformat(output))
                
        #END filter_contigs

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method filter_contigs return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #11
0
    def run_barseqR(self, ctx, params):
        """
        Args:
            :param params: instance of mapping from String to unspecified object
        ctx:
            client_ip: None or 'str', 
            user_id: str, 
            'authenticated': 1,
            'token': str,
            'module': None, 
            'method': None, 
            'call_id': None, 
            'rpc_context': None, 
            'provenance':list<prov_d>
                prov_d: (d)
                    service: (str)
                    'method': 'please_never_use_it_in_production', 
                    'method_params': []}]}
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_barseqR

        # SETUP - Unrelated to inputs --------

        logging.basicConfig(level=logging.DEBUG)

        logging.info("Call back url: " + str(self.callback_url))
        # We create important classes
        dfu = DataFileUtil(self.callback_url)
        logging.info("DFU VARS-- " * 8)
        logging.info(vars(dfu))
        gfu = GenomeFileUtil(self.callback_url)
        smpl_s = SampleService(self.callback_url)
        myToken = os.environ.get('KB_AUTH_TOKEN', None)
        ws = Workspace(self.ws_url, token=myToken)
        ws_id = ws.get_workspace_info({'workspace':
                                       params['workspace_name']})[0]

        logging.info(os.environ)

        logging.info('ws-url')
        logging.info(self.ws_url)
        logging.info('ctx')
        logging.info(ctx)

        # We create indir, outdir, sets_dir (Input, Output, Sets)
        indir = os.path.join(self.shared_folder, "indir")
        os.mkdir(indir)

        outdir = os.path.join(self.shared_folder, "outdir")
        os.mkdir(outdir)

        sets_dir = os.path.join(indir, "sets_dir")
        os.mkdir(sets_dir)

        metadir = '/kb/module/lib/RunDir/metadata'
        if not (os.path.isdir(metadir)):
            raise Exception(
                "metadata directory not found at: {}".format(metadir))

        # We prepare locations of input files
        poolfile_path = os.path.join(indir, "pool.n10")
        gene_table_fp = os.path.join(indir, "genes.GC")
        exps_file = os.path.join(indir, "FEBA_Barseq.tsv")

        # END SETUP

        # VALIDATE PARAMS:
        logging.info("PARAMS:")
        logging.info(params)
        # From Util.validate python file
        val_par = validate_params(params)
        '''
        val_par contains keys:
            genome_ref
            poolfile_ref
            exps_ref
            sets_ref
            output_name
            workspace_name
        '''
        val_par['username'] = ctx['user_id']

        # DOWNLOAD FILES
        download_dict = {
            "dfu": dfu,
            "gfu": gfu,
            "ws": ws,
            "smpl_s": smpl_s,
            "sets_dir": sets_dir,
            "poolfile_path": poolfile_path,
            "gene_table_fp": gene_table_fp,
            "exps_file": exps_file,
            "scratch_dir": self.shared_folder
        }
        # We copy input files to proper directories.
        # vp must contain genome_ref, poolfile_ref, exps_ref, sets_refs (list)
        # DownloadResults must contain keys 'org', 'set_names_list', 'set_fps_list'
        # set_names_list value contains the names of the sets without extensions
        DownloadResults = download_files(val_par, download_dict)

        logging.debug(json.dumps(DownloadResults, indent=2))

        # Get args in this format:
        # [-org, org_name, -indir, Scratch_Dir_Input, -metadir, Fixed meta dir,
        # -outdir, scratch_dir_output, -sets_dir, within scratch_dir_input,
        # -sets, set1 (sets_dir), set2 (sets_dir), set3 (sets_dir), ... ]
        # Note meta dir is called metadata and is in RunDir

        # Running the entire program:
        arg_list = [
            "-org", DownloadResults['org'], '-indir', indir, '-metadir',
            metadir, '-outdir', outdir, '-sets_dir', sets_dir, '-sets'
        ]
        arg_list += DownloadResults['set_names_list']

        RunBarSeq(arg_list)

        # Returning files to user

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': params['parameter_1']
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_barseqR

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_barseqR return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_michael_shafferContigFilter_max(self, ctx, params):
        """
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_michael_shafferContigFilter_max
        # sanitize inputs
        for name in ['max_length', 'assembly_input_ref', 'workspace_name']:
            if name not in params:
                raise ValueError('Parameter %s is required but missing' % name)
        if not isinstance(params['max_length'], int) or (params['max_length'] < 0):
            raise ValueError('Max length must be a non-negative integer')
        if not isinstance(params['assembly_input_ref'], str) or not len(params['assembly_input_ref']):
            raise ValueError('Pass in a valid assembly reference string')

        # get files
        assembly_util = AssemblyUtil(self.callback_url)
        fasta_file = assembly_util.get_assembly_as_fasta({'ref': params['assembly_input_ref']})

        # filter fasta
        parsed_assembly = SeqIO.parse(fasta_file['path'], 'fasta')
        max_length = params['max_length']

        good_contigs = []
        n_total = 0
        for record in parsed_assembly:
            n_total += 1
            if len(record.seq) < max_length:
                good_contigs.append(record)

        # Upload the filtered data to the workspace
        workspace_name = params['workspace_name']
        filtered_path = os.path.join(self.shared_folder, 'filtered.fasta')
        SeqIO.write(good_contigs, filtered_path, 'fasta')
        new_ref = assembly_util.save_assembly_from_fasta({
            'file': {'path': filtered_path},
            'workspace_name': workspace_name,
            'assembly_name': fasta_file['assembly_name']
        })

        # generate report
        message = "Filtering assembly remove contigs greater than %s bp removed %s out of %s contigs (%s remaining)" % \
                  (max_length, n_total-len(good_contigs), n_total, len(good_contigs))
        report_data = {'objects_created': [{'ref': new_ref, 'description': 'Filtered contigs'}],
                       'text_message': message}
        kbase_report = KBaseReport(self.callback_url)
        report = kbase_report.create({'report': report_data, 'workspace_name': workspace_name})

        # set output
        output = {'report_ref': report['ref'],
                  'report_name': report['name'],
                  'n_total': n_total,
                  'n_kept': len(good_contigs),
                  'filtered_assembly_ref': new_ref}
        #END run_michael_shafferContigFilter_max

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_michael_shafferContigFilter_max return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_cnelsonAppDemo(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_cnelsonAppDemo

        # Print statements to stdout/stderr are captured and available as the App log
        logging.info('Starting run_cnelsonAppDemo function. Params=' +
                     pformat(params))

        # Step 1 - Parse/examine the parameters and catch any errors
        # It is important to check that parameters exist and are defined, and that nice error
        # messages are returned to users.  Parameter values go through basic validation when
        # defined in a Narrative App, but advanced users or other SDK developers can call
        # this function directly, so validation is still important.
        logging.info('Validating parameters.')
        if 'workspace_name' not in params:
            raise ValueError(
                'Parameter workspace_name is not set in input arguments')
        workspace_name = params['workspace_name']
        if 'assembly_input_ref' not in params:
            raise ValueError(
                'Parameter assembly_input_ref is not set in input arguments')
        assembly_input_ref = params['assembly_input_ref']
        if 'min_length' not in params:
            raise ValueError(
                'Parameter min_length is not set in input arguments')
        min_length_orig = params['min_length']
        min_length = None
        try:
            min_length = int(min_length_orig)
        except ValueError:
            raise ValueError(
                'Cannot parse integer from min_length parameter (' +
                str(min_length_orig) + ')')
        if min_length < 0:
            raise ValueError('min_length parameter cannot be negative (' +
                             str(min_length) + ')')

        # Step 2 - Download the input data as a Fasta and
        # We can use the AssemblyUtils module to download a FASTA file from our Assembly data object.
        # The return object gives us the path to the file that was created.
        logging.info('Downloading Assembly data as a Fasta file.')
        assemblyUtil = AssemblyUtil(self.callback_url)
        fasta_file = assemblyUtil.get_assembly_as_fasta(
            {'ref': assembly_input_ref})

        # Step 3 - Actually perform the filter operation, saving the good contigs to a new fasta file.
        # We can use BioPython to parse the Fasta file and build and save the output to a file.
        good_contigs = []
        n_total = 0
        n_remaining = 0
        for record in SeqIO.parse(fasta_file['path'], 'fasta'):
            n_total += 1
            if len(record.seq) >= min_length:
                good_contigs.append(record)
                n_remaining += 1

        logging.info('Filtered Assembly to ' + str(n_remaining) +
                     ' contigs out of ' + str(n_total))
        filtered_fasta_file = os.path.join(self.shared_folder,
                                           'filtered.fasta')
        SeqIO.write(good_contigs, filtered_fasta_file, 'fasta')

        # Step 4 - Save the new Assembly back to the system
        logging.info('Uploading filtered Assembly data.')
        new_assembly = assemblyUtil.save_assembly_from_fasta({
            'file': {
                'path': filtered_fasta_file
            },
            'workspace_name':
            workspace_name,
            'assembly_name':
            fasta_file['assembly_name']
        })

        # Step 5 - Build a Report and return
        reportObj = {
            'objects_created': [{
                'ref': new_assembly,
                'description': 'Filtered contigs'
            }],
            'text_message':
            'Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' +
            str(n_total)
        }
        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': reportObj,
            'workspace_name': params['workspace_name']
        })

        # STEP 6: contruct the output to send back
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
            'assembly_output': new_assembly,
            'n_initial_contigs': n_total,
            'n_contigs_removed': n_total - n_remaining,
            'n_contigs_remaining': n_remaining
        }
        logging.info('returning:' + pformat(output))

        #END run_cnelsonAppDemo

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_cnelsonAppDemo return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #14
0
    def run_kb_GATK(self, ctx, params):
        """
        run_kb_GATK:This function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_kb_GATK
        self.gu.validate_params(params)
        print(params)

        ''' for tesitng only 
        logging.info("start testing")
        sam_file = "/kb/module/work/reads_alignment.sam"
        output_dir = "/kb/module/work"
        self.gu.duplicate_marking(output_dir, sam_file)
        logging.info("stop testing")

        return 1
        stop testing '''

        
        source_ref = params['alignment_ref']
        alignment_out = self.du.downloadreadalignment(source_ref, params, self.callback_url)
        #sam_file = os.path.join(alignment_out['destination_dir'], "reads_alignment.sam")
        bam_file = os.path.join(alignment_out['destination_dir'], "reads_alignment.bam")
        '''
        '''
        #Todo Reading sample set and sample strains information
        

        strain_info = params['strain_info']

        
        output_dir = os.path.join(self.shared_folder, str(uuid.uuid4()))
        os.mkdir(output_dir)

        #TODO: to get genome_or_assembly_ref from alignment ref.
        genome_or_assembly_ref = params['assembly_or_genome_ref']
        obj_type = self.wsc.get_object_info3({
            'objects':[{
                'ref': genome_or_assembly_ref
                      }]})['infos'][0][2]
        if ('KBaseGenomes.Genome' in obj_type):
            genome_ref = genome_or_assembly_ref
            subset = self.wsc.get_object_subset([{
                    'included': ['/assembly_ref'],
                    'ref': genome_ref
                }])
            assembly_ref = subset[0]['data']['assembly_ref']
        elif ('KBaseGenomeAnnotations.Assembly' in obj_type):
            assembly_ref = genome_or_assembly_ref
        else:
            raise ValueError(obj_type + ' is not the right input for this method. '
                                      + 'Valid input include KBaseGenomes.Genome or '
                                      + 'KBaseGenomeAnnotations.Assembly ')       
        
        assembly_file = self.du.download_genome(assembly_ref, output_dir)['path']

        #Todo: check time for building index file or donwload from cache.
        #Todo: To discuss about cache_id to be used.
        #Todo: In case of copying genome, find the way of finding original genome (ref id) for getting original cache id.

        self.gu.build_genome(assembly_file)
        self.gu.index_assembly(assembly_file)
        self.gu.generate_sequence_dictionary(assembly_file)
        #self.gu.duplicate_marking(output_dir, sam_file)
        self.gu.duplicate_marking(output_dir, bam_file)
        self.gu.collect_alignment_and_insert_size_metrics(assembly_file, output_dir)


        #Todo: avoid writing intermediate fies to save space and time I/O. 
        self.gu.variant_calling(assembly_file, output_dir)
        self.gu.extract_variants(assembly_file, output_dir)
        '''

        work_dir = "/kb/module/work/9884583c-719f-48b9-800c-3e5047737901"
       
        shutil.copytree(work_dir, "/kb/module/work/tmp/9884583c-719f-48b9-800c-3e5047737901")

        output_dir = "/kb/module/work/tmp/9884583c-719f-48b9-800c-3e5047737901"
        assembly_file = "/kb/module/work/tmp/9884583c-719f-48b9-800c-3e5047737901/ref_genome.fa"
        '''
        self.gu.filter_SNPs(assembly_file, "filtered_snps.vcf", output_dir, params)
        self.gu.filter_Indels(assembly_file, "filtered_indels.vcf", output_dir, params)
        self.gu.exclude_filtered_variants(output_dir)
        self.gu.base_quality_score_recalibration(assembly_file, "recal_data.table", output_dir)
        self.gu.apply_BQSR(assembly_file, "recal_data.table", output_dir)
        self.gu.base_quality_score_recalibration(assembly_file, "post_recal_data.table", output_dir)
        #self.gu.analyze_covariates(output_dir)
        self.gu.apply_BQSR(assembly_file,  "post_recal_data.table", output_dir)
        self.gu.filter_SNPs(assembly_file, "filtered_snps_final.vcf", output_dir, params)
      
        #Todo: To save indels also using VariationUtils or merge with snps and sort them with chr & pos and save using variaiotiontuils.
        #Todo: To get an example for saving structural variants(specially CNV) and compare with standard vcf output.

        self.gu.filter_Indels(assembly_file, "filtered_indels_final.vcf", output_dir, params)

        vcf_filepath = self.gu.index_vcf_file(output_dir + "/filtered_snps_final.vcf")
        
        reheader_vcf_file = self.gu.reheader(vcf_filepath, strain_info)
        #Todo : check existence of final filtered finals snps.
        #Todo : chnage assembly_or_genome_ref to genome_or_assembly_ref

        #Todo: to derive name of sample_attribute_name from sample set ref by prefixing/suffixing. Attribute mapping should have one sample.
  
        save_variation_params = {'workspace_name': params['workspace_name'],
            'genome_or_assembly_ref': params['assembly_or_genome_ref'],      
            'sample_set_ref':params['input_sample_set'],
            'sample_attribute_name':'sample_attr',
            'vcf_staging_file_path': reheader_vcf_file,
            'variation_object_name': params['variation_object_name']
            } 

        self.vu.save_variation_from_vcf(save_variation_params)

        report = KBaseReport(self.callback_url)
        report_info = report.create({'report': {'objects_created': [],
                                                'text_message': 'Success'},
                                                'workspace_name': params['workspace_name']})
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_kb_GATK

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_kb_GATK return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #15
0
    def run_omreegalozpathway_completeness(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_omreegalozpathway_completeness

        #Preparing report client
        report_client = KBaseReport(self.callback_url)

        #Original report info
        report_info = report_client.create({
            'report': {
                'objects_created': [],
                'text_message': params['main_input_ref']
            },
            'workspace_name':
            params['workspace_name']
        })

        token = os.environ.get('KB_AUTH_TOKEN', None)

        #Checking the input params
        if "main_input_ref" in params:
            main_input_ref = params['main_input_ref']
        else:
            logging.info(
                'the reference number is not in the params, program must end.')
            raise Exception("main_input_ref not in params")

        #Creating the workspace client object
        ws = Workspace(self.ws_url, token=token)

        #Getting information about the main input ref
        obj_info = ws.get_object_info3({'objects': [{'ref': main_input_ref}]})

        #Catching errors:
        if "infos" in obj_info:
            #Getting information from object reference number
            object_name = obj_info["infos"][0][1]
            object_type = obj_info["infos"][0][2]
            ws_name = obj_info["infos"][0][7]

            #Logging:
            logging.debug("Object Type: " + object_type)
            logging.debug("Object Name: " + object_name)
            logging.debug("Workspace Name: " + ws_name)
        else:
            logging.info(
                "The function ws.get_object_info3 failed to download the right information. The program must abort."
            )
            raise Exception("Could not find infos in obj_info")

        #We create the output file name and add information to it later.
        output_file_name = 'pathways_measurements'

        #This part is a hack, need to check type of data more accurately.
        if object_type[:17] == 'KBaseFBA.FBAModel':
            logging.info("Succesfully recognized type as FBA Model")

            #Preparing the output file name which we return to the user
            output_file_name += '_fba_model'

            #Creating an fba tools object
            fba_t = fba_tools(self.callback_url)

            # Getting the TSV file from the object
            X = fba_t.export_model_as_tsv_file({"input_ref": main_input_ref})

            # Logging
            logging.info(
                "the object output from fba tools export model as tsv file:")
            logging.info(X)

            #Locating where the reactions tsv was placed (Not well done- replace this with a robust form)
            reactions_file_path = os.path.join(
                self.shared_folder,
                object_name + '/' + object_name + '-reactions.tsv')

            #Preparing an output path for a future function
            output_path = os.path.join(self.shared_folder,
                                       output_file_name + '.tsv')

            #This function performs the percentage calculation work for FBAModel Object Types.
            html_path = reactions_file_to_pathway_reactions_and_percentages(
                reactions_file_path, output_path, object_name)

        # Using KBase Gene Families- Domain Annotation
        elif object_type[:34] == "KBaseGeneFamilies.DomainAnnotation":
            logging.info("Succesfully recognized type as Domain Annotation")
            output_file_name += '_domain_annotation'

            #We get the object using workspace's get_objects2 function
            obj = ws.get_objects2({'objects': [{'ref': main_input_ref}]})

            #Within the way the object dictionary is given, what we are looking for is in the location as follows:
            Y = obj['data'][0]['data']['data']

            #Preparing our own output_file_path with Domain Annotation instead of FBAModel (why?)
            output_file_path = os.path.join(self.shared_folder,
                                            output_file_name + '.tsv')

            #This function (written for the module) finds percentages of pathway completeness.
            html_path = TIGRFAM_file_to_pathway_reactions_and_percentages(
                Y, output_file_path, object_name)

        else:
            logging.info("Object type unknown")
            raise Exception(
                "Could not recognize ref to object- Check if object is FBA Model or Domain Annotation type. If so, the error is in the program, not the input - contact [email protected]."
            )

        html_dict = [{"path": html_path, "name": 'Completeness_Table'}]

        #Preparing final report:
        report = report_client.create_extended_report({
            'direct_html_link_index':
            0,
            'message':
            'Here are the pathway completeness results',
            'workspace_name':
            ws_name,
            'html_links':
            html_dict
        })

        output = {
            'report_name': report['name'],
            'report_ref': report['ref'],
        }
        #END run_omreegalozpathway_completeness

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError(
                'Method run_omreegalozpathway_completeness return value ' +
                'output is not type dict as required.')
        # return the results
        return [output]
Exemple #16
0
    def run_kb_ReadSim(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of type "Inparams" -> structure: parameter
           "workspace_name" of String, parameter "input_sample_set" of
           String, parameter "strain_info" of String, parameter
           "assembly_or_genome_ref" of String, parameter "base_error_rate" of
           String, parameter "outer_distance" of String, parameter
           "standard_deviation" of String, parameter "num_read_pairs" of
           String, parameter "len_first_read" of String, parameter
           "len_second_read" of String, parameter "mutation_rate" of String,
           parameter "frac_indels" of String, parameter
           "variation_object_name" of String, parameter "output_read_object"
           of String
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_kb_ReadSim
        output_dir = self.shared_folder
        print(params)
        self.su.validate_simreads_params(params)

        genome_or_assembly_ref = params['assembly_or_genome_ref']
        obj_type = self.wsc.get_object_info3(
            {'objects': [{
                'ref': genome_or_assembly_ref
            }]})['infos'][0][2]
        if ('KBaseGenomes.Genome' in obj_type):
            genome_ref = genome_or_assembly_ref
            subset = self.wsc.get_object_subset([{
                'included': ['/assembly_ref'],
                'ref': genome_ref
            }])
            assembly_ref = subset[0]['data']['assembly_ref']
        elif ('KBaseGenomeAnnotations.Assembly' in obj_type):
            assembly_ref = genome_or_assembly_ref
        else:
            raise ValueError(obj_type +
                             ' is not the right input for this method. ' +
                             'Valid input include KBaseGenomes.Genome or ' +
                             'KBaseGenomeAnnotations.Assembly ')

        self.du.download_genome(assembly_ref, output_dir)

        ref_genome = os.path.join(self.shared_folder, "ref_genome.fa")
        output_fwd_paired_file_path = os.path.join(self.shared_folder,
                                                   "raed1.fq")
        output_rev_paired_file_path = os.path.join(self.shared_folder,
                                                   "raed2.fq")

        self.eu.check_path_exists(ref_genome)

        self.su.simreads(ref_genome, output_fwd_paired_file_path,
                         output_rev_paired_file_path, params)
        self.eu.check_path_exists(output_fwd_paired_file_path)
        self.eu.check_path_exists(output_rev_paired_file_path)

        retVal = self.ru.upload_reads({
            'wsname': params['workspace_name'],
            'name': params['output_read_object'],
            'sequencing_tech': 'illumina',
            'fwd_file': output_fwd_paired_file_path,
            'rev_file': output_rev_paired_file_path
        })

        logfile = os.path.join(self.shared_folder, "variant.txt")
        self.eu.check_path_exists(logfile)

        vcf_file = self.su.format_vcf(logfile)
        self.eu.check_path_exists(vcf_file)

        save_variation_params = {
            'workspace_name': params['workspace_name'],
            'genome_or_assembly_ref': params['assembly_or_genome_ref'],
            'sample_set_ref': params['input_sample_set'],
            'sample_attribute_name': 'sample_attr',
            'vcf_staging_file_path': vcf_file,
            'variation_object_name': params['variation_object_name']
        }
        self.vu.save_variation_from_vcf(save_variation_params)

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': 'Success'
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_kb_ReadSim

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_kb_ReadSim return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #17
0
    def run_VariationAnalyzer(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of type "InputParams" -> structure: parameter
           "obj_name" of String, parameter "workspace_name" of String,
           parameter "fastq_ref" of String, parameter "map_qual" of Long,
           parameter "base_qual" of Long, parameter "min_cov" of Long,
           parameter "min_qual" of Long
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_VariationAnalyzer

        self.su.validate_params(params)

        logging.info("Downloading Fastq File")
        fastq_file = self.dfu._stage_input_file(params['fastq_ref'],
                                                "paired_end")

        logging.info("Downloading assembly file")
        genome_assembly = self.dfu.download_genome(
            params['genome_or_assembly_ref'])

        self.su.deinterleave(fastq_file['files']['fwd'], self.shared_folder)

        sample_name = "snippy_output"  #hardcoded to match with attribute mapping file

        snippy_output = self.shared_folder + "/" + sample_name

        cmd = self.su.build_snippy_command(genome_assembly['path'],
                                           snippy_output, self.shared_folder)

        self.su.run_snippy_command(cmd)

        params[
            'vcf_staging_file_path'] = self.shared_folder + "/" + sample_name + "/snps.vcf"

        self.vu.save_variation_from_vcf(params)

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': params['fastq_ref']
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_VariationAnalyzer

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_VariationAnalyzer return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_MotifSuite(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_MotifSuite
       
        report = KBaseReport(self.callback_url)
        mfmd_obj = MotifFindermfmd(self.callback_url)
        homer_obj = MotifFinderHomer(self.callback_url)
        meme_obj =  MotifFinderMEME(self.callback_url)
        gibbs_obj = MotifFinderGibbs(self.callback_url)
        ensemble_obj = MotifEnsemble(self.callback_url)

        '''result = homer_obj.DiscoverMotifsFromSequenceSet(params)
        print('Homer RESULT:')
        pprint(result)'''
     
        '''if os.path.exists('/kb/module/work/homer_out'):
           shutil.rmtree('/kb/module/work/homer_out')
        shutil.copytree('/kb/module/work/tmp/', '/kb/module/work/homer_out/')
        
        result = meme_obj.DiscoverMotifsFromSequenceSet(params)
        print('MEME RESULT:')
        pprint(result)
        '''
        result = mfmd_obj.DiscoverMotifsFromSequenceSet(params)
        print('MFMD RESULT:')
        pprint(result)

        '''result = ensemble_obj.MotifEnsemble(params)
        print('Ensemble RESULT:')
        print(result)

        
        if os.path.exists('/kb/module/work/meme_out'):
           shutil.rmtree('/kb/module/work/meme_out')
        shutil.copytree('/kb/module/work/tmp/', '/kb/module/work/meme_out/')

        result = gibbs_obj.ExtractPromotersFromFeatureSetandDiscoverMotifs(params)
        print('Gibbs RESULT:')
        pprint(result)
        if os.path.exists('/kb/module/work/gibbs_out'):
           shutil.rmtree('/kb/module/work/gibbs_out')
        shutil.copytree('/kb/module/work/tmp/', '/kb/module/work/gibbs_out/')

        #fix issue for MotifFindermfmd in catalogue  
        result = mfmd_obj.DiscoverMotifsFromSequenceSet(params)
        print('MFMD RESULT:')
        pprint(result)
        
        MSU=MotifSuiteUtil()
        params['motifset_refs']= MSU.get_obj_refs()

        result = ensemble_obj.MotifEnsemble(params)
        print('Ensemble RESULT:')
        print(result)
        '''
    
        report_info = report.create({'report': {'objects_created':[],
                                                'text_message': params['workspace_name']},
                                                'workspace_name': params['workspace_name']})
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_MotifSuite

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_MotifSuite return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def save_variation_from_vcf(self, ctx, params):
        """
        Save a variation (and trait?) object to Kbase given a reference genome, object output name,
        Variant Call Format (VCF) file, and sample attribute file.
        :param params: instance of type "save_variation_input" (## funcdef
           save_variation_from_vcf ## required input params:
           genome_or_assembly_ref: KBaseGenomes.Genome or
           KBaseGenomeAnnotations.Assembly object reference *** variation
           input data *** vcf_staging_file_path: path to location data
           associated with samples variation_object_name: output name for
           KBase variation object *** sample input data ***
           sample_attribute_ref: x/y/z reference to kbase sample attribute
           optional params: NA output report: report_name report_ref HTML
           visualization: Manhattan plot *** Visualization *** plot_maf:
           generate histogram of minor allele frequencies plot_hwe: generate
           histogram of Hardy-Weinberg Equilibrium p-values) -> structure:
           parameter "workspace_name" of String, parameter
           "genome_or_assembly_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "vcf_staging_file_path" of type "filepath"
           (KBase file path to staging files), parameter
           "variation_object_name" of String, parameter
           "sample_attribute_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "save_variation_output" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: report
        #BEGIN save_variation_from_vcf
        genome_or_assembly_ref = params['genome_or_assembly_ref']
        ws_url = self.config['workspace-url']
        wsc = Workspace(ws_url)
        obj_type = wsc.get_object_info3(
            {'objects': [{
                'ref': genome_or_assembly_ref
            }]})['infos'][0][2]
        if ('KBaseGenomes.Genome' in obj_type):
            params['genome_ref'] = genome_or_assembly_ref
        elif ('KBaseGenomeAnnotations.Assembly' in obj_type):
            params['assembly_ref'] = genome_or_assembly_ref
        else:
            raise ValueError(
                obj_type +
                ' is not the right input for this method. Valid input include KBaseGenomes.Genome or KBaseGenomeAnnotations.Assembly '
            )

        vtv = VCFToVariation(self.config, self.shared_folder,
                             self.callback_url)

        var_obj = vtv.import_vcf(params)
        var_obj_ref = str(var_obj[0][6]) + "/" + str(
            var_obj[0][0]) + "/" + str(var_obj[0][4])

        upload_message = "Variation object created."
        upload_message += "\nObject #" + str(var_obj[0][0])
        upload_message += "\nObject name: " + str(var_obj[0][1])
        upload_message += "\nGenotypes in variation: " + str(
            var_obj[1]['numgenotypes'])
        upload_message += "\nVariants in VCF file: " + str(
            var_obj[1]['numvariants'])

        report_obj = {
            'objects_created': [{
                'ref':
                var_obj_ref,
                'description':
                'Variation object from VCF file.'
            }],
            'text_message':
            upload_message
        }

        report_client = KBaseReport(self.callback_url)
        report_create = report_client.create({
            'report':
            report_obj,
            'workspace_name':
            params['workspace_name']
        })

        report = {
            "report_name": report_create['name'],
            "report_ref": report_create['ref'],
            "workspace_name": params["workspace_name"]
        }

        #END save_variation_from_vcf

        # At some point might do deeper type checking...
        if not isinstance(report, dict):
            raise ValueError('Method save_variation_from_vcf return value ' +
                             'report is not type dict as required.')
        # return the results
        return [report]
    def run_poolfileupload(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_poolfileupload

        params['shared_folder'] = self.shared_folder
        token = os.environ.get('KB_AUTH_TOKEN', None)
        ws = Workspace(self.ws_url, token=token)

        params['workspace_id'] = ws.get_workspace_info(
            {'workspace': params['workspace_name']})[0]
        params['ws_obj'] = ws
        params['username'] = ctx['user_id']
        params['output_name'] = check_output_name(params['output_name'])

        if 'pool_file_type' not in params:
            raise Exception("Did not get param pool_file_type")
        else:
            pft = params['pool_file_type']
            if pft == 'poolfile':
                pfu = poolfileuploadUtil(params)
                result = pfu.upload_poolfile()
            elif pft == 'poolcount':
                pcfu = poolcountfileuploadUtil(params)
                result = pcfu.upload_poolcountfile()
            elif pft == 'experiments':
                expsfu = expsfileuploadUtil(params)
                result = expsfu.upload_expsfile()
            else:
                raise Exception("Did not recognize pool_file_type for upload")

        text_message = "Finished uploading file \n"
        text_message += "{} saved as {} on {}\n".format(
            result['Name'], result['Type'], result['Date'])

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': text_message
            },
            'workspace_name': params['workspace_name']
        })

        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_poolfileupload

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_poolfileupload return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
    def run_ContigFilter_max(self, ctx, params):
        """
        New app which filters contigs in an assembly using both a minimum and a maximum contig length
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_ContigFilter_max
        # Check that the parameters are valid
        for name in [
                'min_length', 'max_length', 'assembly_ref', 'workspace_name'
        ]:
            if name not in params:
                raise ValueError('Parameter "' + name +
                                 '" is required but missing')
        if not isinstance(params['min_length'],
                          int) or (params['min_length'] < 0):
            raise ValueError('Min length must be a non-negative integer')
        if not isinstance(params['max_length'],
                          int) or (params['max_length'] < 0):
            raise ValueError('Max length must be a non-negative integer')
        if not isinstance(params['assembly_ref'], str) or not len(
                params['assembly_ref']):
            raise ValueError('Pass in a valid assembly reference string')

        print(params['min_length'], params['max_length'],
              params['assembly_ref'])
        output = {}

        assembly_util = AssemblyUtil(self.callback_url)
        fasta_file = assembly_util.get_assembly_as_fasta(
            {'ref': params['assembly_ref']})
        print(fasta_file)

        # Parse the downloaded file in FASTA format
        parsed_assembly = SeqIO.parse(fasta_file['path'], 'fasta')
        min_length = params['min_length']
        max_length = params['max_length']

        # Keep a list of contigs greater than min_length
        good_contigs = []
        # total contigs regardless of length
        n_total = 0
        # total contigs over the min_length
        n_remaining = 0
        for record in parsed_assembly:
            n_total += 1
            if len(record.seq) >= min_length and len(record.seq) <= max_length:
                good_contigs.append(record)
                n_remaining += 1
        # Create a file to hold the filtered data
        workspace_name = params['workspace_name']
        filtered_path = os.path.join(self.shared_folder, 'filtered.fasta')
        SeqIO.write(good_contigs, filtered_path, 'fasta')
        # Upload the filtered data to the workspace
        new_ref = assembly_util.save_assembly_from_fasta({
            'file': {
                'path': filtered_path
            },
            'workspace_name':
            workspace_name,
            'assembly_name':
            fasta_file['assembly_name']
        })
        # Create an output summary message for the report
        text_message = "".join([
            'Filtered assembly to ',
            str(n_remaining), ' contigs out of ',
            str(n_total)
        ])
        # Data for creating the report, referencing the assembly we uploaded
        report_data = {
            'objects_created': [{
                'ref': new_ref,
                'description': 'Filtered contigs'
            }],
            'text_message':
            text_message
        }
        # Initialize the report
        kbase_report = KBaseReport(self.callback_url)
        report = kbase_report.create({
            'report': report_data,
            'workspace_name': workspace_name
        })
        # Return the report reference and name in our results
        output = {
            'report_ref': report['ref'],
            'report_name': report['name'],
            'n_total': n_total,
            'n_remaining': n_remaining,
            'filtered_assembly_ref': new_ref
        }
        #END run_ContigFilter_max

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_ContigFilter_max return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #22
0
class nmdc_mg_assembly:
    def __init__(self, callbaack_url, scratch, wdl='../../metaAssembly/'):
        self.callback_url = callbaack_url
        self.scratch = scratch
        self.special = special(self.callback_url)
        self.ru = ReadsUtils(self.callback_url)
        self.au = AssemblyUtil(self.callback_url)
        self.report = KBaseReport(self.callback_url)
        self.wdl_base = wdl

    def validate_params(self, params):
        pass

    def fetch_reads_files(self, reads_upas):
        """
        From a list of reads UPAs, uses ReadsUtils to fetch the reads as files.
        Returns them as a dictionary from reads_upa -> filename
        """
        if reads_upas is None:
            raise ValueError("reads_upas must be a list of UPAs")
        if len(reads_upas) == 0:
            raise ValueError("reads_upas must contain at least one UPA")
        reads_info = self.ru.download_reads(({
            'read_libraries': reads_upas,
            'interleaved': 'true',
            'gzipped': None
        }))['files']
        file_set = dict()
        for reads in reads_info:
            file_set[reads] = reads_info[reads]['files']['fwd']
        return file_set

    def run_wdl(self, rf):
        print(os.getcwd())
        wdl_files = ['jgi_assembly.wdl']

        for f in wdl_files:
            src = self.wdl_base + f
            dst = './' + f
            shutil.copy(src, dst)
        ins = {
            "jgi_metaASM.input_file": [rf.replace(self.scratch, './')],
            "jgi_metaASM.rename_contig_prefix": "contig",
            "jgi_metaASM.outdir": "/out/"
        }
        input_file = os.path.join(self.scratch, 'inputs.json')
        with open(input_file, 'w') as f:
            f.write(json.dumps(ins))

        p = {'workflow': wdl_files[0], 'inputs': 'inputs.json'}

        res = self.special.wdl(p)
        print('wdl: ' + str(res))

    def _fix_path(self, orig):
        ind = orig.find('cromwell-executions')
        return os.path.join(self.scratch, orig[ind:])

    def upload_assembly(self, file_path_orig, workspace_name, assembly_name):
        """
        From a list of file paths, uploads them to KBase, generates Assembly objects,
        then returns the generated UPAs.
        """
        file_path = self._fix_path(file_path_orig)
        if not file_path:
            raise ValueError("file_path must be defined")
        if not os.path.exists(file_path):
            raise ValueError(
                "The given assembly file '{}' does not exist".format(
                    file_path))
        if not workspace_name:
            raise ValueError("workspace_name must be defined")
        if not assembly_name:
            raise ValueError("assembly_name must be defined")

        assembly_upa = self.au.save_assembly_from_fasta({
            "file": {
                "path": file_path
            },
            "workspace_name":
            workspace_name,
            "assembly_name":
            assembly_name
        })
        return assembly_upa

    def _upload_pipeline_result(self,
                                pipeline_result,
                                workspace_name,
                                assembly_name,
                                filtered_reads_name=None,
                                cleaned_reads_name=None,
                                skip_rqcfilter=False,
                                input_reads=None):
        """
        This is very tricky and uploads (optionally!) a few things under different cases.
        1. Uploads assembly
            - this always happens after a successful run.
        2. Cleaned reads - passed RQCFilter / BFC / SeqTK
            - optional, if cleaned_reads_name isn't None
        3. Filtered reads - passed RQCFilter
            - optional, if filtered_reads_name isn't None AND skip_rqcfilter is False
        returns a dict of UPAs with the following keys:
        - assembly_upa - the assembly (always)
        - filtered_reads_upa - the RQCFiltered reads (optionally)
        - cleaned_reads_upa - the RQCFiltered -> BFC -> SeqTK cleaned reads (optional)
        """

        # upload the assembly
        uploaded_assy_upa = self.file_util.upload_assembly(
            pipeline_result["spades"]["contigs_file"], workspace_name,
            assembly_name)
        upload_result = {"assembly_upa": uploaded_assy_upa}
        # upload filtered reads if we didn't skip RQCFilter (otherwise it's just a copy)
        if filtered_reads_name and not skip_rqcfilter:
            # unzip the cleaned reads because ReadsUtils won't do it for us.
            decompressed_reads = os.path.join(self.output_dir,
                                              "filtered_reads.fastq")
            pigz_command = "{} -d -c {} > {}".format(
                PIGZ, pipeline_result["rqcfilter"]["filtered_fastq_file"],
                decompressed_reads)
            p = subprocess.Popen(pigz_command,
                                 cwd=self.scratch_dir,
                                 shell=True)
            exit_code = p.wait()
            if exit_code != 0:
                raise RuntimeError(
                    "Unable to decompress filtered reads for validation! Can't upload them, either!"
                )
            filtered_reads_upa = self.file_util.upload_reads(
                decompressed_reads, workspace_name, filtered_reads_name,
                input_reads)
            upload_result["filtered_reads_upa"] = filtered_reads_upa
        # upload the cleaned reads
        if cleaned_reads_name:
            # unzip the cleaned reads because ReadsUtils won't do it for us.
            decompressed_reads = os.path.join(self.output_dir,
                                              "cleaned_reads.fastq")
            pigz_command = "{} -d -c {} > {}".format(
                PIGZ, pipeline_result["seqtk"]["cleaned_reads"],
                decompressed_reads)
            p = subprocess.Popen(pigz_command,
                                 cwd=self.scratch_dir,
                                 shell=True)
            exit_code = p.wait()
            if exit_code != 0:
                raise RuntimeError(
                    "Unable to decompress cleaned reads for validation! Can't upload them, either!"
                )
            cleaned_reads_upa = self.file_util.upload_reads(
                decompressed_reads, workspace_name, cleaned_reads_name,
                input_reads)
            upload_result["cleaned_reads_upa"] = cleaned_reads_upa
        return upload_result

    def assemble(self, params):
        self.validate_params(params)
        workspace_name = params['workspace_name']
        assembly_name = params['output_assembly_name']

        # Stage Data
        files = self.fetch_reads_files([params["reads_upa"]])
        reads_files = list(files.values())

        # Run WDL
        self.run_wdl(reads_files[0])

        # Check if things ran
        mfile = os.path.join(self.scratch, 'meta.json')
        print(mfile)
        if not os.path.exists(mfile):
            raise OSError("Failed to run workflow")

        with open(mfile) as f:
            pipeline_output = json.loads(f.read())
        out = pipeline_output["calls"]["jgi_metaASM.create_agp"][0]["outputs"]
        print(out)

        # Generate Output Objects
        contigs_fn = out['outcontigs']
        upa = self.upload_assembly(contigs_fn, workspace_name, assembly_name)

        upload_kwargs = {}

        print("upload complete")

        # Do report
        report_info = self.report.create({
            'report': {
                'objects_created': [],
                'text_message': "Assemble metagenomic reads"
            },
            'workspace_name': workspace_name
        })
        return {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
    def run_simplebatch(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of type "SimpleBatchParams" -> structure:
           parameter "batch_inputs" of type "batch_params" -> list of type
           "app_params" -> mapping from String to unspecified object,
           parameter "method_name" of String
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_simplebatch
        report = KBaseReport(self.callback_url)

        #TODO Always request WSID?
        #"simpleapp.simple_add"
        method_name = "simpleapp.simple_add"  #params['method_name']
        wsid = "TODO"
        #TODO Get Service_Ver
        service_ver = "dev"
        batched_app_params = params['app_params']

        job_ids = []
        statuses = []

        for i, app_param in enumerate(batched_app_params):
            print(f"About to submit job with params {app_param}")
            rjp = {
                "method": method_name,
                "params": [app_param],
                "service_ver": service_ver,
                "wsid": wsid,
                "app_id": "RanWithBatch",
            }
            try:
                job_id = self.ee2.run_job(params=rjp)
                status = "queued"
            except Exception:
                job_id = "failed to submit"
                status = "failure"

            job_ids.append(job_id)
            statuses.append(status)

        #TODO Create table with refresh buttons or autorefresh, which uses cookie or environment
        # Send this as a report

        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': params['parameter_1']
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_simplebatch

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_simplebatch return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #24
0
    def run_kb_GATK(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_kb_GATK
        source_ref = params['alignment_ref']
        alignment_out = self.du.downloadreadalignment(source_ref, params,
                                                      self.callback_url)
        sam_file = os.path.join(alignment_out['destination_dir'],
                                "reads_alignment.sam")
        '''
        #Todo Reading sample set and sample strains information
        '''
        '''
        command.extend(["-filter-name", "\"QD_filter\"", "-filter", "\"QD", "<", params['snp_filter']['snp_qd_filter'] + "\""])
        command.extend(["-filter-name", "\"FS_filter\"", "-filter", "\"FS", "<", params['snp_filter']['snp_fs_filter'] + "\""])
        command.extend(["-filter-name", "\"MQ_filter\"", "-filter", "\"MQ", "<", params['snp_filter']['snp_mq_filter'] + "\""])
        command.extend(["-filter-name", "\"SOR_filter\"", "-filter", "\"SOR", "<", params['snp_filter']['snp_sor_filter'] + "\""])
        command.extend(["-filter-name", "\"MQRankSum_filter\"", "-filter", "\"MQRankSum", "<", params['snp_filter']['snp_mqrankSum_filter'] + "\""])
        command.extend(["-filter-name", "\"ReadPosRankSum_filter\"", "-filter", "\"ReadPosRankSum", "<", params['snp_filter']['snp_readposranksum_filter'] + "\""])
        '''
        print(params)
        strain_info = params['strain_info']
        output_dir = os.path.join(self.shared_folder, str(uuid.uuid4()))
        os.mkdir(output_dir)

        genome_or_assembly_ref = params['assembly_or_genome_ref']
        obj_type = self.wsc.get_object_info3(
            {'objects': [{
                'ref': genome_or_assembly_ref
            }]})['infos'][0][2]
        if ('KBaseGenomes.Genome' in obj_type):
            genome_ref = genome_or_assembly_ref
            subset = self.wsc.get_object_subset([{
                'included': ['/assembly_ref'],
                'ref': genome_ref
            }])
            assembly_ref = subset[0]['data']['assembly_ref']
        elif ('KBaseGenomeAnnotations.Assembly' in obj_type):
            assembly_ref = genome_or_assembly_ref
        else:
            raise ValueError(obj_type +
                             ' is not the right input for this method. ' +
                             'Valid input include KBaseGenomes.Genome or ' +
                             'KBaseGenomeAnnotations.Assembly ')

        assembly_file = self.du.download_genome(assembly_ref,
                                                output_dir)['path']

        #output_dir = output_dir + "/"

        #Todo: check time for building index file or donwload from cache.
        #Todo: To discuss about cache_id to be used.
        #Todo: In case of copying genome, find the way of finding original genome (ref id) for getting original cache id.

        self.gu.build_genome(assembly_file)
        self.gu.index_assembly(assembly_file)
        self.gu.generate_sequence_dictionary(assembly_file)
        self.gu.duplicate_marking(output_dir, sam_file)
        #self.gu.sort_bam_index(output_dir)
        self.gu.collect_alignment_and_insert_size_metrics(
            assembly_file, output_dir)
        #self.gu.analyze_covariates(output_dir)

        #Todo: avoid writing intermediate fies to save space and time I/O.
        self.gu.variant_calling(assembly_file, output_dir)
        self.gu.extract_variants(assembly_file, output_dir)
        self.gu.filter_SNPs(assembly_file, "filtered_snps.vcf", output_dir,
                            params)
        self.gu.filter_Indels(assembly_file, "filtered_indels.vcf", output_dir,
                              params)
        self.gu.exclude_filtered_variants(output_dir)
        self.gu.base_quality_score_recalibration(assembly_file,
                                                 "recal_data.table",
                                                 output_dir)
        self.gu.apply_BQSR(assembly_file, "recal_data.table", output_dir)
        self.gu.base_quality_score_recalibration(assembly_file,
                                                 "post_recal_data.table",
                                                 output_dir)
        self.gu.apply_BQSR(assembly_file, "post_recal_data.table", output_dir)
        self.gu.filter_SNPs(assembly_file, "filtered_snps_final.vcf",
                            output_dir, params)

        #Todo: To save indels also using VariationUtils or merge with snps and sort them with chr & pos and save using variaiotiontuils.
        #Todo: To get an example for saving structural variants(specially CNV) and compare with standard vcf output.

        self.gu.filter_Indels(assembly_file, "filtered_indels_final.vcf",
                              output_dir, params)
        '''
        os.system("grep   '##fileformat' " + output_dir + "/filtered_snps_final.vcf > " + output_dir + "/sample.vcf")
        cmd = "grep -v  '##' " + output_dir + "/filtered_snps_final.vcf >> " + output_dir + "/sample.vcf"
        os.system(cmd)            # TODO : need to remove system command after fixing variationUtils.
        '''

        vcf_filepath = self.gu.index_vcf_file(output_dir +
                                              "/filtered_snps_final.vcf")
        reheader_vcf_file = self.gu.reheader(vcf_filepath, strain_info)
        #Todo : check existence of final filtered finals snps.
        #Todo : chnage assembly_or_genome_ref to genome_or_assembly_ref

        #Todo: to derive name of sample_attribute_name from sample set ref by prefixing/suffixing. Attribute mapping should have one sample.

        save_variation_params = {
            'workspace_name': params['workspace_name'],
            'genome_or_assembly_ref': params['assembly_or_genome_ref'],
            'sample_set_ref': params['input_sample_set'],
            'sample_attribute_name': 'sample_attr',
            'vcf_staging_file_path': reheader_vcf_file,
            'variation_object_name': params['variation_object_name']
        }

        self.vu.save_variation_from_vcf(save_variation_params)

        report = KBaseReport(self.callback_url)
        report_info = report.create({
            'report': {
                'objects_created': [],
                'text_message': 'Success'
            },
            'workspace_name': params['workspace_name']
        })
        output = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref'],
        }
        #END run_kb_GATK

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_kb_GATK return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]