def prepare_data(self):
        updated = False

        # create genome object
        #
        if not self.genome_ref:

            # connect to client
            try:
                gfuClient = GenomeFileUtil(self.callback_url,
                                           token=self.getContext()['token'])
            except Exception as e:
                raise ValueError(
                    'Unable to instantiate gfuClient with callbackURL: ' +
                    self.callback_url + ' ERROR: ' + str(e))

            # upload data
            sci_name = 'Thermodesulfobacterium thermophilum DSM 1276',
            base_genome = 'GCF_000421605.1_ASM42160v1_genomic'
            genome_gff_file = base_genome + '.gff.gz'
            genome_fna_file = base_genome + '.fna.gz'
            genome_gff_path = os.path.join(self.scratch, genome_gff_file)
            genome_fna_path = os.path.join(self.scratch, genome_fna_file)
            shutil.copy(os.path.join("data", genome_gff_file), genome_gff_path)
            shutil.copy(os.path.join("data", genome_fna_file), genome_fna_path)

            self.genome_ref = gfuClient.fasta_gff_to_genome({
                'workspace_name':
                self.getWsName(),
                'fasta_file': {
                    'path': genome_fna_path
                },
                'gff_file': {
                    'path': genome_gff_path
                },
                'generate_missing_genes':
                1,
                'source':
                'GFF',
                #'scientific_name': sci_name,  # this is causing an error for some reason
                'genome_name':
                base_genome + '.Genome'
            }).get('genome_ref')

            updated = True

        # create reads objects
        #
        if not self.reads_refs:

            # connect to client
            try:
                ruClient = ReadsUtils(self.callback_url,
                                      token=self.getContext()['token'])
            except Exception as e:
                raise ValueError(
                    'Unable to instantiate ruClient with callbackURL: ' +
                    self.callback_url + ' ERROR: ' + str(e))

            # upload data (ReadsUtils.upload_reads() won't take a gzipped file, so decompress)
            base_reads_list = [
                'Thermodesulfo_50K-0.inter', 'Thermodesulfo_50K-1.inter'
            ]
            reads_refs = []
            for base_reads in base_reads_list:
                reads_file = src_file = base_reads + '.fq'
                reads_path = os.path.join(self.scratch, reads_file)
                #shutil.copy(os.path.join("data", reads_file+'.gz'), reads_path+'.gz')

                src_path = os.path.join("data", src_file + '.gz')
                dst_path = os.path.join(self.scratch, src_file)
                with gzip.open(src_path, 'rb') as f_in:
                    with open(dst_path, 'wb') as f_out:
                        shutil.copyfileobj(f_in, f_out)

                reads_refs.append(
                    ruClient.upload_reads({
                        'wsname': self.getWsName(),
                        'fwd_file': reads_path,
                        'sequencing_tech': 'artificial reads',
                        'interleaved': 1,
                        'name': base_reads + '.Reads'
                    })['obj_ref'])

            self.reads_refs = reads_refs
            updated = True

        return updated
Example #2
0
    def test_fractiontate_contigs_ASSEMBLY_GENOMELIST_05(self):
        method = 'fractionate_contigs_pos_filter_ASSEMBLY_GENOMELIST_05'

        print("\n\nRUNNING: test_" + method + "()")
        print("==========================================================\n\n")

        # upload test data
        try:
            auClient = AssemblyUtil(self.callback_url,
                                    token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate auClient with callbackURL: ' +
                self.callback_url + ' ERROR: ' + str(e))
        try:
            gfuClient = GenomeFileUtil(self.callback_url,
                                       token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate gfuClient with callbackURL: ' +
                self.callback_url + ' ERROR: ' + str(e))

        base_1 = 'assembly_1plus2'
        base_2a = 'assembly_2a'
        base_2b = 'assembly_2b'
        type_1 = 'Assembly'
        type_2a = 'Genome'
        type_2b = 'Genome'
        ass_file_1_fa = base_1 + '.fa.gz'
        ass_file_2a_fa = base_2a + '.fa.gz'
        ass_file_2b_fa = base_2b + '.fa.gz'
        ass_file_2a_gff = base_2a + '.gff'
        ass_file_2b_gff = base_2b + '.gff'
        ass_path_1_fa = os.path.join(self.scratch, ass_file_1_fa)
        ass_path_2a_fa = os.path.join(self.scratch, ass_file_2a_fa)
        ass_path_2b_fa = os.path.join(self.scratch, ass_file_2b_fa)
        ass_path_2a_gff = os.path.join(self.scratch, ass_file_2a_gff)
        ass_path_2b_gff = os.path.join(self.scratch, ass_file_2b_gff)
        shutil.copy(os.path.join("data", ass_file_1_fa), ass_path_1_fa)
        shutil.copy(os.path.join("data", ass_file_2a_fa), ass_path_2a_fa)
        shutil.copy(os.path.join("data", ass_file_2b_fa), ass_path_2b_fa)
        shutil.copy(os.path.join("data", ass_file_2a_gff), ass_path_2a_gff)
        shutil.copy(os.path.join("data", ass_file_2b_gff), ass_path_2b_gff)
        ass_ref_1 = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_1_fa
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            base_1 + '.' + type_1
        })
        ass_ref_2a = gfuClient.fasta_gff_to_genome({
            'fasta_file': {
                'path': ass_path_2a_fa
            },
            'gff_file': {
                'path': ass_path_2a_gff
            },
            'generate_missing_genes':
            1,
            'source':
            'GFF',
            'scientific_name':
            base_2a,
            'workspace_name':
            self.getWsName(),
            'genome_name':
            base_2a + '.' + type_2a
        }).get('genome_ref')
        ass_ref_2b = gfuClient.fasta_gff_to_genome({
            'fasta_file': {
                'path': ass_path_2b_fa
            },
            'gff_file': {
                'path': ass_path_2b_gff
            },
            'generate_missing_genes':
            1,
            'source':
            'GFF',
            'scientific_name':
            base_2b,
            'workspace_name':
            self.getWsName(),
            'genome_name':
            base_2b + '.' + type_2b
        }).get('genome_ref')

        # run method
        base_output_name = method + '_output'
        fractionate_mode = 'pos'
        params = {
            'workspace_name':
            self.getWsName(),
            'input_assembly_ref':
            ass_ref_1,
            'input_pos_filter_obj_refs': [ass_ref_2a, ass_ref_2b],
            'fractionate_mode':
            fractionate_mode,
            'output_name':
            'test_fractionated' + '-' + base_1 + '.' + type_1 + '-' + base_2a +
            '.' + type_2a + '-' + base_2b + '.' + type_2b + '-' +
            fractionate_mode
        }
        result = self.getImpl().run_fractionate_contigs(
            self.getContext(), params)
        print('RESULT:')
        pprint(result)
        pass
Example #3
0
    def test_fractiontate_contigs_ASSEMBLY_GENOMESET_06(self):
        method = 'fractionate_contigs_pos_filter_ASSEMBLY_GENOMESET_06'

        print("\n\nRUNNING: test_" + method + "()")
        print("==========================================================\n\n")

        # upload test data
        try:
            auClient = AssemblyUtil(self.callback_url,
                                    token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate auClient with callbackURL: ' +
                self.callback_url + ' ERROR: ' + str(e))
        try:
            gfuClient = GenomeFileUtil(self.callback_url,
                                       token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate gfuClient with callbackURL: ' +
                self.callback_url + ' ERROR: ' + str(e))

        base_1 = 'assembly_1plus2'
        base_2a = 'assembly_2a'
        base_2b = 'assembly_2b'
        type_1 = 'Assembly'
        type_2a = 'Genome'
        type_2b = 'Genome'
        ass_file_1_fa = base_1 + '.fa.gz'
        ass_file_2a_fa = base_2a + '.fa.gz'
        ass_file_2b_fa = base_2b + '.fa.gz'
        ass_file_2a_gff = base_2a + '.gff'
        ass_file_2b_gff = base_2b + '.gff'
        ass_path_1_fa = os.path.join(self.scratch, ass_file_1_fa)
        ass_path_2a_fa = os.path.join(self.scratch, ass_file_2a_fa)
        ass_path_2b_fa = os.path.join(self.scratch, ass_file_2b_fa)
        ass_path_2a_gff = os.path.join(self.scratch, ass_file_2a_gff)
        ass_path_2b_gff = os.path.join(self.scratch, ass_file_2b_gff)
        shutil.copy(os.path.join("data", ass_file_1_fa), ass_path_1_fa)
        shutil.copy(os.path.join("data", ass_file_2a_fa), ass_path_2a_fa)
        shutil.copy(os.path.join("data", ass_file_2b_fa), ass_path_2b_fa)
        shutil.copy(os.path.join("data", ass_file_2a_gff), ass_path_2a_gff)
        shutil.copy(os.path.join("data", ass_file_2b_gff), ass_path_2b_gff)
        ass_ref_1 = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_1_fa
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            base_1 + '.' + type_1
        })
        ass_ref_2a = gfuClient.fasta_gff_to_genome({
            'fasta_file': {
                'path': ass_path_2a_fa
            },
            'gff_file': {
                'path': ass_path_2a_gff
            },
            'generate_missing_genes':
            1,
            'source':
            'GFF',
            'scientific_name':
            base_2a,
            'workspace_name':
            self.getWsName(),
            'genome_name':
            base_2a + '.' + type_2a
        }).get('genome_ref')
        ass_ref_2b = gfuClient.fasta_gff_to_genome({
            'fasta_file': {
                'path': ass_path_2b_fa
            },
            'gff_file': {
                'path': ass_path_2b_gff
            },
            'generate_missing_genes':
            1,
            'source':
            'GFF',
            'scientific_name':
            base_2b,
            'workspace_name':
            self.getWsName(),
            'genome_name':
            base_2b + '.' + type_2b
        }).get('genome_ref')

        # GenomeSet
        genomeSet_obj = {
            'description': 'test genomeSet',
            'elements': {
                'genome_0': {
                    'ref': ass_ref_2a
                },
                'genome_1': {
                    'ref': ass_ref_2b
                }
            }
        }
        provenance = [{}]
        genomeSet_info = self.getWsClient().save_objects({
            'workspace':
            self.getWsName(),
            'objects': [{
                'type': 'KBaseSearch.GenomeSet',
                'data': genomeSet_obj,
                'name': 'test_genomeSet_2a2b',
                'meta': {},
                'provenance': provenance
            }]
        })[0]
        genomeSet_ref = str(genomeSet_info[WSID_I]) + '/' + \
                        str(genomeSet_info[OBJID_I]) + '/' + \
                        str(genomeSet_info[VERSION_I])

        # run method
        base_output_name = method + '_output'
        fractionate_mode = 'neg'
        params = {
            'workspace_name':
            self.getWsName(),
            'input_assembly_ref':
            ass_ref_1,
            'input_pos_filter_obj_refs': [genomeSet_ref],
            'fractionate_mode':
            fractionate_mode,
            'output_name':
            'test_fractionated' + '-' + base_1 + '.' + type_1 + '-' +
            'genomeset_2a2b' + '-' + fractionate_mode
        }
        result = self.getImpl().run_fractionate_contigs(
            self.getContext(), params)
        print('RESULT:')
        pprint(result)
        pass
class ImportGFFFastaUtil:
    def __init__(self, config):
        self.callback_url = config['SDK_CALLBACK_URL']
        self.dfu = DataFileUtil(self.callback_url)
        self.gfu = GenomeFileUtil(self.callback_url, service_ver='beta')
        self.uploader_utils = UploaderUtil(config)

    def import_gff_fasta_from_staging(self, params):
        """
        import_gff_fasta_from_staging: wrapper method for GenomeFileUtil.fasta_gff_to_genome

        required params:
        fasta_file: fasta file from user's staging area
        gff_file: gff file from user's staging area
        genome_name: output genome object name
        workspace_name: workspace name that genome will be stored to

        file paths for both fasta and gff files must be subdirectory file path in staging area
        e.g.
        for file: /data/bulk/user_name/file_name
        staging_file_subdir_path is file_name
        for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
        staging_file_subdir_path is subdir_1/subdir_2/file_name

        optional params:
        scientific_name: proper name for species, key for taxonomy lookup.Default to 'unknown_taxon'
        source: Source Of The GenBank File. Default to 'User'
        taxon_wsname - where the reference taxons are. Default to 'ReferenceTaxons'
        taxon_reference - if defined, will try to link the Genome to the specified taxonomy object
        release: Release Or Version Of The Source Data
        genetic_code: Genetic Code For The Organism
        type: 'Reference', 'User upload', 'Representative'

        return:
        genome_ref: return object reference
        report_name: name of generated report (if any)
        report_ref: report reference (if any)
        """

        logging.info(
            '--->\nrunning ImportGFFFastaUtil.import_gff_fasta_from_staging\n'
            + f'params:\n{json.dumps(params, indent=1)}')

        self.validate_import_gff_fasta_from_staging_params(params)

        for key in ('fasta_file', 'gff_file'):
            file_path = params[key]
            download_staging_file_params = {
                'staging_file_subdir_path': file_path
            }
            dfu_returnVal = self.dfu.download_staging_file(
                download_staging_file_params)
            params[key] = {'path': dfu_returnVal['copy_file_path']}

        returnVal = self.gfu.fasta_gff_to_genome(params)
        """
        Update the workspace object related meta-data for staged file
        """
        # self.uploader_utils.update_staging_service(download_staging_file_params.get('staging_file_subdir_path'),
        #                                            returnVal['genome_ref'])
        return returnVal

    def validate_import_gff_fasta_from_staging_params(self, params):
        """
        validate_import_gff_fasta_from_staging_params:
                    validates params passed to import_gff_fasta_from_staging method
        """
        # check for required parameters
        for p in ['genome_name', 'workspace_name', 'fasta_file', 'gff_file']:
            if p not in params:
                raise ValueError('"' + p +
                                 '" parameter is required, but missing')

        # for now must use workspace name, but no ws_id_to_name() function available
        if str(params["workspace_name"]).isdigit():
            error_msg = '"{}" parameter is a workspace id and workspace name is required'.format(
                params["workspace_name"])
            raise ValueError(error_msg)