def BuildFastaFromSequenceSet(self, ctx, params):
        """
        :param params: instance of type "BuildSeqIn" -> structure: parameter
           "workspace_name" of String, parameter "SequenceSetRef" of String,
           parameter "fasta_outpath" of String
        :returns: instance of type "BuildSeqOut" -> structure: parameter
           "fasta_outpath" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN BuildFastaFromSequenceSet
        #END BuildFastaFromSequenceSet

        # At some point might do deeper type checking...
        dfu = DataFileUtil(self.callback_url)

        bu = BackgroundUtils()
        TU = TestUtils()
        if params['TESTFLAG'] and params['background']:
            targetpath = '/kb/module/work/tmp/testgenome.fa'
            TU.GetGenome(targetpath)
            bu.BuildBackground(targetpath)
        elif params['background']:

            ws = Workspace('https://appdev.kbase.us/services/ws')
            subset = ws.get_object_subset([{
                                         'included':['/features/[*]/location', '/features/[*]/id','/assembly_ref'],
    'ref':params['genome_ref']}])
            aref = subset[0]['data']['assembly_ref']
            assembly_ref = {'ref': aref}
            print('Downloading Assembly data as a Fasta file.')
            assemblyUtil = AssemblyUtil(self.callback_url)
            fasta_file = assemblyUtil.get_assembly_as_fasta(assembly_ref)['path']
            bu.BuildBackground(fasta_file)


        get_objects_params = {'object_refs' : [params['SequenceSetRef']]}

        SeqSet = dfu.get_objects(get_objects_params)['data'][0]['data']
        outFile = open(params['fasta_outpath'],'w')
        for s in SeqSet['sequences']:
            sname = '>' + s['sequence_id'] + '\n'
            outFile.write(sname)
            sseq = s['sequence'] + '\n'
            outFile.write(sseq)
        outFile.close()

        fu=FastaUtils()
        if params['mask_repeats']:
            fu.RemoveRepeats(params['fasta_outpath'],params['fasta_outpath'])

        output = {'fasta_outpath' : params['fasta_outpath']}
        #END BuildFastaFromSequenceSet

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method BuildFastaFromSequenceSet return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Beispiel #2
0
    def getAssemblyInfo(self, ass_name):
        if hasattr(self.__class__, 'assemblyInfo'):
            if self.__class__.assemblyInfo.get(ass_name):
                return self.__class__.assemblyInfo[ass_name]

        # copy the local test file to the shared scratch space so that the AssemblyUtil
        # container can see it.
        test_fasta_file_local = os.path.join('data', 'assemblies', ass_name)
        test_fasta_file_scratch = os.path.join(
            self.scratch, os.path.basename(test_fasta_file_local))
        shutil.copy(test_fasta_file_local, test_fasta_file_scratch)

        # call the AssemblyUtil libary to upload the test data to KBase
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        ass_ref = au.save_assembly_from_fasta({
            'file': {
                'path': test_fasta_file_scratch
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            ass_name
        })

        # get the object metadata for the new test dataset
        new_obj_info = self.ws.get_object_info_new(
            {'objects': [{
                'ref': ass_ref
            }]})
        if not hasattr(self.__class__, 'assemblyInfo'):
            self.__class__.assemblyInfo = dict()
        self.__class__.assemblyInfo[ass_name] = new_obj_info[0]
        return new_obj_info[0]
Beispiel #3
0
    def upload_assembly(self, file_path, workspace_name, assembly_name):
        """
        From a list of file paths, uploads them to KBase, generates Assembly objects,
        then returns the generated UPAs.
        """
        if not file_path:
            raise ValueError("file_path must be defined")
        if not os.path.exists(file_path):
            raise ValueError(
                "The given assembly file '{}' does not exist".format(
                    file_path))
        if not workspace_name:
            raise ValueError("workspace_name must be defined")
        if not assembly_name:
            raise ValueError("assembly_name must be defined")

        au = AssemblyUtil(self.callback_url)
        assembly_upa = au.save_assembly_from_fasta({
            "file": {
                "path": file_path
            },
            "workspace_name":
            workspace_name,
            "assembly_name":
            assembly_name
        })
        return assembly_upa
 def get_fasta_file(self, filename, obj_name):
     assemblyUtil = AssemblyUtil(self.callback_url)
     assembly_ref = assemblyUtil.save_assembly_from_fasta({'file': {'path': filename},
                                                           'workspace_name': self.getWsName(),
                                                           'assembly_name': obj_name
                                                           })
     return assembly_ref
Beispiel #5
0
 def _get_assembly(self, genome):
     if 'assembly_ref' in genome:
         assembly_ref = genome['assembly_ref']
     else:
         assembly_ref = genome['contigset_ref']
     log('Assembly reference = ' + assembly_ref)
     log('Downloading assembly')
     dfu = DataFileUtil(self.cfg.callbackURL)
     log('object_refs:' + self.genome_ref + ";" + assembly_ref)
     assembly_data = dfu.get_objects(
         {'object_refs':
          [self.genome_ref + ";" + assembly_ref]})['data'][0]['data']
     if isinstance(assembly_data['contigs'], dict):  # is an assembly
         circular_contigs = set([
             x['contig_id'] for x in list(assembly_data['contigs'].values())
             if x.get('is_circ')
         ])
     else:  # is a contig set
         circular_contigs = set([
             x['id'] for x in assembly_data['contigs']
             if x.get('replicon_geometry') == 'circular'
         ])
     au = AssemblyUtil(self.cfg.callbackURL)
     assembly_file_path = au.get_assembly_as_fasta(
         {'ref': self.genome_ref + ";" + assembly_ref})['path']
     return assembly_file_path, circular_contigs
    def load_genome_direct(self, filename, assembly_filename, obj_name):
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        assembly_ref = au.save_assembly_from_fasta({
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            obj_name + '.assembly',
            'file': {
                'path': assembly_filename
            }
        })
        pprint('created test assembly: ' + assembly_ref)

        with open(filename, 'r') as file:
            data_str = file.read()
        data = json.loads(data_str)
        data['assembly_ref'] = assembly_ref
        save_info = {
            'workspace': self.getWsName(),
            'data': data,
            'name': obj_name + '.genome'
        }
        info = self.gaa.save_one_genome_v1(save_info)['info']
        ref = "{}/{}/{}".format(info[6], info[0], info[4])
        print(('created test genome: ' + ref + ' from file ' + filename))
        return ref
Beispiel #7
0
def load_fastas(config, scratch: str, upa: str):
    '''
    Returns list of (fasta_path, upa)
    '''
    dfu = DataFileUtil(config['callback_url'])
    au = AssemblyUtil(config['callback_url'])
    mgu = MetagenomeUtils(config['callback_url'])
    ws = Workspace(config['workspace-url'])

    obj_data = dfu.get_objects({"object_refs": [upa]})['data'][0]
    obj_type = obj_data['info'][2]

    if 'KBaseSets.GenomeSet' in obj_type:
        upas = [gsi['ref'] for gsi in obj_data['data']['items']]
    elif 'KBaseSearch.GenomeSet' in obj_type:
        upas = [gse['ref'] for gse in obj_data['data']['elements'].values()]
    elif "KBaseGenomes.Genome" in obj_type:
        upas = [upa]
    elif "KBaseGenomes.ContigSet" in obj_type or "KBaseGenomeAnnotations.Assembly" in obj_type:
        # in this case we use the assembly file util to get the fasta file
        # file_output = os.path.join(scratch, "input_fasta.fa")
        faf = au.get_assembly_as_fasta({"ref": upa})
        return [(faf['path'], upa)]
    elif "KBaseSets.AssemblySet" in obj_type:
        fasta_paths = []
        for item_upa in obj_data['data']['items']:
            faf = au.get_assembly_as_fasta({"ref": item_upa['ref']})
            fasta_paths.append((faf['path'], item_upa['ref']))
        return fasta_paths
    elif 'KBaseMetagenomes.BinnedContigs' in obj_type:
        fasta_paths = []
        bin_file_dir = mgu.binned_contigs_to_file({
            'input_ref': upa,
            'save_to_shock': 0
        })['bin_file_directory']
        for (dirpath, dirnames, filenames) in os.walk(bin_file_dir):
            for fasta_file in filenames:
                fasta_path = os.path.join(scratch, fasta_file)
                fasta_path = os.path.splitext(fasta_path)[0] + ".fa"
                copyfile(os.path.join(bin_file_dir, fasta_file), fasta_path)
                # Should I verify that the bins have contigs?
                # is it possible to have empty bins?
                fasta_paths.append((fasta_path, upa))
            break
        return fasta_paths
    else:
        raise Error('Input genome/metagenome reference has unhandled type')

    fasta_paths = []
    for genome_upa in upas:
        genome_data = ws.get_objects2({'objects': [{
            "ref": genome_upa
        }]})['data'][0]['data']
        assembly_upa = genome_upa + ';' + str(
            genome_data.get('contigset_ref')
            or genome_data.get('assembly_ref'))
        faf = au.get_assembly_as_fasta({'ref': assembly_upa})
        fasta_paths.append((faf['path'], assembly_upa))

    return fasta_paths
Beispiel #8
0
    def _build_index(self, assembly_info, validated_params):
        # get the assembly as a fasta file using AssemblyUtil
        au = AssemblyUtil(self.callback_url)
        fasta_info = au.get_assembly_as_fasta({'ref': assembly_info['ref']})

        # make the target destination folder (check again it wasn't created yet)
        if os.path.exists(validated_params['output_dir']):
            raise ('Output directory name specified (' + validated_params['output_dir'] +
                   ') already exists. Will not overwrite, so aborting.')
        os.makedirs(validated_params['output_dir'])

        # configure the command line args and run it
        cli_params = self._build_cli_params(fasta_info['path'], fasta_info['assembly_name'], validated_params)
        self.bwa.run('index', cli_params)
        # self.bwa.run('index', cli_params)
        for file in glob.glob(r'/kb/module/work/tmp/' + fasta_info['assembly_name'] + '.*'):
            print(file)
            shutil.copy(file, validated_params['output_dir'])

        index_info = {'output_dir': validated_params['output_dir'],
                      'index_files_basename': fasta_info['assembly_name']}

        # cache the result, mark if it worked or not
        cache_success = self._put_cached_index(assembly_info,
                                               fasta_info['assembly_name'],
                                               validated_params['output_dir'],
                                               validated_params['ws_for_cache'])
        if cache_success:
            index_info['pushed_to_cache'] = 1
        else:
            index_info['pushed_to_cache'] = 0

        return index_info
Beispiel #9
0
def load_fastas(config, scratch, upa):
    '''

    '''
    dfu = DataFileUtil(config['callback_url'])
    au = AssemblyUtil(config['callback_url'])
    ws = Workspace(config['workspace-url'])

    obj_data = dfu.get_objects({"object_refs":[upa]})['data'][0]
    obj_type  = obj_data['info'][2]

    if 'KBaseSets.GenomeSet' in obj_type:
        upas = [gsi['ref'] for gsi in obj_data['data']['items']]
    elif 'KBaseSearch.GenomeSet' in obj_type:
        upas = [gse['ref'] for gse in obj_data['data']['elements'].values()]
    elif "KBaseGenomes.Genome" in obj_type:
        upas = [upa]
    elif "KBaseGenomes.ContigSet" in obj_type or "KBaseGenomeAnnotations.Assembly" in obj_type:
        # in this case we use the assembly file util to get the fasta file
        file_output = os.path.join(scratch, "input_fasta.fa")
        faf = au.get_assembly_as_fasta({"ref": upa})
        return [(faf['path'], upa)]

    fasta_paths = []
    for genome_upa in upas:
        if upa != genome_upa:
            genome_upa = upa + ';' + genome_upa
        genome_data = ws.get_objects2( {'objects':[{"ref":genome_upa}]})['data'][0]['data']
        target_upa = genome_data.get('contigset_ref') or genome_data.get('assembly_ref')
        assembly_upa = genome_upa + ';' + target_upa
        faf = au.get_assembly_as_fasta({"ref":assembly_upa})
        fasta_paths.append((faf['path'], assembly_upa))

    return fasta_paths
Beispiel #10
0
 def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.scratch = config['scratch']
     self.shock_url = config['shock-url']
     self.ws_url = config['workspace-url']
     self.dfu = DataFileUtil(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
Beispiel #11
0
 def __init__(self, config):
     self.cfg = config
     self.au = AssemblyUtil(config.callbackURL)
     self.dfu = DataFileUtil(self.cfg.callbackURL)
     self.gi = GenomeInterface(self.cfg)
     self.taxon_wsname = self.cfg.raw['taxon-workspace-name']
     self.time_string = str(
         datetime.datetime.fromtimestamp(
             time.time()).strftime('%Y_%m_%d_%H_%M_%S'))
     yml_text = open('/kb/module/kbase.yml').read()
     self.version = re.search("module-version:\n\W+(.+)\n",
                              yml_text).group(1)
     self.ont_mappings = load_ontology_mappings('/kb/module/data')
     self.code_table = 11
     self.skip_types = ('exon', 'five_prime_UTR', 'three_prime_UTR',
                        'start_codon', 'stop_codon', 'region', 'chromosome',
                        'scaffold')
     self.spoof_gene_count = 0
     self.is_phytozome = False
     self.strict = True
     self.generate_genes = False
     self.warnings = []
     self.feature_dict = collections.OrderedDict()
     self.cdss = set()
     self.ontologies_present = collections.defaultdict(dict)
     self.ontology_events = list()
     self.skiped_features = collections.Counter()
     self.feature_counts = collections.Counter()
 def load_genome_direct(cls, filename, assembly_filename, obj_name):
     au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
     assembly_path = os.path.join(cls.cfg['scratch'],
                                  os.path.basename(assembly_filename))
     shutil.copy(assembly_filename, assembly_path)
     assembly_ref = au.save_assembly_from_fasta({
         'workspace_name': cls.wsName,
         'assembly_name': obj_name + '.assembly',
         'file': {
             'path': assembly_path
         }
     })
     data = json.load(open(filename))
     data['assembly_ref'] = assembly_ref
     save_info = {
         'workspace':
         cls.wsName,
         'objects': [{
             'data': data,
             'name': obj_name + '.genome',
             'type': 'KBaseGenomes.Genome',
         }],
     }
     info = cls.wsClient.save_objects(save_info)[0]
     ref = f"{info[6]}/{info[0]}/{info[4]}"
     print('created test genome: ' + ref + ' from file ' + filename)
     return ref, assembly_ref
Beispiel #13
0
class DownloadUtils:
    def __init__(self, callbackURL):
        self.callbackURL = os.environ['SDK_CALLBACK_URL']
        self.au = AssemblyUtil(self.callbackURL)
        self.vu = VariationUtil(self.callbackURL)
        self.gfu = GenomeFileUtil(self.callbackURL)
        pass

    def download_genome(self, genomeref, output_dir):
        '''
        this funciton downloads genome.
        :param genomeref:
        :param output_dir:
        :return:
        '''

        file = self.au.get_assembly_as_fasta({
            'ref':
            genomeref,
            'filename':
            os.path.join(output_dir, "ref_genome.fa")
        })
        return file

    def get_variation(self, variation_ref):
        '''
        This function downloads variations.
        :param variation_ref:
        :param filename:
        :return:
        '''

        filepath = self.vu.get_variation_as_vcf(
            {'variation_ref': variation_ref})['path']
        return filepath

    def get_gff(self, genome_ref):
        '''
        :param genome_ref:
        :return: gff file path
        '''

        file = self.gfu.genome_to_gff({'genome_ref': genome_ref})
        return file['file_path']

    def get_assembly(self, assembly_ref, output_dir):
        '''
        :param assembly_ref:
        :param output_dir:
        :return: assembly file path
        '''

        file = self.au.get_assembly_as_fasta({
            'ref':
            assembly_ref,
            'filename':
            os.path.join(output_dir, "ref_genome.fa")
        })
        return file['path']
Beispiel #14
0
 def __init__(self, callbaack_url, scratch, wdl='../../metaAssembly/'):
     self.callback_url = callbaack_url
     self.scratch = scratch
     self.special = special(self.callback_url)
     self.ru = ReadsUtils(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
     self.report = KBaseReport(self.callback_url)
     self.wdl_base = wdl
Beispiel #15
0
 def __init__(self, config):
     self.scratch = os.path.abspath(config['scratch'])
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.token = os.environ['KB_AUTH_TOKEN']
     self.scratch = os.path.abspath(config['scratch'])
     self.ws = Workspace(config['workspace-url'], token=self.token)
     self.genome_api = GenomeAnnotationAPI(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
Beispiel #16
0
 def save_assembly(self, wsname, output_contigs, token, name, console):
     self.log(console, 'Uploading FASTA file to Assembly')
     assemblyUtil = AssemblyUtil(self.callbackURL, token=token,
                                 service_ver='dev')
     assemblyUtil.save_assembly_from_fasta({'file': {'path': output_contigs},
                                            'workspace_name': wsname,
                                            'assembly_name': name
                                            })
Beispiel #17
0
 def __init__(self, config):
     self.cfg = config
     self.gi = GenomeInterface(config)
     self.dfu = DataFileUtil(config.callbackURL)
     self.aUtil = AssemblyUtil(config.callbackURL)
     self.ws = Workspace(config.workspaceURL)
     self._messages = []
     self.time_string = str(
         datetime.datetime.fromtimestamp(
             time.time()).strftime('%Y_%m_%d_%H_%M_%S'))
     yml_text = open('/kb/module/kbase.yml').read()
     self.version = re.search("module-version:\n\W+(.+)\n",
                              yml_text).group(1)
     self.generate_parents = False
     self.generate_ids = False
     self.genes = OrderedDict()
     self.mrnas = OrderedDict()
     self.cdss = OrderedDict()
     self.noncoding = []
     self.ontologies_present = defaultdict(dict)
     self.ontology_events = list()
     self.skiped_features = Counter()
     self.feature_counts = Counter()
     self.orphan_types = Counter()
     self.contig_seq = {}
     self.circ_contigs = set()
     self.features_spaning_zero = set()
     self.genome_warnings = []
     self.genome_suspect = False
     self.defects = Counter()
     self.spoofed_genes = 0
     self.excluded_features = ('source', 'exon', 'fasta_record')
     self.ont_mappings = load_ontology_mappings('/kb/module/data')
     self.code_table = 11
     self.re_api_url = config.re_api_url
     # dict with feature 'id's that have been used more than once.
     self.used_twice_identifiers = {}
     self.default_params = {
         'source':
         'Genbank',
         'taxon_wsname':
         self.cfg.raw['taxon-workspace-name'],
         'taxon_lookup_obj_name':
         self.cfg.raw['taxon-lookup-object-name'],
         'ontology_wsname':
         self.cfg.raw['ontology-workspace-name'],
         'ontology_GO_obj_name':
         self.cfg.raw['ontology-gene-ontology-obj-name'],
         'ontology_PO_obj_name':
         self.cfg.raw['ontology-plant-ontology-obj-name'],
         'release':
         None,
         'genetic_code':
         11,
         'generate_ids_if_needed':
         0,
         'metadata': {}
     }
Beispiel #18
0
 def __init__(self, config):
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.scratch = config['scratch']
     self.shock_url = config['shock-url']
     self.ws_url = config['workspace-url']
     self.dfu = DataFileUtil(self.callback_url)
     self.ru = ReadsUtils(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
     self.mgu = MetagenomeUtils(self.callback_url)
Beispiel #19
0
 def load_fasta_file(self, filename, obj_name, contents):
     f = open(filename, 'w')
     f.write(contents)
     f.close()
     assemblyUtil = AssemblyUtil(self.callback_url)
     assembly_ref = assemblyUtil.save_assembly_from_fasta({'file': {'path': filename},
                                                           'workspace_name': self.getWsName(),
                                                           'assembly_name': obj_name
                                                           })
     return assembly_ref
Beispiel #20
0
 def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.scratch = os.path.join(config['scratch'],
                                 'import_assembly_' + str(uuid.uuid4()))
     handler_utils._mkdir_p(self.scratch)
     self.token = config['KB_AUTH_TOKEN']
     self.dfu = DataFileUtil(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
     self.uploader_utils = UploaderUtil(config)
     self.max_contigs_for_report = 200
Beispiel #21
0
 def __init__(self, config, scratch, callback_url ):
     self.scratch = config['scratch']
     self.ws_url = config['workspace-url']
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.dfu = DataFileUtil(self.callback_url)
     self.wsc = Workspace(self.ws_url)
     self.scratch = scratch
     self.callback_url = callback_url
     self.au = AssemblyUtil(self.callback_url)
     self.gapi = GenericsAPI(self.callback_url)
    def jayrbolton_contig_filter(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN jayrbolton_contig_filter
        if not params.get('assembly_input_ref'):
            raise TypeError("`assembly_input_ref` is required")
        if not params.get('min_length') or not isinstance(
                params['min_length'], int):
            raise TypeError("`min_length` is required and needs to be an int")
        min_length = params['min_length']
        # Initialize the assembly util client
        assembly_util = AssemblyUtil(self.callback_url)
        # download the fasta file to local disk
        fasta_file = assembly_util.get_assembly_as_fasta(
            {'ref': params['assembly_input_ref']})
        filtered_path = os.path.join(self.shared_folder, 'filtered.fasta')
        report_client = KBaseReport(self.callback_url)
        result = contig_filter(fasta_file['path'], filtered_path, min_length)
        assembly_obj = assembly_util.save_assembly_from_fasta({
            'workspace_name':
            params['workspace_name'],
            'file': {
                'path': filtered_path,
                'assembly_name': 'filtered_contigs'
            },
            'assembly_name':
            'filtered_assembly'
        })
        report = report_client.create_extended_report({
            'workspace_name':
            params['workspace_name'],
            'objects_created': [{
                'ref': assembly_obj,
                'description': 'filtered_assembly'
            }],
            'message':
            (f"Filtered out {result['n_total'] - result['n_remaining']} "
             f"records out of {result['n_total']} records.")
        })
        output = {'report_ref': report['ref'], 'report_name': report['name']}
        #END jayrbolton_contig_filter

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method jayrbolton_contig_filter return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Beispiel #23
0
 def prepare_data(cls):
     assembly_file_path = os.path.join(cls.scratch,
                                       'e_coli_assembly.fasta')
     shutil.copy('data/e_coli/e_coli_assembly.fasta', assembly_file_path)
     au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
     assembly_ref = au.save_assembly_from_fasta({
         'workspace_name': cls.wsName,
         'assembly_name': 'e_coli.assembly',
         'file': {'path': assembly_file_path}
     })
     cls.test_genome_data = json.load(open('data/e_coli/e_coli.json'))
     cls.test_genome_data['assembly_ref'] = assembly_ref
Beispiel #24
0
    def test_filter_contigs_by_length_01(self):
        method = 'filter_contigs_by_length_01'

        print("\n\nRUNNING: test_filter_contigs_by_length_01()")
        print("===========================================\n\n")

        # upload test data
        try:
            auClient = AssemblyUtil(self.callback_url,
                                    token=self.getContext()['token'])
        except Exception as e:
            raise ValueError(
                'Unable to instantiate auClient with callbackURL: ' +
                self.callback_url + ' ERROR: ' + str(e))
        ass_file_1 = 'assembly_1.fa.gz'
        ass_file_2 = 'assembly_2.fa.gz'
        ass_path_1 = os.path.join(self.scratch, ass_file_1)
        ass_path_2 = os.path.join(self.scratch, ass_file_2)
        shutil.copy(os.path.join("data", ass_file_1), ass_path_1)
        shutil.copy(os.path.join("data", ass_file_2), ass_path_2)
        ass_ref_1 = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_1
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            'assembly_1'
        })
        ass_ref_2 = auClient.save_assembly_from_fasta({
            'file': {
                'path': ass_path_2
            },
            'workspace_name':
            self.getWsName(),
            'assembly_name':
            'assembly_2'
        })

        # run method
        input_refs = [ass_ref_1, ass_ref_2]
        base_output_name = method + '_output'
        params = {
            'workspace_name': self.getWsName(),
            'input_assembly_refs': input_refs,
            'min_contig_length': 1000,
            'output_name': 'test_filtered'
        }
        result = self.getImpl().run_filter_contigs_by_length(
            self.getContext(), params)
        print('RESULT:')
        pprint(result)
        pass
Beispiel #25
0
    def run_mash_sketch(self, ctx, params):
        """
        Generate a sketch file from a fasta/fastq file
        :param params: instance of type "MashSketchParams" (* * Pass in **one
           of** input_path, assembly_ref, or reads_ref *   input_path -
           string - local file path to an input fasta/fastq *   assembly_ref
           - string - workspace reference to an Assembly type *   reads_ref -
           string - workspace reference to a Reads type * Optionally, pass in
           a boolean indicating whether you are using paired-end reads. *
           paired_ends - boolean - whether you are passing in paired ends) ->
           structure: parameter "input_path" of String, parameter
           "assembly_ref" of String, parameter "reads_ref" of String,
           parameter "paired_ends" of type "boolean" (params:
           input_upa: workspace reference to an assembly object
           workspace_name: name of current workspace search_db: database to
           search n_max_results: number of results to return, integer between
           1 and 100)
        :returns: instance of type "MashSketchResults" (* * Returns the local
           scratch file path of the generated sketch file. * Will have the
           extension '.msh') -> structure: parameter "sketch_path" of String
        """
        # ctx is the context object
        # return variables are: results
        #BEGIN run_mash_sketch
        if 'reads_ref' in params:
            reads_utils = ReadsUtils(self.callbackURL)
            result = reads_utils.download_reads({
                'read_libraries': [params['reads_ref']],
                'interleaved': 'true'
            })
            input_path = result['files'][params['reads_ref']]['files']['fwd']
        elif 'assembly_ref' in params:
            assembly_util = AssemblyUtil(self.callbackURL)
            result = assembly_util.get_assembly_as_fasta({'ref': params['assembly_ref']})
            input_path = result['path']
        elif 'input_path' in params:
            input_path = params['input_path']
        else:
            raise ValueError(
                'Invalid params; must provide one of `reads_ref`, `assembly_ref`, or `input_path`.'
            )
        mash_utils = MashUtils(self.config, self.auth_token)
        output_file_path = mash_utils.mash_sketch(input_path, paired_ends=params.get('paired_ends'))
        results = {'sketch_path': output_file_path}
        #END run_mash_sketch

        # At some point might do deeper type checking...
        if not isinstance(results, dict):
            raise ValueError('Method run_mash_sketch return value ' +
                             'results is not type dict as required.')
        # return the results
        return [results]
Beispiel #26
0
    def download_assembly(self, token, assembly_ref):
        try:
            auClient = AUClient(self.callback_url, token=token, service_ver=self.SERVICE_VER)
        except Exception as e:
            raise ValueError('Unable to instantiate auClient with callback_url: '+ self.callback_url +' ERROR: ' + str(e))
        try:
            dfuClient = DFUClient(self.callback_url, token=token, service_ver=self.SERVICE_VER)
        except Exception as e:
            raise ValueError('Unable to instantiate dfuClient with callback_url: '+ self.callback_url +' ERROR: ' + str(e))

        contig_file = auClient.get_assembly_as_fasta({'ref':assembly_ref}).get('path')
        sys.stdout.flush()   # don't remember why this matters
        contig_file_path = dfuClient.unpack_file({'file_path': contig_file})['file_path']
        return contig_file_path
Beispiel #27
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'provenance': [
                            {'service': 'GenomeFileUtil',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.ws = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = GenomeFileUtil(cls.cfg)
        gi_config = SDKConfig(cls.cfg)
        cls.genome_interface = GenomeInterface(gi_config)
        # create one WS for all tests
        suffix = int(time.time() * 1000)
        wsName = "test_GenomeAnnotationAPI_" + str(suffix)
        cls.ws.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        # save new genome
        assembly_file_path = os.path.join(cls.cfg['scratch'],
                                          'Rhodo_SPAdes_assembly.fa')
        shutil.copy('data/Rhodo_SPAdes_assembly.fa', assembly_file_path)
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        cls.assembly_ref = au.save_assembly_from_fasta({
            'workspace_name': cls.wsName,
            'assembly_name': 'ecoli.assembly',
            'file': {'path': assembly_file_path}
        })

        rhodobacter_contigs = json.load(open('data/rhodobacter_contigs.json'))
        save_info = {
            'workspace': cls.wsName,
            'objects': [{
                'type': 'KBaseGenomes.ContigSet',
                'data': rhodobacter_contigs,
                'name': 'rhodobacter_contigs'
            }]
        }
        cls.contigset_ref = cls.ws.save_objects(save_info)
Beispiel #28
0
def fetch_fasta_from_assembly(assembly_ref, ws_url, callback_url):
    """
    From an assembly or contigset, this uses a data file util to build a FASTA file and return the
    path to it.
    """
    allowed_types = [
        'KBaseFile.Assembly', 'KBaseGenomeAnnotations.Assembly',
        'KBaseGenomes.ContigSet'
    ]
    if not check_ref_type(assembly_ref, allowed_types, ws_url):
        raise ValueError(
            "The reference {} cannot be used to fetch a FASTA file".format(
                assembly_ref))
    au = AssemblyUtil(callback_url)
    return au.get_assembly_as_fasta({'ref': assembly_ref})
    def __init__(self, config):
        self.scratch = config["scratch"]
        self.ctx = config['ctx']
        self.callback_url = config["SDK_CALLBACK_URL"]

        self.ws_client = workspaceService(config["workspace-url"])
        self.gfu = GenomeFileUtil(self.callback_url)
        self.au = AssemblyUtil(self.callback_url)
        self.kbr = KBaseReport(self.callback_url)
        self.dfu = DataFileUtil(self.callback_url)
        self.genome_api = GenomeAnnotationAPI(self.callback_url)

        self.sso_ref = None
        self.sso_event = None
        self.ec_to_sso = {}
        self.output_workspace = None
Beispiel #30
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('VirSorter'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'VirSorter',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = Workspace(cls.wsURL)
     cls.serviceImpl = VirSorter(cls.cfg)
     cls.scratch = cls.cfg['scratch']
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     cls.au = AssemblyUtil(cls.callback_url)
    def setUpClass(cls):
        print('Setting up class')
        token = os.environ.get('KB_AUTH_TOKEN', None)
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        config = configparser.ConfigParser()
        config.read(config_file)
        cls.cfg = {n[0]: n[1] for n in config.items('GenomeAnnotationAPI')}
        authServiceUrl = cls.cfg.get('auth-service-url',
                "https://kbase.us/services/authorization/Sessions/Login")
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'GenomeAnnotationAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})

        cls.ws = Workspace(cls.cfg['workspace-url'], token=token)
        cls.impl = GenomeAnnotationAPI(cls.cfg)

        # Second user
        test_cfg_file = '/kb/module/work/test.cfg'
        test_cfg_text = "[test]\n"
        with open(test_cfg_file, "r") as f:
            test_cfg_text += f.read()
        config = configparser.ConfigParser()
        config.read_file(io.StringIO(test_cfg_text))
        test_cfg_dict = dict(config.items("test"))
        if ('test_token2' not in test_cfg_dict):
            raise ValueError("Configuration in <module>/test_local/test.cfg file should " +
                             "include second user credentials ('test_token2')")
        token2 = test_cfg_dict['test_token2']
        user2 = auth_client.get_user(token2)
        cls.ctx2 = MethodContext(None)
        cls.ctx2.update({'token': token2,
                         'user_id': user2,
                         'provenance': [
                            {'service': 'NarrativeService',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                         'authenticated': 1})
        
        # create one WS for all tests
        suffix = int(time.time() * 1000)
        wsName = "test_GenomeAnnotationAPI_" + str(suffix)
        ret = cls.ws.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        # preload with reference data
        with open ('data/rhodobacter.json', 'r') as file:
            data_str=file.read()
        data = json.loads(data_str)
        # save old genome
        info = cls.impl.save_one_genome_v1(cls.ctx, {
               'workspace': wsName,
               'name': "rhodobacter",
               'data': data,
           })[0]['info']
        cls.rhodobacter_ref = str(info[6]) +'/' + str(info[0]) + '/' + str(info[4])
        print('created rhodobacter test genome: ' + cls.rhodobacter_ref)

        assembly_file_path = os.path.join(cls.cfg['scratch'],
                                          'e_coli_assembly.fasta')
        shutil.copy('data/e_coli_assembly.fasta', assembly_file_path)
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        assembly_ref = au.save_assembly_from_fasta({
            'workspace_name': cls.wsName,
            'assembly_name': 'ecoli.assembly',
            'file': {'path': assembly_file_path}
        })
        data = json.load(open('data/new_ecoli_genome.json'))
        data['assembly_ref'] = assembly_ref
        # save new genome
        save_info = {
            'workspace': wsName,
            'objects': [{
                'type': 'KBaseGenomes.Genome',
                'data': data,
                'name': 'new_ecoli'
            }]
        }
        info = cls.ws.save_objects(save_info)[0]
        cls.new_genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(
            info[4])
        print('created new test genome')
    def setUpClass(cls):
        token = os.environ.get('KB_AUTH_TOKEN', None)
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        config = configparser.ConfigParser()
        config.read(config_file)
        cls.cfg = {n[0]: n[1] for n in config.items('GenomeAnnotationAPI')}
        authServiceUrl = cls.cfg.get('auth-service-url',
                                     "https://kbase.us/services/authorization/Sessions/Login")
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'GenomeAnnotationAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})

        cls.ws = Workspace(cls.cfg['workspace-url'], token=token)
        cls.impl = GenomeAnnotationAPI(cls.cfg)
        test_gbk_file = "/kb/module/test/data/kb_g.399.c.1.gbk"
        temp_gbk_file = "/kb/module/work/tmp/kb_g.399.c.1.gbk"
        shutil.copy(test_gbk_file, temp_gbk_file)
        suffix = int(time.time() * 1000)
        wsName = "test_GenomeAnnotationAPI_" + str(suffix)
        cls.ws.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        data = json.load(open('data/rhodobacter_contigs.json'))
        # save to ws
        save_info = {
            'workspace': wsName,
            'objects': [{
                'type': 'KBaseGenomes.ContigSet',
                'data': data,
                'name': 'rhodo_contigs'
            }]
        }
        info = cls.ws.save_objects(save_info)[0]
        contigset_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
        data = json.load(open('data/rhodobacter.json'))
        data['contigset_ref'] = contigset_ref
        # save to ws
        info = cls.impl.save_one_genome_v1(cls.ctx, {
            'workspace': wsName,
            'name': "rhodobacter",
            'data': data,
        })[0]['info']
        cls.old_genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(
            info[4])
        print('created old test genome')

        assembly_file_path = os.path.join(cls.cfg['scratch'],
                                          'e_coli_assembly.fasta')
        shutil.copy('data/e_coli_assembly.fasta', assembly_file_path)
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        assembly_ref = au.save_assembly_from_fasta({
            'workspace_name': cls.wsName,
            'assembly_name': 'ecoli.assembly',
            'file': {'path': assembly_file_path}
        })
        data = json.load(open('data/new_ecoli_genome.json'))
        data['assembly_ref'] = assembly_ref
        # save to ws
        save_info = {
            'workspace': wsName,
            'objects': [{
                'type': 'KBaseGenomes.Genome',
                'data': data,
                'name': 'new_ecoli'
            }]
        }
        info = cls.ws.save_objects(save_info)[0]
        cls.new_genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
        print('created new test genome')