def BuildFastaFromSequenceSet(self, ctx, params): """ :param params: instance of type "BuildSeqIn" -> structure: parameter "workspace_name" of String, parameter "SequenceSetRef" of String, parameter "fasta_outpath" of String :returns: instance of type "BuildSeqOut" -> structure: parameter "fasta_outpath" of String """ # ctx is the context object # return variables are: output #BEGIN BuildFastaFromSequenceSet #END BuildFastaFromSequenceSet # At some point might do deeper type checking... dfu = DataFileUtil(self.callback_url) bu = BackgroundUtils() TU = TestUtils() if params['TESTFLAG'] and params['background']: targetpath = '/kb/module/work/tmp/testgenome.fa' TU.GetGenome(targetpath) bu.BuildBackground(targetpath) elif params['background']: ws = Workspace('https://appdev.kbase.us/services/ws') subset = ws.get_object_subset([{ 'included':['/features/[*]/location', '/features/[*]/id','/assembly_ref'], 'ref':params['genome_ref']}]) aref = subset[0]['data']['assembly_ref'] assembly_ref = {'ref': aref} print('Downloading Assembly data as a Fasta file.') assemblyUtil = AssemblyUtil(self.callback_url) fasta_file = assemblyUtil.get_assembly_as_fasta(assembly_ref)['path'] bu.BuildBackground(fasta_file) get_objects_params = {'object_refs' : [params['SequenceSetRef']]} SeqSet = dfu.get_objects(get_objects_params)['data'][0]['data'] outFile = open(params['fasta_outpath'],'w') for s in SeqSet['sequences']: sname = '>' + s['sequence_id'] + '\n' outFile.write(sname) sseq = s['sequence'] + '\n' outFile.write(sseq) outFile.close() fu=FastaUtils() if params['mask_repeats']: fu.RemoveRepeats(params['fasta_outpath'],params['fasta_outpath']) output = {'fasta_outpath' : params['fasta_outpath']} #END BuildFastaFromSequenceSet # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method BuildFastaFromSequenceSet return value ' + 'output is not type dict as required.') # return the results return [output]
def getAssemblyInfo(self, ass_name): if hasattr(self.__class__, 'assemblyInfo'): if self.__class__.assemblyInfo.get(ass_name): return self.__class__.assemblyInfo[ass_name] # copy the local test file to the shared scratch space so that the AssemblyUtil # container can see it. test_fasta_file_local = os.path.join('data', 'assemblies', ass_name) test_fasta_file_scratch = os.path.join( self.scratch, os.path.basename(test_fasta_file_local)) shutil.copy(test_fasta_file_local, test_fasta_file_scratch) # call the AssemblyUtil libary to upload the test data to KBase au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) ass_ref = au.save_assembly_from_fasta({ 'file': { 'path': test_fasta_file_scratch }, 'workspace_name': self.getWsName(), 'assembly_name': ass_name }) # get the object metadata for the new test dataset new_obj_info = self.ws.get_object_info_new( {'objects': [{ 'ref': ass_ref }]}) if not hasattr(self.__class__, 'assemblyInfo'): self.__class__.assemblyInfo = dict() self.__class__.assemblyInfo[ass_name] = new_obj_info[0] return new_obj_info[0]
def upload_assembly(self, file_path, workspace_name, assembly_name): """ From a list of file paths, uploads them to KBase, generates Assembly objects, then returns the generated UPAs. """ if not file_path: raise ValueError("file_path must be defined") if not os.path.exists(file_path): raise ValueError( "The given assembly file '{}' does not exist".format( file_path)) if not workspace_name: raise ValueError("workspace_name must be defined") if not assembly_name: raise ValueError("assembly_name must be defined") au = AssemblyUtil(self.callback_url) assembly_upa = au.save_assembly_from_fasta({ "file": { "path": file_path }, "workspace_name": workspace_name, "assembly_name": assembly_name }) return assembly_upa
def get_fasta_file(self, filename, obj_name): assemblyUtil = AssemblyUtil(self.callback_url) assembly_ref = assemblyUtil.save_assembly_from_fasta({'file': {'path': filename}, 'workspace_name': self.getWsName(), 'assembly_name': obj_name }) return assembly_ref
def _get_assembly(self, genome): if 'assembly_ref' in genome: assembly_ref = genome['assembly_ref'] else: assembly_ref = genome['contigset_ref'] log('Assembly reference = ' + assembly_ref) log('Downloading assembly') dfu = DataFileUtil(self.cfg.callbackURL) log('object_refs:' + self.genome_ref + ";" + assembly_ref) assembly_data = dfu.get_objects( {'object_refs': [self.genome_ref + ";" + assembly_ref]})['data'][0]['data'] if isinstance(assembly_data['contigs'], dict): # is an assembly circular_contigs = set([ x['contig_id'] for x in list(assembly_data['contigs'].values()) if x.get('is_circ') ]) else: # is a contig set circular_contigs = set([ x['id'] for x in assembly_data['contigs'] if x.get('replicon_geometry') == 'circular' ]) au = AssemblyUtil(self.cfg.callbackURL) assembly_file_path = au.get_assembly_as_fasta( {'ref': self.genome_ref + ";" + assembly_ref})['path'] return assembly_file_path, circular_contigs
def load_genome_direct(self, filename, assembly_filename, obj_name): au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': self.getWsName(), 'assembly_name': obj_name + '.assembly', 'file': { 'path': assembly_filename } }) pprint('created test assembly: ' + assembly_ref) with open(filename, 'r') as file: data_str = file.read() data = json.loads(data_str) data['assembly_ref'] = assembly_ref save_info = { 'workspace': self.getWsName(), 'data': data, 'name': obj_name + '.genome' } info = self.gaa.save_one_genome_v1(save_info)['info'] ref = "{}/{}/{}".format(info[6], info[0], info[4]) print(('created test genome: ' + ref + ' from file ' + filename)) return ref
def load_fastas(config, scratch: str, upa: str): ''' Returns list of (fasta_path, upa) ''' dfu = DataFileUtil(config['callback_url']) au = AssemblyUtil(config['callback_url']) mgu = MetagenomeUtils(config['callback_url']) ws = Workspace(config['workspace-url']) obj_data = dfu.get_objects({"object_refs": [upa]})['data'][0] obj_type = obj_data['info'][2] if 'KBaseSets.GenomeSet' in obj_type: upas = [gsi['ref'] for gsi in obj_data['data']['items']] elif 'KBaseSearch.GenomeSet' in obj_type: upas = [gse['ref'] for gse in obj_data['data']['elements'].values()] elif "KBaseGenomes.Genome" in obj_type: upas = [upa] elif "KBaseGenomes.ContigSet" in obj_type or "KBaseGenomeAnnotations.Assembly" in obj_type: # in this case we use the assembly file util to get the fasta file # file_output = os.path.join(scratch, "input_fasta.fa") faf = au.get_assembly_as_fasta({"ref": upa}) return [(faf['path'], upa)] elif "KBaseSets.AssemblySet" in obj_type: fasta_paths = [] for item_upa in obj_data['data']['items']: faf = au.get_assembly_as_fasta({"ref": item_upa['ref']}) fasta_paths.append((faf['path'], item_upa['ref'])) return fasta_paths elif 'KBaseMetagenomes.BinnedContigs' in obj_type: fasta_paths = [] bin_file_dir = mgu.binned_contigs_to_file({ 'input_ref': upa, 'save_to_shock': 0 })['bin_file_directory'] for (dirpath, dirnames, filenames) in os.walk(bin_file_dir): for fasta_file in filenames: fasta_path = os.path.join(scratch, fasta_file) fasta_path = os.path.splitext(fasta_path)[0] + ".fa" copyfile(os.path.join(bin_file_dir, fasta_file), fasta_path) # Should I verify that the bins have contigs? # is it possible to have empty bins? fasta_paths.append((fasta_path, upa)) break return fasta_paths else: raise Error('Input genome/metagenome reference has unhandled type') fasta_paths = [] for genome_upa in upas: genome_data = ws.get_objects2({'objects': [{ "ref": genome_upa }]})['data'][0]['data'] assembly_upa = genome_upa + ';' + str( genome_data.get('contigset_ref') or genome_data.get('assembly_ref')) faf = au.get_assembly_as_fasta({'ref': assembly_upa}) fasta_paths.append((faf['path'], assembly_upa)) return fasta_paths
def _build_index(self, assembly_info, validated_params): # get the assembly as a fasta file using AssemblyUtil au = AssemblyUtil(self.callback_url) fasta_info = au.get_assembly_as_fasta({'ref': assembly_info['ref']}) # make the target destination folder (check again it wasn't created yet) if os.path.exists(validated_params['output_dir']): raise ('Output directory name specified (' + validated_params['output_dir'] + ') already exists. Will not overwrite, so aborting.') os.makedirs(validated_params['output_dir']) # configure the command line args and run it cli_params = self._build_cli_params(fasta_info['path'], fasta_info['assembly_name'], validated_params) self.bwa.run('index', cli_params) # self.bwa.run('index', cli_params) for file in glob.glob(r'/kb/module/work/tmp/' + fasta_info['assembly_name'] + '.*'): print(file) shutil.copy(file, validated_params['output_dir']) index_info = {'output_dir': validated_params['output_dir'], 'index_files_basename': fasta_info['assembly_name']} # cache the result, mark if it worked or not cache_success = self._put_cached_index(assembly_info, fasta_info['assembly_name'], validated_params['output_dir'], validated_params['ws_for_cache']) if cache_success: index_info['pushed_to_cache'] = 1 else: index_info['pushed_to_cache'] = 0 return index_info
def load_fastas(config, scratch, upa): ''' ''' dfu = DataFileUtil(config['callback_url']) au = AssemblyUtil(config['callback_url']) ws = Workspace(config['workspace-url']) obj_data = dfu.get_objects({"object_refs":[upa]})['data'][0] obj_type = obj_data['info'][2] if 'KBaseSets.GenomeSet' in obj_type: upas = [gsi['ref'] for gsi in obj_data['data']['items']] elif 'KBaseSearch.GenomeSet' in obj_type: upas = [gse['ref'] for gse in obj_data['data']['elements'].values()] elif "KBaseGenomes.Genome" in obj_type: upas = [upa] elif "KBaseGenomes.ContigSet" in obj_type or "KBaseGenomeAnnotations.Assembly" in obj_type: # in this case we use the assembly file util to get the fasta file file_output = os.path.join(scratch, "input_fasta.fa") faf = au.get_assembly_as_fasta({"ref": upa}) return [(faf['path'], upa)] fasta_paths = [] for genome_upa in upas: if upa != genome_upa: genome_upa = upa + ';' + genome_upa genome_data = ws.get_objects2( {'objects':[{"ref":genome_upa}]})['data'][0]['data'] target_upa = genome_data.get('contigset_ref') or genome_data.get('assembly_ref') assembly_upa = genome_upa + ';' + target_upa faf = au.get_assembly_as_fasta({"ref":assembly_upa}) fasta_paths.append((faf['path'], assembly_upa)) return fasta_paths
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.shock_url = config['shock-url'] self.ws_url = config['workspace-url'] self.dfu = DataFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url)
def __init__(self, config): self.cfg = config self.au = AssemblyUtil(config.callbackURL) self.dfu = DataFileUtil(self.cfg.callbackURL) self.gi = GenomeInterface(self.cfg) self.taxon_wsname = self.cfg.raw['taxon-workspace-name'] self.time_string = str( datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S')) yml_text = open('/kb/module/kbase.yml').read() self.version = re.search("module-version:\n\W+(.+)\n", yml_text).group(1) self.ont_mappings = load_ontology_mappings('/kb/module/data') self.code_table = 11 self.skip_types = ('exon', 'five_prime_UTR', 'three_prime_UTR', 'start_codon', 'stop_codon', 'region', 'chromosome', 'scaffold') self.spoof_gene_count = 0 self.is_phytozome = False self.strict = True self.generate_genes = False self.warnings = [] self.feature_dict = collections.OrderedDict() self.cdss = set() self.ontologies_present = collections.defaultdict(dict) self.ontology_events = list() self.skiped_features = collections.Counter() self.feature_counts = collections.Counter()
def load_genome_direct(cls, filename, assembly_filename, obj_name): au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_path = os.path.join(cls.cfg['scratch'], os.path.basename(assembly_filename)) shutil.copy(assembly_filename, assembly_path) assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': cls.wsName, 'assembly_name': obj_name + '.assembly', 'file': { 'path': assembly_path } }) data = json.load(open(filename)) data['assembly_ref'] = assembly_ref save_info = { 'workspace': cls.wsName, 'objects': [{ 'data': data, 'name': obj_name + '.genome', 'type': 'KBaseGenomes.Genome', }], } info = cls.wsClient.save_objects(save_info)[0] ref = f"{info[6]}/{info[0]}/{info[4]}" print('created test genome: ' + ref + ' from file ' + filename) return ref, assembly_ref
class DownloadUtils: def __init__(self, callbackURL): self.callbackURL = os.environ['SDK_CALLBACK_URL'] self.au = AssemblyUtil(self.callbackURL) self.vu = VariationUtil(self.callbackURL) self.gfu = GenomeFileUtil(self.callbackURL) pass def download_genome(self, genomeref, output_dir): ''' this funciton downloads genome. :param genomeref: :param output_dir: :return: ''' file = self.au.get_assembly_as_fasta({ 'ref': genomeref, 'filename': os.path.join(output_dir, "ref_genome.fa") }) return file def get_variation(self, variation_ref): ''' This function downloads variations. :param variation_ref: :param filename: :return: ''' filepath = self.vu.get_variation_as_vcf( {'variation_ref': variation_ref})['path'] return filepath def get_gff(self, genome_ref): ''' :param genome_ref: :return: gff file path ''' file = self.gfu.genome_to_gff({'genome_ref': genome_ref}) return file['file_path'] def get_assembly(self, assembly_ref, output_dir): ''' :param assembly_ref: :param output_dir: :return: assembly file path ''' file = self.au.get_assembly_as_fasta({ 'ref': assembly_ref, 'filename': os.path.join(output_dir, "ref_genome.fa") }) return file['path']
def __init__(self, callbaack_url, scratch, wdl='../../metaAssembly/'): self.callback_url = callbaack_url self.scratch = scratch self.special = special(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.report = KBaseReport(self.callback_url) self.wdl_base = wdl
def __init__(self, config): self.scratch = os.path.abspath(config['scratch']) self.callback_url = os.environ['SDK_CALLBACK_URL'] self.token = os.environ['KB_AUTH_TOKEN'] self.scratch = os.path.abspath(config['scratch']) self.ws = Workspace(config['workspace-url'], token=self.token) self.genome_api = GenomeAnnotationAPI(self.callback_url) self.au = AssemblyUtil(self.callback_url)
def save_assembly(self, wsname, output_contigs, token, name, console): self.log(console, 'Uploading FASTA file to Assembly') assemblyUtil = AssemblyUtil(self.callbackURL, token=token, service_ver='dev') assemblyUtil.save_assembly_from_fasta({'file': {'path': output_contigs}, 'workspace_name': wsname, 'assembly_name': name })
def __init__(self, config): self.cfg = config self.gi = GenomeInterface(config) self.dfu = DataFileUtil(config.callbackURL) self.aUtil = AssemblyUtil(config.callbackURL) self.ws = Workspace(config.workspaceURL) self._messages = [] self.time_string = str( datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S')) yml_text = open('/kb/module/kbase.yml').read() self.version = re.search("module-version:\n\W+(.+)\n", yml_text).group(1) self.generate_parents = False self.generate_ids = False self.genes = OrderedDict() self.mrnas = OrderedDict() self.cdss = OrderedDict() self.noncoding = [] self.ontologies_present = defaultdict(dict) self.ontology_events = list() self.skiped_features = Counter() self.feature_counts = Counter() self.orphan_types = Counter() self.contig_seq = {} self.circ_contigs = set() self.features_spaning_zero = set() self.genome_warnings = [] self.genome_suspect = False self.defects = Counter() self.spoofed_genes = 0 self.excluded_features = ('source', 'exon', 'fasta_record') self.ont_mappings = load_ontology_mappings('/kb/module/data') self.code_table = 11 self.re_api_url = config.re_api_url # dict with feature 'id's that have been used more than once. self.used_twice_identifiers = {} self.default_params = { 'source': 'Genbank', 'taxon_wsname': self.cfg.raw['taxon-workspace-name'], 'taxon_lookup_obj_name': self.cfg.raw['taxon-lookup-object-name'], 'ontology_wsname': self.cfg.raw['ontology-workspace-name'], 'ontology_GO_obj_name': self.cfg.raw['ontology-gene-ontology-obj-name'], 'ontology_PO_obj_name': self.cfg.raw['ontology-plant-ontology-obj-name'], 'release': None, 'genetic_code': 11, 'generate_ids_if_needed': 0, 'metadata': {} }
def __init__(self, config): self.callback_url = os.environ['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.shock_url = config['shock-url'] self.ws_url = config['workspace-url'] self.dfu = DataFileUtil(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.mgu = MetagenomeUtils(self.callback_url)
def load_fasta_file(self, filename, obj_name, contents): f = open(filename, 'w') f.write(contents) f.close() assemblyUtil = AssemblyUtil(self.callback_url) assembly_ref = assemblyUtil.save_assembly_from_fasta({'file': {'path': filename}, 'workspace_name': self.getWsName(), 'assembly_name': obj_name }) return assembly_ref
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = os.path.join(config['scratch'], 'import_assembly_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.uploader_utils = UploaderUtil(config) self.max_contigs_for_report = 200
def __init__(self, config, scratch, callback_url ): self.scratch = config['scratch'] self.ws_url = config['workspace-url'] self.callback_url = os.environ['SDK_CALLBACK_URL'] self.dfu = DataFileUtil(self.callback_url) self.wsc = Workspace(self.ws_url) self.scratch = scratch self.callback_url = callback_url self.au = AssemblyUtil(self.callback_url) self.gapi = GenericsAPI(self.callback_url)
def jayrbolton_contig_filter(self, ctx, params): """ This example function accepts any number of parameters and returns results in a KBaseReport :param params: instance of mapping from String to unspecified object :returns: instance of type "ReportResults" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN jayrbolton_contig_filter if not params.get('assembly_input_ref'): raise TypeError("`assembly_input_ref` is required") if not params.get('min_length') or not isinstance( params['min_length'], int): raise TypeError("`min_length` is required and needs to be an int") min_length = params['min_length'] # Initialize the assembly util client assembly_util = AssemblyUtil(self.callback_url) # download the fasta file to local disk fasta_file = assembly_util.get_assembly_as_fasta( {'ref': params['assembly_input_ref']}) filtered_path = os.path.join(self.shared_folder, 'filtered.fasta') report_client = KBaseReport(self.callback_url) result = contig_filter(fasta_file['path'], filtered_path, min_length) assembly_obj = assembly_util.save_assembly_from_fasta({ 'workspace_name': params['workspace_name'], 'file': { 'path': filtered_path, 'assembly_name': 'filtered_contigs' }, 'assembly_name': 'filtered_assembly' }) report = report_client.create_extended_report({ 'workspace_name': params['workspace_name'], 'objects_created': [{ 'ref': assembly_obj, 'description': 'filtered_assembly' }], 'message': (f"Filtered out {result['n_total'] - result['n_remaining']} " f"records out of {result['n_total']} records.") }) output = {'report_ref': report['ref'], 'report_name': report['name']} #END jayrbolton_contig_filter # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method jayrbolton_contig_filter return value ' + 'output is not type dict as required.') # return the results return [output]
def prepare_data(cls): assembly_file_path = os.path.join(cls.scratch, 'e_coli_assembly.fasta') shutil.copy('data/e_coli/e_coli_assembly.fasta', assembly_file_path) au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': cls.wsName, 'assembly_name': 'e_coli.assembly', 'file': {'path': assembly_file_path} }) cls.test_genome_data = json.load(open('data/e_coli/e_coli.json')) cls.test_genome_data['assembly_ref'] = assembly_ref
def test_filter_contigs_by_length_01(self): method = 'filter_contigs_by_length_01' print("\n\nRUNNING: test_filter_contigs_by_length_01()") print("===========================================\n\n") # upload test data try: auClient = AssemblyUtil(self.callback_url, token=self.getContext()['token']) except Exception as e: raise ValueError( 'Unable to instantiate auClient with callbackURL: ' + self.callback_url + ' ERROR: ' + str(e)) ass_file_1 = 'assembly_1.fa.gz' ass_file_2 = 'assembly_2.fa.gz' ass_path_1 = os.path.join(self.scratch, ass_file_1) ass_path_2 = os.path.join(self.scratch, ass_file_2) shutil.copy(os.path.join("data", ass_file_1), ass_path_1) shutil.copy(os.path.join("data", ass_file_2), ass_path_2) ass_ref_1 = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_1 }, 'workspace_name': self.getWsName(), 'assembly_name': 'assembly_1' }) ass_ref_2 = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_2 }, 'workspace_name': self.getWsName(), 'assembly_name': 'assembly_2' }) # run method input_refs = [ass_ref_1, ass_ref_2] base_output_name = method + '_output' params = { 'workspace_name': self.getWsName(), 'input_assembly_refs': input_refs, 'min_contig_length': 1000, 'output_name': 'test_filtered' } result = self.getImpl().run_filter_contigs_by_length( self.getContext(), params) print('RESULT:') pprint(result) pass
def run_mash_sketch(self, ctx, params): """ Generate a sketch file from a fasta/fastq file :param params: instance of type "MashSketchParams" (* * Pass in **one of** input_path, assembly_ref, or reads_ref * input_path - string - local file path to an input fasta/fastq * assembly_ref - string - workspace reference to an Assembly type * reads_ref - string - workspace reference to a Reads type * Optionally, pass in a boolean indicating whether you are using paired-end reads. * paired_ends - boolean - whether you are passing in paired ends) -> structure: parameter "input_path" of String, parameter "assembly_ref" of String, parameter "reads_ref" of String, parameter "paired_ends" of type "boolean" (params: input_upa: workspace reference to an assembly object workspace_name: name of current workspace search_db: database to search n_max_results: number of results to return, integer between 1 and 100) :returns: instance of type "MashSketchResults" (* * Returns the local scratch file path of the generated sketch file. * Will have the extension '.msh') -> structure: parameter "sketch_path" of String """ # ctx is the context object # return variables are: results #BEGIN run_mash_sketch if 'reads_ref' in params: reads_utils = ReadsUtils(self.callbackURL) result = reads_utils.download_reads({ 'read_libraries': [params['reads_ref']], 'interleaved': 'true' }) input_path = result['files'][params['reads_ref']]['files']['fwd'] elif 'assembly_ref' in params: assembly_util = AssemblyUtil(self.callbackURL) result = assembly_util.get_assembly_as_fasta({'ref': params['assembly_ref']}) input_path = result['path'] elif 'input_path' in params: input_path = params['input_path'] else: raise ValueError( 'Invalid params; must provide one of `reads_ref`, `assembly_ref`, or `input_path`.' ) mash_utils = MashUtils(self.config, self.auth_token) output_file_path = mash_utils.mash_sketch(input_path, paired_ends=params.get('paired_ends')) results = {'sketch_path': output_file_path} #END run_mash_sketch # At some point might do deeper type checking... if not isinstance(results, dict): raise ValueError('Method run_mash_sketch return value ' + 'results is not type dict as required.') # return the results return [results]
def download_assembly(self, token, assembly_ref): try: auClient = AUClient(self.callback_url, token=token, service_ver=self.SERVICE_VER) except Exception as e: raise ValueError('Unable to instantiate auClient with callback_url: '+ self.callback_url +' ERROR: ' + str(e)) try: dfuClient = DFUClient(self.callback_url, token=token, service_ver=self.SERVICE_VER) except Exception as e: raise ValueError('Unable to instantiate dfuClient with callback_url: '+ self.callback_url +' ERROR: ' + str(e)) contig_file = auClient.get_assembly_as_fasta({'ref':assembly_ref}).get('path') sys.stdout.flush() # don't remember why this matters contig_file_path = dfuClient.unpack_file({'file_path': contig_file})['file_path'] return contig_file_path
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'provenance': [ {'service': 'GenomeFileUtil', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('GenomeFileUtil'): cls.cfg[nameval[0]] = nameval[1] cls.wsURL = cls.cfg['workspace-url'] cls.ws = workspaceService(cls.wsURL, token=token) cls.serviceImpl = GenomeFileUtil(cls.cfg) gi_config = SDKConfig(cls.cfg) cls.genome_interface = GenomeInterface(gi_config) # create one WS for all tests suffix = int(time.time() * 1000) wsName = "test_GenomeAnnotationAPI_" + str(suffix) cls.ws.create_workspace({'workspace': wsName}) cls.wsName = wsName # save new genome assembly_file_path = os.path.join(cls.cfg['scratch'], 'Rhodo_SPAdes_assembly.fa') shutil.copy('data/Rhodo_SPAdes_assembly.fa', assembly_file_path) au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) cls.assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': cls.wsName, 'assembly_name': 'ecoli.assembly', 'file': {'path': assembly_file_path} }) rhodobacter_contigs = json.load(open('data/rhodobacter_contigs.json')) save_info = { 'workspace': cls.wsName, 'objects': [{ 'type': 'KBaseGenomes.ContigSet', 'data': rhodobacter_contigs, 'name': 'rhodobacter_contigs' }] } cls.contigset_ref = cls.ws.save_objects(save_info)
def fetch_fasta_from_assembly(assembly_ref, ws_url, callback_url): """ From an assembly or contigset, this uses a data file util to build a FASTA file and return the path to it. """ allowed_types = [ 'KBaseFile.Assembly', 'KBaseGenomeAnnotations.Assembly', 'KBaseGenomes.ContigSet' ] if not check_ref_type(assembly_ref, allowed_types, ws_url): raise ValueError( "The reference {} cannot be used to fetch a FASTA file".format( assembly_ref)) au = AssemblyUtil(callback_url) return au.get_assembly_as_fasta({'ref': assembly_ref})
def __init__(self, config): self.scratch = config["scratch"] self.ctx = config['ctx'] self.callback_url = config["SDK_CALLBACK_URL"] self.ws_client = workspaceService(config["workspace-url"]) self.gfu = GenomeFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.kbr = KBaseReport(self.callback_url) self.dfu = DataFileUtil(self.callback_url) self.genome_api = GenomeAnnotationAPI(self.callback_url) self.sso_ref = None self.sso_event = None self.ec_to_sso = {} self.output_workspace = None
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('VirSorter'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'VirSorter', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = VirSorter(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.au = AssemblyUtil(cls.callback_url)
def setUpClass(cls): print('Setting up class') token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) config = configparser.ConfigParser() config.read(config_file) cls.cfg = {n[0]: n[1] for n in config.items('GenomeAnnotationAPI')} authServiceUrl = cls.cfg.get('auth-service-url', "https://kbase.us/services/authorization/Sessions/Login") auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'GenomeAnnotationAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.ws = Workspace(cls.cfg['workspace-url'], token=token) cls.impl = GenomeAnnotationAPI(cls.cfg) # Second user test_cfg_file = '/kb/module/work/test.cfg' test_cfg_text = "[test]\n" with open(test_cfg_file, "r") as f: test_cfg_text += f.read() config = configparser.ConfigParser() config.read_file(io.StringIO(test_cfg_text)) test_cfg_dict = dict(config.items("test")) if ('test_token2' not in test_cfg_dict): raise ValueError("Configuration in <module>/test_local/test.cfg file should " + "include second user credentials ('test_token2')") token2 = test_cfg_dict['test_token2'] user2 = auth_client.get_user(token2) cls.ctx2 = MethodContext(None) cls.ctx2.update({'token': token2, 'user_id': user2, 'provenance': [ {'service': 'NarrativeService', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) # create one WS for all tests suffix = int(time.time() * 1000) wsName = "test_GenomeAnnotationAPI_" + str(suffix) ret = cls.ws.create_workspace({'workspace': wsName}) cls.wsName = wsName # preload with reference data with open ('data/rhodobacter.json', 'r') as file: data_str=file.read() data = json.loads(data_str) # save old genome info = cls.impl.save_one_genome_v1(cls.ctx, { 'workspace': wsName, 'name': "rhodobacter", 'data': data, })[0]['info'] cls.rhodobacter_ref = str(info[6]) +'/' + str(info[0]) + '/' + str(info[4]) print('created rhodobacter test genome: ' + cls.rhodobacter_ref) assembly_file_path = os.path.join(cls.cfg['scratch'], 'e_coli_assembly.fasta') shutil.copy('data/e_coli_assembly.fasta', assembly_file_path) au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': cls.wsName, 'assembly_name': 'ecoli.assembly', 'file': {'path': assembly_file_path} }) data = json.load(open('data/new_ecoli_genome.json')) data['assembly_ref'] = assembly_ref # save new genome save_info = { 'workspace': wsName, 'objects': [{ 'type': 'KBaseGenomes.Genome', 'data': data, 'name': 'new_ecoli' }] } info = cls.ws.save_objects(save_info)[0] cls.new_genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str( info[4]) print('created new test genome')
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) config = configparser.ConfigParser() config.read(config_file) cls.cfg = {n[0]: n[1] for n in config.items('GenomeAnnotationAPI')} authServiceUrl = cls.cfg.get('auth-service-url', "https://kbase.us/services/authorization/Sessions/Login") auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'GenomeAnnotationAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.ws = Workspace(cls.cfg['workspace-url'], token=token) cls.impl = GenomeAnnotationAPI(cls.cfg) test_gbk_file = "/kb/module/test/data/kb_g.399.c.1.gbk" temp_gbk_file = "/kb/module/work/tmp/kb_g.399.c.1.gbk" shutil.copy(test_gbk_file, temp_gbk_file) suffix = int(time.time() * 1000) wsName = "test_GenomeAnnotationAPI_" + str(suffix) cls.ws.create_workspace({'workspace': wsName}) cls.wsName = wsName data = json.load(open('data/rhodobacter_contigs.json')) # save to ws save_info = { 'workspace': wsName, 'objects': [{ 'type': 'KBaseGenomes.ContigSet', 'data': data, 'name': 'rhodo_contigs' }] } info = cls.ws.save_objects(save_info)[0] contigset_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) data = json.load(open('data/rhodobacter.json')) data['contigset_ref'] = contigset_ref # save to ws info = cls.impl.save_one_genome_v1(cls.ctx, { 'workspace': wsName, 'name': "rhodobacter", 'data': data, })[0]['info'] cls.old_genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str( info[4]) print('created old test genome') assembly_file_path = os.path.join(cls.cfg['scratch'], 'e_coli_assembly.fasta') shutil.copy('data/e_coli_assembly.fasta', assembly_file_path) au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': cls.wsName, 'assembly_name': 'ecoli.assembly', 'file': {'path': assembly_file_path} }) data = json.load(open('data/new_ecoli_genome.json')) data['assembly_ref'] = assembly_ref # save to ws save_info = { 'workspace': wsName, 'objects': [{ 'type': 'KBaseGenomes.Genome', 'data': data, 'name': 'new_ecoli' }] } info = cls.ws.save_objects(save_info)[0] cls.new_genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) print('created new test genome')