def export_assembly_as_fasta(self, ctx, params): """ A method designed especially for download, this calls 'get_assembly_as_fasta' to do the work, but then packages the output with WS provenance and object info into a zip file and saves to shock. :param params: instance of type "ExportParams" -> structure: parameter "input_ref" of String :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: output #BEGIN export_assembly_as_fasta atf = AssemblyToFasta(self.callback_url, self.sharedFolder) output = atf.export_as_fasta(ctx, params) #END export_assembly_as_fasta # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method export_assembly_as_fasta return value ' + 'output is not type dict as required.') # return the results return [output]
def get_assembly_as_fasta(self, ctx, params): """ Given a reference to an Assembly (or legacy ContigSet data object), along with a set of options, construct a local Fasta file with the sequence data. If filename is set, attempt to save to the specified filename. Otherwise, a random name will be generated. :param params: instance of type "GetAssemblyParams" (@optional filename) -> structure: parameter "ref" of String, parameter "filename" of String :returns: instance of type "FastaAssemblyFile" -> structure: parameter "path" of String, parameter "assembly_name" of String """ # ctx is the context object # return variables are: file #BEGIN get_assembly_as_fasta atf = AssemblyToFasta(self.callback_url, self.sharedFolder) file = atf.assembly_as_fasta(ctx, params) #END get_assembly_as_fasta # At some point might do deeper type checking... if not isinstance(file, dict): raise ValueError('Method get_assembly_as_fasta return value ' + 'file is not type dict as required.') # return the results return [file]
def genome_obj_to_fasta(self, ref, obj_type): # Initiate needed objects atf = AssemblyToFasta(self.callback_url, self.scratch) upas = [] if 'KBaseSets.GenomeSet' in obj_type: obj_data = self.ws.get_objects2({'objects': [{"ref": ref}]})['data'][0] upas = [gsi['ref'] for gsi in obj_data['data']['items']] elif 'KBaseSearch.GenomeSet' in obj_type: obj_data = self.ws.get_objects2({'objects': [{"ref": ref}]})['data'][0] upas = [gse['ref'] for gse in obj_data['data']['elements'].values()] elif "KBaseGenomes.Genome" in obj_type: upas = [ref] if upas: for genome_upa in upas: # Get genome object assembly_ref or contigset_ref through subsetting object genome_data = self.ws.get_objects2({'objects': \ [{"ref": genome_upa, 'included' : ['/assembly_ref/','/contigset_ref/']}]}) \ ['data'][0]['data'] # If genome object contains an assembly_ref or contigset_ref it will return a dictionary, genome_data. # If not an empty dictionary will be returned if genome_data: # Get assembly_upa and fasta assembly_upa = genome_upa + ';' + \ str(genome_data.get('assembly_ref') or genome_data.get('contigset_ref')) faf = atf.assembly_as_fasta({'ref': assembly_upa}) # Input data into object dict self.add_to_dict(assembly_upa, {'paths' : [faf['path']], 'type': obj_type, 'parent_refs': [ref]}) else: raise TypeError("KBase object type %s does not contain an assembly reference or contig reference." % obj_type)
def assembly_obj_to_fasta(self, ref, obj_type): # Initiate needed objects atf = AssemblyToFasta(self.callback_url, self.scratch) obj = {"ref": ref} if "KBaseGenomes.ContigSet" in obj_type or "KBaseGenomeAnnotations.Assembly" in obj_type: # Get fasta faf = atf.assembly_as_fasta(obj) self.add_to_dict(ref, {'paths': [faf['path']], 'type': obj_type, 'parent_refs': [ref]}) elif "KBaseSets.AssemblySet" in obj_type: # Get assembly set object obj_data = self.ws.get_objects2({'objects': [obj]})['data'][0] for item_upa in obj_data['data']['items']: # Get fasta faf = atf.assembly_as_fasta({"ref": item_upa['ref']}) # Input data into object dict self.add_to_dict(item_upa['ref'], {'paths' : [faf['path']], 'type' : obj_type, 'parent_refs': [ref]})
def type_to_fasta(self, ctx, ref_lst): fasta_dict = dict() fasta_array = [] atf = AssemblyToFasta(self.callback_url, self.scratch) # Get type info for each ref in ref_lst for idx, ref in enumerate(ref_lst): upas = [] obj = {"ref": ref} obj_info = self.ws_url.get_object_info3({"objects": [obj]}) obj_type = obj_info["infos"][0][2] # From type info get object if 'KBaseSets.GenomeSet' in obj_type: obj_data = self.dfu.get_objects({"object_refs": [ref]})['data'][0] upas = [gsi['ref'] for gsi in obj_data['data']['items']] elif 'KBaseSearch.GenomeSet' in obj_type: obj_data = self.dfu.get_objects({"object_refs": [ref]})['data'][0] upas = [gse['ref'] for gse in obj_data['data']['elements'].values()] elif "KBaseGenomes.Genome" in obj_type: upas = [ref] elif "KBaseGenomes.ContigSet" in obj_type or "KBaseGenomeAnnotations.Assembly" in obj_type: faf = [atf.assembly_as_fasta(ctx, obj)] fasta_array.extend([faf[0]['path'], ref]) elif "KBaseSets.AssemblySet" in obj_type: fasta_paths = [] obj_data = self.dfu.get_objects({"object_refs": [ref]})['data'][0] for item_upa in obj_data['data']['items']: faf = [atf.assembly_as_fasta(ctx, {"ref": item_upa['ref']})] fasta_paths.extend([faf[0]['path'], item_upa['ref']]) fasta_array = fasta_paths elif 'KBaseMetagenomes.BinnedContigs' in obj_type: fasta_paths = [] bin_file_dir = self.mgu.binned_contigs_to_file({'input_ref': ref, 'save_to_shock': 0})['bin_file_directory'] for (dirpath, dirnames, filenames) in os.walk(bin_file_dir): for fasta_file in filenames: fasta_path = os.path.join(self.scratch, fasta_file) copyfile(os.path.join(bin_file_dir, fasta_file), fasta_path) fasta_paths.extend([fasta_path, ref]) break fasta_array = fasta_paths if upas: for genome_upa in upas: genome_data = self.ws_url.get_objects2({'objects': [{"ref": genome_upa}]})['data'][0]['data'] assembly_upa = genome_upa + ';' + str(genome_data.get('contigset_ref') or genome_data.get('assembly_ref')) faf = [atf.assembly_as_fasta(ctx, {'ref': assembly_upa})] fasta_array.extend([faf[0]['path'], assembly_upa]) # return dictionary of FASTA fasta_dict["FASTA"] = fasta_array return fasta_dict