def proteins_to_fasta(ws_url='https://ci.kbase.us/services/ws/'): """Write FASTA file from a genome reference. Args: ws_url: Workspace service URL Returns: Full path to output file """ ref = "ReferenceGenomeAnnotations/kb|g.166819" # ref = "ReferenceGenomeAnnotations/kb|g.3899" genome_annotation = GenomeAnnotationAPI( token=os.environ.get('KB_AUTH_TOKEN'), services={'workspace_service_url': ws_url}, ref=ref) # Get all the proteins with the Data API proteins = genome_annotation.get_proteins() # Create an output file and write to it outfile = '/tmp/166819_prot.fasta' with open(outfile, 'w') as f: for fasta_line in get_fasta(proteins): f.write(fasta_line) return outfile
def get_proteins(self, ctx, ref): """ Retrieve Protein data. @return Mapping from protein ID to data about the protein. :param ref: instance of type "ObjectReference" :returns: instance of mapping from String to type "Protein_data" -> structure: parameter "protein_id" of String, parameter "protein_amino_acid_sequence" of String, parameter "protein_function" of String, parameter "protein_aliases" of list of String, parameter "protein_md5" of String, parameter "protein_domain_locations" of list of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_proteins ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_proteins() #END get_proteins # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_proteins return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #creating a new GenomeAnnotation object genome_annotation = GenomeAnnotationAPI(services = {"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #return all protein objects from this genome proteins= genome_annotation.get_proteins() #format the proteins in FASTA format fasta = get_protein_fasta(proteins) outfile = '166819_prot.fasta' with open(outfile, 'w') as f: f.write(fasta)
def proteins_to_fasta(ws_url='https://ci.kbase.us/services/ws/'): """Write FASTA file from a genome reference. Args: ws_url: Workspace service URL Returns: Full path to output file """ ref = "ReferenceGenomeAnnotations/kb|g.166819" # ref = "ReferenceGenomeAnnotations/kb|g.3899" genome_annotation = GenomeAnnotationAPI( token=os.environ.get('KB_AUTH_TOKEN'), services={ 'workspace_service_url': ws_url}, ref=ref) # Get all the proteins with the Data API proteins = genome_annotation.get_proteins() # Create an output file and write to it outfile = '/tmp/166819_prot.fasta' with open(outfile, 'w') as f: for fasta_line in get_fasta(proteins): f.write(fasta_line) return outfile
gto['contigobj']['source_id'] = extsource["external_source_id"] gto['contigobj']['name'] = extsource["external_source_id"] gto['source'] = extsource["external_source"] gto['source_id'] = extsource["external_source_id"] except Exception, e: success = 0 features = []; success = 0; try: features = ga.get_features(); success = 1 except Exception, e: success = 0 prot = ga.get_proteins(); if success == 1: for ftrid in features.keys(): ftrdata = features[ftrid] if 'feature_type' in ftrdata.keys(): newfeature = {'id' : ftrid,'type' : ftrdata['feature_type'],'function' : "Unknown",'location' : []} array = ftrid.split("_"); protid = 'protein_'+array[1]; if array[0] == 'CDS' and protid in prot.keys(): newfeature['protein_translation'] = prot[protid]['protein_amino_acid_sequence'] if 'feature_ontology_terms' in ftrdata.keys(): newfeature['ontology_terms'] = ftrdata['feature_ontology_terms'] if 'feature_function' in ftrdata.keys(): newfeature['function'] = ftrdata['feature_function'] if 'feature_dna_sequence' in ftrdata.keys():