def get_features(self, ctx, ref, feature_id_list): """ Retrieve Feature data. @param feature_id_list List of Features to retrieve. If None, returns all Feature data. @return Mapping from Feature IDs to dicts of available data. :param ref: instance of type "ObjectReference" :param feature_id_list: instance of list of String :returns: instance of mapping from String to type "Feature_data" -> structure: parameter "feature_id" of String, parameter "feature_type" of String, parameter "feature_function" of String, parameter "feature_aliases" of mapping from String to list of String, parameter "feature_dna_sequence_length" of Long, parameter "feature_dna_sequence" of String, parameter "feature_md5" of String, parameter "feature_locations" of list of type "Region" -> structure: parameter "contig_id" of String, parameter "strand" of String, parameter "start" of Long, parameter "length" of Long, parameter "feature_publications" of list of String, parameter "feature_quality_warnings" of list of String, parameter "feature_quality_score" of list of String, parameter "feature_notes" of String, parameter "feature_inference" of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_features ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_features(feature_id_list) #END get_features # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_features return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def get_proteins(self, ctx, ref): """ Retrieve Protein data. @return Mapping from protein ID to data about the protein. :param ref: instance of type "ObjectReference" :returns: instance of mapping from String to type "Protein_data" -> structure: parameter "protein_id" of String, parameter "protein_amino_acid_sequence" of String, parameter "protein_function" of String, parameter "protein_aliases" of list of String, parameter "protein_md5" of String, parameter "protein_domain_locations" of list of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_proteins ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_proteins() #END get_proteins # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_proteins return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def get_mrna_by_gene(self, ctx, ref, gene_id_list): """ Retrieve the mRNA IDs for given gene IDs. @param gene_id_list List of gene Feature IDS for which to retrieve mRNA IDs. If empty, returns all gene/mRNA mappings. @return Mapping of gene Feature IDs to a list of mRNA Feature IDs. :param ref: instance of type "ObjectReference" :param gene_id_list: instance of list of String :returns: instance of mapping from String to list of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_mrna_by_gene ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) if not gene_id_list: returnVal = ga.get_mrna_by_gene([]) else: returnVal = ga.get_mrna_by_gene(gene_id_list) #END get_mrna_by_gene # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_mrna_by_gene return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def get_feature_type_descriptions(self, ctx, ref, feature_type_list): """ Retrieve the descriptions for each Feature type in this GenomeAnnotation. @param feature_type_list List of Feature types. If this list is empty or None, the whole mapping will be returned. @return Name and description for each requested Feature Type :param ref: instance of type "ObjectReference" :param feature_type_list: instance of list of String :returns: instance of mapping from String to String """ # ctx is the context object # return variables are: returnVal #BEGIN get_feature_type_descriptions ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_feature_type_descriptions(feature_type_list) #END get_feature_type_descriptions # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_feature_type_descriptions return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def get_mrna_exons(self, ctx, ref, mrna_id_list): """ Retrieve Exon information for each mRNA ID. @param mrna_id_list List of mRNA Feature IDS for which to retrieve exons. If empty, returns data for all exons. @return Mapping of mRNA Feature IDs to a list of exons (:js:data:`Exon_data`). :param ref: instance of type "ObjectReference" :param mrna_id_list: instance of list of String :returns: instance of mapping from String to list of type "Exon_data" -> structure: parameter "exon_location" of type "Region" -> structure: parameter "contig_id" of String, parameter "strand" of String, parameter "start" of Long, parameter "length" of Long, parameter "exon_dna_sequence" of String, parameter "exon_ordinal" of Long """ # ctx is the context object # return variables are: returnVal #BEGIN get_mrna_exons ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_mrna_by_exons(mrna_id_list) #END get_mrna_exons # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_mrna_exons return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def get_summary(self, ctx, ref): """ Retrieve a summary representation of this GenomeAnnotation. @return summary data :param ref: instance of type "ObjectReference" :returns: instance of type "Summary_data" -> structure: parameter "scientific_name" of String, parameter "taxonomy_id" of Long, parameter "kingdom" of String, parameter "scientific_lineage" of list of String, parameter "genetic_code" of Long, parameter "organism_aliases" of list of String, parameter "assembly_source" of String, parameter "assembly_source_id" of String, parameter "assembly_source_date" of String, parameter "gc_content" of Double, parameter "dna_size" of Long, parameter "num_contigs" of Long, parameter "contig_ids" of list of String, parameter "external_source" of String, parameter "external_source_date" of String, parameter "release" of String, parameter "original_source_filename" of String, parameter "feature_type_counts" of mapping from String to Long """ # ctx is the context object # return variables are: returnVal #BEGIN get_summary ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_summary() #END get_summary # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_summary return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def proteins_to_fasta(ws_url='https://ci.kbase.us/services/ws/'): """Write FASTA file from a genome reference. Args: ws_url: Workspace service URL Returns: Full path to output file """ ref = "ReferenceGenomeAnnotations/kb|g.166819" # ref = "ReferenceGenomeAnnotations/kb|g.3899" genome_annotation = GenomeAnnotationAPI( token=os.environ.get('KB_AUTH_TOKEN'), services={'workspace_service_url': ws_url}, ref=ref) # Get all the proteins with the Data API proteins = genome_annotation.get_proteins() # Create an output file and write to it outfile = '/tmp/166819_prot.fasta' with open(outfile, 'w') as f: for fasta_line in get_fasta(proteins): f.write(fasta_line) return outfile
def get_cds_by_mrna(self, ctx, ref, mrna_id_list): """ Retrieves coding sequence (cds) Feature IDs for given mRNA Feature IDs. @param mrna_id_list List of mRNA Feature IDS for which to retrieve CDS. If empty, returns data for all features. @return Mapping of mRNA Feature IDs to a list of CDS Feature IDs. :param ref: instance of type "ObjectReference" :param mrna_id_list: instance of list of String :returns: instance of mapping from String to String """ # ctx is the context object # return variables are: returnVal #BEGIN get_cds_by_mrna ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) if not mrna_id_list: returnVal = ga.get_cds_by_mrna([]) else: returnVal = ga.get_cds_by_mrna(mrna_id_list) #END get_cds_by_mrna # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_cds_by_mrna return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def get_feature_locations(self, ctx, ref, feature_id_list): """ Retrieve Feature locations. @param feature_id_list List of Feature IDs for which to retrieve locations. If empty, returns data for all features. @return Mapping from Feature IDs to location information for each. :param ref: instance of type "ObjectReference" :param feature_id_list: instance of list of String :returns: instance of mapping from String to list of type "Region" -> structure: parameter "contig_id" of String, parameter "strand" of String, parameter "start" of Long, parameter "length" of Long """ # ctx is the context object # return variables are: returnVal #BEGIN get_feature_locations ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref) returnVal = ga.get_feature_locations(feature_id_list) #END get_feature_locations # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_feature_locations return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def setup(): shared.setup() global t_new, t_new_e, t_old, t_old_e, t_client_new, t_client_old t_new = GenomeAnnotationAPI(shared.services, shared.token, genome_new) t_new_e = _GenomeAnnotation(shared.services, shared.token, genome_new) t_old = GenomeAnnotationAPI(shared.services, shared.token, genome_old) t_old_e = _KBaseGenomes_Genome(shared.services, shared.token, genome_old) t_client_new = GenomeAnnotationClientAPI( shared.services["genome_annotation_service_url"], shared.token, genome_new) t_client_old = GenomeAnnotationClientAPI( shared.services["genome_annotation_service_url"], shared.token, genome_old)
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #creating a new GenomeAnnotation object genome_annotation = GenomeAnnotationAPI(services = {"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #return all protein objects from this genome proteins= genome_annotation.get_proteins() #format the proteins in FASTA format fasta = get_protein_fasta(proteins) outfile = '166819_prot.fasta' with open(outfile, 'w') as f: f.write(fasta)
def generate_gff(logger, internal_services, token, ref, output_dir, obj_name, output_file): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref="{}/{}".format(ref, obj_name)) except: raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format(e)) logger.info("Requesting GenomeAnnotation GFF for {}/{}".format( ref, obj_name)) gff_start = datetime.datetime.utcnow() #output_file = os.path.join(output_dir,'{}.gff'.format(obj_name)) with open(output_file, 'w') as gff_file: ga.get_gff().to_file(gff_file) gff_file.close() gff_end = datetime.datetime.utcnow() logger.info("Generating GFF for {} took {}".format(obj_name, gff_end - gff_start))
def generate_fasta(logger, internal_services, token, ref, output_dir, obj_name): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref="{}/{}".format(ref, obj_name)) except Exception as e: raise Exception( "Unable to Call GenomeAnnotationAPI : {0}: {1}".format(e)) logger.info("Generating FASTA file from Assembly for {}/{}".format( ref, obj_name)) fasta_start = datetime.datetime.utcnow() output_file = os.path.join(output_dir, '{}.fasta'.format(obj_name)) with open(output_file, 'w') as fasta_file: ga.get_assembly().get_fasta().to_file(fasta_file) fasta_file.close() fasta_end = datetime.datetime.utcnow() logger.info("Generating FASTA for {} took {}".format( obj_name, fasta_end - fasta_start)) return output_file
def generate_gff(logger,internal_services,token,ref,output_dir,obj_name,output_file): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref= ref) except: raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format(("".join(traceback.format_exc())))) logger.info("Requesting GenomeAnnotation GFF for {}".format(obj_name)) gff_start = datetime.datetime.utcnow() gff_file= io.open(output_file, 'wb') #output_file = os.path.join(output_dir,'{}.gff'.format(obj_name)) try: ga.get_gff().to_file(gff_file) except Exception as e: #raise Exception("Unable to Create GFF file from Genome Annotation : {0}: {1}".format(obj_name,e)) raise Exception("Unable to Create GFF file from Genome Annotation : {0}: {1}".format(obj_name,"".join(traceback.format_exc()))) finally: gff_file.close() gff_end = datetime.datetime.utcnow() logger.info("Generating GFF for {} took {}".format(obj_name, gff_end - gff_start))
def generate_fasta(logger,internal_services,token,ref,output_dir,obj_name): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref= ref) except Exception as e: raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format("".join(traceback.format_exc()))) logger.info("Generating FASTA file from Assembly for {}".format(obj_name)) fasta_start = datetime.datetime.utcnow() output_file = os.path.join(output_dir,'{}.fasta'.format(obj_name)) fasta_file= io.open(output_file, 'wb') try: ga.get_assembly().get_fasta().to_file(fasta_file) except Exception as e: #raise Exception("Unable to Create FASTA file from Genome Annotation : {0}".format(obj_name)) raise Exception("Unable to Create FASTA file from Genome Annotation : {0}".format("".join(traceback.format_exc()))) finally: fasta_file.close() fasta_end = datetime.datetime.utcnow() logger.info("Generating FASTA for {} took {}".format(obj_name, fasta_end - fasta_start)) return output_file
def run(ws_url='https://ci.kbase.us/services/ws/'): #use a KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #instantiate a new genome annotation API genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #pick first locus gene = 'kb|g.166819.locus.1' #retriee core exons for locus exons = get_core_exons(gene, genome_annotation) print "Core exon(s) found in all mRNAs for gene " + gene print exons
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #instantiate a new genome annotation API genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #pick the first locus gene = 'kb|g.166819.locus.1' #pick the first locus gffdata = get_gff(gene, genome_annotation) outfile = 'g.166819.locus.1.gff' print outfile with open(outfile, 'w') as f: f.write(gffdata)
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #creating a new GenomeAnnotation object genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) gc = contig_gc(genome_annotation, genomeref) print gc outfile = '166819_GC.txt' with open(outfile, 'w') as f: f.write(json.dumps(gc))
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #instantiate a new genome annotation API genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) genes = [ 'kb|g.166819.locus.1', 'kb|g.166819.locus.2', 'kb|g.166819.locus.3' ] gffdata = "" for s in genes: gffdata += get_gff(s, genome_annotation) outfile = 'g.166819.locus.1_2_3.gff' print outfile with open(outfile, 'w') as f: f.write(gffdata)