def get_features(self, ctx, ref, feature_id_list):
        """
        Retrieve Feature data.
        @param feature_id_list List of Features to retrieve.
          If None, returns all Feature data.
        @return Mapping from Feature IDs to dicts of available data.
        :param ref: instance of type "ObjectReference"
        :param feature_id_list: instance of list of String
        :returns: instance of mapping from String to type "Feature_data" ->
           structure: parameter "feature_id" of String, parameter
           "feature_type" of String, parameter "feature_function" of String,
           parameter "feature_aliases" of mapping from String to list of
           String, parameter "feature_dna_sequence_length" of Long, parameter
           "feature_dna_sequence" of String, parameter "feature_md5" of
           String, parameter "feature_locations" of list of type "Region" ->
           structure: parameter "contig_id" of String, parameter "strand" of
           String, parameter "start" of Long, parameter "length" of Long,
           parameter "feature_publications" of list of String, parameter
           "feature_quality_warnings" of list of String, parameter
           "feature_quality_score" of list of String, parameter
           "feature_notes" of String, parameter "feature_inference" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_features
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)
        returnVal = ga.get_features(feature_id_list)
        #END get_features

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_features return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def get_proteins(self, ctx, ref):
        """
        Retrieve Protein data.
        @return Mapping from protein ID to data about the protein.
        :param ref: instance of type "ObjectReference"
        :returns: instance of mapping from String to type "Protein_data" ->
           structure: parameter "protein_id" of String, parameter
           "protein_amino_acid_sequence" of String, parameter
           "protein_function" of String, parameter "protein_aliases" of list
           of String, parameter "protein_md5" of String, parameter
           "protein_domain_locations" of list of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_proteins
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)
        returnVal = ga.get_proteins()
        #END get_proteins

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_proteins return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def get_mrna_by_gene(self, ctx, ref, gene_id_list):
        """
        Retrieve the mRNA IDs for given gene IDs.
        @param gene_id_list List of gene Feature IDS for which to retrieve mRNA IDs.
            If empty, returns all gene/mRNA mappings.
        @return Mapping of gene Feature IDs to a list of mRNA Feature IDs.
        :param ref: instance of type "ObjectReference"
        :param gene_id_list: instance of list of String
        :returns: instance of mapping from String to list of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_mrna_by_gene
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)

        if not gene_id_list:
            returnVal = ga.get_mrna_by_gene([])
        else:
            returnVal = ga.get_mrna_by_gene(gene_id_list)
        #END get_mrna_by_gene

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_mrna_by_gene return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def get_feature_type_descriptions(self, ctx, ref, feature_type_list):
        """
        Retrieve the descriptions for each Feature type in
        this GenomeAnnotation.
        @param feature_type_list List of Feature types. If this list
         is empty or None,
         the whole mapping will be returned.
        @return Name and description for each requested Feature Type
        :param ref: instance of type "ObjectReference"
        :param feature_type_list: instance of list of String
        :returns: instance of mapping from String to String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_feature_type_descriptions
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)
        returnVal = ga.get_feature_type_descriptions(feature_type_list)
        #END get_feature_type_descriptions

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_feature_type_descriptions return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def get_mrna_exons(self, ctx, ref, mrna_id_list):
        """
        Retrieve Exon information for each mRNA ID.
        @param mrna_id_list List of mRNA Feature IDS for which to retrieve exons.
            If empty, returns data for all exons.
        @return Mapping of mRNA Feature IDs to a list of exons (:js:data:`Exon_data`).
        :param ref: instance of type "ObjectReference"
        :param mrna_id_list: instance of list of String
        :returns: instance of mapping from String to list of type "Exon_data"
           -> structure: parameter "exon_location" of type "Region" ->
           structure: parameter "contig_id" of String, parameter "strand" of
           String, parameter "start" of Long, parameter "length" of Long,
           parameter "exon_dna_sequence" of String, parameter "exon_ordinal"
           of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_mrna_exons
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)
        returnVal = ga.get_mrna_by_exons(mrna_id_list)
        #END get_mrna_exons

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_mrna_exons return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def get_summary(self, ctx, ref):
        """
        Retrieve a summary representation of this GenomeAnnotation.
        @return summary data
        :param ref: instance of type "ObjectReference"
        :returns: instance of type "Summary_data" -> structure: parameter
           "scientific_name" of String, parameter "taxonomy_id" of Long,
           parameter "kingdom" of String, parameter "scientific_lineage" of
           list of String, parameter "genetic_code" of Long, parameter
           "organism_aliases" of list of String, parameter "assembly_source"
           of String, parameter "assembly_source_id" of String, parameter
           "assembly_source_date" of String, parameter "gc_content" of
           Double, parameter "dna_size" of Long, parameter "num_contigs" of
           Long, parameter "contig_ids" of list of String, parameter
           "external_source" of String, parameter "external_source_date" of
           String, parameter "release" of String, parameter
           "original_source_filename" of String, parameter
           "feature_type_counts" of mapping from String to Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_summary
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)
        returnVal = ga.get_summary()
        #END get_summary

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_summary return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Example #7
0
def proteins_to_fasta(ws_url='https://ci.kbase.us/services/ws/'):
    """Write FASTA file from a genome reference.

    Args:
        ws_url: Workspace service URL

    Returns:
        Full path to output file
    """
    ref = "ReferenceGenomeAnnotations/kb|g.166819"
    # ref = "ReferenceGenomeAnnotations/kb|g.3899"
    genome_annotation = GenomeAnnotationAPI(
        token=os.environ.get('KB_AUTH_TOKEN'),
        services={'workspace_service_url': ws_url},
        ref=ref)

    # Get all the proteins with the Data API
    proteins = genome_annotation.get_proteins()
    # Create an output file and write to it
    outfile = '/tmp/166819_prot.fasta'
    with open(outfile, 'w') as f:
        for fasta_line in get_fasta(proteins):
            f.write(fasta_line)

    return outfile
    def get_cds_by_mrna(self, ctx, ref, mrna_id_list):
        """
        Retrieves coding sequence (cds) Feature IDs for given mRNA Feature IDs.
        @param mrna_id_list List of mRNA Feature IDS for which to retrieve CDS.
            If empty, returns data for all features.
        @return Mapping of mRNA Feature IDs to a list of CDS Feature IDs.
        :param ref: instance of type "ObjectReference"
        :param mrna_id_list: instance of list of String
        :returns: instance of mapping from String to String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_cds_by_mrna
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)

        if not mrna_id_list:
            returnVal = ga.get_cds_by_mrna([])
        else:
            returnVal = ga.get_cds_by_mrna(mrna_id_list)
        #END get_cds_by_mrna

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_cds_by_mrna return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def get_feature_locations(self, ctx, ref, feature_id_list):
        """
        Retrieve Feature locations.
        @param feature_id_list List of Feature IDs for which to retrieve locations.
            If empty, returns data for all features.
        @return Mapping from Feature IDs to location information for each.
        :param ref: instance of type "ObjectReference"
        :param feature_id_list: instance of list of String
        :returns: instance of mapping from String to list of type "Region" ->
           structure: parameter "contig_id" of String, parameter "strand" of
           String, parameter "start" of Long, parameter "length" of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_feature_locations
        ga = GenomeAnnotationAPI_local(self.services, ctx['token'], ref)
        returnVal = ga.get_feature_locations(feature_id_list)
        #END get_feature_locations

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_feature_locations return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Example #10
0
def setup():
    shared.setup()
    global t_new, t_new_e, t_old, t_old_e, t_client_new, t_client_old
    t_new = GenomeAnnotationAPI(shared.services, shared.token, genome_new)
    t_new_e = _GenomeAnnotation(shared.services, shared.token, genome_new)
    t_old = GenomeAnnotationAPI(shared.services, shared.token, genome_old)
    t_old_e = _KBaseGenomes_Genome(shared.services, shared.token, genome_old)
    t_client_new = GenomeAnnotationClientAPI(
        shared.services["genome_annotation_service_url"], shared.token,
        genome_new)
    t_client_old = GenomeAnnotationClientAPI(
        shared.services["genome_annotation_service_url"], shared.token,
        genome_old)
Example #11
0
def run(ws_url='https://ci.kbase.us/services/ws/'):

    #an example KBase reference genome
    genomeref = "ReferenceGenomeAnnotations/kb|g.166819"

    #creating a new GenomeAnnotation object
    genome_annotation = GenomeAnnotationAPI(services = {"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref)
        
    #return all protein objects from this genome
    proteins= genome_annotation.get_proteins()
    #format the proteins in FASTA format
    fasta = get_protein_fasta(proteins)

    outfile = '166819_prot.fasta'
    
    with open(outfile, 'w') as f:
        f.write(fasta)
Example #12
0
def generate_gff(logger, internal_services, token, ref, output_dir, obj_name,
                 output_file):
    try:
        ga = GenomeAnnotationAPI(internal_services,
                                 token=token,
                                 ref="{}/{}".format(ref, obj_name))
    except:
        raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format(e))
    logger.info("Requesting GenomeAnnotation GFF for {}/{}".format(
        ref, obj_name))
    gff_start = datetime.datetime.utcnow()
    #output_file = os.path.join(output_dir,'{}.gff'.format(obj_name))
    with open(output_file, 'w') as gff_file:
        ga.get_gff().to_file(gff_file)
    gff_file.close()
    gff_end = datetime.datetime.utcnow()
    logger.info("Generating GFF for {} took {}".format(obj_name,
                                                       gff_end - gff_start))
Example #13
0
def generate_fasta(logger, internal_services, token, ref, output_dir,
                   obj_name):
    try:
        ga = GenomeAnnotationAPI(internal_services,
                                 token=token,
                                 ref="{}/{}".format(ref, obj_name))
    except Exception as e:
        raise Exception(
            "Unable to Call GenomeAnnotationAPI : {0}: {1}".format(e))
    logger.info("Generating FASTA file from Assembly for {}/{}".format(
        ref, obj_name))
    fasta_start = datetime.datetime.utcnow()
    output_file = os.path.join(output_dir, '{}.fasta'.format(obj_name))
    with open(output_file, 'w') as fasta_file:
        ga.get_assembly().get_fasta().to_file(fasta_file)
    fasta_file.close()
    fasta_end = datetime.datetime.utcnow()
    logger.info("Generating FASTA for {} took {}".format(
        obj_name, fasta_end - fasta_start))
    return output_file
Example #14
0
def generate_gff(logger,internal_services,token,ref,output_dir,obj_name,output_file):
        try:
                ga = GenomeAnnotationAPI(internal_services,
                             token=token,
                             ref= ref)
        except:
                raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format(("".join(traceback.format_exc()))))
        logger.info("Requesting GenomeAnnotation GFF for {}".format(obj_name))
    	gff_start = datetime.datetime.utcnow()
        gff_file= io.open(output_file, 'wb')
	#output_file = os.path.join(output_dir,'{}.gff'.format(obj_name))
	try:
        	ga.get_gff().to_file(gff_file)
	except Exception as e:
                #raise Exception("Unable to Create GFF  file from Genome Annotation : {0}: {1}".format(obj_name,e))
                raise Exception("Unable to Create GFF  file from Genome Annotation : {0}: {1}".format(obj_name,"".join(traceback.format_exc())))
        finally:
    		gff_file.close()
	gff_end = datetime.datetime.utcnow()
    	logger.info("Generating GFF for {} took {}".format(obj_name, gff_end - gff_start))
Example #15
0
def generate_fasta(logger,internal_services,token,ref,output_dir,obj_name):
	try:
		ga = GenomeAnnotationAPI(internal_services,
                             token=token,
                             ref= ref)
	except Exception as e:
		raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format("".join(traceback.format_exc())))
	logger.info("Generating FASTA file from Assembly for {}".format(obj_name))	
	fasta_start = datetime.datetime.utcnow()
	output_file = os.path.join(output_dir,'{}.fasta'.format(obj_name))
	fasta_file= io.open(output_file, 'wb')
    	try:
        	ga.get_assembly().get_fasta().to_file(fasta_file)
	except Exception as e:
		#raise Exception("Unable to Create FASTA file from Genome Annotation : {0}".format(obj_name))
		raise Exception("Unable to Create FASTA file from Genome Annotation : {0}".format("".join(traceback.format_exc())))
	finally:
		fasta_file.close()
    	fasta_end = datetime.datetime.utcnow()
	logger.info("Generating FASTA for {} took {}".format(obj_name, fasta_end - fasta_start))
	return output_file
Example #16
0
def run(ws_url='https://ci.kbase.us/services/ws/'):
    #use a KBase reference genome
    genomeref = "ReferenceGenomeAnnotations/kb|g.166819"
    #instantiate a new genome annotation API
    genome_annotation = GenomeAnnotationAPI(
        services={"workspace_service_url": ws_url},
        token=os.environ.get('KB_AUTH_TOKEN'),
        ref=genomeref)
    #pick first locus
    gene = 'kb|g.166819.locus.1'
    #retriee core exons for locus
    exons = get_core_exons(gene, genome_annotation)

    print "Core exon(s) found in all mRNAs for gene " + gene
    print exons
Example #17
0
def run(ws_url='https://ci.kbase.us/services/ws/'):
    #an example KBase reference genome
    genomeref = "ReferenceGenomeAnnotations/kb|g.166819"
    #instantiate a new genome annotation API
    genome_annotation = GenomeAnnotationAPI(
        services={"workspace_service_url": ws_url},
        token=os.environ.get('KB_AUTH_TOKEN'),
        ref=genomeref)
    #pick the first locus
    gene = 'kb|g.166819.locus.1'
    #pick the first locus
    gffdata = get_gff(gene, genome_annotation)

    outfile = 'g.166819.locus.1.gff'
    print outfile
    with open(outfile, 'w') as f:
        f.write(gffdata)
def run(ws_url='https://ci.kbase.us/services/ws/'):

    #an example KBase reference genome
    genomeref = "ReferenceGenomeAnnotations/kb|g.166819"
    #creating a new GenomeAnnotation object
    genome_annotation = GenomeAnnotationAPI(
        services={"workspace_service_url": ws_url},
        token=os.environ.get('KB_AUTH_TOKEN'),
        ref=genomeref)

    gc = contig_gc(genome_annotation, genomeref)

    print gc

    outfile = '166819_GC.txt'

    with open(outfile, 'w') as f:
        f.write(json.dumps(gc))
def run(ws_url='https://ci.kbase.us/services/ws/'):
    #an example KBase reference genome
    genomeref = "ReferenceGenomeAnnotations/kb|g.166819"
    #instantiate a new genome annotation API
    genome_annotation = GenomeAnnotationAPI(
        services={"workspace_service_url": ws_url},
        token=os.environ.get('KB_AUTH_TOKEN'),
        ref=genomeref)
    genes = [
        'kb|g.166819.locus.1', 'kb|g.166819.locus.2', 'kb|g.166819.locus.3'
    ]

    gffdata = ""
    for s in genes:
        gffdata += get_gff(s, genome_annotation)

    outfile = 'g.166819.locus.1_2_3.gff'
    print outfile
    with open(outfile, 'w') as f:
        f.write(gffdata)