예제 #1
0
 def setUpClass(cls):
     #        logging.basicConfig(level=logging.info)
     token = environ.get('KB_AUTH_TOKEN', None)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'provenance': [{
             'service': 'GenomeFileUtil',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('GenomeFileUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.ws = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = GenomeFileUtil(cls.cfg)
     gi_config = SDKConfig(cls.cfg)
     cls.genome_interface = GenomeInterface(gi_config)
예제 #2
0
 def __init__(self, config):
     self.cfg = config
     self.au = AssemblyUtil(config.callbackURL)
     self.dfu = DataFileUtil(self.cfg.callbackURL)
     self.gi = GenomeInterface(self.cfg)
     self.taxon_wsname = self.cfg.raw['taxon-workspace-name']
     self.time_string = str(
         datetime.datetime.fromtimestamp(
             time.time()).strftime('%Y_%m_%d_%H_%M_%S'))
     yml_text = open('/kb/module/kbase.yml').read()
     self.version = re.search("module-version:\n\W+(.+)\n",
                              yml_text).group(1)
     self.ont_mappings = load_ontology_mappings('/kb/module/data')
     self.code_table = 11
     self.skip_types = ('exon', 'five_prime_UTR', 'three_prime_UTR',
                        'start_codon', 'stop_codon', 'region', 'chromosome',
                        'scaffold')
     self.spoof_gene_count = 0
     self.is_phytozome = False
     self.strict = True
     self.generate_genes = False
     self.warnings = []
     self.feature_dict = collections.OrderedDict()
     self.cdss = set()
     self.ontologies_present = collections.defaultdict(dict)
     self.ontology_events = list()
     self.skiped_features = collections.Counter()
     self.feature_counts = collections.Counter()
예제 #3
0
 def __init__(self, config):
     self.cfg = config
     self.gi = GenomeInterface(config)
     self.dfu = DataFileUtil(config.callbackURL)
     self.aUtil = AssemblyUtil(config.callbackURL)
     self.ws = Workspace(config.workspaceURL)
     self._messages = []
     self.time_string = str(
         datetime.datetime.fromtimestamp(
             time.time()).strftime('%Y_%m_%d_%H_%M_%S'))
     yml_text = open('/kb/module/kbase.yml').read()
     self.version = re.search("module-version:\n\W+(.+)\n",
                              yml_text).group(1)
     self.generate_parents = False
     self.generate_ids = False
     self.genes = OrderedDict()
     self.mrnas = OrderedDict()
     self.cdss = OrderedDict()
     self.noncoding = []
     self.ontologies_present = defaultdict(dict)
     self.ontology_events = list()
     self.skiped_features = Counter()
     self.feature_counts = Counter()
     self.orphan_types = Counter()
     self.contig_seq = {}
     self.circ_contigs = set()
     self.features_spaning_zero = set()
     self.genome_warnings = []
     self.genome_suspect = False
     self.defects = Counter()
     self.spoofed_genes = 0
     self.excluded_features = ('source', 'exon', 'fasta_record')
     self.ont_mappings = load_ontology_mappings('/kb/module/data')
     self.code_table = 11
     self.re_api_url = config.re_api_url
     # dict with feature 'id's that have been used more than once.
     self.used_twice_identifiers = {}
     self.default_params = {
         'source':
         'Genbank',
         'taxon_wsname':
         self.cfg.raw['taxon-workspace-name'],
         'taxon_lookup_obj_name':
         self.cfg.raw['taxon-lookup-object-name'],
         'ontology_wsname':
         self.cfg.raw['ontology-workspace-name'],
         'ontology_GO_obj_name':
         self.cfg.raw['ontology-gene-ontology-obj-name'],
         'ontology_PO_obj_name':
         self.cfg.raw['ontology-plant-ontology-obj-name'],
         'release':
         None,
         'genetic_code':
         11,
         'generate_ids_if_needed':
         0,
         'metadata': {}
     }
예제 #4
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'provenance': [
                            {'service': 'GenomeFileUtil',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.ws = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = GenomeFileUtil(cls.cfg)
        gi_config = SDKConfig(cls.cfg)
        cls.genome_interface = GenomeInterface(gi_config)
        # create one WS for all tests
        suffix = int(time.time() * 1000)
        wsName = "test_GenomeAnnotationAPI_" + str(suffix)
        cls.ws.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        # save new genome
        assembly_file_path = os.path.join(cls.cfg['scratch'],
                                          'Rhodo_SPAdes_assembly.fa')
        shutil.copy('data/Rhodo_SPAdes_assembly.fa', assembly_file_path)
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        cls.assembly_ref = au.save_assembly_from_fasta({
            'workspace_name': cls.wsName,
            'assembly_name': 'ecoli.assembly',
            'file': {'path': assembly_file_path}
        })

        rhodobacter_contigs = json.load(open('data/rhodobacter_contigs.json'))
        save_info = {
            'workspace': cls.wsName,
            'objects': [{
                'type': 'KBaseGenomes.ContigSet',
                'data': rhodobacter_contigs,
                'name': 'rhodobacter_contigs'
            }]
        }
        cls.contigset_ref = cls.ws.save_objects(save_info)
예제 #5
0
 def __init__(self, sdk_config):
     self.cfg = sdk_config
     self.dfu = DataFileUtil(self.cfg.callbackURL)
     self.gi = GenomeInterface(sdk_config)
     self.default_params = {
         'genome_ref': None,
         'feature_lists': ['features'],
         'filter_ids': [],
         'include_functions': True,
         'include_aliases': True,
     }
     self.valid_feature_lists = {
         'features', 'mrnas', 'cdss', 'non_coding_features'
     }
예제 #6
0
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        cls.user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': cls.token,
                        'user_id': cls.user_id,
                        'provenance': [
                            {'service': 'kb_ke_util',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.shockURL = cls.cfg['shock-url']
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenomeFileUtil(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        cls.dfu = DataFileUtil(cls.callback_url)
        cls.cfg['KB_AUTH_TOKEN'] = cls.token

        # build genome interface instance
        gi_config = SDKConfig(cls.cfg)
        cls.genome_interface = GenomeInterface(gi_config)

        # second user
        test_cfg_file = '/kb/module/work/test.cfg'
        test_cfg_text = "[test]\n"
        with open(test_cfg_file, "r") as f:
            test_cfg_text += f.read()

        config = ConfigParser()
        config.readfp(io.StringIO(test_cfg_text))

        test_cfg_dict = dict(config.items("test"))
        if ('test_token2' not in test_cfg_dict):
            raise ValueError("Configuration in <module>/test_local/test.cfg file should " +
                             "include second user credentials ('test_token2')")
        token2 = test_cfg_dict['test_token2']
        user2 = auth_client.get_user(token2)
        cls.ctx2 = MethodContext(None)
        cls.ctx2.update({'token': token2,
                         'user_id': user2,
                         'provenance': [
                            {'service': 'NarrativeService',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                         'authenticated': 1})

        suffix = int(time.time() * 1000)
        cls.wsName = "test_SaveGenomeTest_" + str(suffix)
        cls.wsClient.create_workspace({'workspace': cls.wsName})

        cls.nodes_to_delete = []
        cls.prepare_data()
예제 #7
0
 def __init__(self, sdk_config):
     self.cfg = sdk_config
     self.dfu = DataFileUtil(self.cfg.callbackURL)
     self.gi = GenomeInterface(sdk_config)
     self.child_dict = {}
     self.transcript_counter = defaultdict(int)
    def save_one_genome(self, ctx, params):
        """
        :param params: instance of type "SaveOneGenomeParams" -> structure:
           parameter "workspace" of String, parameter "name" of String,
           parameter "data" of type "Genome" (Genome object holds much of the
           data relevant for a genome in KBase Genome publications should be
           papers about the genome, not papers about certain features of the
           genome (which go into the Feature object) Should the Genome object
           have a list of feature ids? (in addition to having a list of
           feature_refs) Should the Genome object contain a list of
           contig_ids too? @optional assembly_ref quality close_genomes
           analysis_events features source_id source contigs contig_ids
           publications md5 taxonomy gc_content complete dna_size num_contigs
           contig_lengths contigset_ref @metadata ws gc_content as GC content
           @metadata ws taxonomy as Taxonomy @metadata ws md5 as MD5
           @metadata ws dna_size as Size @metadata ws genetic_code as Genetic
           code @metadata ws domain as Domain @metadata ws source_id as
           Source ID @metadata ws source as Source @metadata ws
           scientific_name as Name @metadata ws length(close_genomes) as
           Close genomes @metadata ws length(features) as Number features
           @metadata ws num_contigs as Number contigs) -> structure:
           parameter "id" of type "Genome_id" (KBase genome ID @id kb),
           parameter "scientific_name" of String, parameter "domain" of
           String, parameter "genetic_code" of Long, parameter "dna_size" of
           Long, parameter "num_contigs" of Long, parameter "contigs" of list
           of type "Contig" (Type spec for a "Contig" subobject in the
           "ContigSet" object Contig_id id - ID of contig in contigset string
           md5 - unique hash of contig sequence string sequence - sequence of
           the contig string description - Description of the contig (e.g.
           everything after the ID in a FASTA file) @optional length md5
           genetic_code cell_compartment replicon_geometry replicon_type name
           description complete) -> structure: parameter "id" of type
           "Contig_id" (ContigSet contig ID @id external), parameter "length"
           of Long, parameter "md5" of String, parameter "sequence" of
           String, parameter "genetic_code" of Long, parameter
           "cell_compartment" of String, parameter "replicon_type" of String,
           parameter "replicon_geometry" of String, parameter "name" of
           String, parameter "description" of String, parameter "complete" of
           type "Bool", parameter "contig_lengths" of list of Long, parameter
           "contig_ids" of list of type "Contig_id" (ContigSet contig ID @id
           external), parameter "source" of String, parameter "source_id" of
           type "source_id" (Reference to a source_id @id external),
           parameter "md5" of String, parameter "taxonomy" of String,
           parameter "gc_content" of Double, parameter "complete" of Long,
           parameter "publications" of list of type "publication" (Structure
           for a publication (from ER API) also want to capture authors,
           journal name (not in ER)) -> tuple of size 7: parameter "id" of
           Long, parameter "source_db" of String, parameter "article_title"
           of String, parameter "link" of String, parameter "pubdate" of
           String, parameter "authors" of String, parameter "journal_name" of
           String, parameter "features" of list of type "Feature" (Structure
           for a single feature of a genome Should genome_id contain the
           genome_id in the Genome object, the workspace id of the Genome
           object, a genomeref, something else? Should sequence be in
           separate objects too? We may want to add additional fields for
           other CDM functions (e.g., atomic regulons, coexpressed fids,
           co_occurring fids,...) @optional orthologs quality
           feature_creation_event md5 location function ontology_terms
           protein_translation protein_families subsystems publications
           subsystem_data aliases annotations regulon_data atomic_regulons
           coexpressed_fids co_occurring_fids dna_sequence
           protein_translation_length dna_sequence_length) -> structure:
           parameter "id" of type "Feature_id" (KBase Feature ID @id
           external), parameter "location" of list of tuple of size 4: type
           "Contig_id" (ContigSet contig ID @id external), Long, String,
           Long, parameter "type" of String, parameter "function" of String,
           parameter "ontology_terms" of mapping from String to mapping from
           String to type "OntologyData" -> structure: parameter "id" of
           String, parameter "ontology_ref" of String, parameter
           "term_lineage" of list of String, parameter "term_name" of String,
           parameter "evidence" of list of type "OntologyEvidence" (@optional
           translation_provenance alignment_evidence) -> structure: parameter
           "method" of String, parameter "method_version" of String,
           parameter "timestamp" of String, parameter
           "translation_provenance" of tuple of size 3: parameter
           "ontologytranslation_ref" of String, parameter "namespace" of
           String, parameter "source_term" of String, parameter
           "alignment_evidence" of list of tuple of size 4: parameter "start"
           of Long, parameter "stop" of Long, parameter "align_length" of
           Long, parameter "identify" of Double, parameter "md5" of String,
           parameter "protein_translation" of String, parameter
           "dna_sequence" of String, parameter "protein_translation_length"
           of Long, parameter "dna_sequence_length" of Long, parameter
           "publications" of list of type "publication" (Structure for a
           publication (from ER API) also want to capture authors, journal
           name (not in ER)) -> tuple of size 7: parameter "id" of Long,
           parameter "source_db" of String, parameter "article_title" of
           String, parameter "link" of String, parameter "pubdate" of String,
           parameter "authors" of String, parameter "journal_name" of String,
           parameter "subsystems" of list of String, parameter
           "protein_families" of list of type "ProteinFamily" (Structure for
           a protein family @optional query_begin query_end subject_begin
           subject_end score evalue subject_description release_version) ->
           structure: parameter "id" of String, parameter "subject_db" of
           String, parameter "release_version" of String, parameter
           "subject_description" of String, parameter "query_begin" of Long,
           parameter "query_end" of Long, parameter "subject_begin" of Long,
           parameter "subject_end" of Long, parameter "score" of Double,
           parameter "evalue" of Double, parameter "aliases" of list of
           String, parameter "orthologs" of list of tuple of size 2: String,
           Double, parameter "annotations" of list of type "annotation" (a
           notation by a curator of the genome object) -> tuple of size 3:
           parameter "comment" of String, parameter "annotator" of String,
           parameter "annotation_time" of Double, parameter "subsystem_data"
           of list of type "subsystem_data" (Structure for subsystem data
           (from CDMI API)) -> tuple of size 3: parameter "subsystem" of
           String, parameter "variant" of String, parameter "role" of String,
           parameter "regulon_data" of list of type "regulon_data" (Structure
           for regulon data (from CDMI API)) -> tuple of size 3: parameter
           "regulon_id" of String, parameter "regulon_set" of list of type
           "Feature_id" (KBase Feature ID @id external), parameter "tfs" of
           list of type "Feature_id" (KBase Feature ID @id external),
           parameter "atomic_regulons" of list of type "atomic_regulon"
           (Structure for an atomic regulon (from CDMI API)) -> tuple of size
           2: parameter "atomic_regulon_id" of String, parameter
           "atomic_regulon_size" of Long, parameter "coexpressed_fids" of
           list of type "coexpressed_fid" (Structure for coexpressed fids
           (from CDMI API)) -> tuple of size 2: parameter "scored_fid" of
           type "Feature_id" (KBase Feature ID @id external), parameter
           "score" of Double, parameter "co_occurring_fids" of list of type
           "co_occurring_fid" (Structure for co-occurring fids (from CDMI
           API)) -> tuple of size 2: parameter "scored_fid" of type
           "Feature_id" (KBase Feature ID @id external), parameter "score" of
           Double, parameter "quality" of type "Feature_quality_measure"
           (@optional weighted_hit_count hit_count existence_priority
           overlap_rules pyrrolysylprotein truncated_begin truncated_end
           existence_confidence frameshifted selenoprotein) -> structure:
           parameter "truncated_begin" of type "Bool", parameter
           "truncated_end" of type "Bool", parameter "existence_confidence"
           of Double, parameter "frameshifted" of type "Bool", parameter
           "selenoprotein" of type "Bool", parameter "pyrrolysylprotein" of
           type "Bool", parameter "overlap_rules" of list of String,
           parameter "existence_priority" of Double, parameter "hit_count" of
           Double, parameter "weighted_hit_count" of Double, parameter
           "feature_creation_event" of type "Analysis_event" (@optional
           tool_name execution_time parameters hostname) -> structure:
           parameter "id" of type "Analysis_event_id", parameter "tool_name"
           of String, parameter "execution_time" of Double, parameter
           "parameters" of list of String, parameter "hostname" of String,
           parameter "contigset_ref" of type "ContigSet_ref" (Reference to a
           ContigSet object containing the contigs for this genome in the
           workspace @id ws KBaseGenomes.ContigSet), parameter "assembly_ref"
           of type "Assembly_ref" (Reference to an Assembly object in the
           workspace @id ws KBaseGenomeAnnotations.Assembly), parameter
           "quality" of type "Genome_quality_measure" (@optional
           frameshift_error_rate sequence_error_rate) -> structure: parameter
           "frameshift_error_rate" of Double, parameter "sequence_error_rate"
           of Double, parameter "close_genomes" of list of type
           "Close_genome" (@optional genome closeness_measure) -> structure:
           parameter "genome" of type "Genome_id" (KBase genome ID @id kb),
           parameter "closeness_measure" of Double, parameter
           "analysis_events" of list of type "Analysis_event" (@optional
           tool_name execution_time parameters hostname) -> structure:
           parameter "id" of type "Analysis_event_id", parameter "tool_name"
           of String, parameter "execution_time" of Double, parameter
           "parameters" of list of String, parameter "hostname" of String,
           parameter "hidden" of type "boolean" (A boolean - 0 for false, 1
           for true. @range (0, 1))
        :returns: instance of type "SaveGenomeResult" -> structure: parameter
           "info" of type "object_info" (Information about an object,
           including user provided metadata. obj_id objid - the numerical id
           of the object. obj_name name - the name of the object. type_string
           type - the type of the object. timestamp save_date - the save date
           of the object. obj_ver ver - the version of the object. username
           saved_by - the user that saved or copied the object. ws_id wsid -
           the workspace containing the object. ws_name workspace - the
           workspace containing the object. string chsum - the md5 checksum
           of the object. int size - the size of the object in bytes.
           usermeta meta - arbitrary user-supplied metadata about the
           object.) -> tuple of size 11: parameter "objid" of type "obj_id"
           (The unique, permanent numerical ID of an object.), parameter
           "name" of type "obj_name" (A string used as a name for an object.
           Any string consisting of alphanumeric characters and the
           characters |._- that is not an integer is acceptable.), parameter
           "type" of type "type_string" (A type string. Specifies the type
           and its version in a single string in the format
           [module].[typename]-[major].[minor]: module - a string. The module
           name of the typespec containing the type. typename - a string. The
           name of the type as assigned by the typedef statement. major - an
           integer. The major version of the type. A change in the major
           version implies the type has changed in a non-backwards compatible
           way. minor - an integer. The minor version of the type. A change
           in the minor version implies that the type has changed in a way
           that is backwards compatible with previous type definitions. In
           many cases, the major and minor versions are optional, and if not
           provided the most recent version will be used. Example:
           MyModule.MyType-3.1), parameter "save_date" of type "timestamp" (A
           time in the format YYYY-MM-DDThh:mm:ssZ, where Z is either the
           character Z (representing the UTC timezone) or the difference in
           time to UTC in the format +/-HHMM, eg: 2012-12-17T23:24:06-0500
           (EST time) 2013-04-03T08:56:32+0000 (UTC time)
           2013-04-03T08:56:32Z (UTC time)), parameter "version" of Long,
           parameter "saved_by" of type "username" (Login name of a KBase
           user account.), parameter "wsid" of type "ws_id" (The unique,
           permanent numerical ID of a workspace.), parameter "workspace" of
           type "ws_name" (A string used as a name for a workspace. Any
           string consisting of alphanumeric characters and "_", ".", or "-"
           that is not an integer is acceptable. The name may optionally be
           prefixed with the workspace owner's user name and a colon, e.g.
           kbasetest:my_workspace.), parameter "chsum" of String, parameter
           "size" of Long, parameter "meta" of type "usermeta" (User provided
           metadata about an object. Arbitrary key-value pairs provided by
           the user.) -> mapping from String to String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN save_one_genome

        genome_interface = GenomeInterface(self.cfg)
        returnVal = genome_interface.save_one_genome(params)
        #END save_one_genome

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method save_one_genome return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
예제 #9
0
    def save_one_genome(self, ctx, params):
        """
        :param params: instance of type "SaveOneGenomeParams" -> structure:
           parameter "workspace" of String, parameter "name" of String,
           parameter "data" of type "Genome" (Genome object holds much of the
           data relevant for a genome in KBase Genome publications should be
           papers about the genome Should the Genome object contain a list of
           contig_ids too? Source: allowed entries RefSeq, Ensembl,
           Phytozome, RAST, Prokka, User_upload #allowed entries RefSeq,
           Ensembl, Phytozome, RAST, Prokka, User_upload controlled
           vocabulary managed by API Domain is a controlled vocabulary
           Warnings : mostly controlled vocab but also allow for unstructured
           Genome_tiers : controlled vocabulary (based on ap input and API
           checked) Allowed values: #Representative, Reference, ExternalDB,
           User Examples Tiers: All phytozome - Representative and ExternalDB
           Phytozome flagship genomes - Reference, Representative and
           ExternalDB Ensembl - Representative and ExternalDB RefSeq
           Reference - Reference, Representative and ExternalDB RefSeq
           Representative - Representative and ExternalDB RefSeq Latest or
           All Assemblies folder - ExternalDB User Data - User tagged Example
           Sources: RefSeq, Ensembl, Phytozome, Microcosm, User, RAST,
           Prokka, (other annotators) @optional warnings contig_lengths
           contig_ids source_id taxonomy publications @optional
           ontology_events ontologies_present non_coding_features mrnas
           @optional genbank_handle_ref gff_handle_ref
           external_source_origination_date @optional release
           original_source_file_name notes quality_scores suspect
           assembly_ref @metadata ws gc_content as GC content @metadata ws
           taxonomy as Taxonomy @metadata ws md5 as MD5 @metadata ws dna_size
           as Size @metadata ws genetic_code as Genetic code @metadata ws
           domain as Domain @metadata ws source_id as Source ID @metadata ws
           source as Source @metadata ws scientific_name as Name @metadata ws
           length(features) as Number of Protein Encoding Genes @metadata ws
           length(cdss) as Number of CDS @metadata ws assembly_ref as
           Assembly Object @metadata ws num_contigs as Number contigs
           @metadata ws length(warnings) as Number of Genome Level Warnings
           @metadata ws suspect as Suspect Genome) -> structure: parameter
           "id" of type "Genome_id" (KBase genome ID @id kb), parameter
           "scientific_name" of String, parameter "domain" of String,
           parameter "warnings" of list of String, parameter "genome_tiers"
           of list of String, parameter "feature_counts" of mapping from
           String to Long, parameter "genetic_code" of Long, parameter
           "dna_size" of Long, parameter "num_contigs" of Long, parameter
           "molecule_type" of String, parameter "contig_lengths" of list of
           Long, parameter "contig_ids" of list of String, parameter "source"
           of String, parameter "source_id" of type "source_id" (Reference to
           a source_id @id external), parameter "md5" of String, parameter
           "taxonomy" of String, parameter "gc_content" of Double, parameter
           "publications" of list of type "publication" (Structure for a
           publication (float pubmedid string source (ex. Pubmed) string
           title string web address string  publication year string authors
           string journal)) -> tuple of size 7: parameter "pubmedid" of
           Double, parameter "source" of String, parameter "title" of String,
           parameter "url" of String, parameter "year" of String, parameter
           "authors" of String, parameter "journal" of String, parameter
           "ontology_events" of list of type "Ontology_event" (@optional
           ontology_ref method_version eco) -> structure: parameter "id" of
           String, parameter "ontology_ref" of type "Ontology_ref" (Reference
           to a ontology object @id ws KBaseOntology.OntologyDictionary),
           parameter "method" of String, parameter "method_version" of
           String, parameter "timestamp" of String, parameter "eco" of
           String, parameter "ontologies_present" of mapping from String to
           mapping from String to String, parameter "features" of list of
           type "Feature" (Structure for a single CDS encoding ???gene??? of
           a genome ONLY PUT GENES THAT HAVE A CORRESPONDING CDS IN THIS
           ARRAY NOTE: Sequence is optional. Ideally we can keep it in here,
           but Recognize due to space constraints another solution may be
           needed. We may want to add additional fields for other CDM
           functions (e.g., atomic regulons, coexpressed fids, co_occurring
           fids,...) protein_translation_length and protein_translation are
           for longest coded protein (representative protein for splice
           variants) NOTE: New Aliases field definitely breaks compatibility.
           As Does Function. flags are flag fields in GenBank format. This
           will be a controlled vocabulary. Initially Acceptable values are
           pseudo, ribosomal_slippage, and trans_splicing Md5 is the md5 of
           dna_sequence. @optional functions ontology_terms note
           protein_translation mrnas flags warnings @optional inference_data
           dna_sequence aliases db_xrefs children functional_descriptions) ->
           structure: parameter "id" of type "Feature_id" (KBase Feature ID
           @id external), parameter "location" of list of tuple of size 4:
           type "Contig_id" (ContigSet contig ID @id external), Long, String,
           Long, parameter "functions" of list of String, parameter
           "functional_descriptions" of list of String, parameter
           "ontology_terms" of mapping from String to mapping from String to
           list of Long, parameter "note" of String, parameter "md5" of
           String, parameter "protein_translation" of String, parameter
           "protein_translation_length" of Long, parameter "cdss" of list of
           String, parameter "mrnas" of list of String, parameter "children"
           of list of String, parameter "flags" of list of String, parameter
           "warnings" of list of String, parameter "inference_data" of list
           of type "InferenceInfo" (category;#Maybe a controlled vocabulary
           type;#Maybe a controlled vocabulary) -> structure: parameter
           "category" of String, parameter "type" of String, parameter
           "evidence" of String, parameter "dna_sequence" of String,
           parameter "dna_sequence_length" of Long, parameter "aliases" of
           list of tuple of size 2: parameter "fieldname" of String,
           parameter "alias" of String, parameter "db_xrefs" of list of tuple
           of size 2: parameter "db_source" of String, parameter
           "db_identifier" of String, parameter "non_coding_features" of list
           of type "NonCodingFeature" (Structure for a single feature that is
           NOT one of the following: Protein encoding gene (gene that has a
           corresponding CDS) mRNA CDS Note pseudo-genes and Non protein
           encoding genes are put into this flags are flag fields in GenBank
           format. This will be a controlled vocabulary. Initially Acceptable
           values are pseudo, ribosomal_slippage, and trans_splicing Md5 is
           the md5 of dna_sequence. @optional functions ontology_terms note
           flags warnings functional_descriptions @optional inference_data
           dna_sequence aliases db_xrefs children parent_gene) -> structure:
           parameter "id" of type "Feature_id" (KBase Feature ID @id
           external), parameter "location" of list of tuple of size 4: type
           "Contig_id" (ContigSet contig ID @id external), Long, String,
           Long, parameter "type" of String, parameter "functions" of list of
           String, parameter "functional_descriptions" of list of String,
           parameter "ontology_terms" of mapping from String to mapping from
           String to list of Long, parameter "note" of String, parameter
           "md5" of String, parameter "parent_gene" of String, parameter
           "children" of list of String, parameter "flags" of list of String,
           parameter "warnings" of list of String, parameter "inference_data"
           of list of type "InferenceInfo" (category;#Maybe a controlled
           vocabulary type;#Maybe a controlled vocabulary) -> structure:
           parameter "category" of String, parameter "type" of String,
           parameter "evidence" of String, parameter "dna_sequence" of
           String, parameter "dna_sequence_length" of Long, parameter
           "aliases" of list of tuple of size 2: parameter "fieldname" of
           String, parameter "alias" of String, parameter "db_xrefs" of list
           of tuple of size 2: parameter "db_source" of String, parameter
           "db_identifier" of String, parameter "cdss" of list of type "CDS"
           (Structure for a single feature CDS flags are flag fields in
           GenBank format. This will be a controlled vocabulary. Initially
           Acceptable values are pseudo, ribosomal_slippage, and
           trans_splicing Md5 is the md5 of dna_sequence. @optional
           parent_gene parent_mrna functions ontology_terms note flags
           warnings @optional inference_data dna_sequence aliases db_xrefs
           functional_descriptions) -> structure: parameter "id" of type
           "cds_id" (KBase CDS ID @id external), parameter "location" of list
           of tuple of size 4: type "Contig_id" (ContigSet contig ID @id
           external), Long, String, Long, parameter "md5" of String,
           parameter "protein_md5" of String, parameter "parent_gene" of type
           "Feature_id" (KBase Feature ID @id external), parameter
           "parent_mrna" of type "mrna_id" (KBase mRNA ID @id external),
           parameter "note" of String, parameter "functions" of list of
           String, parameter "functional_descriptions" of list of String,
           parameter "ontology_terms" of mapping from String to mapping from
           String to list of Long, parameter "flags" of list of String,
           parameter "warnings" of list of String, parameter "inference_data"
           of list of type "InferenceInfo" (category;#Maybe a controlled
           vocabulary type;#Maybe a controlled vocabulary) -> structure:
           parameter "category" of String, parameter "type" of String,
           parameter "evidence" of String, parameter "protein_translation" of
           String, parameter "protein_translation_length" of Long, parameter
           "aliases" of list of tuple of size 2: parameter "fieldname" of
           String, parameter "alias" of String, parameter "db_xrefs" of list
           of tuple of size 2: parameter "db_source" of String, parameter
           "db_identifier" of String, parameter "dna_sequence" of String,
           parameter "dna_sequence_length" of Long, parameter "mrnas" of list
           of type "mRNA" (Structure for a single feature mRNA flags are flag
           fields in GenBank format. This will be a controlled vocabulary.
           Initially Acceptable values are pseudo, ribosomal_slippage, and
           trans_splicing Md5 is the md5 of dna_sequence. @optional
           parent_gene cds functions ontology_terms note flags warnings
           @optional inference_data dna_sequence aliases db_xrefs
           functional_descriptions) -> structure: parameter "id" of type
           "mrna_id" (KBase mRNA ID @id external), parameter "location" of
           list of tuple of size 4: type "Contig_id" (ContigSet contig ID @id
           external), Long, String, Long, parameter "md5" of String,
           parameter "parent_gene" of type "Feature_id" (KBase Feature ID @id
           external), parameter "cds" of type "cds_id" (KBase CDS ID @id
           external), parameter "dna_sequence" of String, parameter
           "dna_sequence_length" of Long, parameter "note" of String,
           parameter "functions" of list of String, parameter
           "functional_descriptions" of list of String, parameter
           "ontology_terms" of mapping from String to mapping from String to
           list of Long, parameter "flags" of list of String, parameter
           "warnings" of list of String, parameter "inference_data" of list
           of type "InferenceInfo" (category;#Maybe a controlled vocabulary
           type;#Maybe a controlled vocabulary) -> structure: parameter
           "category" of String, parameter "type" of String, parameter
           "evidence" of String, parameter "aliases" of list of tuple of size
           2: parameter "fieldname" of String, parameter "alias" of String,
           parameter "db_xrefs" of list of tuple of size 2: parameter
           "db_source" of String, parameter "db_identifier" of String,
           parameter "assembly_ref" of type "Assembly_ref" (Reference to an
           Assembly object in the workspace @id ws
           KBaseGenomeAnnotations.Assembly), parameter "taxon_ref" of type
           "Taxon_ref" (Reference to a taxon object @id ws
           KBaseGenomeAnnotations.Taxon), parameter "genbank_handle_ref" of
           type "genbank_handle_ref" (Reference to a handle to the Genbank
           file on shock @id handle), parameter "gff_handle_ref" of type
           "gff_handle_ref" (Reference to a handle to the GFF file on shock
           @id handle), parameter "external_source_origination_date" of
           String, parameter "release" of String, parameter
           "original_source_file_name" of String, parameter "notes" of
           String, parameter "quality_scores" of list of type
           "GenomeQualityScore" (Score_interpretation : fraction_complete -
           controlled vocabulary managed by API @optional method_report_ref
           method_version) -> structure: parameter "method" of String,
           parameter "method_report_ref" of type "Method_report_ref"
           (Reference to a report object @id ws KBaseReport.Report),
           parameter "method_version" of String, parameter "score" of String,
           parameter "score_interpretation" of String, parameter "timestamp"
           of String, parameter "suspect" of type "Bool", parameter "hidden"
           of type "boolean" (A boolean - 0 for false, 1 for true. @range (0,
           1)), parameter "upgrade" of type "boolean" (A boolean - 0 for
           false, 1 for true. @range (0, 1))
        :returns: instance of type "SaveGenomeResult" -> structure: parameter
           "info" of type "object_info" (Information about an object,
           including user provided metadata. obj_id objid - the numerical id
           of the object. obj_name name - the name of the object. type_string
           type - the type of the object. timestamp save_date - the save date
           of the object. obj_ver ver - the version of the object. username
           saved_by - the user that saved or copied the object. ws_id wsid -
           the workspace containing the object. ws_name workspace - the
           workspace containing the object. string chsum - the md5 checksum
           of the object. int size - the size of the object in bytes.
           usermeta meta - arbitrary user-supplied metadata about the
           object.) -> tuple of size 11: parameter "objid" of type "obj_id"
           (The unique, permanent numerical ID of an object.), parameter
           "name" of type "obj_name" (A string used as a name for an object.
           Any string consisting of alphanumeric characters and the
           characters |._- that is not an integer is acceptable.), parameter
           "type" of type "type_string" (A type string. Specifies the type
           and its version in a single string in the format
           [module].[typename]-[major].[minor]: module - a string. The module
           name of the typespec containing the type. typename - a string. The
           name of the type as assigned by the typedef statement. major - an
           integer. The major version of the type. A change in the major
           version implies the type has changed in a non-backwards compatible
           way. minor - an integer. The minor version of the type. A change
           in the minor version implies that the type has changed in a way
           that is backwards compatible with previous type definitions. In
           many cases, the major and minor versions are optional, and if not
           provided the most recent version will be used. Example:
           MyModule.MyType-3.1), parameter "save_date" of type "timestamp" (A
           time in the format YYYY-MM-DDThh:mm:ssZ, where Z is either the
           character Z (representing the UTC timezone) or the difference in
           time to UTC in the format +/-HHMM, eg: 2012-12-17T23:24:06-0500
           (EST time) 2013-04-03T08:56:32+0000 (UTC time)
           2013-04-03T08:56:32Z (UTC time)), parameter "version" of Long,
           parameter "saved_by" of type "username" (Login name of a KBase
           user account.), parameter "wsid" of type "ws_id" (The unique,
           permanent numerical ID of a workspace.), parameter "workspace" of
           type "ws_name" (A string used as a name for a workspace. Any
           string consisting of alphanumeric characters and "_", ".", or "-"
           that is not an integer is acceptable. The name may optionally be
           prefixed with the workspace owner's user name and a colon, e.g.
           kbasetest:my_workspace.), parameter "chsum" of String, parameter
           "size" of Long, parameter "meta" of type "usermeta" (User provided
           metadata about an object. Arbitrary key-value pairs provided by
           the user.) -> mapping from String to String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN save_one_genome

        genome_interface = GenomeInterface(self.cfg)
        returnVal = genome_interface.save_one_genome(params)
        #END save_one_genome

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method save_one_genome return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
예제 #10
0
 def __init__(self, sdk_config):
     self.cfg = sdk_config
     self.dfu = DataFileUtil(self.cfg.callbackURL)
     self.gi = GenomeInterface(sdk_config)