def __read_features(self, feature_ids): """Returns a list containing the features for the specified feature ids""" def read_feature(line): """Creates and adds a feature and associated contig from current DelimitedFile line""" contig = line[3] is_reverse = False if line[6] == 'R': is_reverse = True # note that feature positions can sometimes start with a '>' # or '<', so make sure it is stripped away return st.Feature(line[0], line[1], line[2], st.Location(contig, int(string.lstrip(line[4], '<>')), int(string.lstrip(line[5], '<>')), is_reverse)) features = {} dfile = util.dfile_from_text( self.__rsatdb().get_features(self.species()), comment='--') for line in dfile.lines: feature_id = line[0] if feature_id in feature_ids: features[feature_id] = read_feature(line) return features
def __read_features(self, feature_ids): """Returns a list containing the features for the specified feature ids""" def read_feature(line): """Creates and adds a feature and associated contig from current DelimitedFile line""" contig = line[3] is_reverse = False if line[6] == 'R': is_reverse = True # note that feature positions can sometimes start with a '>' # or '<', so make sure it is stripped away return st.Feature( line[0], line[1], line[2], st.Location(contig, int(string.lstrip(line[4], '<>')), int(string.lstrip(line[5], '<>')), is_reverse)) features = {} dfile = util.dfile_from_text(self.__rsatdb().get_features( self.species()), comment='--') for line in dfile.lines: feature_id = line[0] if feature_id in feature_ids: features[feature_id] = read_feature(line) return features
def test_create_from_text(self): """Reads a tab delimited file from a text""" dfile = util.dfile_from_text( "value11\tvalue12\nvalue21\tvalue22") lines = dfile.lines self.assertEquals(["value11", "value12"], lines[0]) self.assertEquals(["value21", "value22"], lines[1]) self.assertIsNone(dfile.header)
def get_taxonomy_id(self, organism): """returns the specified organism name file contents""" logging.info('RSAT - get_organism_names(%s)', organism) cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism]) text = util.read_url_cached( "/".join([self.base_url, RsatDatabase.DIR_PATH, organism, RsatDatabase.ORGANISM_NAMES_PATH]), cache_file) organism_names_dfile = util.dfile_from_text(text, comment='--') return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
def __get_predictions(microbes_online, organism): """reads the operon predictions for a given organism from MicrobesOnline""" preds_text = microbes_online.get_operon_predictions_for( organism.taxonomy_id()) dfile = util.dfile_from_text(preds_text, has_header=True) code = organism.code preds = [(patches.patch_mo_gene(code, line[2]), patches.patch_mo_gene(code, line[3])) for line in dfile.lines if line[6] == 'TRUE'] logging.info("%d prediction pairs read", len(preds)) return preds
def thesaurus(self): """reads the thesaurus from a feature_names file. The thesaurus is also cached, because it is used many times """ if not self.__synonyms: feature_names_dfile = util.dfile_from_text( self.__rsatdb().get_feature_names(self.species()), comment='--') self.__synonyms = thesaurus.create_from_rsat_feature_names( feature_names_dfile, [thesaurus.strip_vng_modification]) return self.__synonyms
def get_taxonomy_id(self, organism): """returns the specified organism name file contents""" logging.info('RSAT - get_organism_names(%s)', organism) cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism]) text = util.read_url_cached( "/".join([ self.base_url, RsatDatabase.DIR_PATH, organism, RsatDatabase.ORGANISM_NAMES_PATH ]), cache_file) organism_names_dfile = util.dfile_from_text(text, comment='--') return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
def get_taxonomy_id(rsat_organism): """Determine the taxonomy data from the RSAT database""" organism_names_dfile = util.dfile_from_text( rsatdb.get_organism_names(rsat_organism), comment='--') return organism_names_dfile.lines[0][0]