Ejemplo n.º 1
0
    def __read_features(self, feature_ids):
        """Returns a list containing the features for the specified feature
        ids"""

        def read_feature(line):
            """Creates and adds a feature and associated contig from current
            DelimitedFile line"""
            contig = line[3]
            is_reverse = False
            if line[6] == 'R':
                is_reverse = True

            # note that feature positions can sometimes start with a '>'
            # or '<', so make sure it is stripped away
            return st.Feature(line[0], line[1], line[2],
                              st.Location(contig,
                                          int(string.lstrip(line[4], '<>')),
                                          int(string.lstrip(line[5], '<>')),
                                          is_reverse))

        features = {}
        dfile = util.dfile_from_text(
            self.__rsatdb().get_features(self.species()), comment='--')
        for line in dfile.lines:
            feature_id = line[0]
            if feature_id in feature_ids:
                features[feature_id] = read_feature(line)
        return features
Ejemplo n.º 2
0
    def __read_features(self, feature_ids):
        """Returns a list containing the features for the specified feature
        ids"""
        def read_feature(line):
            """Creates and adds a feature and associated contig from current
            DelimitedFile line"""
            contig = line[3]
            is_reverse = False
            if line[6] == 'R':
                is_reverse = True

            # note that feature positions can sometimes start with a '>'
            # or '<', so make sure it is stripped away
            return st.Feature(
                line[0], line[1], line[2],
                st.Location(contig, int(string.lstrip(line[4], '<>')),
                            int(string.lstrip(line[5], '<>')), is_reverse))

        features = {}
        dfile = util.dfile_from_text(self.__rsatdb().get_features(
            self.species()),
                                     comment='--')
        for line in dfile.lines:
            feature_id = line[0]
            if feature_id in feature_ids:
                features[feature_id] = read_feature(line)
        return features
Ejemplo n.º 3
0
 def test_create_from_text(self):
     """Reads a tab delimited file from a text"""
     dfile = util.dfile_from_text(
         "value11\tvalue12\nvalue21\tvalue22")
     lines = dfile.lines
     self.assertEquals(["value11", "value12"], lines[0])
     self.assertEquals(["value21", "value22"], lines[1])
     self.assertIsNone(dfile.header)
Ejemplo n.º 4
0
 def get_taxonomy_id(self, organism):
     """returns the specified organism name file contents"""
     logging.info('RSAT - get_organism_names(%s)', organism)
     cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism])
     text = util.read_url_cached(
         "/".join([self.base_url, RsatDatabase.DIR_PATH, organism,
                   RsatDatabase.ORGANISM_NAMES_PATH]), cache_file)
     organism_names_dfile = util.dfile_from_text(text, comment='--')
     return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
Ejemplo n.º 5
0
def __get_predictions(microbes_online, organism):
    """reads the operon predictions for a given organism from MicrobesOnline"""
    preds_text = microbes_online.get_operon_predictions_for(
        organism.taxonomy_id())
    dfile = util.dfile_from_text(preds_text, has_header=True)
    code = organism.code
    preds = [(patches.patch_mo_gene(code, line[2]),
              patches.patch_mo_gene(code, line[3]))
             for line in dfile.lines if line[6] == 'TRUE']
    logging.info("%d prediction pairs read", len(preds))
    return preds
def __get_predictions(microbes_online, organism):
    """reads the operon predictions for a given organism from MicrobesOnline"""
    preds_text = microbes_online.get_operon_predictions_for(
        organism.taxonomy_id())
    dfile = util.dfile_from_text(preds_text, has_header=True)
    code = organism.code
    preds = [(patches.patch_mo_gene(code, line[2]),
              patches.patch_mo_gene(code, line[3]))
             for line in dfile.lines if line[6] == 'TRUE']
    logging.info("%d prediction pairs read", len(preds))
    return preds
Ejemplo n.º 7
0
 def thesaurus(self):
     """reads the thesaurus from a feature_names file. The thesaurus
     is also cached, because it is used many times
     """
     if not self.__synonyms:
         feature_names_dfile = util.dfile_from_text(
             self.__rsatdb().get_feature_names(self.species()),
             comment='--')
         self.__synonyms = thesaurus.create_from_rsat_feature_names(
             feature_names_dfile, [thesaurus.strip_vng_modification])
     return self.__synonyms
Ejemplo n.º 8
0
 def get_taxonomy_id(self, organism):
     """returns the specified organism name file contents"""
     logging.info('RSAT - get_organism_names(%s)', organism)
     cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism])
     text = util.read_url_cached(
         "/".join([
             self.base_url, RsatDatabase.DIR_PATH, organism,
             RsatDatabase.ORGANISM_NAMES_PATH
         ]), cache_file)
     organism_names_dfile = util.dfile_from_text(text, comment='--')
     return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
Ejemplo n.º 9
0
 def thesaurus(self):
     """reads the thesaurus from a feature_names file. The thesaurus
     is also cached, because it is used many times
     """
     if not self.__synonyms:
         feature_names_dfile = util.dfile_from_text(
             self.__rsatdb().get_feature_names(self.species()),
             comment='--')
         self.__synonyms = thesaurus.create_from_rsat_feature_names(
             feature_names_dfile, [thesaurus.strip_vng_modification])
     return self.__synonyms
Ejemplo n.º 10
0
 def get_taxonomy_id(rsat_organism):
     """Determine the taxonomy data from the RSAT database"""
     organism_names_dfile = util.dfile_from_text(
         rsatdb.get_organism_names(rsat_organism), comment='--')
     return organism_names_dfile.lines[0][0]