def make_halo(ratio_matrix, search_distances, scan_distances):
    """returns the organism object to work on"""
    keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
    gofile = util.read_dfile(GO_FILE_PATH)
    rsatdb = rsat.RsatDatabase(rsat.RSAT_BASE_URL, CACHE_DIR)
    mo_db = microbes_online.MicrobesOnline(CACHE_DIR)
    stringfile = 'testdata/string_links_64091.tab'

    nw_factories = []
    if stringfile != None:
        nw_factories.append(
            stringdb.get_network_factory2('hal',
                                          stringfile,
                                          0.5,
                                          normalized=True))
    else:
        logging.warn("no STRING file specified !")

    nw_factories.append(
        microbes_online.get_network_factory(
            mo_db, max_operon_size=ratio_matrix.num_rows / 20, weight=0.5))

    org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
                                     org.make_rsat_organism_mapper(rsatdb),
                                     org.make_go_taxonomy_mapper(gofile),
                                     mo_db, nw_factories)

    return org_factory.create('hal', search_distances, scan_distances)
def make_halo(search_distances, scan_distances):
    """returns the organism object to work on"""
    keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
    gofile = util.read_dfile(GO_FILE_PATH)
    rsatdb = rsat.RsatDatabase(rsat.RSAT_BASE_URL, CACHE_DIR)
    mo_db = microbes_online.MicrobesOnline(CACHE_DIR)

    org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
                                     org.make_rsat_organism_mapper(rsatdb),
                                     org.make_go_taxonomy_mapper(gofile),
                                     mo_db, [])

    return org_factory.create('hal', search_distances, scan_distances)
def make_halo(search_distances, scan_distances):
    """returns the organism object to work on"""
    keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
    gofile = util.read_dfile(GO_FILE_PATH)
    rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR        )
    mo_db = microbes_online.MicrobesOnline()

    org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
                                     org.make_rsat_organism_mapper(rsatdb),
                                     org.make_go_taxonomy_mapper(gofile),
                                     mo_db, [])

    return org_factory.create('hal', search_distances, scan_distances)
    def make_microbe(self):
        """returns the organism object to work on"""
        keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
        gofile = util.read_dfile(GO_FILE_PATH)
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, self['cache_dir'])
        mo_db = microbes_online.MicrobesOnline()
        stringfile = self.config_params['string_file']
        kegg_mapper = org.make_kegg_code_mapper(keggfile)
        rsat_mapper = org.make_rsat_organism_mapper(rsatdb)

        # automatically download STRING file
        if stringfile == None:
            rsat_info = rsat_mapper(kegg_mapper(self['organism_code']))
            ncbi_code = rsat_info.taxonomy_id
            print "NCBI CODE IS: ", ncbi_code
            url = STRING_URL_PATTERN % ncbi_code
            stringfile = "%s/%s.gz" % (self['cache_dir'], ncbi_code)
            self['string_file'] = stringfile
            logging.info("Automatically using STRING file in '%s'", stringfile)
            util.get_url_cached(url, stringfile)

        nw_factories = []
        if stringfile != None:
            nw_factories.append(stringdb.get_network_factory2(
                    self['organism_code'], stringfile, 0.5))
        else:
            logging.warn("no STRING file specified !")

        nw_factories.append(microbes_online.get_network_factory(
                mo_db, max_operon_size=self.ratio_matrix.num_rows / 20,
                weight=0.5))

        org_factory = org.MicrobeFactory(kegg_mapper,
                                         rsat_mapper,
                                         org.make_go_taxonomy_mapper(gofile),
                                         mo_db,
                                         nw_factories)
        return org_factory.create(self['organism_code'],
                                  self['search_distances'],
                                  self['scan_distances'])
def make_halo(ratio_matrix, search_distances, scan_distances):
    """returns the organism object to work on"""
    keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
    gofile = util.read_dfile(GO_FILE_PATH)
    rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR        )
    mo_db = microbes_online.MicrobesOnline()
    stringfile = 'string_links_64091.tab'

    nw_factories = []
    if stringfile != None:
        nw_factories.append(stringdb.get_network_factory2('hal', stringfile, 0.5))
    else:
        logging.warn("no STRING file specified !")

    nw_factories.append(microbes_online.get_network_factory(
            mo_db, max_operon_size=ratio_matrix.num_rows / 20, weight=0.5))

    org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
                                     org.make_rsat_organism_mapper(rsatdb),
                                     org.make_go_taxonomy_mapper(gofile),
                                     mo_db,
                                     nw_factories)

    return org_factory.create('hal', search_distances, scan_distances)
    def make_microbe(self):
        """returns the organism object to work on"""
        keggfile = util.DelimitedFile.read(KEGG_FILE_PATH, comment='#')
        gofile = util.DelimitedFile.read(GO_FILE_PATH)
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, self['cache_dir'])
        mo_db = microbes_online.MicrobesOnline()
        stringfile = self.config_params['string_file']

        nw_factories = []
        if stringfile != None:
            nw_factories.append(stringdb.get_network_factory2(stringfile, 0.5))
        else:
            logging.warn("no STRING file specified !")

        nw_factories.append(microbes_online.get_network_factory(
                mo_db, max_operon_size=self.ratio_matrix.num_rows() / 20, weight=0.5))

        org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
                                         org.make_rsat_organism_mapper(rsatdb),
                                         org.make_go_taxonomy_mapper(gofile),
                                         mo_db,
                                         nw_factories)
        return org_factory.create(self['organism_code'], self['search_distances'],
                                  self['scan_distances'])
Exemple #7
0
 def setUp(self):  # pylint: disable-msg=C0103
     """test fixture"""
     with open(RSAT_LIST_FILE_PATH) as inputfile:
         html = inputfile.read()
     self.mapper = org.make_rsat_organism_mapper(MockRsatDatabase(html))
Exemple #8
0
    def make_microbe(self):
        """returns the organism object to work on"""
        self.__make_dirs_if_needed()

        if os.path.exists(USER_KEGG_FILE_PATH):
            keggfile = util.read_dfile(USER_KEGG_FILE_PATH, comment='#')
        elif os.path.exists(SYSTEM_KEGG_FILE_PATH):
            keggfile = util.read_dfile(SYSTEM_KEGG_FILE_PATH, comment='#')
        else:
            raise Exception('KEGG file not found !!')

        if os.path.exists(USER_GO_FILE_PATH):
            gofile = util.read_dfile(USER_GO_FILE_PATH)
        elif os.path.exists(SYSTEM_GO_FILE_PATH):
            gofile = util.read_dfile(SYSTEM_GO_FILE_PATH)
        else:
            raise Exception('GO file not found !!')

        if self['rsat_dir']:
            if not self['rsat_organism']:
                raise Exception(
                    'override RSAT loading: please specify --rsat_organism')
            logging.info("using RSAT files for '%s'", self['rsat_organism'])
            rsatdb = rsat.RsatFiles(self['rsat_dir'], self['rsat_organism'],
                                    self['ncbi_code'])
        else:
            rsatdb = rsat.RsatDatabase(rsat.RSAT_BASE_URL, self['cache_dir'])

        if self['operon_file']:
            logging.info("using operon file at '%s'", self['operon_file'])
            mo_db = microbes_online.MicrobesOnlineOperonFile(
                self['operon_file'])
        else:
            logging.info(
                "attempting automatic download of operons from Microbes Online"
            )
            mo_db = microbes_online.MicrobesOnline(self['cache_dir'])

        stringfile = self['string_file']
        kegg_mapper = org.make_kegg_code_mapper(keggfile)
        rsat_mapper = org.make_rsat_organism_mapper(rsatdb)
        ncbi_code = self['ncbi_code']
        nw_factories = []

        # do we use STRING ?
        if self['donetworks'] and self['use_string']:
            # download if not provided
            if stringfile is None:
                if ncbi_code is None:
                    rsat_info = rsat_mapper(kegg_mapper(self['organism_code']),
                                            self['rsat_organism'])
                    ncbi_code = rsat_info.taxonomy_id

                logging.info("NCBI CODE IS: %s", ncbi_code)
                url = STRING_URL_PATTERN % ncbi_code
                stringfile = "%s/%s.gz" % (self['cache_dir'], ncbi_code)
                self['string_file'] = stringfile
                logging.info("Automatically using STRING file in '%s'",
                             stringfile)
                util.get_url_cached(url, stringfile)
            else:
                logging.info("Loading STRING file at '%s'", stringfile)

            # create and add network
            nw_factories.append(
                stringdb.get_network_factory2(self['organism_code'],
                                              stringfile, 0.5))

        # do we use operons ?
        if self['donetworks'] and self['use_operons']:
            logging.info('adding operon network factory')
            nw_factories.append(
                microbes_online.get_network_factory(
                    mo_db,
                    max_operon_size=self.ratio_matrix.num_rows / 20,
                    weight=0.5))

        org_factory = org.MicrobeFactory(kegg_mapper, rsat_mapper,
                                         org.make_go_taxonomy_mapper(gofile),
                                         mo_db, nw_factories,
                                         self['ncbi_code'])
        return org_factory.create(self['organism_code'],
                                  self['search_distances'],
                                  self['scan_distances'], self['use_operons'],
                                  self['rsat_organism'], self.ratio_matrix)
    def make_microbe(self):
        """returns the organism object to work on"""
        self.__make_dirs_if_needed()

        if os.path.exists(USER_KEGG_FILE_PATH):
            keggfile = util.read_dfile(USER_KEGG_FILE_PATH, comment='#')
        elif os.path.exists(SYSTEM_KEGG_FILE_PATH):
            keggfile = util.read_dfile(SYSTEM_KEGG_FILE_PATH, comment='#')
        else:
            raise Exception('KEGG file not found !!')

        if os.path.exists(USER_GO_FILE_PATH):
            gofile = util.read_dfile(USER_GO_FILE_PATH)
        elif os.path.exists(SYSTEM_GO_FILE_PATH):
            gofile = util.read_dfile(SYSTEM_GO_FILE_PATH)
        else:
            raise Exception('GO file not found !!')

        if self['rsat_dir']:
            if not self['rsat_organism']:
                raise Exception('override RSAT loading: please specify --rsat_organism')
            logging.info("using RSAT files for '%s'", self['rsat_organism'])
            rsatdb = rsat.RsatFiles(self['rsat_dir'], self['rsat_organism'], self['ncbi_code'])
        else:
            rsatdb = rsat.RsatDatabase(rsat.RSAT_BASE_URL, self['cache_dir'])

        if self['operon_file']:
            logging.info("using operon file at '%s'", self['operon_file'])
            mo_db = microbes_online.MicrobesOnlineOperonFile(self['operon_file'])
        else:
            logging.info("attempting automatic download of operons from Microbes Online")
            mo_db = microbes_online.MicrobesOnline(self['cache_dir'])

        stringfile = self['string_file']
        kegg_mapper = org.make_kegg_code_mapper(keggfile)
        rsat_mapper = org.make_rsat_organism_mapper(rsatdb)
        ncbi_code = self['ncbi_code']
        nw_factories = []

        # do we use STRING ?
        if self['donetworks'] and self['use_string']:
            # download if not provided
            if stringfile is None:
                if ncbi_code is None:
                    rsat_info = rsat_mapper(kegg_mapper(self['organism_code']),
                                            self['rsat_organism'])
                    ncbi_code = rsat_info.taxonomy_id

                logging.info("NCBI CODE IS: %s", ncbi_code)
                url = STRING_URL_PATTERN % ncbi_code
                stringfile = "%s/%s.gz" % (self['cache_dir'], ncbi_code)
                self['string_file'] = stringfile
                logging.info("Automatically using STRING file in '%s'", stringfile)
                util.get_url_cached(url, stringfile)
            else:
                logging.info("Loading STRING file at '%s'", stringfile)

            # create and add network
            nw_factories.append(stringdb.get_network_factory2(
                self['organism_code'], stringfile, 0.5))

        # do we use operons ?
        if self['donetworks'] and self['use_operons']:
            logging.info('adding operon network factory')
            nw_factories.append(microbes_online.get_network_factory(
                mo_db, max_operon_size=self.ratio_matrix.num_rows / 20,
                weight=0.5))

        org_factory = org.MicrobeFactory(kegg_mapper,
                                         rsat_mapper,
                                         org.make_go_taxonomy_mapper(gofile),
                                         mo_db,
                                         nw_factories,
                                         self['ncbi_code'])
        return org_factory.create(self['organism_code'],
                                  self['search_distances'],
                                  self['scan_distances'],
                                  self['use_operons'],
                                  self['rsat_organism'],
                                  self.ratio_matrix)
Exemple #10
0
    print("extract_string_links.py, (c) 2012, Institute for Systems Biology")
    print('This program is licensed under the General Public License V3.')
    print('See README and LICENSE for details.\n')
    if len(sys.argv) <= 2:
        print(
            'Usage: python extract_string_links.py <stringdb-path> <organism-code>'
        )
    else:
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR)
        kegg_mapper = organism.make_kegg_code_mapper(
            util.DelimitedFile.read(KEGG_FILE,
                                    sep='\t',
                                    has_header=True,
                                    comment='#'))
        kegg_org = kegg_mapper(sys.argv[2])
        rsat_info = organism.make_rsat_organism_mapper(rsatdb)(kegg_org)
        print "RSAT SPECIES: ", rsat_info.species
        print "TAX ID: ", rsat_info.taxonomy_id
        feature_names = rsatdb.get_feature_names(rsat_info.species)
        feature_names_dfile = util.DelimitedFile.create_from_text(
            feature_names, comment='--')
        synonyms = thesaurus.create_from_rsat_feature_names(
            feature_names_dfile)
        string_filepath = sys.argv[1]
        if string_filepath.endswith('.gz'):
            with gzip.open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)
        else:
            with open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)
    print "edges normalized."
    for edge in normalized_result:
        print "%s\t%s\%f" % (edge.source(), edge.target(), edge.score())
"""

if __name__ == '__main__':
    print("extract_string_links.py, (c) 2012, Institute for Systems Biology")
    print('This program is licensed under the General Public License V3.')
    print('See README and LICENSE for details.\n')
    if len(sys.argv) <= 2:
        print('Usage: python extract_string_links.py <stringdb-path> <organism-code>')
    else:
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR)
        kegg_mapper = organism.make_kegg_code_mapper(util.DelimitedFile.read(KEGG_FILE, sep='\t',
                                                                             has_header=True,
                                                                             comment='#'))
        kegg_org = kegg_mapper(sys.argv[2])
        rsat_info = organism.make_rsat_organism_mapper(rsatdb)(kegg_org)
        print "RSAT SPECIES: ", rsat_info.species
        print "TAX ID: ", rsat_info.taxonomy_id
        feature_names = rsatdb.get_feature_names(rsat_info.species)
        feature_names_dfile = util.DelimitedFile.create_from_text(feature_names, comment='--')
        synonyms = thesaurus.create_from_rsat_feature_names(feature_names_dfile)
        string_filepath = sys.argv[1]
        if string_filepath.endswith('.gz'):
            with gzip.open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)
        else:
            with open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)