Ejemplo n.º 1
0
    def __init__(self, args, run=run, progress=progress):

        self.run = run
        self.progress = progress

        A = lambda x, t: t(args.__dict__[x]) if x in args.__dict__ else None
        null = lambda x: x
        self.contigs_db_path = A('contigs_db', null)
        self.num_threads = A('num_threads', null)
        self.hmm_program = A('hmmer_program', null) or 'hmmsearch'
        self.pfam_data_dir = A('pfam_data_dir', null)

        # load_catalog will populate this
        self.function_catalog = {}

        filesnpaths.is_program_exists(self.hmm_program)
        utils.is_contigs_db(self.contigs_db_path)

        if not self.pfam_data_dir:
            self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__),
                                              'data/misc/Pfam')

        # here, in the process of checking whether Pfam has been downloaded into the pfam_data_dir,
        # we also decompress and hmmpress the profile if it is currently gzipped
        self.is_database_exists()

        self.run.info('Pfam database directory', self.pfam_data_dir)

        self.get_version()
        self.load_catalog()
Ejemplo n.º 2
0
    def __init__(self, args, run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress
        self.pfam_data_dir = args.pfam_data_dir

        filesnpaths.is_program_exists('hmmpress')

        if self.pfam_data_dir and args.reset:
            raise ConfigError(
                "You are attempting to run Pfam setup on a non-default data directory (%s) using the --reset flag. "
                "To avoid automatically deleting a directory that may be important to you, anvi'o refuses to reset "
                "directories that have been specified with --pfam-data-dir. If you really want to get rid of this "
                "directory and regenerate it with Pfam data inside, then please remove the directory yourself using "
                "a command like `rm -r %s`. We are sorry to make you go through this extra trouble, but it really is "
                "the safest way to handle things." %
                (self.pfam_data_dir, self.pfam_data_dir))

        if not self.pfam_data_dir:
            self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__),
                                              'data/misc/Pfam')

        filesnpaths.is_output_dir_writable(os.path.dirname(self.pfam_data_dir))

        if not args.reset and not anvio.DEBUG:
            self.is_database_exists()

        filesnpaths.gen_output_directory(self.pfam_data_dir,
                                         delete_if_exists=args.reset)

        self.database_url = "http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release"
        self.files = [
            'Pfam-A.hmm.gz', 'Pfam.version.gz', 'Pfam-A.clans.tsv.gz'
        ]
Ejemplo n.º 3
0
    def __init__(self,
                 db_path,
                 num_threads_to_use=1,
                 run=run,
                 progress=progress,
                 initializing_for_deletion=False,
                 just_do_it=False,
                 hmm_program_to_use='hmmscan',
                 hmmer_output_directory=None,
                 get_domain_table_output=False):
        self.num_threads_to_use = num_threads_to_use
        self.db_path = db_path
        self.just_do_it = just_do_it
        self.hmm_program = hmm_program_to_use or 'hmmscan'
        self.hmmer_output_dir = hmmer_output_directory
        self.hmmer_desired_output = (
            'table', 'domtable') if get_domain_table_output else 'table'

        utils.is_contigs_db(self.db_path)
        filesnpaths.is_program_exists(self.hmm_program)

        self.contigs_db_hash = db.DB(
            self.db_path, utils.get_required_version_for_db(
                self.db_path)).get_meta_value('contigs_db_hash')

        Table.__init__(self, self.db_path, anvio.__contigs__version__, run,
                       progress)

        self.init_gene_calls_dict()

        if not len(self.gene_calls_dict):
            if self.genes_are_called:
                self.run.warning(
                    "Tables in this contigs database that should contain gene calls are empty despite the fact that "
                    "you didn't skip the gene calling step while generating this contigs database. This probably means "
                    "that the gene caller did not find any genes among contigs. This is OK for now. But might explode "
                    "later. If it does explode and you decide to let us know about that problem, please remember to mention "
                    "this warning. By the way, this warning probably has been seen by like only 2 people on the planet. Who "
                    "works with contigs with no gene calls? A better implementation of anvi'o will unite researchers who "
                    "study weird stuff.")
            else:
                self.run.warning(
                    "It seems you have skipped gene calling step while generating your contigs database, and you have no "
                    "genes calls in tables that should contain gene calls. Anvi'o will let you go with this since some HMM "
                    "sources only operate on DNA sequences, and at this point it doesn't know which HMMs you wish to run. "
                    "If the lack of genes causes a problem, you will get another error message later probably :/"
                )

        if not initializing_for_deletion:
            self.set_next_available_id(t.hmm_hits_table_name)
Ejemplo n.º 4
0
    def __init__(self, args, run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress
        self.pfam_data_dir = args.pfam_data_dir

        filesnpaths.is_program_exists('hmmpress')

        if not self.pfam_data_dir:
            self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam')

        if not args.reset:
            self.is_database_exists()

        filesnpaths.gen_output_directory(self.pfam_data_dir, delete_if_exists=args.reset)

        self.database_url = "http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release"
        self.files = ['Pfam-A.hmm.gz', 'Pfam.version.gz', 'Pfam-A.clans.tsv.gz']
Ejemplo n.º 5
0
    def __init__(self, args, run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress
        self.pfam_data_dir = args.pfam_data_dir

        filesnpaths.is_program_exists('hmmpress')

        if not self.pfam_data_dir:
            self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__),
                                              'data/misc/Pfam')

        if not args.reset:
            self.is_database_exists()

        filesnpaths.gen_output_directory(self.pfam_data_dir,
                                         delete_if_exists=args.reset)

        self.database_url = "http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release"
        self.files = [
            'Pfam-A.hmm.gz', 'Pfam.version.gz', 'Pfam-A.clans.tsv.gz'
        ]
Ejemplo n.º 6
0
    def __init__(self, args, run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress
        self.contigs_db_path = args.contigs_db
        self.num_threads = args.num_threads
        self.pfam_data_dir = args.pfam_data_dir

        # load_catalog will populate this
        self.function_catalog = {}

        filesnpaths.is_program_exists('hmmscan')
        utils.is_contigs_db(self.contigs_db_path)

        if not self.pfam_data_dir:
            self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam')

        self.is_database_exists()

        self.run.info('Pfam database directory', self.pfam_data_dir)

        self.get_version()
        self.load_catalog()
Ejemplo n.º 7
0
    def __init__(self, args, run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress
        self.contigs_db_path = args.contigs_db
        self.num_threads = args.num_threads
        self.pfam_data_dir = args.pfam_data_dir

        # load_catalog will populate this
        self.function_catalog = {}

        filesnpaths.is_program_exists('hmmscan')
        utils.is_contigs_db(self.contigs_db_path)

        if not self.pfam_data_dir:
            self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__),
                                              'data/misc/Pfam')

        self.is_database_exists()

        self.run.info('Pfam database directory', self.pfam_data_dir)

        self.get_version()
        self.load_catalog()
Ejemplo n.º 8
0
 def check_sge_binaries(self):
     filesnpaths.is_program_exists('qsub')
     filesnpaths.is_program_exists('qstat')
Ejemplo n.º 9
0
 def check_sge_binaries(self):
     filesnpaths.is_program_exists('qsub')
     filesnpaths.is_program_exists('qstat')