예제 #1
0
    def __init__(self, args = {}, p=progress, r=run):
        self.args = args
        self.splits_of_interest = set([])
        self.samples_of_interest = set([])

        self.run = r
        self.progress = p

        A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(x) else None
        null = lambda x: x
        self.bin_id = A('bin_id', null)
        self.collection_id = A('collection_id', null)
        self.splits_of_interest_path = A('splits_of_interest', null)
        self.min_ratio = A('min_ratio', float)
        self.min_occurrence = A('min_occurrence', int)
        self.num_positions_from_each_split = A('num_positions_from_each_split', int)
        self.min_scatter = A('min_scatter', int)
        self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int)
        self.profile_db_path = A('profile_db', null)
        self.contigs_db_path = A('contigs_db', null)
        self.quince_mode = A('quince_mode', bool)
        self.output_file_path = A('output_file', null)
        self.samples_of_interest_path = A('samples_of_interest', null)

        self.variable_nts_table = {} 
        self.merged_split_coverage_values = None
        self.unique_pos_identifier = 0
        self.split_name_position_dict = {}
        self.unique_pos_id_to_entry_id = {}
        self.contig_sequences = None
        self.input_file_path = None

        # Initialize the contigs super
        dbops.ContigsSuperclass.__init__(self, self.args, r = self.run, p = self.progress)
        self.init_contig_sequences()
예제 #2
0
    def __init__(self, args = {}, p=progress, r=run):
        self.args = args

        self.run = r
        self.progress = p

        self.samples = None
        self.samples_information_dict = None
        self.data = None

        A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(x) else None
        null = lambda x: x
        self.input_file_path = A('input_file', null)
        self.samples_information_path = A('samples_information', null)
        self.max_num_unique_positions = A('max_num_unique_positions', int)
        self.output_file_path = A('output_file', null)

        filesnpaths.is_output_file_writable(self.output_file_path)

        if self.samples_information_path:
            filesnpaths.is_file_tab_delimited(self.samples_information_path)
            self.samples_information_dict = utils.get_TAB_delimited_file_as_dictionary(self.samples_information_path)
            num_attributes = len(self.samples_information_dict.values()[0])

            self.run.info('samples_information', '%d attributes read for %d samples' % (num_attributes, len(self.samples_information_dict)))

        if self.input_file_path:
            filesnpaths.is_file_tab_delimited(self.input_file_path)
            self.progress.new('Reading the input file')
            self.progress.update('...')
            self.data = utils.get_TAB_delimited_file_as_dictionary(self.input_file_path)
            self.progress.end()

            self.run.info('input_file', '%d entries read' % len(self.data))
예제 #3
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):
        self.args = args
        self.run = run
        self.progress = progress

        # initialize self.arg parameters
        A = lambda x, t: t(args.__dict__[x]
                           ) if x in self.args.__dict__ else None
        null = lambda x: x
        self.contigs_db_path = A('contigs_db', null)
        self.structure_db_path = A('structure_db', null)
        self.genes_to_remove = A('genes_to_remove', null)
        self.genes_to_remove_path = A('genes_to_remove_file', null)
        self.genes_to_add = A('genes_to_add', null)
        self.genes_to_add_path = A('genes_to_add_file', null)
        self.full_modeller_output = A('dump_dir', null)
        self.modeller_executable = A('modeller_executable', null)
        self.DSSP_executable = None

        utils.is_contigs_db(self.contigs_db_path)
        self.contigs_db = dbops.ContigsDatabase(self.contigs_db_path)
        self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash']

        # init ContigsSuperClass
        self.contigs_super = ContigsSuperclass(self.args)

        if not any([
                self.genes_to_remove, self.genes_to_remove_path,
                self.genes_to_add, self.genes_to_add_path
        ]):
            raise ConfigError(
                "Please specify some genes to add or remove to your database.")

        if self.genes_to_remove and self.genes_to_remove_path:
            raise ConfigError(
                "Provide either --genes-to-remove or --genes-to-remove-path. You provided both."
            )

        if self.genes_to_add and self.genes_to_add_path:
            raise ConfigError(
                "Provide either --genes-to-add or --genes-to-add-path. You provided both."
            )

        if self.genes_to_remove or self.genes_to_remove_path:
            self.run.warning("Removing genes...",
                             header="Updating %s" % self.structure_db_path,
                             lc='green')
            self.load_structure_db()
            remove = self.parse_genes(self.genes_to_remove,
                                      self.genes_to_remove_path)
            self.remove_genes(remove)
            self.structure_db.disconnect()

        if self.genes_to_add or self.genes_to_add_path:
            self.run.warning("Adding genes...",
                             header="Updating %s" % self.structure_db_path,
                             lc='green')
            self.load_structure_db()
            self.add_genes()
예제 #4
0
    def __init__(self, args={}, p=progress, r=run):
        self.args = args

        self.data = {}

        self.splits_of_interest = set([])
        self.samples_of_interest = set([])

        A = lambda x, t: t(args.__dict__[x]) if x in args.__dict__ else None
        null = lambda x: x
        self.bin_id = A('bin_id', null)
        self.collection_name = A('collection_name', null)
        self.splits_of_interest_path = A('splits_of_interest', null)
        self.min_departure_from_reference = A('min_departure_from_reference',
                                              float)
        self.max_departure_from_reference = A('max_departure_from_reference',
                                              float)
        self.min_departure_from_consensus = A('min_departure_from_consensus',
                                              float)
        self.max_departure_from_consensus = A('max_departure_from_consensus',
                                              float)
        self.min_occurrence = A('min_occurrence', int)
        self.num_positions_from_each_split = A('num_positions_from_each_split',
                                               int)
        self.min_scatter = A('min_scatter', int)
        self.min_coverage_in_each_sample = A('min_coverage_in_each_sample',
                                             int)
        self.profile_db_path = A('profile_db', null)
        self.contigs_db_path = A('contigs_db', null)
        self.quince_mode = A('quince_mode', bool)
        self.output_file_path = A('output_file', null)
        self.samples_of_interest_path = A('samples_of_interest', null)
        self.genes_of_interest_path = A('genes_of_interest', null)
        self.include_contig_names_in_output = A('include_contig_names', null)
        self.include_split_names_in_output = A('include_split_names', null)

        self.merged_split_coverage_values = None
        self.unique_pos_identifier = 0
        self.split_name_position_dict = {}
        self.unique_pos_id_to_entry_id = {}
        self.contig_sequences = None
        self.input_file_path = None

        if self.engine not in variability_engines:
            raise ConfigError, "The superclass is inherited with an unknown engine. Anvi'o needs an adult :("

        # Initialize the contigs super
        dbops.ContigsSuperclass.__init__(self,
                                         self.args,
                                         r=self.run,
                                         p=self.progress)
        self.init_contig_sequences()
예제 #5
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):
        self.args = args
        self.run = run
        self.progress = progress

        # initialize self.arg parameters
        A                                  = lambda x, t: t(args.__dict__[x]) if x in self.args.__dict__ else None
        null                               = lambda x: x
        self.contigs_db_path               = A('contigs_db', null)
        self.structure_db_path             = A('structure_db', null)
        self.genes_to_remove               = A('genes_to_remove', null)
        self.genes_to_remove_path          = A('genes_to_remove_file', null)
        self.genes_to_add                  = A('genes_to_add', null)
        self.genes_to_add_path             = A('genes_to_add_file', null)
        self.full_modeller_output          = A('dump_dir', null)
        self.modeller_executable           = A('modeller_executable', null)
        self.skip_genes_if_already_present = A('skip_genes_if_already_present', bool)
        self.DSSP_executable               = None

        utils.is_contigs_db(self.contigs_db_path)
        self.contigs_db      = dbops.ContigsDatabase(self.contigs_db_path)
        self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash']

        # init ContigsSuperClass
        self.contigs_super = ContigsSuperclass(self.args)

        if not any([self.genes_to_remove, self.genes_to_remove_path, self.genes_to_add, self.genes_to_add_path]):
            raise ConfigError("Please specify some genes to add or remove to your database.")

        if self.genes_to_remove and self.genes_to_remove_path:
            raise ConfigError("Provide either --genes-to-remove or --genes-to-remove-path. You provided both.")

        if self.genes_to_add and self.genes_to_add_path:
            raise ConfigError("Provide either --genes-to-add or --genes-to-add-path. You provided both.")

        if self.genes_to_remove or self.genes_to_remove_path:
            self.run.warning("Removing genes...", header="Updating %s" % self.structure_db_path, lc='green')
            self.load_structure_db()
            remove = self.parse_genes(self.genes_to_remove, self.genes_to_remove_path)
            self.remove_genes(remove)
            self.structure_db.disconnect()

        if self.genes_to_add or self.genes_to_add_path:
            self.run.warning("Adding genes...", header="Updating %s" % self.structure_db_path, lc='green')
            self.load_structure_db()
            self.add_genes()
예제 #6
0
    def __init__(self, args={}, p=progress, r=run):
        self.args = args

        self.data = {}

        self.splits_of_interest = set([])
        self.samples_of_interest = set([])

        A = lambda x, t: t(args.__dict__[x]) if x in args.__dict__ else None
        null = lambda x: x
        self.bin_id = A('bin_id', null)
        self.collection_name = A('collection_name', null)
        self.splits_of_interest_path = A('splits_of_interest', null)
        self.min_departure_from_reference = A('min_departure_from_reference', float)
        self.max_departure_from_reference = A('max_departure_from_reference', float)
        self.min_departure_from_consensus = A('min_departure_from_consensus', float)
        self.max_departure_from_consensus = A('max_departure_from_consensus', float)
        self.min_occurrence = A('min_occurrence', int)
        self.num_positions_from_each_split = A('num_positions_from_each_split', int)
        self.min_scatter = A('min_scatter', int)
        self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int)
        self.profile_db_path = A('profile_db', null)
        self.contigs_db_path = A('contigs_db', null)
        self.quince_mode = A('quince_mode', bool)
        self.output_file_path = A('output_file', null)
        self.samples_of_interest_path = A('samples_of_interest', null)
        self.genes_of_interest_path = A('genes_of_interest', null)
        self.include_contig_names_in_output = A('include_contig_names', null)
        self.include_split_names_in_output = A('include_split_names', null)

        self.merged_split_coverage_values = None
        self.unique_pos_identifier = 0
        self.split_name_position_dict = {}
        self.unique_pos_id_to_entry_id = {}
        self.contig_sequences = None
        self.input_file_path = None

        if self.engine not in variability_engines:
            raise ConfigError, "The superclass is inherited with an unknown engine. Anvi'o needs an adult :("

        # Initialize the contigs super
        dbops.ContigsSuperclass.__init__(self, self.args, r=self.run, p=self.progress)
        self.init_contig_sequences()
예제 #7
0
    def __init__(self, args={}, p=progress, r=run):
        self.args = args
        self.splits_of_interest = set([])
        self.samples_of_interest = set([])

        self.run = r
        self.progress = p

        A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(
            x) else None
        null = lambda x: x
        self.bin_id = A('bin_id', null)
        self.collection_id = A('collection_id', null)
        self.splits_of_interest_path = A('splits_of_interest', null)
        self.min_ratio = A('min_ratio', float)
        self.min_occurrence = A('min_occurrence', int)
        self.num_positions_from_each_split = A('num_positions_from_each_split',
                                               int)
        self.min_scatter = A('min_scatter', int)
        self.min_coverage_in_each_sample = A('min_coverage_in_each_sample',
                                             int)
        self.profile_db_path = A('profile_db', null)
        self.contigs_db_path = A('contigs_db', null)
        self.quince_mode = A('quince_mode', bool)
        self.output_file_path = A('output_file', null)
        self.samples_of_interest_path = A('samples_of_interest', null)

        self.variable_nts_table = {}
        self.merged_split_coverage_values = None
        self.unique_pos_identifier = 0
        self.split_name_position_dict = {}
        self.unique_pos_id_to_entry_id = {}
        self.contig_sequences = None
        self.input_file_path = None

        # Initialize the contigs super
        dbops.ContigsSuperclass.__init__(self,
                                         self.args,
                                         r=self.run,
                                         p=self.progress)
        self.init_contig_sequences()
예제 #8
0
    def __init__(self, args = {}, p=progress, r=run):
        self.args = args

        self.splits_of_interest = set([])
        self.samples_of_interest = set([])

        A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(x) else None
        null = lambda x: x
        self.bin_id = A('bin_id', null)
        self.collection_name = A('collection_name', null)
        self.splits_of_interest_path = A('splits_of_interest', null)
        self.min_ratio = A('min_ratio', float)
        self.min_occurrence = A('min_occurrence', int)
        self.num_positions_from_each_split = A('num_positions_from_each_split', int)
        self.min_scatter = A('min_scatter', int)
        self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int)
        self.profile_db_path = A('profile_db', null)
        self.contigs_db_path = A('contigs_db', null)
        self.quince_mode = A('quince_mode', bool)
        self.output_file_path = A('output_file', null)
        self.samples_of_interest_path = A('samples_of_interest', null)
예제 #9
0
    def __init__(self, args={}, p=progress, r=run):
        self.args = args

        self.run = r
        self.progress = p

        self.samples = None
        self.samples_information_dict = None
        self.variable_nts_table = None

        A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(
            x) else None
        null = lambda x: x
        self.input_file_path = A('input_file', null)
        self.samples_information_path = A('samples_information', null)
        self.max_num_unique_positions = A('max_num_unique_positions', int)
        self.output_file_path = A('output_file', null)

        filesnpaths.is_output_file_writable(self.output_file_path)

        if self.samples_information_path:
            filesnpaths.is_file_tab_delimited(self.samples_information_path)
            self.samples_information_dict = utils.get_TAB_delimited_file_as_dictionary(
                self.samples_information_path)
            num_attributes = len(self.samples_information_dict.values()[0])

            self.run.info(
                'samples_information', '%d attributes read for %d samples' %
                (num_attributes, len(self.samples_information_dict)))

        if self.input_file_path:
            filesnpaths.is_file_tab_delimited(self.input_file_path)
            self.progress.new('Reading the input file')
            self.progress.update('...')
            self.variable_nts_table = utils.get_TAB_delimited_file_as_dictionary(
                self.input_file_path)
            self.progress.end()

            self.run.info('input_file',
                          '%d entries read' % len(self.variable_nts_table))
예제 #10
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):
        self.args = args
        self.run = run
        self.progress = progress

        # initialize self.arg parameters
        A                             = lambda x, t: t(args.__dict__[x]) if x in self.args.__dict__ else None
        null                          = lambda x: x
        self.contigs_db_path          = A('contigs_db', null)
        self.genes_of_interest_path   = A('genes_of_interest', null)
        self.splits_of_interest_path  = A('splits_of_interest', null)
        self.bin_id                   = A('bin_id', null)
        self.collection_name          = A('collection_name', null)
        self.gene_caller_ids          = A('gene_caller_ids', null)
        self.output_db_path           = A('output_db_path', null)
        self.full_modeller_output     = A('dump_dir', null)
        self.skip_DSSP                = A('skip_DSSP', bool)
        self.modeller_executable      = A('modeller_executable', null)
        self.DSSP_executable          = None

        utils.is_contigs_db(self.contigs_db_path)
        self.contigs_db                = dbops.ContigsDatabase(self.contigs_db_path)
        self.contigs_db_hash           = self.contigs_db.meta['contigs_db_hash']

        # MODELLER params
        self.modeller_database        = A('modeller_database', null)
        self.scoring_method           = A('scoring_method', null)
        self.max_number_templates     = A('max_number_templates', null)
        self.percent_identical_cutoff = A('percent_identical_cutoff', null)
        self.num_models               = A('num_models', null)
        self.deviation                = A('deviation', null)
        self.very_fast                = A('very_fast', bool)

        # check database output
        if not self.output_db_path:
            self.output_db_path = "STRUCTURE.db"
        if not self.output_db_path.endswith('.db'):
            raise ConfigError("The structure database output file (`-o / --output`) must end with '.db'")
        filesnpaths.is_output_file_writable(self.output_db_path)

        # check modeller output
        if self.full_modeller_output:
            self.full_modeller_output = filesnpaths.check_output_directory(self.full_modeller_output, ok_if_exists=False)

        # identify which genes user wants to model structures for
        self.genes_of_interest = self.get_genes_of_interest(self.genes_of_interest_path, self.gene_caller_ids)

        self.sanity_check()

        # residue annotation
        self.residue_annotation_sources_info = self.get_residue_annotation_sources_info()
        self.residue_info_table_structure, self.residue_info_table_types = self.get_residue_info_table_structure()
        self.residue_annotation_df = pd.DataFrame({})

        # initialize StructureDatabase
        self.structure_db = StructureDatabase(self.output_db_path,
                                              self.contigs_db_hash,
                                              residue_info_structure_extras = self.residue_info_table_structure,
                                              residue_info_types_extras = self.residue_info_table_types,
                                              create_new=True)

        # init ContigsSuperClass
        self.contigs_super = ContigsSuperclass(self.args)
예제 #11
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):
        self.args = args
        self.run = run
        self.progress = progress

        # initialize self.arg parameters
        A = lambda x, t: t(args.__dict__[x]
                           ) if x in self.args.__dict__ else None
        null = lambda x: x
        self.contigs_db_path = A('contigs_db', null)
        self.genes_of_interest_path = A('genes_of_interest', null)
        self.splits_of_interest_path = A('splits_of_interest', null)
        self.bin_id = A('bin_id', null)
        self.collection_name = A('collection_name', null)
        self.gene_caller_ids = A('gene_caller_ids', null)
        self.output_db_path = A('output_db_path', null)
        self.full_modeller_output = A('dump_dir', null)
        self.skip_DSSP = A('skip_DSSP', bool)
        self.modeller_executable = A('modeller_executable', null)
        self.DSSP_executable = None

        utils.is_contigs_db(self.contigs_db_path)
        self.contigs_db = dbops.ContigsDatabase(self.contigs_db_path)
        self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash']

        # MODELLER params
        self.modeller_database = A('modeller_database', null)
        self.scoring_method = A('scoring_method', null)
        self.max_number_templates = A('max_number_templates', null)
        self.percent_identical_cutoff = A('percent_identical_cutoff', null)
        self.num_models = A('num_models', null)
        self.deviation = A('deviation', null)
        self.very_fast = A('very_fast', bool)

        # check database output
        if not self.output_db_path:
            self.output_db_path = "STRUCTURE.db"
        if not self.output_db_path.endswith('.db'):
            raise ConfigError(
                "The structure database output file (`-o / --output`) must end with '.db'"
            )
        filesnpaths.is_output_file_writable(self.output_db_path)

        # check modeller output
        if self.full_modeller_output:
            self.full_modeller_output = filesnpaths.check_output_directory(
                self.full_modeller_output, ok_if_exists=False)

        # identify which genes user wants to model structures for
        self.genes_of_interest = self.get_genes_of_interest(
            self.genes_of_interest_path, self.gene_caller_ids)

        self.sanity_check()

        # residue annotation
        self.annotation_sources_info = self.get_annotation_sources_info()
        self.residue_info_table_structure, self.residue_info_table_types = self.get_residue_info_table_structure(
        )
        self.res_annotation_df = pd.DataFrame({})

        # initialize StructureDatabase
        self.structure_db = StructureDatabase(
            self.output_db_path,
            self.contigs_db_hash,
            residue_info_structure_extras=self.residue_info_table_structure,
            residue_info_types_extras=self.residue_info_table_types,
            create_new=True)

        # init ContigsSuperClass
        self.contigs_super = ContigsSuperclass(self.args)