def __init__(self, args = {}, p=progress, r=run): self.args = args self.splits_of_interest = set([]) self.samples_of_interest = set([]) self.run = r self.progress = p A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(x) else None null = lambda x: x self.bin_id = A('bin_id', null) self.collection_id = A('collection_id', null) self.splits_of_interest_path = A('splits_of_interest', null) self.min_ratio = A('min_ratio', float) self.min_occurrence = A('min_occurrence', int) self.num_positions_from_each_split = A('num_positions_from_each_split', int) self.min_scatter = A('min_scatter', int) self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int) self.profile_db_path = A('profile_db', null) self.contigs_db_path = A('contigs_db', null) self.quince_mode = A('quince_mode', bool) self.output_file_path = A('output_file', null) self.samples_of_interest_path = A('samples_of_interest', null) self.variable_nts_table = {} self.merged_split_coverage_values = None self.unique_pos_identifier = 0 self.split_name_position_dict = {} self.unique_pos_id_to_entry_id = {} self.contig_sequences = None self.input_file_path = None # Initialize the contigs super dbops.ContigsSuperclass.__init__(self, self.args, r = self.run, p = self.progress) self.init_contig_sequences()
def __init__(self, args = {}, p=progress, r=run): self.args = args self.run = r self.progress = p self.samples = None self.samples_information_dict = None self.data = None A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(x) else None null = lambda x: x self.input_file_path = A('input_file', null) self.samples_information_path = A('samples_information', null) self.max_num_unique_positions = A('max_num_unique_positions', int) self.output_file_path = A('output_file', null) filesnpaths.is_output_file_writable(self.output_file_path) if self.samples_information_path: filesnpaths.is_file_tab_delimited(self.samples_information_path) self.samples_information_dict = utils.get_TAB_delimited_file_as_dictionary(self.samples_information_path) num_attributes = len(self.samples_information_dict.values()[0]) self.run.info('samples_information', '%d attributes read for %d samples' % (num_attributes, len(self.samples_information_dict))) if self.input_file_path: filesnpaths.is_file_tab_delimited(self.input_file_path) self.progress.new('Reading the input file') self.progress.update('...') self.data = utils.get_TAB_delimited_file_as_dictionary(self.input_file_path) self.progress.end() self.run.info('input_file', '%d entries read' % len(self.data))
def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()): self.args = args self.run = run self.progress = progress # initialize self.arg parameters A = lambda x, t: t(args.__dict__[x] ) if x in self.args.__dict__ else None null = lambda x: x self.contigs_db_path = A('contigs_db', null) self.structure_db_path = A('structure_db', null) self.genes_to_remove = A('genes_to_remove', null) self.genes_to_remove_path = A('genes_to_remove_file', null) self.genes_to_add = A('genes_to_add', null) self.genes_to_add_path = A('genes_to_add_file', null) self.full_modeller_output = A('dump_dir', null) self.modeller_executable = A('modeller_executable', null) self.DSSP_executable = None utils.is_contigs_db(self.contigs_db_path) self.contigs_db = dbops.ContigsDatabase(self.contigs_db_path) self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash'] # init ContigsSuperClass self.contigs_super = ContigsSuperclass(self.args) if not any([ self.genes_to_remove, self.genes_to_remove_path, self.genes_to_add, self.genes_to_add_path ]): raise ConfigError( "Please specify some genes to add or remove to your database.") if self.genes_to_remove and self.genes_to_remove_path: raise ConfigError( "Provide either --genes-to-remove or --genes-to-remove-path. You provided both." ) if self.genes_to_add and self.genes_to_add_path: raise ConfigError( "Provide either --genes-to-add or --genes-to-add-path. You provided both." ) if self.genes_to_remove or self.genes_to_remove_path: self.run.warning("Removing genes...", header="Updating %s" % self.structure_db_path, lc='green') self.load_structure_db() remove = self.parse_genes(self.genes_to_remove, self.genes_to_remove_path) self.remove_genes(remove) self.structure_db.disconnect() if self.genes_to_add or self.genes_to_add_path: self.run.warning("Adding genes...", header="Updating %s" % self.structure_db_path, lc='green') self.load_structure_db() self.add_genes()
def __init__(self, args={}, p=progress, r=run): self.args = args self.data = {} self.splits_of_interest = set([]) self.samples_of_interest = set([]) A = lambda x, t: t(args.__dict__[x]) if x in args.__dict__ else None null = lambda x: x self.bin_id = A('bin_id', null) self.collection_name = A('collection_name', null) self.splits_of_interest_path = A('splits_of_interest', null) self.min_departure_from_reference = A('min_departure_from_reference', float) self.max_departure_from_reference = A('max_departure_from_reference', float) self.min_departure_from_consensus = A('min_departure_from_consensus', float) self.max_departure_from_consensus = A('max_departure_from_consensus', float) self.min_occurrence = A('min_occurrence', int) self.num_positions_from_each_split = A('num_positions_from_each_split', int) self.min_scatter = A('min_scatter', int) self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int) self.profile_db_path = A('profile_db', null) self.contigs_db_path = A('contigs_db', null) self.quince_mode = A('quince_mode', bool) self.output_file_path = A('output_file', null) self.samples_of_interest_path = A('samples_of_interest', null) self.genes_of_interest_path = A('genes_of_interest', null) self.include_contig_names_in_output = A('include_contig_names', null) self.include_split_names_in_output = A('include_split_names', null) self.merged_split_coverage_values = None self.unique_pos_identifier = 0 self.split_name_position_dict = {} self.unique_pos_id_to_entry_id = {} self.contig_sequences = None self.input_file_path = None if self.engine not in variability_engines: raise ConfigError, "The superclass is inherited with an unknown engine. Anvi'o needs an adult :(" # Initialize the contigs super dbops.ContigsSuperclass.__init__(self, self.args, r=self.run, p=self.progress) self.init_contig_sequences()
def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()): self.args = args self.run = run self.progress = progress # initialize self.arg parameters A = lambda x, t: t(args.__dict__[x]) if x in self.args.__dict__ else None null = lambda x: x self.contigs_db_path = A('contigs_db', null) self.structure_db_path = A('structure_db', null) self.genes_to_remove = A('genes_to_remove', null) self.genes_to_remove_path = A('genes_to_remove_file', null) self.genes_to_add = A('genes_to_add', null) self.genes_to_add_path = A('genes_to_add_file', null) self.full_modeller_output = A('dump_dir', null) self.modeller_executable = A('modeller_executable', null) self.skip_genes_if_already_present = A('skip_genes_if_already_present', bool) self.DSSP_executable = None utils.is_contigs_db(self.contigs_db_path) self.contigs_db = dbops.ContigsDatabase(self.contigs_db_path) self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash'] # init ContigsSuperClass self.contigs_super = ContigsSuperclass(self.args) if not any([self.genes_to_remove, self.genes_to_remove_path, self.genes_to_add, self.genes_to_add_path]): raise ConfigError("Please specify some genes to add or remove to your database.") if self.genes_to_remove and self.genes_to_remove_path: raise ConfigError("Provide either --genes-to-remove or --genes-to-remove-path. You provided both.") if self.genes_to_add and self.genes_to_add_path: raise ConfigError("Provide either --genes-to-add or --genes-to-add-path. You provided both.") if self.genes_to_remove or self.genes_to_remove_path: self.run.warning("Removing genes...", header="Updating %s" % self.structure_db_path, lc='green') self.load_structure_db() remove = self.parse_genes(self.genes_to_remove, self.genes_to_remove_path) self.remove_genes(remove) self.structure_db.disconnect() if self.genes_to_add or self.genes_to_add_path: self.run.warning("Adding genes...", header="Updating %s" % self.structure_db_path, lc='green') self.load_structure_db() self.add_genes()
def __init__(self, args={}, p=progress, r=run): self.args = args self.splits_of_interest = set([]) self.samples_of_interest = set([]) self.run = r self.progress = p A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key( x) else None null = lambda x: x self.bin_id = A('bin_id', null) self.collection_id = A('collection_id', null) self.splits_of_interest_path = A('splits_of_interest', null) self.min_ratio = A('min_ratio', float) self.min_occurrence = A('min_occurrence', int) self.num_positions_from_each_split = A('num_positions_from_each_split', int) self.min_scatter = A('min_scatter', int) self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int) self.profile_db_path = A('profile_db', null) self.contigs_db_path = A('contigs_db', null) self.quince_mode = A('quince_mode', bool) self.output_file_path = A('output_file', null) self.samples_of_interest_path = A('samples_of_interest', null) self.variable_nts_table = {} self.merged_split_coverage_values = None self.unique_pos_identifier = 0 self.split_name_position_dict = {} self.unique_pos_id_to_entry_id = {} self.contig_sequences = None self.input_file_path = None # Initialize the contigs super dbops.ContigsSuperclass.__init__(self, self.args, r=self.run, p=self.progress) self.init_contig_sequences()
def __init__(self, args = {}, p=progress, r=run): self.args = args self.splits_of_interest = set([]) self.samples_of_interest = set([]) A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key(x) else None null = lambda x: x self.bin_id = A('bin_id', null) self.collection_name = A('collection_name', null) self.splits_of_interest_path = A('splits_of_interest', null) self.min_ratio = A('min_ratio', float) self.min_occurrence = A('min_occurrence', int) self.num_positions_from_each_split = A('num_positions_from_each_split', int) self.min_scatter = A('min_scatter', int) self.min_coverage_in_each_sample = A('min_coverage_in_each_sample', int) self.profile_db_path = A('profile_db', null) self.contigs_db_path = A('contigs_db', null) self.quince_mode = A('quince_mode', bool) self.output_file_path = A('output_file', null) self.samples_of_interest_path = A('samples_of_interest', null)
def __init__(self, args={}, p=progress, r=run): self.args = args self.run = r self.progress = p self.samples = None self.samples_information_dict = None self.variable_nts_table = None A = lambda x, t: t(args.__dict__[x]) if args.__dict__.has_key( x) else None null = lambda x: x self.input_file_path = A('input_file', null) self.samples_information_path = A('samples_information', null) self.max_num_unique_positions = A('max_num_unique_positions', int) self.output_file_path = A('output_file', null) filesnpaths.is_output_file_writable(self.output_file_path) if self.samples_information_path: filesnpaths.is_file_tab_delimited(self.samples_information_path) self.samples_information_dict = utils.get_TAB_delimited_file_as_dictionary( self.samples_information_path) num_attributes = len(self.samples_information_dict.values()[0]) self.run.info( 'samples_information', '%d attributes read for %d samples' % (num_attributes, len(self.samples_information_dict))) if self.input_file_path: filesnpaths.is_file_tab_delimited(self.input_file_path) self.progress.new('Reading the input file') self.progress.update('...') self.variable_nts_table = utils.get_TAB_delimited_file_as_dictionary( self.input_file_path) self.progress.end() self.run.info('input_file', '%d entries read' % len(self.variable_nts_table))
def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()): self.args = args self.run = run self.progress = progress # initialize self.arg parameters A = lambda x, t: t(args.__dict__[x]) if x in self.args.__dict__ else None null = lambda x: x self.contigs_db_path = A('contigs_db', null) self.genes_of_interest_path = A('genes_of_interest', null) self.splits_of_interest_path = A('splits_of_interest', null) self.bin_id = A('bin_id', null) self.collection_name = A('collection_name', null) self.gene_caller_ids = A('gene_caller_ids', null) self.output_db_path = A('output_db_path', null) self.full_modeller_output = A('dump_dir', null) self.skip_DSSP = A('skip_DSSP', bool) self.modeller_executable = A('modeller_executable', null) self.DSSP_executable = None utils.is_contigs_db(self.contigs_db_path) self.contigs_db = dbops.ContigsDatabase(self.contigs_db_path) self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash'] # MODELLER params self.modeller_database = A('modeller_database', null) self.scoring_method = A('scoring_method', null) self.max_number_templates = A('max_number_templates', null) self.percent_identical_cutoff = A('percent_identical_cutoff', null) self.num_models = A('num_models', null) self.deviation = A('deviation', null) self.very_fast = A('very_fast', bool) # check database output if not self.output_db_path: self.output_db_path = "STRUCTURE.db" if not self.output_db_path.endswith('.db'): raise ConfigError("The structure database output file (`-o / --output`) must end with '.db'") filesnpaths.is_output_file_writable(self.output_db_path) # check modeller output if self.full_modeller_output: self.full_modeller_output = filesnpaths.check_output_directory(self.full_modeller_output, ok_if_exists=False) # identify which genes user wants to model structures for self.genes_of_interest = self.get_genes_of_interest(self.genes_of_interest_path, self.gene_caller_ids) self.sanity_check() # residue annotation self.residue_annotation_sources_info = self.get_residue_annotation_sources_info() self.residue_info_table_structure, self.residue_info_table_types = self.get_residue_info_table_structure() self.residue_annotation_df = pd.DataFrame({}) # initialize StructureDatabase self.structure_db = StructureDatabase(self.output_db_path, self.contigs_db_hash, residue_info_structure_extras = self.residue_info_table_structure, residue_info_types_extras = self.residue_info_table_types, create_new=True) # init ContigsSuperClass self.contigs_super = ContigsSuperclass(self.args)
def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()): self.args = args self.run = run self.progress = progress # initialize self.arg parameters A = lambda x, t: t(args.__dict__[x] ) if x in self.args.__dict__ else None null = lambda x: x self.contigs_db_path = A('contigs_db', null) self.genes_of_interest_path = A('genes_of_interest', null) self.splits_of_interest_path = A('splits_of_interest', null) self.bin_id = A('bin_id', null) self.collection_name = A('collection_name', null) self.gene_caller_ids = A('gene_caller_ids', null) self.output_db_path = A('output_db_path', null) self.full_modeller_output = A('dump_dir', null) self.skip_DSSP = A('skip_DSSP', bool) self.modeller_executable = A('modeller_executable', null) self.DSSP_executable = None utils.is_contigs_db(self.contigs_db_path) self.contigs_db = dbops.ContigsDatabase(self.contigs_db_path) self.contigs_db_hash = self.contigs_db.meta['contigs_db_hash'] # MODELLER params self.modeller_database = A('modeller_database', null) self.scoring_method = A('scoring_method', null) self.max_number_templates = A('max_number_templates', null) self.percent_identical_cutoff = A('percent_identical_cutoff', null) self.num_models = A('num_models', null) self.deviation = A('deviation', null) self.very_fast = A('very_fast', bool) # check database output if not self.output_db_path: self.output_db_path = "STRUCTURE.db" if not self.output_db_path.endswith('.db'): raise ConfigError( "The structure database output file (`-o / --output`) must end with '.db'" ) filesnpaths.is_output_file_writable(self.output_db_path) # check modeller output if self.full_modeller_output: self.full_modeller_output = filesnpaths.check_output_directory( self.full_modeller_output, ok_if_exists=False) # identify which genes user wants to model structures for self.genes_of_interest = self.get_genes_of_interest( self.genes_of_interest_path, self.gene_caller_ids) self.sanity_check() # residue annotation self.annotation_sources_info = self.get_annotation_sources_info() self.residue_info_table_structure, self.residue_info_table_types = self.get_residue_info_table_structure( ) self.res_annotation_df = pd.DataFrame({}) # initialize StructureDatabase self.structure_db = StructureDatabase( self.output_db_path, self.contigs_db_hash, residue_info_structure_extras=self.residue_info_table_structure, residue_info_types_extras=self.residue_info_table_types, create_new=True) # init ContigsSuperClass self.contigs_super = ContigsSuperclass(self.args)