def sanity_check(self): filesnpaths.is_file_tab_delimited(self.view_data_path) if self.tree_file_path: filesnpaths.is_proper_newick(self.tree_file_path) self.check_output_directory() new_view_data_path = self.get_output_file_path('view_data.txt') shutil.copyfile(self.view_data_path, new_view_data_path) self.view_data_path = new_view_data_path if self.tree_file_path: new_tree_path = self.get_output_file_path('tree.txt') shutil.copyfile(self.tree_file_path, new_tree_path) self.tree_file_path = new_tree_path if self.additional_view_data_file_path: new_additional_view_data_file_path = self.get_output_file_path('additional_view_data.txt') shutil.copyfile(self.additional_view_data_file_path, new_additional_view_data_file_path) self.additional_view_data_file_path = new_additional_view_data_file_path if self.samples_info_file_path: new_samples_info_file_path = self.get_output_file_path('anvio_samples_info.txt') shutil.copyfile(self.samples_info_file_path, new_samples_info_file_path) self.samples_info_file_path = new_samples_info_file_path self.sanity_checked = True
def sanity_check(self): self.distance = self.distance or constants.distance_metric_default self.linkage = self.linkage or constants.linkage_method_default clustering.is_distance_and_linkage_compatible(self.distance, self.linkage) filesnpaths.is_file_tab_delimited(self.view_data_path) if self.tree_file_path: filesnpaths.is_proper_newick(self.tree_file_path) self.check_output_directory() new_view_data_path = self.get_output_file_path('view_data.txt') shutil.copyfile(self.view_data_path, new_view_data_path) self.view_data_path = new_view_data_path if self.tree_file_path: new_tree_path = self.get_output_file_path('tree.txt') shutil.copyfile(self.tree_file_path, new_tree_path) self.tree_file_path = new_tree_path if self.additional_view_data_file_path: new_additional_view_data_file_path = self.get_output_file_path('additional_view_data.txt') shutil.copyfile(self.additional_view_data_file_path, new_additional_view_data_file_path) self.additional_view_data_file_path = new_additional_view_data_file_path if self.samples_info_file_path: new_samples_info_file_path = self.get_output_file_path('anvio_samples_info.txt') shutil.copyfile(self.samples_info_file_path, new_samples_info_file_path) self.samples_info_file_path = new_samples_info_file_path self.sanity_checked = True
def run_command(self, input_file_path, output_file_path): input_file = open(input_file_path, 'rb') fasttree = Popen(self.command, stdout=PIPE, stdin=PIPE, stderr=PIPE) output = fasttree.communicate(input=input_file.read()) input_file.close() output_stdout = output[0].decode().rstrip() output_stderr = output[1].decode().splitlines() run.info("Version", output_stderr[0]) warning = "" for line in output_stderr[1:]: if len(warning) > 0 or line.startswith("WARNING! "): warning += line + "\n" if line == "": run.warning(warning) warning = "" elif line.startswith(" "): pass elif 'seconds' in line: pass else: line = line.split(":") if len(line) == 2: run.info(line[0], line[1].strip()) else: run.info("Info", ":".join(line)) if filesnpaths.is_proper_newick(output_stdout): output_file = open(output_file_path, 'w') output_file.write(output_stdout + '\n') output_file.close() run.info('FastTree output newick file', output_file_path, mc='green', nl_before=1, nl_after=1)
def process_single_order_data(self, single_order_path, single_order_name): """Just inject a single order into the `self.samples_order_dict`""" if not single_order_path: return if not single_order_name: raise SamplesError("You provided a file for a single order, but not a name for it. This is a no no :/") filesnpaths.is_file_plain_text(single_order_path) single_order_file_content = [l.strip('\n') for l in open(single_order_path, 'rU').readlines()] if len(single_order_file_content) != 1: raise SamplesError("The single order file should contain a single line of information. It can't have nothing,\ it can't have too much. Just a single newick tree, or a comma-separated list of sample\ names.") _order = single_order_file_content.pop() # if you are reading this line, please brace yourself to possibly one of the silliest # bunch of lines in the anvi'o codebase. the reason we are doing this this way is quite # a long story, and deserves a FIXME, but in order to utilize the excellent function # in the filesnpaths module to check the contents of the samples order dict rigirously, # we need to have this information in a file. a better way could have been implementing # a filesnpaths.is_proper_samples_order_content function next to the currently available # filesnpaths.is_proper_samples_order_file (the latter would call the former with a dict # and it would be much more flexible), but we can't import utils form within filesnpaths. # without utils we don't have a get_TAB_delimited_file_as_dictionary function, and we are # definitely not going to implement it in two places :( recovering from a poor design by # doing something even poorer? couldn't have we fixed this once and for all instead of # writing this paragraph? well. just remember that you are thinking about a rethorical # question in a comment section. so sometimes we do things that are not quite productive. temp_samples_order_file_path = filesnpaths.get_temp_file_path() temp_samples_order_file = open(temp_samples_order_file_path, 'w') temp_samples_order_file.write('\t'.join(['attributes', 'basic', 'newick']) + '\n') if filesnpaths.is_proper_newick(_order, dont_raise=True): temp_samples_order_file.write('\t'.join([single_order_name, '', _order]) + '\n') self.samples_order_dict[single_order_name] = {'newick': _order, 'basic': None} else: temp_samples_order_file.write('\t'.join([single_order_name, _order, '']) + '\n') self.samples_order_dict[single_order_name] = {'basic': _order, 'newick': None} temp_samples_order_file.close() sample_names_in_samples_order_file = filesnpaths.is_proper_samples_order_file(temp_samples_order_file_path) os.remove(temp_samples_order_file_path) if not self.sample_names_in_samples_information_file: self.sample_names_in_samples_order_file = sample_names_in_samples_order_file self.available_orders.add(single_order_name) self.run.info('Samples order', "A single order for '%s' is also loaded" % single_order_name, quiet=self.quiet)
def sanity_check(self): filesnpaths.is_file_tab_delimited(self.view_data_path) if self.tree_file_path: filesnpaths.is_proper_newick(self.tree_file_path) self.check_output_directory() new_view_data_path = self.get_output_file_path('view_data.txt') shutil.copyfile(self.view_data_path, new_view_data_path) self.view_data_path = new_view_data_path if self.tree_file_path: new_tree_path = self.get_output_file_path('tree.txt') shutil.copyfile(self.tree_file_path, new_tree_path) self.tree_file_path = new_tree_path if self.additional_view_data_file_path: new_additional_view_data_file_path = self.get_output_file_path( 'additional_view_data.txt') shutil.copyfile(self.additional_view_data_file_path, new_additional_view_data_file_path) self.additional_view_data_file_path = new_additional_view_data_file_path self.sanity_checked = True
def load_from_user_files(self, args): if self.contigs_db_path: raise ConfigError, "When you want to use the interactive interface in an ad hoc manner, you must\ not use a contigs database." if not self.profile_db_path: raise ConfigError, "Even when you want to use the interactive interface in an ad hoc manner by\ using the '--manual-mode' flag, you still need to declare a profile database.\ The profile database in this mode only used to read or store the 'state' of\ the display for visualization purposes. You DO NOT need to point to an already\ existing database, as anvi'o will generate an empty one for your if there is no\ profile database." if (not self.view_data_path) or (not self.tree): raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\ each of the '-d', and '-t' parameters. Please see the documentation for help." if self.view: raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\ in manual mode" if self.show_views: raise ConfigError, "Sorry, there are no views to show in manual mode :/" if self.show_states: raise ConfigError, "Sorry, there are no states to show in manual mode :/" filesnpaths.is_file_exists(self.tree) filesnpaths.is_proper_newick(self.tree) view_data_path = os.path.abspath(self.view_data_path) self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file) if self.fasta_file else None self.p_meta['output_dir'] = None self.p_meta['views'] = {} self.p_meta['merged'] = True self.p_meta['default_view'] = 'single' self.p_meta['default_clustering'] = 'default' self.p_meta['available_clusterings'] = ['default'] self.p_meta['clusterings'] = {'default': {'newick': open(os.path.abspath(self.tree)).read()}} self.default_view = self.p_meta['default_view'] # sanity of the view data filesnpaths.is_file_tab_delimited(view_data_path) view_data_columns = utils.get_columns_of_TAB_delim_file(view_data_path, include_first_column=True) if not view_data_columns[0] == "contig": raise ConfigError, "The first row of the first column of the view data file must\ say 'contig', which is not the case for your view data file\ ('%s'). Please make sure this is a properly formatted view data\ file." % (view_data_path) # load view data as the default view: self.views[self.default_view] = {'header': view_data_columns[1:], 'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)} self.split_names_ordered = self.views[self.default_view]['dict'].keys() # we assume that the sample names are the header of the view data, so we might as well set it up: self.p_meta['samples'] = self.views[self.default_view]['header'] # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts, # otherwise we will leave them empty self.splits_basic_info = {} self.split_sequences = None if self.p_meta['splits_fasta']: filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta']) self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta']) names_missing_in_FASTA = set(self.split_names_ordered) - set(self.split_sequences.keys()) num_names_missing_in_FASTA = len(names_missing_in_FASTA) if num_names_missing_in_FASTA: raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\ FASTA file you provided. Here is an example to one of those %d names that occur\ in your data file, but not in the FASTA file: "%s"' % (num_names_missing_in_FASTA, names_missing_in_FASTA.pop()) # setup a mock splits_basic_info dict for split_id in self.split_names_ordered: self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]), 'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])} # create a new, empty profile database for ad hoc operations if not os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) profile_db.create({'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples'])}) # create an instance of states table self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__) # also populate collections, if there are any self.collections.populate_sources_dict(self.profile_db_path, anvio.__profile__version__) if self.title: self.title = self.title
def load_manual_mode(self, args): if self.contigs_db_path: raise ConfigError, "When you want to use the interactive interface in manual mode, you must\ not use a contigs database." # if the user is using an existing profile database, we need to make sure that it is not associated # with a contigs database, since it would mean that it is a full anvi'o profile database and should # not be included in manual operations. if os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) if profile_db.meta['contigs_db_hash']: raise ConfigError, "Well. It seems the profile database is associated with a contigs database,\ which means using it in manual mode is not the best way to use it. Probably\ what you wanted to do is to let the manual mode create a new profile database\ for you. Simply type in a new profile database path (it can be a file name\ that doesn't exist)." if not self.profile_db_path: raise ConfigError, "Even when you want to use the interactive interface in manual mode, you need\ to declare a profile database. The profile database in this mode only used to\ read or store the 'state' of the display for visualization purposes. You DO\ NOT need to point to an already existing database, as anvi'o will generate\ an empty one for your if there is no profile database." if not self.tree: raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\ at least the tree file. Please see the documentation for help." if self.view: raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\ in manual mode" if self.show_views: raise ConfigError, "Sorry, there are no views to show in manual mode :/" if self.show_states: raise ConfigError, "Sorry, there are no states to show in manual mode :/" filesnpaths.is_file_exists(self.tree) tree = filesnpaths.is_proper_newick(self.tree) view_data_path = os.path.abspath( self.view_data_path) if self.view_data_path else None self.p_meta['splits_fasta'] = os.path.abspath( self.fasta_file) if self.fasta_file else None self.p_meta['output_dir'] = None self.p_meta['views'] = {} self.p_meta['merged'] = True self.p_meta['default_view'] = 'single' clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path( self.tree) self.p_meta['default_clustering'] = clustering_id self.p_meta['available_clusterings'] = [clustering_id] self.p_meta['clusterings'] = { clustering_id: { 'newick': ''.join([ l.strip() for l in open(os.path.abspath(self.tree)).readlines() ]) } } self.default_view = self.p_meta['default_view'] if self.view_data_path: # sanity of the view data filesnpaths.is_file_tab_delimited(view_data_path) view_data_columns = utils.get_columns_of_TAB_delim_file( view_data_path, include_first_column=True) if not view_data_columns[0] == "contig": raise ConfigError, "The first row of the first column of the view data file must\ say 'contig', which is not the case for your view data file\ ('%s'). Please make sure this is a properly formatted view data\ file." % (view_data_path) # load view data as the default view: self.views[self.default_view] = { 'header': view_data_columns[1:], 'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path) } else: # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict' # here using what is in the tree. names_in_the_tree = [n.name for n in tree.get_leaves()] ad_hoc_dict = {} for item in names_in_the_tree: ad_hoc_dict[item] = {'names': item} self.views[self.default_view] = { 'header': ['names'], 'dict': ad_hoc_dict } self.split_names_ordered = self.views[self.default_view]['dict'].keys() # we assume that the sample names are the header of the view data, so we might as well set it up: self.p_meta['samples'] = self.views[self.default_view]['header'] # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts, # otherwise we will leave them empty self.splits_basic_info = {} self.split_sequences = None if self.p_meta['splits_fasta']: filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta']) self.split_sequences = utils.get_FASTA_file_as_dictionary( self.p_meta['splits_fasta']) names_missing_in_FASTA = set(self.split_names_ordered) - set( self.split_sequences.keys()) num_names_missing_in_FASTA = len(names_missing_in_FASTA) if num_names_missing_in_FASTA: raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\ FASTA file you provided. Here is an example to one of those %d names that occur\ in your data file, but not in the FASTA file: "%s"' % ( num_names_missing_in_FASTA, names_missing_in_FASTA.pop()) # setup a mock splits_basic_info dict for split_id in self.split_names_ordered: self.splits_basic_info[split_id] = { 'length': len(self.split_sequences[split_id]), 'gc_content': utils.get_GC_content_for_sequence( self.split_sequences[split_id]) } # create a new, empty profile database for manual operations if not os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) profile_db.create({ 'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples']) }) # create an instance of states table self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__) # also populate collections, if there are any self.collections.populate_collections_dict(self.profile_db_path, anvio.__profile__version__) if self.title: self.title = self.title
def load_from_user_files(self, args): if self.contigs_db_path: raise ConfigError, "When you want to use the interactive interface in an ad hoc manner, you must\ not use a contigs database." if not self.profile_db_path: raise ConfigError, "Even when you want to use the interactive interface in an ad hoc manner by\ using the '--manual-mode' flag, you still need to declare a profile database.\ The profile database in this mode only used to read or store the 'state' of\ the display for visualization purposes. You DO NOT need to point to an already\ existing database, as anvi'o will generate an empty one for your if there is no\ profile database." if (not self.view_data_path) or (not self.tree): raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\ each of the '-d', and '-t' parameters. Please see the documentation for help." if self.view: raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\ in manual mode" if self.show_views: raise ConfigError, "Sorry, there are no views to show in manual mode :/" if self.show_states: raise ConfigError, "Sorry, there are no states to show in manual mode :/" filesnpaths.is_file_exists(self.tree) filesnpaths.is_proper_newick(self.tree) view_data_path = os.path.abspath(self.view_data_path) self.p_meta['splits_fasta'] = os.path.abspath( self.fasta_file) if self.fasta_file else None self.p_meta['output_dir'] = None self.p_meta['views'] = {} self.p_meta['merged'] = True self.p_meta['default_view'] = 'single' self.p_meta['default_clustering'] = 'default' self.p_meta['available_clusterings'] = ['default'] self.p_meta['clusterings'] = { 'default': { 'newick': open(os.path.abspath(self.tree)).read() } } self.default_view = self.p_meta['default_view'] # sanity of the view data filesnpaths.is_file_tab_delimited(view_data_path) view_data_columns = utils.get_columns_of_TAB_delim_file( view_data_path, include_first_column=True) if not view_data_columns[0] == "contig": raise ConfigError, "The first row of the first column of the view data file must\ say 'contig', which is not the case for your view data file\ ('%s'). Please make sure this is a properly formatted view data\ file." % (view_data_path) # load view data as the default view: self.views[self.default_view] = { 'header': view_data_columns[1:], 'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path) } self.split_names_ordered = self.views[self.default_view]['dict'].keys() # we assume that the sample names are the header of the view data, so we might as well set it up: self.p_meta['samples'] = self.views[self.default_view]['header'] # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts, # otherwise we will leave them empty self.splits_basic_info = {} self.split_sequences = None if self.p_meta['splits_fasta']: filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta']) self.split_sequences = utils.get_FASTA_file_as_dictionary( self.p_meta['splits_fasta']) names_missing_in_FASTA = set(self.split_names_ordered) - set( self.split_sequences.keys()) num_names_missing_in_FASTA = len(names_missing_in_FASTA) if num_names_missing_in_FASTA: raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\ FASTA file you provided. Here is an example to one of those %d names that occur\ in your data file, but not in the FASTA file: "%s"' % ( num_names_missing_in_FASTA, names_missing_in_FASTA.pop()) # setup a mock splits_basic_info dict for split_id in self.split_names_ordered: self.splits_basic_info[split_id] = { 'length': len(self.split_sequences[split_id]), 'gc_content': utils.get_GC_content_for_sequence( self.split_sequences[split_id]) } # create a new, empty profile database for ad hoc operations if not os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) profile_db.create({ 'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples']) }) # create an instance of states table self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__) # also populate collections, if there are any self.collections.populate_sources_dict(self.profile_db_path, anvio.__profile__version__) if self.title: self.title = self.title
def load_manual_mode(self, args): if self.contigs_db_path: raise ConfigError, "When you want to use the interactive interface in manual mode, you must\ not use a contigs database." # if the user is using an existing profile database, we need to make sure that it is not associated # with a contigs database, since it would mean that it is a full anvi'o profile database and should # not be included in manual operations. if os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) if profile_db.meta['contigs_db_hash']: raise ConfigError, "Well. It seems the profile database is associated with a contigs database,\ which means using it in manual mode is not the best way to use it. Probably\ what you wanted to do is to let the manual mode create a new profile database\ for you. Simply type in a new profile database path (it can be a file name\ that doesn't exist)." if not self.profile_db_path: raise ConfigError, "Even when you want to use the interactive interface in manual mode, you need\ to declare a profile database. The profile database in this mode only used to\ read or store the 'state' of the display for visualization purposes. You DO\ NOT need to point to an already existing database, as anvi'o will generate\ an empty one for your if there is no profile database." if not self.tree: raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\ at least the tree file. Please see the documentation for help." if self.view: raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\ in manual mode" if self.show_views: raise ConfigError, "Sorry, there are no views to show in manual mode :/" if self.show_states: raise ConfigError, "Sorry, there are no states to show in manual mode :/" filesnpaths.is_file_exists(self.tree) tree = filesnpaths.is_proper_newick(self.tree) view_data_path = os.path.abspath(self.view_data_path) if self.view_data_path else None self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file) if self.fasta_file else None self.p_meta['output_dir'] = None self.p_meta['views'] = {} self.p_meta['merged'] = True self.p_meta['default_view'] = 'single' clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(self.tree) self.p_meta['default_clustering'] = clustering_id self.p_meta['available_clusterings'] = [clustering_id] self.p_meta['clusterings'] = {clustering_id: {'newick': ''.join([l.strip() for l in open(os.path.abspath(self.tree)).readlines()])}} self.default_view = self.p_meta['default_view'] if self.view_data_path: # sanity of the view data filesnpaths.is_file_tab_delimited(view_data_path) view_data_columns = utils.get_columns_of_TAB_delim_file(view_data_path, include_first_column=True) if not view_data_columns[0] == "contig": raise ConfigError, "The first row of the first column of the view data file must\ say 'contig', which is not the case for your view data file\ ('%s'). Please make sure this is a properly formatted view data\ file." % (view_data_path) # load view data as the default view: self.views[self.default_view] = {'header': view_data_columns[1:], 'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)} else: # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict' # here using what is in the tree. names_in_the_tree = [n.name for n in tree.get_leaves()] ad_hoc_dict = {} for item in names_in_the_tree: ad_hoc_dict[item] = {'names': item} self.views[self.default_view] = {'header': ['names'], 'dict': ad_hoc_dict} self.split_names_ordered = self.views[self.default_view]['dict'].keys() # we assume that the sample names are the header of the view data, so we might as well set it up: self.p_meta['samples'] = self.views[self.default_view]['header'] # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts, # otherwise we will leave them empty self.splits_basic_info = {} self.split_sequences = None if self.p_meta['splits_fasta']: filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta']) self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta']) names_missing_in_FASTA = set(self.split_names_ordered) - set(self.split_sequences.keys()) num_names_missing_in_FASTA = len(names_missing_in_FASTA) if num_names_missing_in_FASTA: raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\ FASTA file you provided. Here is an example to one of those %d names that occur\ in your data file, but not in the FASTA file: "%s"' % (num_names_missing_in_FASTA, names_missing_in_FASTA.pop()) # setup a mock splits_basic_info dict for split_id in self.split_names_ordered: self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]), 'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])} # create a new, empty profile database for manual operations if not os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) profile_db.create({'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples'])}) # create an instance of states table self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__) # also populate collections, if there are any self.collections.populate_collections_dict(self.profile_db_path, anvio.__profile__version__) if self.title: self.title = self.title