def load_collections(self): ''' Load the collections_txt file, run some sanity checks, and figure out params for anvi_import_collection''' collections = u.get_TAB_delimited_file_as_dictionary(self.collections_txt) bad_groups = [g for g in collections if g not in self.group_names] if bad_groups: raise ConfigError('Some of the names in your collection_txt \ file ("%s") don\'t match the names of the \ groups in your samples_txt/fasta_txt. \ Here are the names that don\'t match: %s. \ And here are the group names we expect to find: \ %s' % (self.collections_txt, ', '.join(bad_groups), ', '.join(self.group_names))) for group in collections: filesnpaths.is_file_exists(collections[group]['collection_file']) if not collections[group]['collection_name']: raise ConfigError('You must specify a name for each collection in your collections_txt') u.check_collection_name(collections[group]['collection_name']) if collections[group].get('bins_info'): filesnpaths.is_file_exists(collections[group]['bins_info']) collections[group]['bins_info'] = '--bins-info %s' % collections[group]['bins_info'] else: collections[group]['bins_info'] = '' if collections[group].get('contigs_mode'): collections[group]['contigs_mode'] = '--contigs-mode' else: collections[group]['contigs_mode'] = '' self.collections = collections
def load_collections(self): ''' Load the collections_txt file, run some sanity checks, and figure out params for anvi_import_collection''' collections = u.get_TAB_delimited_file_as_dictionary(self.collections_txt) bad_groups = [g for g in collections if g not in self.group_names] if bad_groups: raise ConfigError('Some of the names in your collection_txt ' 'file ("%s") don\'t match the names of the ' 'groups in your samples_txt/fasta_txt. ' 'Here are the names that don\'t match: %s. ' 'And here are the group names we expect to find: ' '%s' % (self.collections_txt, ', '.join(bad_groups), ', '.join(self.group_names))) for group in collections: default_collection = collections[group].get('default_collection') if default_collection: # User can specify either a default collection OR collection from file not_allowed_params = {'collection_name', 'collection_file', 'bins_info', 'contigs_mode'} if any([collections[group][key] for key in not_allowed_params if key in collections[group].keys()]): raise ConfigError('We encountered the following problem with your ' 'collections_txt file ("%s"): you can choose ' 'either using a default collection OR importing ' 'a collection from a file. Yet, for "%s", you specificy ' 'a default collection AND also specify some of the following ' 'parameters: %s.' % (self.collections_txt, group, ", ".join(not_allowed_params))) collections[group]['collection_name'] = 'DEFAULT' collections[group]['contigs_mode'] = '' else: if not filesnpaths.is_file_exists(collections[group]['collection_file'], dont_raise=True): raise ConfigError('We encountered the following problem with your ' 'collections_txt file ("%s"): you did not specify ' 'a valid collection file for "%s".' % (self.collections_txt, group)) if not collections[group]['collection_name']: raise ConfigError('You must specify a name for each collection in your collections_txt') u.check_collection_name(collections[group]['collection_name']) if collections[group].get('bins_info'): filesnpaths.is_file_exists(collections[group]['bins_info']) collections[group]['bins_info'] = '--bins-info %s' % collections[group]['bins_info'] else: collections[group]['bins_info'] = '' if collections[group].get('contigs_mode'): collections[group]['contigs_mode'] = '--contigs-mode' else: collections[group]['contigs_mode'] = '' self.collections = collections
def load_collections(self): ''' Load the collections_txt file, run some sanity checks, and figure out params for anvi_import_collection''' collections = u.get_TAB_delimited_file_as_dictionary(self.collections_txt) bad_groups = [g for g in collections if g not in self.group_names] if bad_groups: raise ConfigError('Some of the names in your collection_txt \ file ("%s") don\'t match the names of the \ groups in your samples_txt/fasta_txt. \ Here are the names that don\'t match: %s. \ And here are the group names we expect to find: \ %s' % (self.collections_txt, ', '.join(bad_groups), ', '.join(self.group_names))) for group in collections: default_collection = collections[group].get('default_collection') if default_collection: # User can specify either a default collection OR collection from file not_allowed_params = {'collection_name', 'collection_file', 'bins_info', 'contigs_mode'} if any([collections[group][key] for key in not_allowed_params if key in collections[group].keys()]): raise ConfigError('We encountered the following problem with your \ collections_txt file ("%s"): you can choose \ either using a default collection OR importing \ a collection from a file. Yet, for "%s", you specificy \ a default collection AND also specify some of the following \ parameters: %s.' % (self.collections_txt, group, ", ".join(not_allowed_params))) collections[group]['collection_name'] = 'DEFAULT' collections[group]['contigs_mode'] = '' else: if not filesnpaths.is_file_exists(collections[group]['collection_file'], dont_raise=True): raise ConfigError('We encountered the following problem with your \ collections_txt file ("%s"): you did not specify \ a valid collection file for "%s".' % (self.collections_txt, group)) if not collections[group]['collection_name']: raise ConfigError('You must specify a name for each collection in your collections_txt') u.check_collection_name(collections[group]['collection_name']) if collections[group].get('bins_info'): filesnpaths.is_file_exists(collections[group]['bins_info']) collections[group]['bins_info'] = '--bins-info %s' % collections[group]['bins_info'] else: collections[group]['bins_info'] = '' if collections[group].get('contigs_mode'): collections[group]['contigs_mode'] = '--contigs-mode' else: collections[group]['contigs_mode'] = '' self.collections = collections
def get_target_files_for_anvi_cluster_contigs(self): import anvio.workflows as w w.D(self.get_param_value_from_config(['anvi_cluster_contigs', 'run'])) if self.get_param_value_from_config(['anvi_cluster_contigs', 'run']) is not True: # the user doesn't want to run this return w.D('hi2') requested_drivers = self.get_param_value_from_config(['anvi_cluster_contigs', '--driver']) if not requested_drivers: raise ConfigError('You must specify a driver for anvi_cluster_contigs. ' 'You specified \'"run": true\' for anvi_cluster_contigs, \ but provided no driver.') if type(requested_drivers) != list: requested_drivers = [requested_drivers] incompatible_drivers = [d for d in requested_drivers if d not in list(driver_modules['binning'].keys())] if incompatible_drivers: raise ConfigError('The following drivers were listed in the config file for rule anvi_cluster_contigs ' 'but they are not familiar to anvi\'o: %s' % ', '.join(incompatible_drivers)) # TODO: we should make sure drivers are installed. Maybe Ozcan or Meren are willing to do this? for d in driver_modules['binning'].keys(): # let's make sure no parameters were set for a driver that was not listed additional_parameters = self.get_param_value_from_config(['anvi_cluster_contigs', self.get_param_name_for_binning_driver(d)]) if additional_parameters: if d not in requested_drivers: raise ConfigError('You set the following parameters: "%s" for %s, but you did not ' 'specify it as one of the drivers that should be used by anvi_cluster_contigs. ' 'In order to reduce room for mistakes, we don\'t allow this.' % (additional_parameters, d)) collection_name = self.get_param_value_from_config(['anvi_cluster_contigs', '--collection-name']) if '{driver}' not in collection_name: if len(requested_drivers) > 1: raise ConfigError('When using multiple binning algorithms, the --collection-name ' 'for rule anvi_cluster_contigs must contain ' 'the key word "{driver}" (including those curly brackets). ' 'It appears you changed the --collection-name, which by ' 'default is simply "{driver}" to: "%s". That\'s fine, \ but only as long as you have "{driver}" appear somewhere in \ the name, because otherwise the collections made by different \ binning algorithms would try to override each other, since they \ would all end up having the same name' % collection_name) # if the key word '{driver}' is not in the collection name then it is a static collection name example_collection_name = collection_name else: # if the key word '{driver}' IS in the collection name, then let's take one # driver as example when we check if the collection name is valid. example_collection_name = collection_name.format(driver=requested_drivers[0]) try: u.check_collection_name(example_collection_name) except ConfigError as e: raise ConfigError('%s is not an acceptable collection name for anvi_cluster_contigs. ' 'We tried it for one of the drivers you requested and this is the ' 'error we got: %s' % (collection_name, e)) groups_of_size_one = [] target_files = [] for d in requested_drivers: for g in self.group_names: if self.group_sizes[g] > 1: # add flag files of binning to the target files # only if the group is larger than 1 (because otherwise, there is no merged profile) target_files.append(self.get_flag_file_for_binning_driver(g, d)) else: groups_of_size_one.append(g) if groups_of_size_one: run.warning('You requested to run anvi_cluster_contigs, but it will not run for ' '%s, since there is only one sample in this group, and hence there ' 'will be no merged profile, which is required for anvi-cluster-contigs.' % ', '.join(groups_of_size_one)) return target_files