Ejemplo n.º 1
0
 def load_collections(self):
     ''' Load the collections_txt file, run some sanity checks, and figure out params for anvi_import_collection'''
     collections = u.get_TAB_delimited_file_as_dictionary(self.collections_txt)
     bad_groups = [g for g in collections if g not in self.group_names]
     if bad_groups:
             raise ConfigError('Some of the names in your collection_txt \
                                file ("%s") don\'t match the names of the \
                                groups in your samples_txt/fasta_txt. \
                                Here are the names that don\'t match: %s. \
                                And here are the group names we expect to find: \
                                %s' % (self.collections_txt, ', '.join(bad_groups), ', '.join(self.group_names)))
     for group in collections:
         filesnpaths.is_file_exists(collections[group]['collection_file'])
         if not collections[group]['collection_name']:
             raise ConfigError('You must specify a name for each collection in your collections_txt')
         u.check_collection_name(collections[group]['collection_name'])
         if collections[group].get('bins_info'):
             filesnpaths.is_file_exists(collections[group]['bins_info'])
             collections[group]['bins_info'] = '--bins-info %s' % collections[group]['bins_info']
         else:
             collections[group]['bins_info'] = ''
         if collections[group].get('contigs_mode'):
             collections[group]['contigs_mode'] = '--contigs-mode'
         else:
             collections[group]['contigs_mode'] = ''
     self.collections = collections
Ejemplo n.º 2
0
    def load_collections(self):
        ''' Load the collections_txt file, run some sanity checks, and figure out params for anvi_import_collection'''
        collections = u.get_TAB_delimited_file_as_dictionary(self.collections_txt)
        bad_groups = [g for g in collections if g not in self.group_names]
        if bad_groups:
                raise ConfigError('Some of the names in your collection_txt '
                                  'file ("%s") don\'t match the names of the '
                                  'groups in your samples_txt/fasta_txt. '
                                  'Here are the names that don\'t match: %s. '
                                  'And here are the group names we expect to find: '
                                  '%s' % (self.collections_txt, ', '.join(bad_groups), ', '.join(self.group_names)))
        for group in collections:
            default_collection = collections[group].get('default_collection')

            if default_collection:
                # User can specify either a default collection OR collection from file
                not_allowed_params = {'collection_name', 'collection_file', 'bins_info', 'contigs_mode'}
                if any([collections[group][key] for key in not_allowed_params if key in collections[group].keys()]):
                    raise ConfigError('We encountered the following problem with your '
                                      'collections_txt file ("%s"): you can choose '
                                      'either using a default collection OR importing '
                                      'a collection from a file. Yet, for "%s", you specificy '
                                      'a default collection AND also specify some of the following '
                                      'parameters: %s.' % (self.collections_txt, group, ", ".join(not_allowed_params)))

                collections[group]['collection_name'] = 'DEFAULT'
                collections[group]['contigs_mode'] = ''

            else:
                if not filesnpaths.is_file_exists(collections[group]['collection_file'], dont_raise=True):
                    raise ConfigError('We encountered the following problem with your '
                                      'collections_txt file ("%s"): you did not specify '
                                      'a valid collection file for "%s".' % (self.collections_txt, group))

                if not collections[group]['collection_name']:
                    raise ConfigError('You must specify a name for each collection in your collections_txt')
                u.check_collection_name(collections[group]['collection_name'])
                if collections[group].get('bins_info'):
                    filesnpaths.is_file_exists(collections[group]['bins_info'])
                    collections[group]['bins_info'] = '--bins-info %s' % collections[group]['bins_info']
                else:
                    collections[group]['bins_info'] = ''
                if collections[group].get('contigs_mode'):
                    collections[group]['contigs_mode'] = '--contigs-mode'
                else:
                    collections[group]['contigs_mode'] = ''
        self.collections = collections
Ejemplo n.º 3
0
    def load_collections(self):
        ''' Load the collections_txt file, run some sanity checks, and figure out params for anvi_import_collection'''
        collections = u.get_TAB_delimited_file_as_dictionary(self.collections_txt)
        bad_groups = [g for g in collections if g not in self.group_names]
        if bad_groups:
                raise ConfigError('Some of the names in your collection_txt \
                                   file ("%s") don\'t match the names of the \
                                   groups in your samples_txt/fasta_txt. \
                                   Here are the names that don\'t match: %s. \
                                   And here are the group names we expect to find: \
                                   %s' % (self.collections_txt, ', '.join(bad_groups), ', '.join(self.group_names)))
        for group in collections:
            default_collection = collections[group].get('default_collection')

            if default_collection:
                # User can specify either a default collection OR collection from file
                not_allowed_params = {'collection_name', 'collection_file', 'bins_info', 'contigs_mode'}
                if any([collections[group][key] for key in not_allowed_params if key in collections[group].keys()]):
                    raise ConfigError('We encountered the following problem with your \
                                       collections_txt file ("%s"): you can choose \
                                       either using a default collection OR importing \
                                       a collection from a file. Yet, for "%s", you specificy \
                                       a default collection AND also specify some of the following \
                                       parameters: %s.' % (self.collections_txt, group, ", ".join(not_allowed_params)))

                collections[group]['collection_name'] = 'DEFAULT'
                collections[group]['contigs_mode'] = ''

            else:
                if not filesnpaths.is_file_exists(collections[group]['collection_file'], dont_raise=True):
                    raise ConfigError('We encountered the following problem with your \
                                       collections_txt file ("%s"): you did not specify \
                                       a valid collection file for "%s".' % (self.collections_txt, group))

                if not collections[group]['collection_name']:
                    raise ConfigError('You must specify a name for each collection in your collections_txt')
                u.check_collection_name(collections[group]['collection_name'])
                if collections[group].get('bins_info'):
                    filesnpaths.is_file_exists(collections[group]['bins_info'])
                    collections[group]['bins_info'] = '--bins-info %s' % collections[group]['bins_info']
                else:
                    collections[group]['bins_info'] = ''
                if collections[group].get('contigs_mode'):
                    collections[group]['contigs_mode'] = '--contigs-mode'
                else:
                    collections[group]['contigs_mode'] = ''
        self.collections = collections
Ejemplo n.º 4
0
    def get_target_files_for_anvi_cluster_contigs(self):
        import anvio.workflows as w
        w.D(self.get_param_value_from_config(['anvi_cluster_contigs', 'run']))
        if self.get_param_value_from_config(['anvi_cluster_contigs', 'run']) is not True:
            # the user doesn't want to run this
            return
        w.D('hi2')
        requested_drivers = self.get_param_value_from_config(['anvi_cluster_contigs', '--driver'])
        if not requested_drivers:
            raise ConfigError('You must specify a driver for anvi_cluster_contigs. '
                              'You specified \'"run": true\' for anvi_cluster_contigs, \
                               but provided no driver.')

        if type(requested_drivers) != list:
            requested_drivers = [requested_drivers]

        incompatible_drivers = [d for d in requested_drivers if d not in list(driver_modules['binning'].keys())]
        if incompatible_drivers:
            raise ConfigError('The following drivers were listed in the config file for rule anvi_cluster_contigs '
                              'but they are not familiar to anvi\'o: %s' % ', '.join(incompatible_drivers))

        # TODO: we should make sure drivers are installed. Maybe Ozcan or Meren are willing to do this?

        for d in driver_modules['binning'].keys():
            # let's make sure no parameters were set for a driver that was not listed
            additional_parameters = self.get_param_value_from_config(['anvi_cluster_contigs', self.get_param_name_for_binning_driver(d)])
            if additional_parameters:
                if d not in requested_drivers:
                    raise ConfigError('You set the following parameters: "%s" for %s, but you did not '
                                     'specify it as one of the drivers that should be used by anvi_cluster_contigs. '
                                     'In order to reduce room for mistakes, we don\'t allow this.' % (additional_parameters, d))

        collection_name = self.get_param_value_from_config(['anvi_cluster_contigs', '--collection-name'])
        if '{driver}' not in collection_name:
            if len(requested_drivers) > 1:
                raise ConfigError('When using multiple binning algorithms, the --collection-name '
                                  'for rule anvi_cluster_contigs must contain '
                                  'the key word "{driver}" (including those curly brackets). '
                                  'It appears you changed the --collection-name, which by '
                                  'default is simply "{driver}" to: "%s". That\'s fine, \
                                   but only as long as you have "{driver}" appear somewhere in \
                                   the name, because otherwise the collections made by different \
                                   binning algorithms would try to override each other, since they \
                                   would all end up having the same name' % collection_name)

            # if the key word '{driver}' is not in the collection name then it is a static collection name
            example_collection_name = collection_name
        else:
            # if the key word '{driver}' IS in the collection name, then let's take one
            # driver as example when we check if the collection name is valid.
            example_collection_name = collection_name.format(driver=requested_drivers[0])
        try:
            u.check_collection_name(example_collection_name)
        except ConfigError as e:
            raise ConfigError('%s is not an acceptable collection name for anvi_cluster_contigs. '
                  'We tried it for one of the drivers you requested and this is the '
                  'error we got: %s' % (collection_name, e))

        groups_of_size_one = []
        target_files = []
        for d in requested_drivers:
            for g in self.group_names:
                if self.group_sizes[g] > 1:
                    # add flag files of binning to the target files
                    # only if the group is larger than 1 (because otherwise, there is no merged profile)
                    target_files.append(self.get_flag_file_for_binning_driver(g, d))
                else:
                    groups_of_size_one.append(g)
        if groups_of_size_one:
            run.warning('You requested to run anvi_cluster_contigs, but it will not run for '
                        '%s, since there is only one sample in this group, and hence there '
                        'will be no merged profile, which is required for anvi-cluster-contigs.' % ', '.join(groups_of_size_one))

        return target_files