Python get_newick_tree_data_for_dict Beispiele, anvio.clustering.get_newick_tree_data_for_dict Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: panops.py Projekt: fishinwind/anvio

    def gen_samples_order_file(self):
        self.progress.new('Samples DB')
        self.progress.update(
            'Copmputing the hierarchical clustering of the (transposed) view data'
        )

        samples_order_file_path = self.get_output_file_path(
            self.project_name + '-samples-order.txt')
        samples_order = open(samples_order_file_path, 'w')
        samples_order.write('attributes\tbasic\tnewick\n')

        for clustering_tuple in [('PC presence absence', self.view_data),
                                 ('PC frequencies',
                                  self.view_data_presence_absence)]:
            v, d = clustering_tuple
            newick = clustering.get_newick_tree_data_for_dict(
                d,
                transpose=True,
                distance=self.distance,
                linkage=self.linkage)
            samples_order.write('%s\t\t%s\n' % (v, newick))

        samples_order.close()

        self.progress.end()

        self.run.info("Anvi'o samples order", samples_order_file_path)

        return samples_order_file_path

Beispiel #2

0

Datei anzeigen

    def populate_layers_additional_data_and_layer_orders(self):
        self.run.info_single("Additional data and layer orders...", nl_before=1, nl_after=1, mc="blue")

        essential_fields = [f for f in self.atomic_data_fields if constants.IS_ESSENTIAL_FIELD(f)]

        # initialize views.
        args = argparse.Namespace(profile_db = self.merged_profile_db_path)
        profile_db_super = dbops.ProfileSuperclass(args)
        profile_db_super.load_views(omit_parent_column=True)

        # figure out sample orders dictionary
        layer_orders_data_dict = {}
        failed_attempts = []
        self.progress.new('Working on layer orders')
        for essential_field in essential_fields:
            self.progress.update('recovering order for "%s"' % (essential_field))
            try:
                data_value = clustering.get_newick_tree_data_for_dict(profile_db_super.views[essential_field]['dict'],
                                                                      distance=self.distance,
                                                                      linkage=self.linkage,
                                                                      transpose=True)

                layer_orders_data_dict[essential_field] = {'data_value': data_value, 'data_type': 'newick'}
            except:
                failed_attempts.append(essential_field)
        self.progress.end()

        if not len(layer_orders_data_dict):
            self.run.warning("This may or may not be important: anvi'o attempted to generate orders for your\
                              samples based on the view data, however, it failed :/")
            return

        if len(failed_attempts):
            self.run.warning("While anvi'o was trying to generate clusterings of samples based on view data\
                              available in the merged profile, clustering of some of the essential data\
                              failed. It is likely not a very big deal, but you shall be the judge of it.\
                              Anvi'o now proceeds to store layers order information for those view items\
                              the clustering in fact worked. Here is the list of stuff that failed: '%s'"\
                              % (', '.join(failed_attempts)))

        self.progress.new('Working on layer additional data')
        self.progress.update('...')

        layer_additional_data_dict = {}
        for sample_name in self.sample_ids_found_in_input_dbs:
            layer_additional_data_dict[sample_name] = {}

        # figure out num reads mapped per sample:
        for sample_name in self.sample_ids_found_in_input_dbs:
            layer_additional_data_dict[sample_name]['num_mapped_reads'] = self.total_reads_mapped_per_sample[sample_name]

        self.progress.end()

        TableForLayerOrders(args).add(layer_orders_data_dict)
        TableForLayerAdditionalData(args).add(layer_additional_data_dict, ['num_mapped_reads'])

Beispiel #3

0

Datei anzeigen

Datei: merger.py Projekt: AstrobioMike/anvio

    def populate_misc_data_tables(self):
        self.run.info_single("Additional data and layer orders...", nl_before=1, nl_after=1, mc="blue")

        essential_fields = [f for f in self.atomic_data_fields if constants.IS_ESSENTIAL_FIELD(f)]

        # initialize views.
        args = argparse.Namespace(profile_db = self.merged_profile_db_path)
        profile_db_super = dbops.ProfileSuperclass(args)
        profile_db_super.load_views(omit_parent_column=True)

        # figure out layer orders dictionary
        layer_orders_data_dict = {}
        failed_attempts = []
        self.progress.new('Working on layer orders')
        for essential_field in essential_fields:
            self.progress.update('recovering order for "%s"' % (essential_field))
            try:
                data_value = clustering.get_newick_tree_data_for_dict(profile_db_super.views[essential_field]['dict'],
                                                                      distance=self.distance,
                                                                      linkage=self.linkage,
                                                                      transpose=True)

                layer_orders_data_dict[essential_field] = {'data_value': data_value, 'data_type': 'newick'}
            except:
                failed_attempts.append(essential_field)
        self.progress.end()

        if not len(layer_orders_data_dict):
            self.run.warning("This may or may not be important: anvi'o attempted to generate orders for your\
                              samples based on the view data, however, it failed :/")
            return

        if len(failed_attempts):
            self.run.warning("While anvi'o was trying to generate clusterings of samples based on view data\
                              available in the merged profile, clustering of some of the essential data\
                              failed. It is likely not a very big deal, but you shall be the judge of it.\
                              Anvi'o now proceeds to store layers order information for those view items\
                              the clustering in fact worked. Here is the list of stuff that failed: '%s'"\
                              % (', '.join(failed_attempts)))

        # add the layer orders quietly
        TableForLayerOrders(args, r=terminal.Run(verbose=False)).add(layer_orders_data_dict)
        self.run.warning(None, header="Layer orders added", lc='cyan')
        for layer_order in layer_orders_data_dict:
            self.run.info_single(layer_order, mc='cyan')

        # done with layer orders. let's add our layer additional data and call it a day.
        for data_group_name in self.layer_additional_data_dict:
            args.target_data_group = data_group_name
            TableForLayerAdditionalData(args, r=terminal.Run(verbose=False)).add(self.layer_additional_data_dict[data_group_name],
                                                                                 list(self.layer_additional_data_keys[data_group_name]))

        self.run.warning(None, header="Data groups added", lc='cyan')
        for data_group in self.layer_additional_data_dict:
            self.run.info_single('%s (w/%d items)' % (data_group, len(self.layer_additional_data_keys[data_group])), mc='cyan')

Beispiel #4

0

Datei anzeigen

    def populate_misc_data_tables(self):
        self.run.info_single("Additional data and layer orders...", nl_before=1, nl_after=1, mc="blue")

        essential_fields = [f for f in self.atomic_data_fields if constants.IS_ESSENTIAL_FIELD(f)]

        # initialize views.
        args = argparse.Namespace(profile_db = self.merged_profile_db_path)
        profile_db_super = dbops.ProfileSuperclass(args)
        profile_db_super.load_views(omit_parent_column=True)

        # figure out layer orders dictionary
        layer_orders_data_dict = {}
        failed_attempts = []
        self.progress.new('Working on layer orders')
        for essential_field in essential_fields:
            self.progress.update('recovering order for "%s"' % (essential_field))
            try:
                data_value = clustering.get_newick_tree_data_for_dict(profile_db_super.views[essential_field]['dict'],
                                                                      distance=self.distance,
                                                                      linkage=self.linkage,
                                                                      transpose=True)

                layer_orders_data_dict[essential_field] = {'data_value': data_value, 'data_type': 'newick'}
            except:
                failed_attempts.append(essential_field)
        self.progress.end()

        if not len(layer_orders_data_dict):
            self.run.warning("This may or may not be important: anvi'o attempted to generate orders for your\
                              samples based on the view data, however, it failed :/")
            return

        if len(failed_attempts):
            self.run.warning("While anvi'o was trying to generate clusterings of samples based on view data\
                              available in the merged profile, clustering of some of the essential data\
                              failed. It is likely not a very big deal, but you shall be the judge of it.\
                              Anvi'o now proceeds to store layers order information for those view items\
                              the clustering in fact worked. Here is the list of stuff that failed: '%s'"\
                              % (', '.join(failed_attempts)))

        # add the layer orders quietly
        TableForLayerOrders(args, r=terminal.Run(verbose=False)).add(layer_orders_data_dict)
        self.run.warning(None, header="Layer orders added", lc='cyan')
        for layer_order in layer_orders_data_dict:
            self.run.info_single(layer_order, mc='cyan')

        # done with layer orders. let's add our layer additional data and call it a day.
        for data_group_name in self.layer_additional_data_dict:
            args.target_data_group = data_group_name
            TableForLayerAdditionalData(args, r=terminal.Run(verbose=False)).add(self.layer_additional_data_dict[data_group_name],
                                                                                 list(self.layer_additional_data_keys[data_group_name]))

        self.run.warning(None, header="Data groups added", lc='cyan')
        for data_group in self.layer_additional_data_dict:
            self.run.info_single('%s (w/%d items)' % (data_group, len(self.layer_additional_data_keys[data_group])), mc='cyan')

Beispiel #5

0

Datei anzeigen

Datei: panops.py Projekt: AstrobioMike/anvio

    def populate_layers_additional_data_and_orders(self):
        self.progress.new('Layers additional data and orders')
        self.progress.update('Copmputing the hierarchical clustering of the (transposed) view data')

        layer_orders_data_dict = {}
        for clustering_tuple in [('gene_cluster presence absence', self.view_data), ('gene_cluster frequencies', self.view_data_presence_absence)]:
            v, d = clustering_tuple
            newick = clustering.get_newick_tree_data_for_dict(d, transpose=True, distance = self.distance, linkage=self.linkage)
            layer_orders_data_dict[v] = {'data_type': 'newick', 'data_value': newick}

        self.progress.update('Generating layers additional data ..')

        layers_additional_data_dict = {}
        layers_additional_data_keys = ['total_length', 'gc_content']

        for h in ['percent_completion', 'percent_redundancy']:
            if h in list(self.genomes.values())[0]:
                layers_additional_data_keys.append(h)

        layers_additional_data_keys.extend(['num_genes', 'avg_gene_length', 'num_genes_per_kb'])

        for genome_name in self.genomes:
            new_dict = {}
            for key in layers_additional_data_keys:
                new_dict[key] = self.genomes[genome_name][key]

            layers_additional_data_dict[genome_name] = new_dict

        # summarize gene cluster stats across genomes
        layers_additional_data_keys.extend(['num_gene_clusters', 'singleton_gene_clusters'])
        for genome_name in self.genomes:
            layers_additional_data_dict[genome_name]['num_gene_clusters'] = 0
            layers_additional_data_dict[genome_name]['singleton_gene_clusters'] = 0
            for gene_cluster in self.view_data_presence_absence:
                genomes_contributing_to_gene_cluster = [t[0] for t in self.view_data_presence_absence[gene_cluster].items() if t[1]]

                # tracking singletons
                if len(genomes_contributing_to_gene_cluster) == 1 and genomes_contributing_to_gene_cluster[0] == genome_name:
                    layers_additional_data_dict[genome_name]['singleton_gene_clusters'] += 1

                # tracking the total number of gene clusters
                if self.view_data_presence_absence[gene_cluster][genome_name]:
                    layers_additional_data_dict[genome_name]['num_gene_clusters'] += 1

        self.progress.end()

        miscdata.TableForLayerOrders(self.args).add(layer_orders_data_dict)
        miscdata.TableForLayerAdditionalData(self.args).add(layers_additional_data_dict, layers_additional_data_keys)

Beispiel #6

0

Datei anzeigen

    def populate_layers_additional_data_and_orders(self):
        self.progress.new('Layers additional data and orders')
        self.progress.update('Copmputing the hierarchical clustering of the (transposed) view data')

        layer_orders_data_dict = {}
        for clustering_tuple in [('gene_cluster presence absence', self.view_data), ('gene_cluster frequencies', self.view_data_presence_absence)]:
            v, d = clustering_tuple
            newick = clustering.get_newick_tree_data_for_dict(d, transpose=True, distance = self.distance, linkage=self.linkage)
            layer_orders_data_dict[v] = {'data_type': 'newick', 'data_value': newick}

        self.progress.update('Generating layers additional data ..')

        layers_additional_data_dict = {}
        layers_additional_data_keys = ['total_length', 'gc_content']

        for h in ['percent_completion', 'percent_redundancy']:
            if h in list(self.genomes.values())[0]:
                layers_additional_data_keys.append(h)

        layers_additional_data_keys.extend(['num_genes', 'avg_gene_length', 'num_genes_per_kb',
                                            'singleton_gene_clusters'])

        if self.gene_cluster_min_occurrence > 1:
            layers_additional_data_keys.extend(['num_gene_clusters_raw'])

        for genome_name in self.genomes:
            new_dict = {}
            for key in layers_additional_data_keys:
                new_dict[key] = self.genomes[genome_name][key]

            layers_additional_data_dict[genome_name] = new_dict

        # summarize gene cluster stats across genomes
        layers_additional_data_keys.extend(['num_gene_clusters'])
        for genome_name in self.genomes:
            layers_additional_data_dict[genome_name]['num_gene_clusters'] = 0
            for gene_cluster in self.view_data_presence_absence:
                # tracking the total number of gene clusters
                if self.view_data_presence_absence[gene_cluster][genome_name]:
                    layers_additional_data_dict[genome_name]['num_gene_clusters'] += 1

        self.progress.end()

        miscdata.TableForLayerOrders(self.args).add(layer_orders_data_dict)
        miscdata.TableForLayerAdditionalData(self.args).add(layers_additional_data_dict, layers_additional_data_keys)

Beispiel #7

0

Datei anzeigen

Datei: merger.py Projekt: mruehlemann/anvio

    def gen_samples_db_for_the_merged_profile(self):
        """Geenrate a samples db for the merged profile.

           We use the ProfileSuperclass to load all the views we added into the meged profile,
           and generate clusterings of samples for each view to generate a default samples database."""

        self.run.info_single("SAMPLES.db stuff...",
                             nl_before=1,
                             nl_after=1,
                             mc="blue")

        essential_fields = [
            f for f in self.atomic_data_fields
            if constants.IS_ESSENTIAL_FIELD(f)
        ]

        class Args:
            pass

        args = Args()
        args.profile_db = self.merged_profile_db_path

        # initialize views.
        profile_db_super = dbops.ProfileSuperclass(args)
        profile_db_super.load_views(omit_parent_column=True)

        # figure out sample orders dictionary
        sample_orders = {}
        failed_attempts = []
        self.progress.new('Working on SAMPLES.db')
        for essential_field in essential_fields:
            self.progress.update('recovering samples order for "%s"' %
                                 (essential_field))
            try:
                sample_orders[essential_field] = \
                        clustering.get_newick_tree_data_for_dict(profile_db_super.views[essential_field]['dict'],
                                                                 distance=self.distance,
                                                                 linkage=self.linkage,
                                                                 transpose=True)
            except:
                failed_attempts.append(essential_field)
        self.progress.end()

        if not len(sample_orders):
            self.run.warning(
                "This may or may not be important: anvi'o attempted to generate a samples\
                              database for this merged profile, however, all attempts to cluster samples\
                              based on view data available in the merged profile failed. No samples db\
                              for you :/")
            return

        if len(failed_attempts):
            self.run.warning("While anvi'o was trying to generate clusterings of samples based on view data\
                              available in the merged profile, clustering of some of the essential data\
                              failed. It is likely not a very big deal, but you shall be the judge of it.\
                              Anvi'o now proceeds to generate a samples db with clusterings it generated\
                              using the view data that worked. Here is the list of stuff that failed: '%s'"\
                              % (', '.join(failed_attempts)))

        # generate the samples order file
        samples_order_file_path = filesnpaths.get_temp_file_path()
        samples_order_file = open(samples_order_file_path, 'w')
        samples_order_file.write('attributes\tbasic\tnewick\n')
        for sample_order in sample_orders:
            samples_order_file.write(
                '%s\t%s\t%s\n' %
                (sample_order, '', sample_orders[sample_order]))
        samples_order_file.close()

        # figure out samples information stuff
        samples_information = {}
        headers = []
        for sample_name in self.sample_ids_found_in_input_dbs:
            samples_information[sample_name] = {}

        self.progress.new('Working on SAMPLES.db')
        self.progress.update('...')

        # figure out num reads mapped per sample:
        for sample_name in self.sample_ids_found_in_input_dbs:
            samples_information[sample_name][
                'num_mapped_reads'] = self.total_reads_mapped_per_sample[
                    sample_name]

        self.progress.end()
        # generate the samples information file
        samples_information_file_path = filesnpaths.get_temp_file_path()
        utils.store_dict_as_TAB_delimited_file(samples_information,
                                               samples_information_file_path,
                                               headers=headers)

        # generate the samples database
        samples_db = dbops.SamplesInformationDatabase(self.samples_db_path,
                                                      quiet=False)
        samples_db.create(
            samples_order_path=samples_order_file_path,
            samples_information_path=samples_information_file_path)

        os.remove(samples_order_file_path)
        os.remove(samples_information_file_path)

        self.run.info('Samples database', self.samples_db_path)

Beispiel #8

0

Datei anzeigen

Datei: interactive.py Projekt: psaxcode/anvio

    def load_collection_mode(self, args):
        self.collections.populate_collections_dict(self.profile_db_path,
                                                   anvio.__profile__version__)

        if self.list_collections:
            self.collections.list_collections()
            sys.exit()

        if self.collection_name not in self.collections.collections_dict:
            raise ConfigError, "%s is not a valid collection name. See a list of available ones with '--list-collections' flag" % args.collection_name

        completeness = Completeness(args.contigs_db)
        if not len(completeness.sources):
            raise ConfigError, "HMM's were not run for this contigs database :/"

        # we are about to request a collections dict that contains only split names that appear in the
        # profile database:
        self.collection = self.collections.get_trimmed_dicts(
            self.collection_name,
            get_split_names_in_profile_db(self.profile_db_path))[0]

        # we will do something quite tricky here. first, we will load the full mode to get the self.views
        # data structure fully initialized based on the profile database. Then, we using information about
        # bins in the selected collection, we will create another views data structure, and replace it with
        # the one we have. that will be LOVELY.
        self.load_full_mode(args)

        self.p_meta['available_clusterings'] = []
        self.p_meta['clusterings'] = {}

        # setting up a new view:
        views_for_collection = {}
        for view in self.views:
            v = self.views[view]

            d = {}
            d['table_name'] = v['table_name']
            d['header'] = [h for h in v['header'] if not h == '__parent__']
            d['dict'] = {}

            for bin_id in self.collection:
                d['dict'][bin_id] = {}
                for header in d['header']:
                    d['dict'][bin_id][header] = numpy.mean([
                        v['dict'][split_name][header]
                        for split_name in self.collection[bin_id]
                    ])

            clustering_id = ':'.join([view, self.distance, self.linkage])
            self.p_meta['available_clusterings'].append(clustering_id)
            self.p_meta['clusterings'][clustering_id] = {
                'newick':
                clustering.get_newick_tree_data_for_dict(
                    d['dict'], distance=self.distance, linkage=self.linkage)
            }

            # clustering is done, we can get prepared for the expansion of the view dict
            # with new layers. Note that these layers are going to be filled later.
            d['header'].extend(
                ['percent_completion', 'percent_redundancy', 'bin_name'])

            views_for_collection[view] = d

        default_clustering_class = 'mean_coverage' if self.p_meta[
            'merged'] else 'single'
        self.p_meta['default_clustering'] = get_default_clustering_id(
            default_clustering_class, self.p_meta['clusterings'])

        # replace self.views with the new view:
        self.views = views_for_collection

        # preparing a new 'splits_basic_info'
        basic_info_for_collection = {}
        for bin_id in self.collection:
            basic_info_for_collection[bin_id] = {
                'length':
                sum([
                    self.splits_basic_info[s]['length']
                    for s in self.collection[bin_id]
                ]),
                'gc_content':
                numpy.mean([
                    self.splits_basic_info[s]['gc_content']
                    for s in self.collection[bin_id]
                ])
            }

        # replace it with the new one!
        self.splits_basic_info = basic_info_for_collection

        # additional layers for each bin, INCLUDING completion and redundancy estimates:
        self.progress.new('Additional layers for bins')
        num_bins = len(self.collection)
        current_bin = 1
        for bin_id in self.collection:
            self.progress.update('%d of %d :: %s ...' %
                                 (current_bin, num_bins, bin_id))

            # get completeness estimate
            p_completion, p_redundancy, domain, domain_confidence, results_dict = completeness.get_info_for_splits(
                set(self.collection[bin_id]))

            for view in self.views:
                self.views[view]['dict'][bin_id]['bin_name'] = bin_id
                self.views[view]['dict'][bin_id][
                    'percent_completion'] = p_completion
                self.views[view]['dict'][bin_id][
                    'percent_redundancy'] = p_redundancy

            current_bin += 1
        self.progress.end()

        # let empty some variables to avoid confusion downstream
        self.hmm_sources_info = {}
        self.split_sequences = None
        self.splits_taxonomy_dict = {}
        self.genes_in_splits_summary_dict = {}
        self.split_names_ordered = sorted(
            self.views[self.default_view]['dict'].keys())

        # set the title:
        R = lambda x: x.replace('-', ' ').replace('_', ' ')
        self.title = "Collection '%s' for %s" % (R(
            self.collection_name), R(self.p_meta['sample_id']))

Beispiel #9

0

Datei anzeigen

Datei: interactive.py Projekt: ascendo/anvio

    def load_collection_mode(self, args):
        self.collections.populate_collections_dict(self.profile_db_path, anvio.__profile__version__)

        if self.list_collections:
            self.collections.list_collections()
            sys.exit()

        if self.collection_name not in self.collections.collections_dict:
            raise ConfigError, "%s is not a valid collection name. See a list of available ones with '--list-collections' flag" % args.collection_name

        self.progress.new('Accessing HMM hits')
        self.progress.update('...')
        self.hmm_access = SequencesForHMMHits(self.contigs_db_path, sources=set(['Campbell_et_al']))
        self.progress.end()

        completeness = Completeness(args.contigs_db, source='Campbell_et_al')
        if not len(completeness.sources):
            raise ConfigError, "HMM's were not run for this contigs database :/"

        if not 'Campbell_et_al' in completeness.sources:
            raise ConfigError, "This 'collection' mode uses Campbell et al. single-copy gene collections to make sense of the completion\
                                and redundancy of bins. The bad news is that Campbell et al is not among the available HMM sources in your\
                                contigs database :/ Why? Why?"

        # we are about to request a collections dict that contains only split names that appear in the
        # profile database:
        self.collection = self.collections.get_trimmed_dicts(self.collection_name, get_split_names_in_profile_db(self.profile_db_path))[0]

        # we will do something quite tricky here. first, we will load the full mode to get the self.views
        # data structure fully initialized based on the profile database. Then, we using information about
        # bins in the selected collection, we will create another views data structure, and replace it with
        # the one we have. that will be LOVELY.
        self.load_full_mode(args)

        self.p_meta['available_clusterings'] = []
        self.p_meta['clusterings'] = {}

        # setting up a new view:
        views_for_collection = {}
        for view in self.views:
            v = self.views[view]

            d = {}
            d['table_name'] = v['table_name']
            d['header'] = [h for h in v['header'] if not h == '__parent__']
            d['dict'] = {}

            for bin_id in self.collection:
                d['dict'][bin_id] = {}
                for header in d['header']:
                     d['dict'][bin_id][header] = numpy.mean([v['dict'][split_name][header] for split_name in self.collection[bin_id]])

            clustering_id = ':'.join([view, self.distance, self.linkage])
            self.p_meta['available_clusterings'].append(clustering_id)
            self.p_meta['clusterings'][clustering_id] = {'newick': clustering.get_newick_tree_data_for_dict(d['dict'], distance=self.distance, linkage=self.linkage)}

            # clustering is done, we can get prepared for the expansion of the view dict
            # with new layers. Note that these layers are going to be filled later.
            d['header'].extend(['percent_completion', 'percent_redundancy', 'bin_name'])

            views_for_collection[view] = d

        default_clustering_class = 'mean_coverage' if self.p_meta['merged'] else 'single'
        self.p_meta['default_clustering'] = get_default_clustering_id(default_clustering_class, self.p_meta['clusterings'])

        # replace self.views with the new view:
        self.views = views_for_collection

        # preparing a new 'splits_basic_info'
        basic_info_for_collection = {}
        for bin_id in self.collection:
            basic_info_for_collection[bin_id] = {'length': sum([self.splits_basic_info[s]['length'] for s in self.collection[bin_id]]),
                                                 'gc_content': numpy.mean([self.splits_basic_info[s]['gc_content'] for s in self.collection[bin_id]])}


        # replace it with the new one!
        self.splits_basic_info = basic_info_for_collection

        # additional layers for each bin, INCLUDING completion and redundancy estimates:
        self.progress.new('Additional layers for bins')
        num_bins = len(self.collection)
        current_bin = 1
        for bin_id in self.collection:
            self.progress.update('%d of %d :: %s ...' % (current_bin, num_bins, bin_id))

            # get completeness estimate
            c = completeness.get_info_for_splits(set(self.collection[bin_id]))['Campbell_et_al']
            percent_completion = c['percent_complete']
            percent_redundancy = c['percent_redundancy']

            for view in self.views:
                self.views[view]['dict'][bin_id]['bin_name'] = bin_id
                self.views[view]['dict'][bin_id]['percent_completion'] = percent_completion
                self.views[view]['dict'][bin_id]['percent_redundancy'] = percent_redundancy

            current_bin += 1
        self.progress.end()

        # let empty some variables to avoid confusion downstream
        self.hmm_sources_info = {}
        self.split_sequences = None
        self.splits_taxonomy_dict = {}
        self.genes_in_splits_summary_dict = {}
        self.split_names_ordered = sorted(self.views[self.default_view]['dict'].keys())

        # set the title:
        R = lambda x: x.replace('-', ' ').replace('_', ' ')
        self.title = "Collection '%s' for %s" % (R(self.collection_name), R(self.p_meta['sample_id']))