Ejemplo n.º 1
0
    def store_clusters_in_db(self, collection_name='CONCOCT'):
        # convert id -> bin mapping dict into a bin -> ids dict
        data = {}
        bin_info_dict = {}

        for split_name in self.clusters:
            bin_id = self.clusters[split_name]
            if data.has_key(bin_id):
                data[bin_id].add(split_name)
            else:
                data[bin_id] = set([split_name])
                bin_info_dict[bin_id] = {
                    'html_color':
                    '#' + ''.join([
                        '%02X' % random.randint(50, 230) for i in range(0, 3)
                    ]),
                    'source':
                    'CONCOCT'
                }
                # ^
                #  \
                #    poor man's random color generator

        c = dbops.TablesForCollections(self.profile_db_path,
                                       anvio.__profile__version__)
        c.append(collection_name, data, bin_info_dict)

        self.run.info('CONCOCT results in db',
                      self.profile_db_path,
                      display_only=True)
Ejemplo n.º 2
0
    def store_refined_bins(self, refined_bin_data, refined_bin_colors):
        if 0 in [len(b) for b in refined_bin_data.values()]:
            raise RefineError, 'One or more of your bins have zero splits. If you are trying to remove this bin from your collection,\
                                this is not the right way to do it.'

        self.progress.new('Storing refined bins')
        self.progress.update('accessing to collection "%s" ...' %
                             self.collection_id)
        collection_dict = self.collections.get_collection_dict(
            self.collection_id)
        colors_dict = self.collections.get_collection_colors(
            self.collection_id)
        self.progress.end()

        bad_bin_names = [
            b for b in collection_dict
            if (b in refined_bin_data
                and b not in self.ids_for_already_refined_bins)
        ]
        if len(bad_bin_names):
            raise RefineError, '%s of your bin names %s NOT unique, and already exist%s in the database. You must rename\
                                %s to something else: %s' % (
                'One' if len(bad_bin_names) == 1 else len(bad_bin_names),
                'is' if len(bad_bin_names) == 1 else 'are',
                's' if len(bad_bin_names) == 1 else '', 'this one' if
                len(bad_bin_names) == 1 else 'these', ', '.join(bad_bin_names))

        # remove bins that should be updated in the database:
        for bin_id in self.ids_for_already_refined_bins:
            collection_dict.pop(bin_id)
            colors_dict.pop(bin_id)

        # zero it out
        self.ids_for_already_refined_bins = set([])

        if self.debug:
            self.run.info('collection from db', collection_dict)
            self.run.info('colors from db', colors_dict)
            self.run.info_single('')

            self.run.info('incoming collection data', refined_bin_data)
            self.run.info('incoming collection colors', refined_bin_colors)
            self.run.info_single('')

        for bin_id in refined_bin_data:
            collection_dict[bin_id] = refined_bin_data[bin_id]
            colors_dict[bin_id] = refined_bin_colors[bin_id]
            self.ids_for_already_refined_bins.add(bin_id)

        if self.debug:
            self.run.info('resulting collection', collection_dict)
            self.run.info('resulting collection colors', colors_dict)
            self.run.info_single('')

        collections = dbops.TablesForCollections(self.profile_db_path,
                                                 anvio.__profile__version__)
        collections.append(self.collection_id, collection_dict, colors_dict)

        self.run.info_single('"%s" collection is updated!' %
                             self.collection_id)
Ejemplo n.º 3
0
    def store_collections_dict(self):
        if self.read_only:
            return json.dumps("Sorry! This is a read-only instance.")

        source = request.forms.get('source')
        data = json.loads(request.forms.get('data'))
        colors = json.loads(request.forms.get('colors'))

        if not len(source):
            run.info_single(
                'Lousy attempt from the user to store their collection under an empty source identifier name :/'
            )
            return json.dumps("Error: Collection name cannot be empty.")

        num_splits = sum(len(l) for l in list(data.values()))
        if not num_splits:
            run.info_single('The user to store 0 splits as a collection :/')
            return json.dumps(
                "Error: There are no selections to store (you haven't selected anything)."
            )

        if source in self.interactive.collections.collections_dict:
            e = self.interactive.collections.collections_dict[source]
            if e['read_only']:
                run.info_single(
                    'Lousy attempt from the user to store their collection under "%s" :/'
                    % source)
                return json.dumps(
                    "Well, '%s' is a read-only collection, so you need to come up with a different name... Sorry!"
                    % source)

        run.info_single(
            'A request to store %d bins that describe %d splits under the collection id "%s"\
                         has been made.' % (len(data), num_splits, source),
            cut_after=None)

        bins_info_dict = {}
        for bin_name in data:
            bins_info_dict[bin_name] = {
                'html_color': colors[bin_name],
                'source': "anvi-interactive"
            }

        # the db here is either a profile db, or a pan db, but it can't be both:
        db_path = self.interactive.pan_db_path or self.interactive.profile_db_path
        collections = dbops.TablesForCollections(db_path)
        try:
            collections.append(source, data, bins_info_dict)
        except ConfigError as e:
            return json.dumps(e.clear_text())

        # a new collection is stored in the database, but the interactive object
        # does not know about that and needs updatin'
        self.interactive.collections.populate_collections_dict(db_path)

        msg = "New collection '%s' with %d bin%s been stored." % (
            source, len(data), 's have' if len(data) > 1 else ' has')
        run.info_single(msg)
        return json.dumps(msg)
Ejemplo n.º 4
0
def store_collections_dict(args, d, request, response):
    if args.read_only:
        return json.dumps("Sorry! This is a read-only instance.")

    source = request.forms.get('source')
    data = json.loads(request.forms.get('data'))
    colors = json.loads(request.forms.get('colors'))

    if not len(source):
        run.info_single(
            'Lousy attempt from the user to store their collection under an empty source identifier name :/'
        )
        return json.dumps("Error: Collection name cannot be empty.")

    num_splits = sum(len(l) for l in data.values())
    if not num_splits:
        run.info_single('The user to store 0 splits as a collection :/')
        return json.dumps(
            "Error: There are no selections to store (you haven't selected anything)."
        )

    if source in d.collections.collections_dict:
        e = d.collections.collections_dict[source]
        if e['read_only']:
            run.info_single(
                'Lousy attempt from the user to store their collection under "%s" :/'
                % source)
            return json.dumps(
                "Well, '%s' is a read-only collection, so you need to come up with a different name... Sorry!"
                % source)

    run.info_single(
        'A request to store %d bins that describe %d splits under the collection id "%s"\
                     has been made.' % (len(data), num_splits, source),
        cut_after=None)

    bins_info_dict = {}
    for bin_name in data:
        bins_info_dict[bin_name] = {
            'html_color': colors[bin_name],
            'source': "anvi-interactive"
        }

    collections = dbops.TablesForCollections(d.profile_db_path,
                                             anvio.__profile__version__)
    collections.append(source, data, bins_info_dict)
    d.collections.populate_collections_dict(d.profile_db_path,
                                            anvio.__profile__version__)
    msg = "New collection '%s' with %d bin%s been stored." % (
        source, len(data), 's have' if len(data) > 1 else ' has')
    run.info_single(msg)
    return json.dumps(msg)
Ejemplo n.º 5
0
    def store_clusters_in_db(self, collection_name='CONCOCT'):
        # convert id -> bin mapping dict into a bin -> ids dict
        data = {}
        bin_info_dict = {}

        if not len(self.clusters):
            self.run.info('CONCOCT results in db',
                          'Nope. CONCOCT clusters are empty. Skipping!',
                          mc='red',
                          display_only=True)
            return

        for split_name in self.clusters:
            bin_id = self.clusters[split_name]
            if bin_id in data:
                data[bin_id].add(split_name)
            else:
                data[bin_id] = set([split_name])
                bin_info_dict[bin_id] = {
                    'html_color':
                    '#' + ''.join([
                        '%02X' % random.randint(50, 230) for i in range(0, 3)
                    ]),
                    'source':
                    'CONCOCT'
                }
                # ^
                #  \
                #    poor man's random color generator

        c = dbops.TablesForCollections(self.profile_db_path)
        c.append(collection_name, data, bin_info_dict)

        self.run.info('CONCOCT results in db',
                      self.profile_db_path,
                      display_only=True)
Ejemplo n.º 6
0
    def do_profile_db(self):
        # are we working with a merged profile database?
        merged = self.summary.p_meta['merged']
        self.run.info('Merged database', 'True' if merged else 'False')

        self.progress.new('Splitting "%s"' % self.bin_id)
        self.progress.update('Subsetting the %s profile database' %
                             'merged' if merged else 'single')

        bin_profile_db = dbops.ProfileDatabase(self.bin_profile_db_path)
        bin_profile_db.touch()

        # copy-paste tables that will largely stay the same from the parent
        bin_profile_db.db.copy_paste(table_name='self',
                                     source_db_path=self.profile_db_path)
        bin_profile_db.db.copy_paste(table_name='views',
                                     source_db_path=self.profile_db_path)
        bin_profile_db.db.copy_paste(table_name='states',
                                     source_db_path=self.profile_db_path)

        # update some values
        bin_profile_db.db.update_meta_value('contigs_db_hash',
                                            self.contigs_db_hash)
        bin_profile_db.db.update_meta_value('available_clusterings', None)
        bin_profile_db.db.update_meta_value('sample_id', self.bin_id)

        # setup the filtering rules for migrating data:
        tables = {}

        # this is to deal with merge atomic data tables that are stored in merged profiles.
        # they are being created on the fly during merge, so bin_profile_db.touch() did not
        # create them, and we have to do it here ourselves. while creating them in the target
        # db, we will also populate the tables dictionary for data migration::
        sample_names = self.summary.p_meta['samples']
        if merged:
            for table_name in t.atomic_data_table_structure[1:-1]:
                for target in ['splits', 'contigs']:
                    new_table_name = '_'.join([table_name, target])
                    new_table_structure = ['contig'
                                           ] + sample_names + ['__parent__']
                    new_table_types = [
                        'text'
                    ] + ['numeric'] * len(sample_names) + ['text']
                    bin_profile_db.db.create_table(new_table_name,
                                                   new_table_structure,
                                                   new_table_types)

                    tables[new_table_name] = ('contig', self.split_names)
        else:
            profile_db = dbops.ProfileDatabase(self.profile_db_path)
            table_structure = profile_db.db.get_table_structure(
                'atomic_data_contigs')
            table_types = profile_db.db.get_table_column_types(
                'atomic_data_contigs')
            for table_name in ['atomic_data_splits', 'atomic_data_contigs']:
                new_table_structure = profile_db.db.get_table_structure(
                    table_name)
                bin_profile_db.db.create_table(table_name, table_structure,
                                               table_types)

                tables[table_name] = ('contig', self.split_names)

        # we need to migrate these guys, too.
        tables[t.variable_nts_table_name] = ('split_name', self.split_names)
        tables[t.variable_aas_table_name] = ('corresponding_gene_call',
                                             self.gene_caller_ids)

        bin_profile_db.disconnect()

        self.migrate_data(tables, self.profile_db_path,
                          self.bin_profile_db_path)

        self.progress.end()

        if not self.skip_hierarchical_clustering:
            dbops.do_hierarchical_clustering_of_items(self.bin_profile_db_path, constants.clustering_configs['merged' if merged else 'single'], self.split_names, \
                                                      self.database_paths, input_directory=self.bin_output_directory, \
                                                      default_clustering_config=constants.merged_default, distance=self.distance, \
                                                      linkage=self.linkage, run=terminal.Run(verbose=False), progress=self.progress)

        # add a collection
        collection_dict = {'ALL_SPLITS': self.split_names}
        bins_info_dict = {
            'ALL_SPLITS': {
                'html_color': '#FF0000',
                'source': 'anvi-split'
            }
        }
        collections = dbops.TablesForCollections(self.bin_profile_db_path)
        collections.append('DEFAULT',
                           collection_dict,
                           bins_info_dict=bins_info_dict)