Example #1
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    try:
        profile_db.remove_meta_key_value_pair('max_contig_length')
    except:
        pass

    profile_db.update_meta_value('max_contig_length', sys.maxsize)

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Your profile db is now %s (and anvi'o will now take a break)." % next_version, nl_after=1, nl_before=1, mc='green')
Example #2
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    profile_db.set_meta_value('description', '_No description is found_')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
Example #3
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    # migrate item orders
    item_orders = profile_db.get_table_as_dict(item_orders_table_name)
    for order_name in item_orders:
        if item_orders[order_name]['type'] == 'newick':
            newick = Tree(item_orders[order_name]['data'], format=1)
            newick = newick.write(format=2)
            profile_db._exec("""UPDATE %s SET "data" = ? WHERE "name" LIKE ?""" % item_orders_table_name, (newick, order_name))

    # migrate layer orders
    layer_orders = profile_db.get_table_as_dict(layer_orders_table_name)
    for order_name in layer_orders:
        if layer_orders[order_name]['data_type'] == 'newick':
            newick = Tree(layer_orders[order_name]['data_value'], format=1)
            newick = newick.write(format=2)
            profile_db._exec("""UPDATE %s SET "data_value" = ? WHERE "data_key" LIKE ?""" % layer_orders_table_name, (newick, order_name))

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()
Example #4
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    profile_db.set_meta_value('description', '_No description is found_')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s.' % next_version,
                    nl_after=1,
                    nl_before=1,
                    mc='green')
Example #5
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    profile_db._exec('ALTER TABLE "item_orders" ADD COLUMN "additional" text')
    profile_db._exec('UPDATE "item_orders" SET "additional" = "{}"')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single(
        'Your profile db is now %s, and you know this deserves a celebration.'
        % next_version,
        nl_after=1,
        nl_before=1,
        mc='green')
Example #6
0
    def check_dbs_to_be_merged(self):
        proper, improper = [], []

        for p in self.input_profile_db_paths:
            utils.is_profile_db(p)

            profile_db = dbops.ProfileDatabase(p)

            if profile_db.meta['db_type'] != 'profile' or profile_db.meta['blank'] or profile_db.meta['merged']:
                improper.append(p)
            else:
                proper.append(p)

        proper = [p for p in self.input_profile_db_paths if p not in improper]

        if len(improper) == len(self.input_profile_db_paths):
            raise ConfigError("None of the databases you asked anvi'o to merge were single, non-blank anvi'o profiles. If you\
                               are not testing anvi'o and yet found yourself here, it is safe to assume that something somewhere\
                               in your workflow is quite wrong :/")

        if not len(proper) > 1:
            raise ConfigError("Anvi'o can only merge single, non-blank anvi'o profiles. You have only one database that fits into that\
                               criterion. So there is nothing really to merge here. Yes?")

        if improper:
            self.run.warning("Pleae read carefuly. You sent %d profile databases to anvi'o merger to be merged. However, not\
                              all of them were single, non-blank anvi'o profiles. Anvi'o removed %d of them, and will merge\
                              only the remaining %d. At the end of this warning you will find a list of paths to those databases\
                              anvi'o excluded from merging. If you are not happy with that, please carefully examine what went wrong.\
                              Here are all the paths for excluded databases: %s." \
                                            % (len(self.input_profile_db_paths), len(improper), len(proper), ', '.join(["'%s'" % p for p in improper])))

        # replace input profile database paths with proper paths:
        self.input_profile_db_paths = proper
Example #7
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    profile_db._exec('ALTER TABLE "item_additional_data" ADD COLUMN "data_group" text')
    profile_db._exec('ALTER TABLE "layer_additional_data" ADD COLUMN "data_group" text')

    profile_db._exec('UPDATE "item_additional_data" SET "data_group" = "default"')
    profile_db._exec('UPDATE "layer_additional_data" SET "data_group" = "default"')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s, and you rock.' % next_version, nl_after=1, nl_before=1, mc='green')
Example #8
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    try:
        profile_db.remove_meta_key_value_pair('max_contig_length')
    except:
        pass

    profile_db.update_meta_value('max_contig_length', sys.maxsize)

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()
Example #9
0
    def populate_profile_dbs_info_dict(self):
        improper = []

        for p in self.input_profile_db_paths:
            utils.is_profile_db(p)

            profile_db = dbops.ProfileDatabase(p)

            if profile_db.meta['db_type'] != 'profile' or profile_db.meta['blank'] or profile_db.meta['merged']:
                improper.append(p)
            else:
                self.profile_dbs_info_dict[p] = profile_db.meta

        proper = [p for p in self.input_profile_db_paths if p not in improper]

        if len(improper) == len(self.input_profile_db_paths):
            raise ConfigError("None of the databases you asked anvi'o to merge were single, non-blank anvi'o profiles. If you\
                               are not testing anvi'o and yet found yourself here, it is safe to assume that something somewhere\
                               in your workflow is quite wrong :/")

        if not len(proper) > 1:
            raise ConfigError("Anvi'o can only merge single, non-blank anvi'o profiles. You have only one database that fits into that\
                               criterion. So there is nothing really to merge here. Yes?")

        if improper:
            self.run.warning("Pleae read carefuly. You sent %d profile databases to anvi'o merger to be merged. However, not\
                              all of them were single, non-blank anvi'o profiles. Anvi'o removed %d of them, and will merge\
                              only the remaining %d. At the end of this warning you will find a list of paths to those databases\
                              anvi'o excluded from merging. If you are not happy with that, please carefully examine what went wrong.\
                              Here are all the paths for excluded databases: %s." \
                                            % (len(self.input_profile_db_paths), len(improper), len(proper), ', '.join(["'%s'" % p for p in improper])))
Example #10
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        profile_db.disconnect()
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)
    profile_db.disconnect()

    # drop entry ids one by one
    for table_name in tables:
        drop_entry_id_column_from_table(db_path, table_name, table_properties=tables[table_name])

    # set the version
    profile_db = db.DB(db_path, None, ignore_version = True)
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.disconnect()

    progress.end()

    run.info_single("Your profile db is now version %s. %d of its tables were cleaned from a historical "
                    "design artifact." % (next_version, len(tables)), nl_after=1, nl_before=1, mc='green')
Example #11
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    progress.new("Redefining indels table...")
    progress.update("...")

    # delete if exists
    try:
        profile_db.drop_table(indels_table_name)
    except:
        pass

    # create with new structure
    profile_db.create_table(indels_table_name, indels_table_structure, indels_table_types)

    progress.update("Updating self table")
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.set_meta_value('min_indel_fraction', '0.0')
    profile_db.set_meta_value('INDELs_profiled', '0')

    progress.update("Committing changes")
    profile_db.disconnect()

    progress.end()
    run.info_single("The profile database is now %s. This upgrade redefined the stored format of indels to "
                    "provide a more robust working framework. Unfortunately, if you had indels, you don't anymore." \
                    % (next_version), nl_after=1, nl_before=1, mc='green')
Example #12
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    for table_name in ['layer_additional_data', 'item_additional_data']:
        new_table = divide_stackedbar_to_multiple_entries(
            profile_db.get_table_as_dict(table_name))
        profile_db._exec("DELETE FROM '%s'" % table_name)
        for entry_id in new_table:
            profile_db.insert(table_name, tuple(new_table[entry_id].values()))

    #set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single(
        "Your profile db is now %s (and anvi'o is as surprised as you are)." %
        next_version,
        nl_after=1,
        nl_before=1,
        mc='green')
Example #13
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    is_merged = profile_db.get_meta_value('merged')
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'mean_coverage_Q2Q3_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)

    if is_full_profile:
        # add our new table
        profile_db.create_table(variable_codons_table_name, variable_codons_table_structure, variable_codons_table_types)

        # drop the old table
        profile_db._exec('DROP TABLE variable_amino_acid_frequencies;')

        # rename the table old
        profile_db._exec('ALTER TABLE variable_nucleotide_positions RENAME TO variable_nucleotides;')

        # remove stuff no longer necessary
        profile_db.remove_meta_key_value_pair('AA_frequencies_profiled')

        # add the sad fact
        profile_db.set_meta_value('SCVs_profiled', False)

        # clean after yourself
        profile_db._exec('vacuum')

        full_upgrade = True
    else:
        full_upgrade = False

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    if full_upgrade:
        run.info_single("Your profile db is now version %s. If you had amino acids profiled for this database,\
                         you just lost all of that content :( The only option is to re-profile all your databases\
                         and merge them again. We are very sorry about the inconvenience. If you don't know what\
                         this message is talking about, then you have nothing to worry about." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. But essentially nothing really happened to your\
                         database since it was a blank profile (which is OK, move along)." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #14
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'mean_coverage_Q2Q3_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    if is_full_profile:
        # contigs_ordered -> items_ordered
        profile_db.set_meta_value('items_ordered', profile_db.get_meta_value('contigs_ordered'))
        profile_db.remove_meta_key_value_pair('contigs_ordered')


    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    if is_full_profile:
        run.info_single("Your profile db is now %s (WRONG .. anvi'o never takes breaks)." % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. But essentially nothing really happened to your\
                         database since it was a blank profile (which is OK, move along)." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #15
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    for table_name in ['layer_additional_data', 'item_additional_data']:
        new_table = divide_stackedbar_to_multiple_entries(profile_db.get_table_as_dict(table_name))
        profile_db._exec("DELETE FROM '%s'" % table_name)
        new_entry_counter = 0
        for entry_id in new_table:
            profile_db.insert(table_name, (new_entry_counter, *new_table[entry_id].values()))
            new_entry_counter += 1

    #set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Your profile db is now %s (and anvi'o is as surprised as you are)." % next_version, nl_after=1, nl_before=1, mc='green')
Example #16
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    profile_db._exec(
        'ALTER TABLE "item_additional_data" ADD COLUMN "data_group" text')
    profile_db._exec(
        'ALTER TABLE "layer_additional_data" ADD COLUMN "data_group" text')

    profile_db._exec(
        'UPDATE "item_additional_data" SET "data_group" = "default"')
    profile_db._exec(
        'UPDATE "layer_additional_data" SET "data_group" = "default"')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()
Example #17
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    is_merged = profile_db.get_meta_value('merged')
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'portion_covered_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    if is_full_profile and is_merged:
        profile_db._exec('ALTER TABLE portion_covered_splits RENAME TO detection_splits;')
        profile_db._exec('ALTER TABLE portion_covered_contigs RENAME TO detection_contigs;')
        profile_db._exec('ALTER TABLE mean_coverage_Q1Q3_splits RENAME TO mean_coverage_Q2Q3_splits;')
        profile_db._exec('ALTER TABLE mean_coverage_Q1Q3_contigs RENAME TO mean_coverage_Q2Q3_contigs;')

        profile_db._exec('DELETE FROM %s WHERE view_id = "portion_covered"' % (t.views_table_name))
        profile_db._exec('INSERT INTO %s VALUES ("detection", "detection_splits")' % t.views_table_name)
        profile_db._exec('DELETE FROM %s WHERE view_id = "mean_coverage_Q1Q3"' % (t.views_table_name))
        profile_db._exec('INSERT INTO %s VALUES ("mean_coverage_Q2Q3", "mean_coverage_Q2Q3_splits")' % t.views_table_name)

    elif is_full_profile and not is_merged:
        profile_db.cursor.execute('ALTER TABLE atomic_data_contigs RENAME TO atomic_data_contigs_TEMP;')
        profile_db.cursor.execute('CREATE TABLE atomic_data_contigs (contig text, std_coverage numeric, mean_coverage numeric, mean_coverage_Q2Q3 numeric, max_normalized_ratio numeric, relative_abundance numeric, detection numeric, abundance numeric, variability numeric, __parent__ text);')
        profile_db.cursor.execute('INSERT INTO atomic_data_contigs(contig, std_coverage, mean_coverage, mean_coverage_Q2Q3, max_normalized_ratio, relative_abundance, detection, abundance, variability, __parent__) SELECT contig, std_coverage, mean_coverage, mean_coverage_Q1Q3, max_normalized_ratio, relative_abundance, portion_covered, abundance, variability, __parent__ FROM atomic_data_contigs_TEMP;')
        profile_db.cursor.execute('DROP TABLE atomic_data_contigs_TEMP;')

        profile_db.cursor.execute('ALTER TABLE atomic_data_splits RENAME TO atomic_data_splits_TEMP;')
        profile_db.cursor.execute('CREATE TABLE atomic_data_splits (contig text, std_coverage numeric, mean_coverage numeric, mean_coverage_Q2Q3 numeric, max_normalized_ratio numeric, relative_abundance numeric, detection numeric, abundance numeric, variability numeric, __parent__ text);')
        profile_db.cursor.execute('INSERT INTO atomic_data_splits(contig, std_coverage, mean_coverage, mean_coverage_Q2Q3, max_normalized_ratio, relative_abundance, detection, abundance, variability, __parent__) SELECT contig, std_coverage, mean_coverage, mean_coverage_Q1Q3, max_normalized_ratio, relative_abundance, portion_covered, abundance, variability, __parent__ FROM atomic_data_splits_TEMP;')
        profile_db.cursor.execute('DROP TABLE atomic_data_splits_TEMP;')

    # update states
    states = profile_db.get_table_as_dict(t.states_table_name)
    for state in states:
        profile_db._exec('DELETE FROM %s WHERE name = "%s"' % (t.states_table_name, state))
        profile_db._exec('INSERT INTO %s VALUES (?,?,?)' % (t.states_table_name), (state, states[state]['content'].replace('portion_covered', 'detection').replace('mean_coverage_Q1Q3', 'mean_coverage_Q2Q3'), states[state]['last_modified']))

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 17!", nl_after=1, nl_before=1, mc='green')
Example #18
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No profile database is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is 5
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != '13':
        raise ConfigError(
            "Version of this profile database is not 13 (hence, this script cannot really do anything)."
        )

    is_merged = profile_db.get_meta_value('merged')

    progress.new("Trying to upgrade the %s profile database" %
                 'merged' if is_merged else 'single')

    # serious stuff ###################################################################################################
    progress.update('working on the variable nts table ...')
    profile_db.cursor.execute(
        'ALTER TABLE variable_nucleotide_positions RENAME TO variable_nucleotide_positions_TEMP;'
    )
    profile_db.cursor.execute(
        'CREATE TABLE variable_nucleotide_positions (entry_id numeric, sample_id text, split_name text, pos numeric, pos_in_contig numeric, corresponding_gene_call numeric, in_partial_gene_call numeric, in_complete_gene_call numeric, base_pos_in_codon numeric, codon_order_in_gene numeric, coverage numeric, cov_outlier_in_split bool, cov_outlier_in_contig bool, departure_from_reference numeric, competing_nts text, reference text, A numeric, T numeric, C numeric, G numeric, N numeric);'
    )
    profile_db.cursor.execute(
        'INSERT INTO variable_nucleotide_positions(entry_id, sample_id, split_name, pos, pos_in_contig, corresponding_gene_call, in_partial_gene_call, in_complete_gene_call, base_pos_in_codon, codon_order_in_gene, coverage, cov_outlier_in_split, cov_outlier_in_contig, departure_from_reference, competing_nts, reference, A, T, C, G, N) SELECT entry_id, sample_id, split_name, pos, pos_in_contig, corresponding_gene_call, in_partial_gene_call, in_complete_gene_call, base_pos_in_codon, codon_order_in_gene, coverage, cov_outlier_in_split, cov_outlier_in_contig, departure_from_consensus, competing_nts, consensus, A, T, C, G, N FROM variable_nucleotide_positions_TEMP;'
    )
    profile_db.cursor.execute('DROP TABLE variable_nucleotide_positions_TEMP;')

    progress.update('working on the aa freqs table ...')
    profile_db.cursor.execute(
        'ALTER TABLE variable_amino_acid_frequencies RENAME TO variable_amino_acid_frequencies_TEMP;'
    )
    profile_db.cursor.execute(
        'CREATE TABLE variable_amino_acid_frequencies (entry_id numeric, sample_id text, corresponding_gene_call numeric, codon_order_in_gene numeric, reference text, departure_from_reference numeric, coverage numeric, Ala numeric, Arg numeric, Asn numeric, Asp numeric, Cys numeric, Gln numeric, Glu numeric, Gly numeric, His numeric, Ile numeric, Leu numeric, Lys numeric, Met numeric, Phe numeric, Pro numeric, STP numeric, Ser numeric, Thr numeric, Trp numeric, Tyr numeric, Val numeric);'
    )
    profile_db.cursor.execute(
        'INSERT INTO variable_amino_acid_frequencies(entry_id, sample_id, corresponding_gene_call, codon_order_in_gene, reference, departure_from_reference, coverage, Ala, Arg, Asn, Asp, Cys, Gln, Glu, Gly, His, Ile, Leu, Lys, Met, Phe, Pro, STP, Ser, Thr, Trp, Tyr, Val) SELECT entry_id, sample_id, corresponding_gene_call, codon_order_in_gene, consensus, departure_from_consensus, coverage, Ala, Arg, Asn, Asp, Cys, Gln, Glu, Gly, His, Ile, Leu, Lys, Met, Phe, Pro, STP, Ser, Thr, Trp, Tyr, Val FROM variable_amino_acid_frequencies_TEMP;'
    )
    profile_db.cursor.execute(
        'DROP TABLE variable_amino_acid_frequencies_TEMP;')
    # /serious stuff ##################################################################################################

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version('14')

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 14!",
                    nl_after=1,
                    nl_before=1,
                    mc='green')
Example #19
0
 def store(self):
     utils.is_profile_db(self.db_path)
     database = db.DB(self.db_path,
                      utils.get_required_version_for_db(self.db_path))
     database._exec_many(
         '''INSERT INTO %s VALUES (%s)''' %
         (t.variable_codons_table_name, ','.join(
             ['?'] * len(t.variable_codons_table_structure))),
         self.db_entries)
     database.disconnect()
Example #20
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    try:
        profile_db.create_table(indels_table_name, indels_table_structure,
                                indels_table_types)
    except:
        pass

    profile_db.set_meta_value('INDELs_profiled', 0)
    profile_db.set_meta_value('min_percent_identity', 0)

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # full as opposed to a "blank profile"
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'mean_coverage_Q2Q3_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    if is_full_profile:
        run.info_single(
            "Your profile db is now %s. We just added a bunch of new variables to the `self` table "
            "of your database. All good now." % next_version,
            nl_after=1,
            nl_before=1,
            mc='green')
    else:
        run.info_single("Your profile db is now version %s. But essentially nothing really happened to your "
                        "database since it was a blank profile (which is OK, move along)." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #21
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No profile database is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is 5
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != '14':
        raise ConfigError(
            "Version of this profile database is not 14 (hence, this script cannot really do anything)."
        )

    is_merged = profile_db.get_meta_value('merged')

    progress.new("Trying to upgrade the %s profile database" %
                 'merged' if is_merged else 'single')

    # update the runinfo.cp
    input_dir = os.path.dirname(os.path.abspath(db_path))
    P = lambda x: os.path.join(input_dir, x)
    E = lambda x: os.path.exists(x)

    runinfo_path = P('RUNINFO.cp') if E(P('RUNINFO.cp')) else None
    runinfo_path = P('RUNINFO.mcp') if E(P('RUNINFO.mcp')) else None

    if runinfo_path:
        runinfo = dictio.read_serialized_object(runinfo_path)
        if 'blank' not in runinfo:
            runinfo['blank'] = False

            dictio.write_serialized_object(runinfo, runinfo_path)

    # add the new value
    profile_db.set_meta_value('blank', False)

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version('15')

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 15!",
                    nl_after=1,
                    nl_before=1,
                    mc='green')
Example #22
0
    def get_split_names_of_interest_for_internal_genome(self, entry):
        utils.is_profile_db(entry['profile_db_path'])
        # get splits of interest:
        class Args: pass
        args = Args()
        args.profile_db = entry['profile_db_path']
        args.collection_name = entry['collection_id']
        args.bin_id = entry['bin_id']

        split_names_of_interest = list(ccollections.GetSplitNamesInBins(args).get_split_names_only())

        if not len(split_names_of_interest):
            raise ConfigError("There are 0 splits defined for bin id %s in collection %s..." % (entry['bin_id'], entry['collection_id']))

        return split_names_of_interest
Example #23
0
    def get_split_names_of_interest_for_internal_genome(self, entry):
        utils.is_profile_db(entry['profile_db_path'])
        # get splits of interest:
        class Args: pass
        args = Args()
        args.profile_db = entry['profile_db_path']
        args.collection_name = entry['collection_id']
        args.bin_id = entry['bin_id']

        split_names_of_interest = list(ccollections.GetSplitNamesInBins(args).get_split_names_only())

        if not len(split_names_of_interest):
            raise ConfigError("There are 0 splits defined for bin id %s in collection %s..." % (entry['bin_id'], entry['collection_id']))

        return split_names_of_interest
Example #24
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    # drop the table
    try:
        profile_db._exec('DROP TABLE gene_coverages;')
    except:
        pass

    # commit
    try:
        profile_db._exec('COMMIT')
    except:
        pass

    # cleanup
    try:
        profile_db._exec('vacuum')
    except:
        pass

    # remove irrelevant self table entry
    try:
        profile_db.remove_meta_key_value_pair('gene_coverages_computed')
    except:
        pass

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
Example #25
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    progress.new("Durr Durr")
    progress.update('...')

    profile_db = db.DB(db_path, None, ignore_version=True)

    is_merged = profile_db.get_meta_value('merged')

    if is_merged:
        # merged profile
        try:
            profile_db._exec('DROP TABLE relative_abundance_contigs')
            profile_db._exec('DROP TABLE relative_abundance_splits')
            profile_db._exec('DROP TABLE max_normalized_ratio_contigs')
            profile_db._exec('DROP TABLE max_normalized_ratio_splits')
        except:
            pass

    else:
        # other profile
        pass

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # 안녕
    profile_db.disconnect()

    progress.end()

    if is_merged:
        run.info_single(
            f"The profile database is now {next_version}. There were some unnecessary "
            f"tables in it, but they are no more.",
            nl_after=1,
            nl_before=1,
            mc='green')
    else:
        run.info_single(f"The profile database is now {next_version}.",
                        nl_after=1,
                        nl_before=1,
                        mc='green')
Example #26
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    # migrate item orders
    item_orders = profile_db.get_table_as_dict(item_orders_table_name)
    for order_name in item_orders:
        if item_orders[order_name]['type'] == 'newick':
            newick = Tree(item_orders[order_name]['data'], format=1)
            newick = newick.write(format=2)
            profile_db._exec(
                """UPDATE %s SET "data" = ? WHERE "name" LIKE ?""" %
                item_orders_table_name, (newick, order_name))

    # migrate layer orders
    layer_orders = profile_db.get_table_as_dict(layer_orders_table_name)
    for order_name in layer_orders:
        if layer_orders[order_name]['data_type'] == 'newick':
            newick = Tree(layer_orders[order_name]['data_value'], format=1)
            newick = newick.write(format=2)
            profile_db._exec(
                """UPDATE %s SET "data_value" = ? WHERE "data_key" LIKE ?""" %
                layer_orders_table_name, (newick, order_name))

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s. Aww, yisss.' % next_version,
                    nl_after=1,
                    nl_before=1,
                    mc='green')
Example #27
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No profile database is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is 15
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != '15':
        raise ConfigError("Version of this profile database is not 15 (hence, this script cannot really do anything).")

    is_merged = profile_db.get_meta_value('merged')

    progress.new("Trying to upgrade the %s profile database" % 'merged' if is_merged else 'single')

    available_clusterings = []
    clusterings = profile_db.get_table_as_dict('clusterings')

    if clusterings:
        profile_db._exec('''DELETE FROM clusterings''')

        for entry in clusterings:
            clustering_id = ':'.join([entry, 'euclidean', 'ward'])
            clustering_newick = clusterings[entry]['newick']
            profile_db._exec('''INSERT INTO clusterings VALUES (?,?)''', tuple([clustering_id, clustering_newick]))
            available_clusterings.append(clustering_id)

        profile_db.remove_meta_key_value_pair('available_clusterings')
        profile_db.set_meta_value('available_clusterings', ','.join(available_clusterings))

        default_clustering = profile_db.get_meta_value('default_clustering')
        profile_db.remove_meta_key_value_pair('default_clustering')
        profile_db.set_meta_value('default_clustering', ':'.join([default_clustering, 'euclidean', 'ward']))

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version('16')

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 16!", nl_after=1, nl_before=1, mc='green')
Example #28
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    progress.new("Redefining indels table...")
    progress.update("...")

    # delete if exists
    try:
        profile_db.drop_table(indels_table_name)
    except:
        pass

    # create with new structure
    profile_db.create_table(indels_table_name, indels_table_structure,
                            indels_table_types)

    progress.update("Updating self table")
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.set_meta_value('min_indel_fraction', '0.0')
    profile_db.set_meta_value('INDELs_profiled', '0')

    progress.update("Committing changes")
    profile_db.disconnect()

    progress.end()
    run.info_single(
        "The profile database is now %s. This upgrade redefined the stored format of INDELS to "
        "provide a more robust working framework. If you are upgrading this database from `v6`, "
        "you don't have anything to worry about. But if you were using the active branch of anvi'o, "
        "then you lost your INDELs now and you would need to re-profile your BAM files if you want "
        "them back :)" % (next_version),
        nl_after=1,
        nl_before=1,
        mc='green')
Example #29
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # full as opposed to a "blank profile"
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'mean_coverage_Q2Q3_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    # ---------------------------------------------------------------------------------

    progress.new('Updating DB')

    variable_nts_table_name = 'variable_nucleotides'
    if variable_nts_table_name in tables_in_db:
        progress.update('Renaming columns in variability table...')
        profile_db._exec("""ALTER TABLE %s RENAME COLUMN in_partial_gene_call TO in_noncoding_gene_call;""" % variable_nts_table_name)
        profile_db._exec("""ALTER TABLE %s RENAME COLUMN in_complete_gene_call TO in_coding_gene_call;""" % variable_nts_table_name)

    # ---------------------------------------------------------------------------------

    # bye
    profile_db.disconnect()
    progress.end()

    if is_full_profile:
        run.info_single("Your profile db is now %s. This update renamed two column names in the `variabile_nucleotides` table "
                        "of your profile database (`in_partial_gene_call` has become `in_noncoding_gene_call`, and "
                        "`in_complete_gene_call` has become `in_complete_gene_call`" % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. But essentially nothing really happened to your "
                        "database since it was a blank profile (which is OK, move along)." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #30
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No profile database is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is 5
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != '14':
        raise ConfigError("Version of this profile database is not 14 (hence, this script cannot really do anything).")

    is_merged = profile_db.get_meta_value('merged')

    progress.new("Trying to upgrade the %s profile database" % 'merged' if is_merged else 'single')

    # update the runinfo.cp
    input_dir = os.path.dirname(os.path.abspath(db_path))
    P = lambda x: os.path.join(input_dir, x)
    E = lambda x: os.path.exists(x)

    runinfo_path = P('RUNINFO.cp') if E(P('RUNINFO.cp')) else None
    runinfo_path = P('RUNINFO.mcp') if E(P('RUNINFO.mcp')) else None

    if runinfo_path:
        runinfo = dictio.read_serialized_object(runinfo_path)
        if 'blank' not in runinfo:
            runinfo['blank'] = False

            dictio.write_serialized_object(runinfo, runinfo_path)

    # add the new value
    profile_db.set_meta_value('blank', False)

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version('15')

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 15!", nl_after=1, nl_before=1, mc='green')
Example #31
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    is_merged = profile_db.get_meta_value('merged')
    is_blank = profile_db.get_meta_value('blank')
    is_full_profile = 'portion_covered_splits' in  profile_db.get_table_names()

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    if is_merged:
        num_samples = len(profile_db.get_meta_value('samples').split(','))
        profile_db.remove_meta_key_value_pair('total_reads_mapped')
        profile_db.set_meta_value('total_reads_mapped', ', '.join(['0'] * num_samples))
    elif is_blank:
        profile_db.remove_meta_key_value_pair('total_reads_mapped')
        profile_db.set_meta_value('total_reads_mapped', '0')
    else:
        pass

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
Example #32
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No profile database is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is 5
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != '13':
        raise ConfigError("Version of this profile database is not 13 (hence, this script cannot really do anything).")

    is_merged = profile_db.get_meta_value('merged')

    progress.new("Trying to upgrade the %s profile database" % 'merged' if is_merged else 'single')

    # serious stuff ###################################################################################################
    progress.update('working on the variable nts table ...')
    profile_db.cursor.execute('ALTER TABLE variable_nucleotide_positions RENAME TO variable_nucleotide_positions_TEMP;')
    profile_db.cursor.execute('CREATE TABLE variable_nucleotide_positions (entry_id numeric, sample_id text, split_name text, pos numeric, pos_in_contig numeric, corresponding_gene_call numeric, in_partial_gene_call numeric, in_complete_gene_call numeric, base_pos_in_codon numeric, codon_order_in_gene numeric, coverage numeric, cov_outlier_in_split bool, cov_outlier_in_contig bool, departure_from_reference numeric, competing_nts text, reference text, A numeric, T numeric, C numeric, G numeric, N numeric);')
    profile_db.cursor.execute('INSERT INTO variable_nucleotide_positions(entry_id, sample_id, split_name, pos, pos_in_contig, corresponding_gene_call, in_partial_gene_call, in_complete_gene_call, base_pos_in_codon, codon_order_in_gene, coverage, cov_outlier_in_split, cov_outlier_in_contig, departure_from_reference, competing_nts, reference, A, T, C, G, N) SELECT entry_id, sample_id, split_name, pos, pos_in_contig, corresponding_gene_call, in_partial_gene_call, in_complete_gene_call, base_pos_in_codon, codon_order_in_gene, coverage, cov_outlier_in_split, cov_outlier_in_contig, departure_from_consensus, competing_nts, consensus, A, T, C, G, N FROM variable_nucleotide_positions_TEMP;')
    profile_db.cursor.execute('DROP TABLE variable_nucleotide_positions_TEMP;')

    progress.update('working on the aa freqs table ...')
    profile_db.cursor.execute('ALTER TABLE variable_amino_acid_frequencies RENAME TO variable_amino_acid_frequencies_TEMP;')
    profile_db.cursor.execute('CREATE TABLE variable_amino_acid_frequencies (entry_id numeric, sample_id text, corresponding_gene_call numeric, codon_order_in_gene numeric, reference text, departure_from_reference numeric, coverage numeric, Ala numeric, Arg numeric, Asn numeric, Asp numeric, Cys numeric, Gln numeric, Glu numeric, Gly numeric, His numeric, Ile numeric, Leu numeric, Lys numeric, Met numeric, Phe numeric, Pro numeric, STP numeric, Ser numeric, Thr numeric, Trp numeric, Tyr numeric, Val numeric);')
    profile_db.cursor.execute('INSERT INTO variable_amino_acid_frequencies(entry_id, sample_id, corresponding_gene_call, codon_order_in_gene, reference, departure_from_reference, coverage, Ala, Arg, Asn, Asp, Cys, Gln, Glu, Gly, His, Ile, Leu, Lys, Met, Phe, Pro, STP, Ser, Thr, Trp, Tyr, Val) SELECT entry_id, sample_id, corresponding_gene_call, codon_order_in_gene, consensus, departure_from_consensus, coverage, Ala, Arg, Asn, Asp, Cys, Gln, Glu, Gly, His, Ile, Leu, Lys, Met, Phe, Pro, STP, Ser, Thr, Trp, Tyr, Val FROM variable_amino_acid_frequencies_TEMP;')
    profile_db.cursor.execute('DROP TABLE variable_amino_acid_frequencies_TEMP;')
    # /serious stuff ##################################################################################################

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version('14')

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 14!", nl_after=1, nl_before=1, mc='green')
Example #33
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    profile_db._exec('ALTER TABLE "item_orders" ADD COLUMN "additional" text')
    profile_db._exec('UPDATE "item_orders" SET "additional" = "{}"')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()
Example #34
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)


    # contigs_ordered -> items_ordered
    profile_db.set_meta_value('items_ordered', profile_db.get_meta_value('contigs_ordered'))
    profile_db.remove_meta_key_value_pair('contigs_ordered')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()
Example #35
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    is_merged = profile_db.get_meta_value('merged')
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'mean_coverage_Q2Q3_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)

    total_reads_mapped = profile_db.get_meta_value('total_reads_mapped')
    samples = profile_db.get_meta_value('samples')
    profile_db.disconnect()

    layer_additional_data_table = TableForLayerAdditionalData(
        argparse.Namespace(profile_db=db_path))

    # we will do this only for full merged or single profiles
    if is_full_profile and is_merged:
        full_upgrade = True
        total_reads_mapped = [int(m) for m in total_reads_mapped.split(',')]
        samples = [s.strip() for s in samples.split(',')]
        d = dict(zip(samples, total_reads_mapped))
        data = {}
        for sample in samples:
            data[sample] = {'total_reads_mapped': d[sample]}

        layer_additional_data_table.add(data, ['total_reads_mapped'])
    elif is_full_profile and not is_merged:
        total_reads_mapped = int(total_reads_mapped)
        layer_additional_data_table.add(
            {samples: {
                'total_reads_mapped': total_reads_mapped
            }}, ['total_reads_mapped'])
        full_upgrade = True
    else:
        full_upgrade = False

    progress.new("Finalizing profile database upgrade")
    progress.update('...')

    profile_db = db.DB(db_path, None, ignore_version=True)

    # remove stuff no longer necessary
    profile_db.remove_meta_key_value_pair('total_reads_mapped')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    if full_upgrade:
        run.info_single("Your profile db is now version %s. You can learn more about what happened here\
                         by taking a look at this issue: https://github.com/merenlab/anvio/issues/800" \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. But essentially nothing really happened to your\
                         database since it was a blank profile (which is OK, move along)." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #36
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    # check samples db
    samples_db_path = check_samples_db_status()

    # start by adding new tables...
    profile_db.create_table(layer_orders_table_name, layer_orders_table_structure, layer_orders_table_types)
    profile_db.create_table(layer_additional_data_table_name, layer_additional_data_table_structure, layer_additional_data_table_types)

    # update the item_additional_data table
    profile_db.cursor.execute('ALTER TABLE item_additional_data RENAME TO item_additional_data_TEMP;')
    profile_db.cursor.execute('CREATE TABLE item_additional_data (entry_id numeric, item_name text, data_key text, data_value text, data_type text);')
    profile_db.cursor.execute('INSERT INTO item_additional_data(entry_id, item_name, data_key, data_value, data_type) SELECT entry_id, item_name, key, value, type FROM item_additional_data_TEMP;')
    profile_db.cursor.execute('DROP TABLE item_additional_data_TEMP;')

    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.disconnect()

    if samples_db_path:
        try:
            samples_db = SamplesInformationDatabase(samples_db_path)
            layers_info_path, layers_order_path = samples_db.export_samples_db_files()

            args = argparse.Namespace(profile_db=db_path, target_data_table='layers')
            TableForLayerAdditionalData(args).populate_from_file(layers_info_path)

            args = argparse.Namespace(profile_db=db_path, target_data_table='layers')
            TableForLayerOrders(args).populate_from_file(layers_order_path)

            os.remove(layers_info_path)
            os.remove(layers_order_path)

            fully_upgraded = True
        except Exception as e:
            run.warning('Something went wrong adding the data found in samples database into the profile database. This is what\
                         we know: "%s".' % e)
            fully_upgraded = False
    else:
        fully_upgraded = False


    if fully_upgraded:
        shutil.move(samples_db_path, samples_db_path + '.OBSOLETE')
        run.info_single("Your profile db is now version %s. You no longer need your old samples database (which is now \
                         renamed to something ugly so you can see it." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    elif samples_db_path:
        run.info_single("Your profile db is now version %s. BUT THERE WAS THIS: the actual purpose of this script was to\
                         incorporate the data in your samples database into your profile database. But for some reason it\
                         has failed. Probably everything is still alright, but you may have to do that step manually. The\
                         Error messsage should be somewhere above." % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. BUT WITHOUT the samples database incorporation as you wished."\
                                                    % next_version, nl_after=1, nl_before=1, mc='green')
Example #37
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    auxiliary_path = os.path.join(os.path.dirname(db_path), 'AUXILIARY-DATA.h5')
    new_auxiliary_path = os.path.join(os.path.dirname(db_path), 'AUXILIARY-DATA.db')

    if os.path.exists(auxiliary_path):
        fp = h5py.File(auxiliary_path, 'r')
        G = lambda x: fp.attrs[x].decode('utf-8') if isinstance(fp.attrs[x], bytes) else fp.attrs[x]
        auxiliary_db = db.DB(new_auxiliary_path, '2', new_database=True)

        auxiliary_db.set_meta_value('db_type', 'auxiliary data for coverages')
        auxiliary_db.set_meta_value('contigs_db_hash', G('hash'))
        auxiliary_db.set_meta_value('creation_date', time.time())
        auxiliary_db.create_table(split_coverages_table_name, split_coverages_table_structure, split_coverages_table_types)
        auxiliary_db._exec("""CREATE INDEX IF NOT EXISTS covering_index ON %s(split_name, sample_name)""" % (split_coverages_table_name))

        sample_names_in_db = set(list(list(fp['/data/coverages'].values())[0].keys()))
        split_names_in_db = list(fp['/data/coverages'].keys())

        run.info("Auxiliary data file found", auxiliary_path)
        run.info("Splits found", len(split_names_in_db))
        run.info("Samples found", len(sample_names_in_db))
        run.info("New auxiliary data path", new_auxiliary_path)

        progress.new('Processing auxiliary')
        counter, total = 0, len(sample_names_in_db)

        entries = []
        for sample_name in sample_names_in_db:
            for split_name in split_names_in_db:
                entries.append((split_name, sample_name, convert_numpy_array_to_binary_blob(fp['/data/coverages/%s/%s' % (split_name, sample_name)].value),))

            counter += 1
            progress.update('sample %d of %d ...' % (counter, total))

            if counter % 10 == 0:
                progress.update("Writing buffer into a new database file ...")
                auxiliary_db.insert_many(split_coverages_table_name, entries=entries)
                entries = []

        auxiliary_db.insert_many(split_coverages_table_name, entries=entries)

        progress.end()
        auxiliary_db.disconnect()
        fp.close()

        os.remove(auxiliary_path)
        fully_upgraded = True
    else:
        fully_upgraded = False

    # we also added a totally new table to this version:
    profile_db.create_table(item_additional_data_table_name, item_additional_data_table_structure, item_additional_data_table_types)

    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.disconnect()

    if fully_upgraded:
        run.info_single("Your profile db is now version %s. Anvi'o just created a new, up-to-date auxiliary data file (which ends with \
                         extension .db), and deleted the old one (the one that ended with the extension .h5))" \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. BUT THERE WAS THIS: the actual purpose of this script was to upgrade your\
                         AUXILIARY-DATA.h5 file, but it was not where it was supposed to be. Anvi'o upgraded your profile.db alone,\
                         but as a consequence you will not be able to use its auxiliary data with this profile database. If you care\
                         about it, you should find the old profile database, and upgrade it along with its auxiliary data" \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #38
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    is_merged = profile_db.get_meta_value('merged')
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'portion_covered_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    if is_full_profile and is_merged:
        profile_db._exec(
            'ALTER TABLE portion_covered_splits RENAME TO detection_splits;')
        profile_db._exec(
            'ALTER TABLE portion_covered_contigs RENAME TO detection_contigs;')
        profile_db._exec(
            'ALTER TABLE mean_coverage_Q1Q3_splits RENAME TO mean_coverage_Q2Q3_splits;'
        )
        profile_db._exec(
            'ALTER TABLE mean_coverage_Q1Q3_contigs RENAME TO mean_coverage_Q2Q3_contigs;'
        )

        profile_db._exec('DELETE FROM %s WHERE view_id = "portion_covered"' %
                         (t.views_table_name))
        profile_db._exec(
            'INSERT INTO %s VALUES ("detection", "detection_splits")' %
            t.views_table_name)
        profile_db._exec(
            'DELETE FROM %s WHERE view_id = "mean_coverage_Q1Q3"' %
            (t.views_table_name))
        profile_db._exec(
            'INSERT INTO %s VALUES ("mean_coverage_Q2Q3", "mean_coverage_Q2Q3_splits")'
            % t.views_table_name)

    elif is_full_profile and not is_merged:
        profile_db.cursor.execute(
            'ALTER TABLE atomic_data_contigs RENAME TO atomic_data_contigs_TEMP;'
        )
        profile_db.cursor.execute(
            'CREATE TABLE atomic_data_contigs (contig text, std_coverage numeric, mean_coverage numeric, mean_coverage_Q2Q3 numeric, max_normalized_ratio numeric, relative_abundance numeric, detection numeric, abundance numeric, variability numeric, __parent__ text);'
        )
        profile_db.cursor.execute(
            'INSERT INTO atomic_data_contigs(contig, std_coverage, mean_coverage, mean_coverage_Q2Q3, max_normalized_ratio, relative_abundance, detection, abundance, variability, __parent__) SELECT contig, std_coverage, mean_coverage, mean_coverage_Q1Q3, max_normalized_ratio, relative_abundance, portion_covered, abundance, variability, __parent__ FROM atomic_data_contigs_TEMP;'
        )
        profile_db.cursor.execute('DROP TABLE atomic_data_contigs_TEMP;')

        profile_db.cursor.execute(
            'ALTER TABLE atomic_data_splits RENAME TO atomic_data_splits_TEMP;'
        )
        profile_db.cursor.execute(
            'CREATE TABLE atomic_data_splits (contig text, std_coverage numeric, mean_coverage numeric, mean_coverage_Q2Q3 numeric, max_normalized_ratio numeric, relative_abundance numeric, detection numeric, abundance numeric, variability numeric, __parent__ text);'
        )
        profile_db.cursor.execute(
            'INSERT INTO atomic_data_splits(contig, std_coverage, mean_coverage, mean_coverage_Q2Q3, max_normalized_ratio, relative_abundance, detection, abundance, variability, __parent__) SELECT contig, std_coverage, mean_coverage, mean_coverage_Q1Q3, max_normalized_ratio, relative_abundance, portion_covered, abundance, variability, __parent__ FROM atomic_data_splits_TEMP;'
        )
        profile_db.cursor.execute('DROP TABLE atomic_data_splits_TEMP;')

    # update states
    states = profile_db.get_table_as_dict(t.states_table_name)
    for state in states:
        profile_db._exec('DELETE FROM %s WHERE name = "%s"' %
                         (t.states_table_name, state))
        profile_db._exec(
            'INSERT INTO %s VALUES (?,?,?)' % (t.states_table_name),
            (state, states[state]['content'].replace(
                'portion_covered', 'detection').replace(
                    'mean_coverage_Q1Q3',
                    'mean_coverage_Q2Q3'), states[state]['last_modified']))

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single("Database successfully upgraded to version 17!",
                    nl_after=1,
                    nl_before=1,
                    mc='green')
Example #39
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version=True)
    is_merged = profile_db.get_meta_value('merged')
    is_blank = profile_db.get_meta_value('blank')
    sample_name = profile_db.get_meta_value('sample_id')
    tables_in_db = profile_db.get_table_names()
    profile_db.disconnect()

    is_full_profile = 'mean_coverage_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)
    run.info('Is blank', is_blank)

    progress.new("Durr Durr")
    progress.update('...')

    if is_blank:
        ########################
        #     BLANK PROFILE    #
        ########################

        pass

    elif is_full_profile and not is_merged:
        #########################
        #     SINGLE PROFILE    #
        #########################
        profile_db = db.DB(db_path, None, ignore_version=True)

        # remove the default view variable in self, and add it back with 'mean_coverage'
        profile_db.remove_meta_key_value_pair('default_view')
        profile_db.set_meta_value('default_view', 'mean_coverage')

        for target in ['splits', 'contigs']:
            # get rid of the hideous view called 'single'.
            profile_db._exec('''DELETE FROM views WHERE view_id = "single"''')

            atomic_data = profile_db.get_table_as_dict(f'atomic_data_{target}')

            for view in essential_data_fields_for_anvio_profiles:
                table_name = f'{view}_{target}'

                # le creationeaux au de neuvo tabl
                profile_db._exec(
                    f'''CREATE TABLE {table_name} (item text, layer text, value numeric)'''
                )
                view_data = []
                for split_name in atomic_data:
                    view_data.append((split_name, sample_name,
                                      atomic_data[split_name][view]), )

                # populate the new view table
                profile_db._exec_many(
                    '''INSERT INTO %s VALUES (?,?,?)''' % (table_name),
                    view_data)

                # update the views table
                if target == 'splits':
                    profile_db._exec('''INSERT INTO views VALUES (?,?)''',
                                     (view, table_name))

        # баяртай
        profile_db.disconnect()

    elif is_full_profile and is_merged:
        #########################
        #     MERGED PROFILE    #
        #########################

        # open the profile database without rowid prepend.
        profile_db = db.DB(db_path,
                           None,
                           ignore_version=True,
                           skip_rowid_prepend=True)

        # learn your samples
        sample_names = [
            s.strip() for s in profile_db.get_meta_value('samples').split(',')
        ]

        # drop the contents of the view table.
        profile_db._exec("DELETE FROM views")

        for target in ['splits', 'contigs']:
            for view in essential_data_fields_for_anvio_profiles:
                table_name = f'{view}_{target}'

                progress.update(f"Working on table '{table_name} ...'")

                table_data = profile_db.get_table_as_dict(table_name)

                # drop the old view table
                profile_db._exec(f'DROP TABLE {table_name}')

                # create a new view table!
                profile_db._exec(
                    f'''CREATE TABLE {table_name} (item text, layer text, value numeric)'''
                )

                # fill in the new view data from the old format
                view_data = []
                for split_name in table_data:
                    for sample_name in sample_names:
                        view_data.append(
                            (split_name, sample_name,
                             table_data[split_name][sample_name]), )

                # populate new view table
                profile_db._exec_many(
                    '''INSERT INTO %s VALUES (?,?,?)''' % (table_name),
                    view_data)

                # if splits, I sits
                if target == 'splits':
                    profile_db._exec('''INSERT INTO views VALUES (?,?)''',
                                     (view, table_name))

        # さようなら
        profile_db.disconnect()

    else:
        ###########################
        #     SURPRISE PROFILE    #
        ###########################

        raise ConfigError(
            "Anvi'o is confuse. Your profile database does not fit into anything we have "
            "anticipated to run into here. For full disclosure, [the rest of the sentence "
            "was left blank intentionally just to drive you mad as you drive anvi'o mad -- "
            "eye for an eye].")

    # set the version
    profile_db = db.DB(db_path, None, ignore_version=True)
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.disconnect()

    progress.end()
    run.info_single(
        f"The profile database is now {next_version}. This upgrade fixed one of the most annoying "
        f"early design decisions we have made (and when we say 'we', we actually mean 'Meren', and "
        f"the rest of us accept no blame for it). This design shortcoming prevented anvi'o to merge "
        f"more than 2,000 samples. The current update reflects a significant change in the structure "
        f"of the 'view' tables of anvi'o and not only removes this limitation, but also results in "
        f"significant speed and memory gains during `anvi-merge`. But this operation is similar to "
        f"changing the entire flooring of an apartment while having to make sure each piece of the "
        f"furniture put back to their place properly once the flooring is redone. The aim of this "
        f"migration script was to put the furniture back. If you are reading this message, you are "
        f"most likely ⭐",
        nl_after=1,
        nl_before=1,
        mc='green')
Example #40
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version=True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError(
            "Version of this profile database is not %s (hence, this script cannot really do anything)."
            % current_version)

    auxiliary_path = os.path.join(os.path.dirname(db_path),
                                  'AUXILIARY-DATA.h5')
    new_auxiliary_path = os.path.join(os.path.dirname(db_path),
                                      'AUXILIARY-DATA.db')

    if os.path.exists(auxiliary_path):
        fp = h5py.File(auxiliary_path, 'r')
        G = lambda x: fp.attrs[x].decode('utf-8') if isinstance(
            fp.attrs[x], bytes) else fp.attrs[x]
        auxiliary_db = db.DB(new_auxiliary_path, '2', new_database=True)

        auxiliary_db.set_meta_value('db_type', 'auxiliary data for coverages')
        auxiliary_db.set_meta_value('contigs_db_hash', G('hash'))
        auxiliary_db.set_meta_value('creation_date', time.time())
        auxiliary_db.create_table(split_coverages_table_name,
                                  split_coverages_table_structure,
                                  split_coverages_table_types)
        auxiliary_db._exec(
            """CREATE INDEX IF NOT EXISTS covering_index ON %s(split_name, sample_name)"""
            % (split_coverages_table_name))

        sample_names_in_db = set(
            list(list(fp['/data/coverages'].values())[0].keys()))
        split_names_in_db = list(fp['/data/coverages'].keys())

        run.info("Auxiliary data file found", auxiliary_path)
        run.info("Splits found", len(split_names_in_db))
        run.info("Samples found", len(sample_names_in_db))
        run.info("New auxiliary data path", new_auxiliary_path)

        progress.new('Processing auxiliary')
        counter, total = 0, len(sample_names_in_db)

        entries = []
        for sample_name in sample_names_in_db:
            for split_name in split_names_in_db:
                entries.append((
                    split_name,
                    sample_name,
                    convert_numpy_array_to_binary_blob(
                        fp['/data/coverages/%s/%s' %
                           (split_name, sample_name)].value),
                ))

            counter += 1
            progress.update('sample %d of %d ...' % (counter, total))

            if counter % 10 == 0:
                progress.update("Writing buffer into a new database file ...")
                auxiliary_db.insert_many(split_coverages_table_name,
                                         entries=entries)
                entries = []

        auxiliary_db.insert_many(split_coverages_table_name, entries=entries)

        progress.end()
        auxiliary_db.disconnect()
        fp.close()

        os.remove(auxiliary_path)
        fully_upgraded = True
    else:
        fully_upgraded = False

    # we also added a totally new table to this version:
    profile_db.create_table(item_additional_data_table_name,
                            item_additional_data_table_structure,
                            item_additional_data_table_types)

    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.disconnect()

    if fully_upgraded:
        run.info_single("Your profile db is now version %s. Anvi'o just created a new, up-to-date auxiliary data file (which ends with \
                         extension .db), and deleted the old one (the one that ended with the extension .h5))" \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. BUT THERE WAS THIS: the actual purpose of this script was to upgrade your\
                         AUXILIARY-DATA.h5 file, but it was not where it was supposed to be. Anvi'o upgraded your profile.db alone,\
                         but as a consequence you will not be able to use its auxiliary data with this profile database. If you care\
                         about it, you should find the old profile database, and upgrade it along with its auxiliary data" \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #41
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    progress.new("Trying to upgrade the profile database")
    progress.update('...')

    try:
        profile_db.create_table(t.item_orders_table_name, t.item_orders_table_structure, t.item_orders_table_types)
    except:
        pass

    clusterings = profile_db.get_table_as_dict('clusterings')

    # move clustering data into the new table
    for clustering in clusterings:
        newick = clusterings[clustering]['newick']
        profile_db._exec('''INSERT INTO %s VALUES (?,?,?)''' % t.item_orders_table_name, tuple([clustering, 'newick', newick]))

    # update keys
    for old_key, new_key in [('available_clusterings', 'available_item_orders'),
                             ('contigs_clustered', 'contigs_ordered'),
                             ('default_clustering', 'default_item_order')]:
        try:
            profile_db.set_meta_value(new_key, profile_db.get_meta_value(old_key))
        except:
            pass

    # remove stuff that are not irrelevant
    try:
        profile_db._exec('DROP TABLE clusterings;')
        profile_db.remove_meta_key_value_pair('available_clusterings')
        profile_db.remove_meta_key_value_pair('contigs_clustered')
        profile_db.remove_meta_key_value_pair('default_clustering')
    except:
        pass

    # commit
    try:
        profile_db._exec('COMMIT')
    except:
        pass

    # cleanup
    try:
        profile_db._exec('vacuum')
    except:
        pass

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    run.info_single('Your profile db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
Example #42
0
 def store(self):
     utils.is_profile_db(self.db_path)
     database = db.DB(self.db_path, utils.get_required_version_for_db(self.db_path))
     database._exec_many('''INSERT INTO %s VALUES (%s)''' % (t.variable_codons_table_name, ','.join(['?'] * len(t.variable_codons_table_structure))), self.db_entries)
     database.disconnect()
Example #43
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)


    profile_db = db.DB(db_path, None, ignore_version = True)

    is_blank = profile_db.get_meta_value('blank')
    is_merged = profile_db.get_meta_value('merged')

    progress.new("Durr Durr")
    progress.update('...')

    msg = ""

    if is_blank:
        # nothing to be done since we don't have
        # fetch filters for blank profiles
        msg = "But this was a blank profile database, so anvi'o did nothing."
        pass
    elif is_merged:
        # we need to update dis.
        try:
            profile_db.remove_meta_key_value_pair('fetch_filter')
        except:
            pass

        samples = profile_db.get_meta_value('samples').split(',')
        profile_db.set_meta_value('fetch_filter', ', '.join(['None'] * len(samples)))

        msg = ("This was a merged profile database, so anvi'o assumed none of the single profiles "
               "had any fetch filters, and marked them as such.")

        # BEWARE OF THIS CHEATING.
        # yes we are not here for this, but we will squeeze it in anyway. so far we have not been
        # tracking specific 'min percent identity' paramters set for individual single profiles
        # to filter short reads that are taken into consideration during profiling. but at this
        # stage of the codebase, the merger class does store that informaiton in merged profile
        # self tables, so we are also reprsenting that information in previous versions of
        # merged profile databases:
        profile_db.set_meta_value('min_percent_identity', ', '.join(['0.0'] * len(samples)))
    else:
        try:
            profile_db.remove_meta_key_value_pair('fetch_filter')
        except:
            pass

        profile_db.set_meta_value('fetch_filter', 'None')

        msg = ("This was a single profile database, so anvi'o marked it with a blank fetch filter (which "
               "really is the case since fetch filters are just being introduced in anvi'o, and any single "
               "profile database that was generated in previous versions do not have any fetch filters (unless "
               "you are Florian -- because if you are, you need to re-profile all your databases you had profiled "
               "profiled with a fetch filter).")

    profile_db.set_version(next_version)

    #              خدا حافظ
    profile_db.disconnect()

    progress.end()

    run.info_single(f"The profile database is now {next_version}. We just added a very fancy feature in anvi'o, "
                    f"'fetch filter', that enables you to define very specific filters regarding what to work with "
                    f"from BAM files during profiling, and this update is all about that. {msg}",
                    nl_after=1, nl_before=1, mc='green')
Example #44
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    # make sure someone is not being funny
    utils.is_profile_db(db_path)

    # make sure the version is accurate
    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    is_merged = profile_db.get_meta_value('merged')
    tables_in_db = profile_db.get_table_names()
    is_full_profile = 'mean_coverage_Q2Q3_splits' in tables_in_db or 'atomic_data_splits' in tables_in_db

    run.info('Profile db type', 'Merged' if is_merged else 'Single')
    run.info('Full profile', is_full_profile)

    total_reads_mapped = profile_db.get_meta_value('total_reads_mapped')
    samples = profile_db.get_meta_value('samples')
    profile_db.disconnect()

    layer_additional_data_table = TableForLayerAdditionalData(argparse.Namespace(profile_db=db_path))

    # we will do this only for full merged or single profiles
    if is_full_profile and is_merged:
        full_upgrade = True
        total_reads_mapped = [int(m) for m in total_reads_mapped.split(',')]
        samples = [s.strip() for s in samples.split(',')]
        d = dict(zip(samples, total_reads_mapped))
        data = {}
        for sample in samples:
            data[sample] = {'total_reads_mapped': d[sample]}

        layer_additional_data_table.add(data, ['total_reads_mapped'])
    elif is_full_profile and not is_merged:
        total_reads_mapped = int(total_reads_mapped)
        layer_additional_data_table.add({samples: {'total_reads_mapped': total_reads_mapped}}, ['total_reads_mapped'])
        full_upgrade = True
    else:
        full_upgrade = False

    progress.new("Finalizing profile database upgrade")
    progress.update('...')

    profile_db = db.DB(db_path, None, ignore_version = True)

    # remove stuff no longer necessary
    profile_db.remove_meta_key_value_pair('total_reads_mapped')

    # set the version
    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)

    # bye
    profile_db.disconnect()
    progress.end()

    if full_upgrade:
        run.info_single("Your profile db is now version %s. You can learn more about what happened here\
                         by taking a look at this issue: https://github.com/merenlab/anvio/issues/800" \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. But essentially nothing really happened to your\
                         database since it was a blank profile (which is OK, move along)." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
Example #45
0
def migrate(db_path):
    if db_path is None:
        raise ConfigError("No database path is given.")

    utils.is_profile_db(db_path)

    profile_db = db.DB(db_path, None, ignore_version = True)
    if str(profile_db.get_version()) != current_version:
        raise ConfigError("Version of this profile database is not %s (hence, this script cannot really do anything)." % current_version)

    # check samples db
    samples_db_path = check_samples_db_status()

    # start by adding new tables...
    profile_db.create_table(layer_orders_table_name, layer_orders_table_structure, layer_orders_table_types)
    profile_db.create_table(layer_additional_data_table_name, layer_additional_data_table_structure, layer_additional_data_table_types)

    # update the item_additional_data table
    profile_db.cursor.execute('ALTER TABLE item_additional_data RENAME TO item_additional_data_TEMP;')
    profile_db.cursor.execute('CREATE TABLE item_additional_data (entry_id numeric, item_name text, data_key text, data_value text, data_type text);')
    profile_db.cursor.execute('INSERT INTO item_additional_data(entry_id, item_name, data_key, data_value, data_type) SELECT entry_id, item_name, key, value, type FROM item_additional_data_TEMP;')
    profile_db.cursor.execute('DROP TABLE item_additional_data_TEMP;')

    profile_db.remove_meta_key_value_pair('version')
    profile_db.set_version(next_version)
    profile_db.disconnect()

    if samples_db_path:
        try:
            samples_db = SamplesInformationDatabase(samples_db_path)
            layers_info_path, layers_order_path = samples_db.export_samples_db_files()

            args = argparse.Namespace(profile_db=db_path, target_data_table='layers')
            TableForLayerAdditionalData(args).populate_from_file(layers_info_path)

            args = argparse.Namespace(profile_db=db_path, target_data_table='layers')
            TableForLayerOrders(args).populate_from_file(layers_order_path)

            os.remove(layers_info_path)
            os.remove(layers_order_path)

            fully_upgraded = True
        except Exception as e:
            run.warning('Something went wrong adding the data found in samples database into the profile database. This is what\
                         we know: "%s".' % e)
            fully_upgraded = False
    else:
        fully_upgraded = False


    if fully_upgraded:
        shutil.move(samples_db_path, samples_db_path + '.OBSOLETE')
        run.info_single("Your profile db is now version %s. You no longer need your old samples database (which is now \
                         renamed to something ugly so you can see it." \
                                                            % next_version, nl_after=1, nl_before=1, mc='green')
    elif samples_db_path:
        run.info_single("Your profile db is now version %s. BUT THERE WAS THIS: the actual purpose of this script was to\
                         incorporate the data in your samples database into your profile database. But for some reason it\
                         has failed. Probably everything is still alright, but you may have to do that step manually. The\
                         Error messsage should be somewhere above." % next_version, nl_after=1, nl_before=1, mc='green')
    else:
        run.info_single("Your profile db is now version %s. BUT WITHOUT the samples database incorporation as you wished."\
                                                    % next_version, nl_after=1, nl_before=1, mc='green')