Example #1
0
def create_database(db_name, table_name, directory_name, data_file, alias,
                    silent, integrity_cancel):
    """ Function called from dbdm initializes project/module

    :param silent:
    :param alias:
    :param db_name: (str)   Name of db
    :param table_name: (str)    Record that will be created
    :param directory_name: (str)    Directory with files to add
    :param data_file: (str)     File with metadata for storing in database
    :return:
    """
    # Confirm working dir does not exist and that directory with genomes does exist
    assert db_name != "None", CreateDBAssertString.WORKING_DB_NOT_SET
    assert table_name != "None", CreateDBAssertString.TABLE_NAME_NOT_SET
    assert os.path.isdir(
        db_name) is False, CreateDBAssertString.WORKING_DIR_EXISTS
    if directory_name != "None":
        assert os.path.isdir(
            directory_name), CreateDBAssertString.SEQUENCE_DIR_NOT_EXISTS
    table_name = table_name.lower()
    if not silent:
        _initialization_display_message_prelude(db_name, db_name, table_name,
                                                directory_name, data_file,
                                                alias)
    # Gather files to commit and initial data to store for each file
    print_if_not_silent(silent, "Beginning process...")
    print_if_not_silent(silent, " Getting files from %s" % directory_name)
    if directory_name != "None":
        genomic_files_to_add = (_f for _f in os.listdir(directory_name))
    else:
        genomic_files_to_add = ()
    data_types = {}
    initial_data = []
    if data_file is not "None":
        _initial_data = CountTable(data_file)
        # Ignore first column name, typically announcing "Name" or "Genome ID"
        # Get names and types of each column in dict
        # Key is header name (which will be used as name of column in database)
        # Value is type of data (int, float, str) that is found for a random genome id's data value
        # TODO: Data type determination requires a uniformity from the .tsv file data. Consider a workaround
        data_types = TypeMapper.get_translated_types(
            _initial_data, TypeMapper.py_type_to_string)
        initial_data.append(_initial_data)
    # Create working directories
    print_if_not_silent(silent,
                        " Creating directories at database root %s" % db_name)
    classes_dir, config_dir, db_dir, table_dir = _create_all_directories(
        db_name, table_name)
    # Create database file
    print_if_not_silent(silent, " Creating database file in %s" % db_dir)
    touch(os.path.join(db_dir, db_name + ".db"))
    # Write configuration info
    config_file = db_name + ".ini"
    print_if_not_silent(
        silent, " Writing database configuration to %s" %
        os.path.join(config_dir, config_file))
    config = Config()
    abs_path_working_dir = os.path.abspath(db_name)
    db_name = os.path.basename(db_name)
    config[ConfigKeys.DATABASES] = {
        ConfigKeys.db_name:
        db_name,
        ConfigKeys.working_dir:
        abs_path_working_dir,
        ConfigKeys.rel_work_dir:
        db_name,
        ConfigKeys.migrations_dir:
        os.path.join(abs_path_working_dir, Directories.MIGRATIONS),
        ConfigKeys.config_dir:
        os.path.join(abs_path_working_dir, Directories.CONFIG),
        ConfigKeys.db_dir:
        os.path.join(abs_path_working_dir, Directories.DATABASE),
        ConfigKeys.rel_db_dir:
        os.path.join(db_name, Directories.DATABASE),
    }
    config[table_name] = {
        ConfigKeys.rel_classes_dir: os.path.join(db_name, Directories.CLASSES),
    }
    config[ConfigKeys.TABLES_TO_DB] = {
        table_name: db_name,
    }
    config[ConfigKeys.TABLES_TO_ALIAS] = {
        "{}|{}".format(alias, table_name): table_name,
    }
    with open(os.path.join(config_dir, config_file), "w") as W:
        config.write(W)
    # Create table
    print_if_not_silent(
        silent, "Creating new table %s at %s" %
        (table_name, os.path.join(db_dir, db_name)))
    os.makedirs(table_dir)
    ClassManager.create_initial_table_in_db(db_name,
                                            db_name,
                                            table_name,
                                            data_types,
                                            silent,
                                            initial=False)
    # Populate table with data from file and genomes
    # Get config file - confirms that it was written correctly
    cfg = ConfigManager(config, table_name)
    if data_file is not "None":
        for _data in initial_data:
            ClassManager.populate_data_to_existing_table(
                table_name, _data, cfg, genomic_files_to_add, directory_name,
                silent)
    if not silent:
        _initialization_display_message_epilogue()
    if not integrity_cancel:
        integrity_check(db_name, table_name, "None", silent)
def create_table_in_existing_database(config_file, table_name, directory_name, data_file, alias, silent, integrity_cancel):
    """

    :param silent:
    :param config_file:
    :param table_name:
    :param directory_name:
    :param data_file:
    :param alias:
    :return:
    """
    assert table_name != "None", CreateDBAssertString.TABLE_NAME_NOT_SET
    assert config_file is not None, ConfigAssertString.CONFIG_FILE_NOT_PASSED
    _cfg, _tbl, _sil, _al = config_file, table_name, silent, alias
    config, config_file = ConfigManager.confirm_config_set(config_file)
    if table_name in config.keys():
        print("!! Record exists, exiting. To update table, use UPDATE !!")
        exit(1)
    if not silent:
        _create_table_display_message_prelude(config[ConfigKeys.DATABASES][ConfigKeys.db_name],
                                              config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir],
                                              table_name,
                                              directory_name,
                                              data_file,
                                              alias)
    data_types = {}
    initial_data = []
    if data_file is not "None":
        _initial_data = CountTable(data_file)
        # Ignore first column name, typically announcing "Name" or "Genome ID"
        # Get names and types of each column in dict
        # Key is header name (which will be used as name of column in database)
        # Value is type of data (int, float, str) that is found for a random genome id's data value
        # TODO: Data type determination requires a uniformity from the .tsv file data. Consider a workaround
        data_types = TypeMapper.get_translated_types(_initial_data, TypeMapper.py_type_to_string)
        initial_data.append(_initial_data)
    # Gather bio data from folder
    if directory_name != "None":
        genomic_files_to_add = (_f for _f in os.listdir(directory_name))
    else:
        genomic_files_to_add = ()
    # Create new table directories
    _create_all_directories(config[ConfigKeys.DATABASES][ConfigKeys.working_dir], table_name)
    # Update config object with new data
    table_name = table_name.lower()
    config[table_name] = {
        ConfigKeys.rel_classes_dir: os.path.join(config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir],
                                                 Directories.CLASSES),
    }
    config.set(ConfigKeys.TABLES_TO_DB, table_name, config[ConfigKeys.DATABASES][ConfigKeys.db_name])
    config.set(ConfigKeys.TABLES_TO_ALIAS, "{}|{}".format(alias, table_name), table_name)
    # Write new config file
    with open(config_file, "w") as W:
        config.write(W)
    # Update ConfigManager object
    cfg = ConfigManager(config, table_name)
    # Create new table and populate with new data
    ClassManager.create_initial_table_in_db(cfg.db_name, cfg.working_dir, table_name, data_types, silent, initial=False)
    if data_file is not "None":
        for _data in initial_data:
            ClassManager.populate_data_to_existing_table(table_name, _data, cfg,
                                                         genomic_files_to_add, directory_name, silent)
    if not silent:
        _create_table_display_message_epilogue()
    if not integrity_cancel:
        integrity_check(_cfg, _tbl, _al, _sil)