Beispiel #1
0
def helper_execute_select_into(query)->pd.DataFrame:
    config=TstConfig()
    db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    classObj=SqliteDataWarehouse(LocalConfig())
    retVal=classObj.select_into(query,db,table2)
    return retVal
Beispiel #2
0
    def __init__(self):
        PlatformConfig.__init__(self)
        testConfig = TstConfig()

        self.data_lake_path = self.get_env("HM_LAKE_PATH")
        self.sqlite_db_path = self.get_env("HM_SQLITE_WAREHOUSE_DBPATH")

        self.lake_bucket = self.get_env(
            "LAKE_BUCKET",
            os.path.join(testConfig.get_base_folder(), "sqlite_lake_bucket"))
        self.lake_path = self.get_env("LAKE_PATH")

        self.warehouse_dataset = self.get_env('WAREHOUSE_DATASET',
                                              "hyper_model")
        self.warehouse_location = self.get_env("WAREHOUSE_LOCATION", "./data")

        self.k8s_namespace = self.get_env('K8S_NAMESPACE')
        self.k8s_cluster = self.get_env('K8S_CLUSTER')

        self.kfp_artifact_path = self.get_env(
            'KFP_ARTIFACT_PATH',
            os.path.join(testConfig.get_base_folder(), "data"))

        self.ci_commit = self.get_env("CI_COMMIT_SHA", "no-commit")

        self.is_local_dev = self.ci_commit == "no-commit"

        self.gitlab_token = self.get_env("GITLAB_TOKEN", None)
        self.gitlab_project = self.get_env("GITLAB_PROJECT", None)
        self.gitlab_url = self.get_env("GITLAB_URL", None)

        self.default_sql_lite_db_file = f"{self.warehouse_location}/default.db"
Beispiel #3
0
def test_dataframe_from_table_size()->None:
    config=TstConfig()
    db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    table1=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1"))
    df=helper_dataframe_from_table(db,table1)
    row,actual_column_count_table1=get_row_column_count(db,table1)
    assert df.shape == (row,actual_column_count_table1)
Beispiel #4
0
def helper_execute_import_csv()->bool:
    config=TstConfig()
    #get testing csv file
    db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    table=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1"))
    csv=str(config.get("FLAT_FILE_LOCATION","CSV_DATA_FILE"))
    classObj=SqliteDataWarehouse(LocalConfig())
    retVal=classObj.import_csv(csv,db,table)
    return retVal
Beispiel #5
0
def test_file_md5():
    config=TstConfig()
    dummy_file=config.get("FLAT_FILE_LOCATION","DUMMY_FILE_LOCATION")
    #<To-Do> need to determine how to test this method
    # As this is different for every system or OS
    expected_return_value="8348f98f61cf58d6e7921c173bd0286d"# on windows 
    actual_return_value=file_md5(dummy_file)
    #assert expected_return_value==actual_return_value
    assert True
Beispiel #6
0
def test_dataframe_from_query_size()->None:
    config=TstConfig()
    localConfig=LocalConfig()
    table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    # query method to return method returning value
    db=localConfig.default_sql_lite_db_file
    table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    expected_row_count,expected_column_count= populate_table_in_database(db,table2)
    retVal=helper_dataframe_from_query(f"select * from {table2}")
    assert (expected_row_count,expected_column_count)==retVal.shape
Beispiel #7
0
def test_import_csv_length_of_db()->None:
    config=TstConfig()
    db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    table=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1"))
    expected_table_length =5129

    retVal=helper_execute_import_csv()
   
    tbl_len_query_result=execute_query(db,f'SELECT count(*) as c FROM {table}')
    actual_table_length=tbl_len_query_result.iloc[0][0]
    assert expected_table_length == actual_table_length
Beispiel #8
0
def test_select_into_row_count()-> None:
    config=TstConfig()
    db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    table1=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1"))
    table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    # Confining to 30 as we just want to limit the size
    # The assumption is that the row count of table 1 is more than 30
    query=f"select * from {table1} LIMIT 30"
    helper_execute_select_into(query)
    row_count,col_count=get_row_column_count(db,table2)
    expected_row_count=30
    assert expected_row_count == row_count
Beispiel #9
0
def test_download():
    classObj = LocalDataLake(LocalConfig())
    config = TstConfig()
    from_location = str(
        config.get("FLAT_FILE_LOCATION", "DUMMY_FILE_LOCATION2"))
    to_folder = str(config.get("FLAT_FILE_LOCATION", "TEMPERORY_FILES_FOLDER"))

    file_name = general.get_filename_from_path(from_location)
    from_folder = from_location[0:len(from_location) - len(file_name) - 1]
    to_file_path = os.path.join(to_folder, file_name)
    retVal = classObj.download(from_location, to_file_path)

    assert retVal == True
Beispiel #10
0
def test_table_schema():
    config=TstConfig()
    table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    populate_table_in_database(db,table2)
    #populated test data in table2
    classObj=SqliteDataWarehouse(LocalConfig())
    retVal=classObj.table_schema(db, table2)
    testDataColumns=[]
    testDataColumns.append(SqlColumn("id", "int64", True))
    testDataColumns.append(SqlColumn("title", "object", True))
    testDataColumns.append(SqlColumn("author", "object", True))
    testDataColumns.append(SqlColumn("price", "object", True))
    testDataColumns.append(SqlColumn("year", "object", True))

    expectedTable=SqlTable(table2,table2,testDataColumns)
    
    # see if all column count match
    assert  expectedTable==retVal
Beispiel #11
0
def test_upload():
    classObj = LocalDataLake(LocalConfig())
    config = TstConfig()

    from_location = str(config.get("FLAT_FILE_LOCATION",
                                   "DUMMY_FILE_LOCATION"))
    to_location = str(
        config.get("FLAT_FILE_LOCATION", "TEMPERORY_FILES_FOLDER"))

    file_name = general.get_filename_from_path(from_location)
    from_folder = from_location[0:len(from_location) - len(file_name) - 1]
    to_folder = to_location

    # db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING"))
    # table=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    # csv=str(config.get("FLAT_FILE_LOCATION","CSV_DATA_FILE"))

    # retVal=classObj.upload(os.path.join(to_location,file_name),csv,table)
    retVal = classObj.upload(to_folder, from_folder, file_name)

    assert retVal == True
Beispiel #12
0
def test_dry_run():
    config=TstConfig()
    localConfig=LocalConfig()
    table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2"))
    db=localConfig.default_sql_lite_db_file
    populate_table_in_database(db,table2)
    #populated test data in table2
    classObj=SqliteDataWarehouse(LocalConfig())
    retVal=classObj.dry_run(f"select * from {table2} ")
    testDataColumns=[]
    testDataColumns.append(SqlColumn("id", "int64", True))
    testDataColumns.append(SqlColumn("title", "object", True))
    testDataColumns.append(SqlColumn("author", "object", True))
    testDataColumns.append(SqlColumn("price", "object", True))
    testDataColumns.append(SqlColumn("year", "object", True))

    #see if all column values match
    for col in retVal:
        assert col in testDataColumns
    
    # see if all column count match
    assert  len(testDataColumns)==len(retVal)
def test_test_configuration_nagative():
    test_config = TstConfig()
    assert test_config.get("NOT THERE", "NOT THERE") == None
def test_test_configuration_positive():
    test_config = TstConfig()
    assert test_config.get("TESTING_THE_TEST_FILE", "TEST_ENTRY") == "1"
import pandas
import numpy
from pandas import DataFrame
import os.path
from hypermodel.tests.utilities import create_test_data
import logging
from hypermodel.tests.utilities.configurations import TstConfig
from typing import List, Dict, Any
import math
import random

config = TstConfig()


def prepare_csv_file() -> str:
    csvLocation = config.get("FLAT_FILE_LOCATION", "CSV_DATA_FILE")
    if not os.path.exists(csvLocation):
        create_test_data.create_csv_file()
    return csvLocation


def get_test_dataframe() -> pandas.DataFrame:
    return pandas.read_csv(prepare_csv_file())


def get_test_dataframe_feature_names() -> List[str]:
    prepare_csv_file()
    features = [
        "REGISTER_NAME",  #text
        "CRED_LIC_NUM",  #int
        "CRED_LIC_NAME",  #text