예제 #1
0
 def __init__(self, csv_path):
     '''
     Loads the Dataframe from the csv_path.
     Uses the user defined pre_processing module's load_data() function to load the csv file.
     After loading the entries in the various columns of the dataframe are processed and validated.
     Processing involves replacing unknown values with np.NaN.
     Validation checks if the column values in the data frame are compliant with the mappings of the data provided.
     :param csv_path: a string that contains the path to the dataframe.
     '''
     assert isinstance(csv_path, str)
     self.processing = pp.load_data(csv_path)
     self.processing.process_all()
     self.df = self.processing.df
예제 #2
0
    def load_preprocessed_data(self):
        '''
        Imports pre_processing library from src folder and loads the preprocessed data. If the change_wd option is True in the class constructor then imports pre_processing library from the new location else uses the default working directory. While reading the data uses the filepath string that was passed in the class constructor.
        
        :return: 'df' which is the preprocessed Pandas dataframe.
        :rtype: pandas.core.frame.DataFrame
        '''

        # read the data
        bank_data = pp.load_data(self.filepath)
        bank_data.process_all()
        df = bank_data.df

        # assign id
        df['customer_id'] = np.arange(0, len(df), 1)

        return df
예제 #3
0
def test_load_data():
    '''
        Test for the load_data() method of pre_processing.py
    '''
    my_load_data_pre_processing = pre_processing.load_data(csv_path)
    assert isinstance(my_load_data_pre_processing, DfBankAdditional)
예제 #4
0
"""
"""
import pytest
import pandas as pd
import sys
import os
sys.path.insert(0, '..')
from src.pre_processing import *
import src.pre_processing as pre_processing

csv_path = '../data/bank-additional-full.csv'
my_pre_processing = pre_processing.load_data(csv_path)


@pytest.fixture(autouse=True)
def teardown():
    d = os.path.dirname(os.path.abspath('test_util.py'))
    d = d.split('/')
    if d[-1] != 'test':
        os.chdir('test')


def test_load_data():
    '''
        Test for the load_data() method of pre_processing.py
    '''
    my_load_data_pre_processing = pre_processing.load_data(csv_path)
    assert isinstance(my_load_data_pre_processing, DfBankAdditional)


def test_process_all():