from dataPreProcessing import process from featureEngineering import featureProcessing from featureSelection import featureSelecting from dataTransformation import dataTransform from modelTraining import model #del sys.modules['logBuilder'] #del sys.modules['dataIngestion'] #del sys.modules['dataPreProcessing'] #del sys.modules['featureEngineering'] #del sys.modules['featureSelection'] #del sys.modules['dataTransformation'] #del sys.modules['modelTraining'] fileDir = os.path.dirname(__file__) sys.path.insert(0, fileDir) updatelog = AppLogger() try: import pandas as pd from sklearn.model_selection import train_test_split import time updatelog.log('process', 'Imported necessary packages') except: updatelog.log( 'error', "Unexpected error:" + str(sys.exc_info()[0]) + str(sys.exc_info()[1])) masterBegin = time.time() # Database ingestion try: dataLoad = loadData.Load()
""" Created on Fri Aug 21 20:02:24 2020 @author: raghu """ import sys import os import numpy as np fileDir = os.path.dirname(__file__) dirPath = os.path.abspath(os.path.join(fileDir, '..')) sys.path.insert(0, dirPath) from logBuilder.logger import AppLogger updatelog = AppLogger() class featureEng(object): def processOutliers(self, dataFrame, continuous): try: overallIndeces = set() outlierIndeces = {} for feature in dataFrame[continuous].columns.values: q1 = np.percentile(dataFrame[feature], 25) q3 = np.percentile(dataFrame[feature], 75) iqr = q3 - q1 outliers = dataFrame.loc[(dataFrame[feature] < q1 - 1.5 * iqr) | (dataFrame[feature] > q3 + 1.5 * iqr)]