コード例 #1
0
from dataPreProcessing import process
from featureEngineering import featureProcessing
from featureSelection import featureSelecting
from dataTransformation import dataTransform
from modelTraining import model
#del sys.modules['logBuilder']
#del sys.modules['dataIngestion']
#del sys.modules['dataPreProcessing']
#del sys.modules['featureEngineering']
#del sys.modules['featureSelection']
#del sys.modules['dataTransformation']
#del sys.modules['modelTraining']
fileDir = os.path.dirname(__file__)
sys.path.insert(0, fileDir)

updatelog = AppLogger()

try:
    import pandas as pd
    from sklearn.model_selection import train_test_split
    import time
    updatelog.log('process', 'Imported necessary packages')
except:
    updatelog.log(
        'error',
        "Unexpected error:" + str(sys.exc_info()[0]) + str(sys.exc_info()[1]))
masterBegin = time.time()

# Database ingestion
try:
    dataLoad = loadData.Load()
コード例 #2
0
"""
Created on Fri Aug 21 20:02:24 2020

@author: raghu
"""

import sys
import os
import numpy as np

fileDir = os.path.dirname(__file__)
dirPath = os.path.abspath(os.path.join(fileDir, '..'))
sys.path.insert(0, dirPath)
from logBuilder.logger import AppLogger

updatelog = AppLogger()


class featureEng(object):
    def processOutliers(self, dataFrame, continuous):
        try:
            overallIndeces = set()
            outlierIndeces = {}

            for feature in dataFrame[continuous].columns.values:
                q1 = np.percentile(dataFrame[feature], 25)
                q3 = np.percentile(dataFrame[feature], 75)
                iqr = q3 - q1
                outliers = dataFrame.loc[(dataFrame[feature] < q1 - 1.5 * iqr)
                                         |
                                         (dataFrame[feature] > q3 + 1.5 * iqr)]