Esempio n. 1
0
 def preprocessorMain(self):
     self.removeTargetColumn()
     while (1):
         print("\nTasks (Preprocessing)\n")
         for task in self.tasks:
             print(task)
         while (1):
             try:
                 choice = int(
                     input(
                         "\nWhat do you want to do? [enter -1 to exit]:  "))
             except ValueError:
                 print("Integer Value required. Try again.....")
                 continue
             break
         if choice == -1:
             exit()
         elif choice == 1:
             DataDescription(self.data).describe()
         elif choice == 2:
             self.data = Imputation(self.data).imputer()
         elif choice == 3:
             self.data = Categorical(self.data).categoricalMain()
         elif choice == 4:
             self.data = FeatureScaling(self.data).scaling()
         elif choice == 5:
             Download(self.data).download()
         else:
             print("\nWrong choice!! Try again...")
Esempio n. 2
0
 def get(argsfscale):
     print("Feature Scaling...")
     args_list = argsfscale.split(",")
     fsn = args_list[0]
     filename = args_list[1]
     jfilename = args_list[2]
     json_path = currentDirPath + "/jsonfiles/" + jfilename
     file_path = currentDirPath + "/csvfiles/" + filename
     data = pd.read_csv(file_path)
     data_afs = FeatureScaling.Scaling(data, fsn)
     data_afs.reset_index(inplace=True, drop=True)
     data_afs.to_csv(file_path, index=False)
     data_t10 = data_afs.head(10)
     data_noff = len(data_afs.columns)
     with open(json_path) as jcon:
         jdata = json.load(jcon)
     jdata.update({'FeatureScaling': fsn})
     with open(json_path, 'w') as j_con:
         json.dump(jdata, j_con)
     return {
         'predata': data_t10.to_json(orient='table'),
         'fileName': filename,
         'numofcol': data_noff,
         'jfilename': jfilename
     }
Esempio n. 3
0
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from feature_scaling import FeatureScaling

if __name__ == "__main__":
    fc = FeatureScaling()
    train_data, test_data = fc.getFeaturedDataframe()
    x_train, x_test, Y_train, Y_test = train_test_split(
        train_data,
        test_data,
        random_state=2,
        test_size=0.10,
    )
    reg = LinearRegression()
    reg.fit(x_train, Y_train)
    print(reg.score(x_test, Y_test))
Esempio n. 4
0
def main():
    print('-'*10+'Welcome to ML Preprocessor CLI'+'-'*10+'\n\n')
    try:
        file_path=sys.argv[1]
        if file_path.endswith('.csv')==False:
            raise IncorrectFileFormatError("file is not in CSV format")

        revised_df=readCSV(file_path)
        print('\nScreenshot of independent dataframe:\n')
        print(revised_df.head())
        print('\n'+'-'*30+'\n')
        while True:
            print('\nTasks(Preprocessing)')
            print('1.Data Description')
            print('2.Handling NULL values')
            print('3.Encoding Categorical Data')
            print('4.Feature Scaling of the Dataset')
            print('5.Download the modified Dataset\n')
            option=int(input('What do you want to do?(Press -1 to exit):'))
            if option==-1:
                raise ExitError
            elif option==1:
                data_desc=DataDescription(revised_df)
                while True:
                    option=data_desc.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        data_desc.showProperty()
                    elif option==2:
                        data_desc.showStats()
                    elif option==3:
                        data_desc.showDF()
                    else:
                        print('Incorrect option!Try again.')
                
            elif option==2:
                impute=Imputation(revised_df)
                while True:
                    option=impute.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        impute.countNULL()
                    elif option==2:
                        revised_df=impute.dropColumn()
                    elif option==3:
                        revised_df=impute.fillUtil()
                    elif option==4:
                        impute.showDF()
                    else:
                        print('Incorrect option!Try again.')
            elif option==3:
                encode=EncodeCategorical(revised_df)
                while True:
                    option=encode.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        encode.showCategorical()
                    elif option==2:
                        revised_df=encode.performOneHotEncodingUtil()
                    elif option==3:
                        encode.showDF()
                    else:
                        print('Incorrect option!Try again.')
            elif option==4:
                while True:
                    scale=FeatureScaling(revised_df)
                    option=scale.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        scale.normalizeUtil()
                    elif option==2:
                        scale.standardizeUtil()
                    elif option==3:
                        scale.showDF()
                    else:
                        print('Incorrect option!Try again.')
            elif option==5:
                download=Download(revised_df)
                download.downloadDataframe()
            else:
                print('Incorrect option!Try again.')
    except IndexError as e:
        print('File path missing.',e)
    except IncorrectFileFormatError as e:
        print('Incorrect file format.',e)
    except FileNotFoundError as e:
        print('File Not Found!',e)
    except ExitError as e:
        print(e)
    except Exception as e:
        print('Incorrect option chosen.',e)       
Esempio n. 5
0
class Preprocessor:

    # The Task associated with this class. This is also the main class of the project.
    tasks = [
        '1. Data Description', '2. Handling NULL Values',
        '3. Encoding Categorical Data', '4. Feature Scaling of the Dataset',
        '5. Download the modified dataset'
    ]

    data = 0

    def __init__(self):
        self.data = DataInput().inputFunction()
        print("\n\n" +
              "WELCOME TO THE MACHINE LEARNING PREPROCESSOR CLI!!!\n" + "\n\n")

    # function to remove the target column of the DataFrame.
    def removeTargetColumn(self):
        print("Columns\n")
        for column in self.data.columns.values:
            print(column, end="  ")

        while (1):
            column = input(
                "\nWhich is the target variable:(Press -1 to exit)  ").lower()
            if column == "-1":
                exit()
            choice = input("Are you sure?(y/n) ")
            if choice == "y" or choice == "Y":
                try:
                    self.data.drop([column], axis=1, inplace=True)
                except KeyError:
                    print("No column present with this name. Try again......")
                    continue
                print("Done.......")
                break
            else:
                print("Try again with the correct column name...")
        return

    def printData(self):
        print(self.data)

    # main function of the Preprocessor class.
    def preprocessorMain(self):
        self.removeTargetColumn()
        while (1):
            print("\nTasks (Preprocessing)\n")
            for task in self.tasks:
                print(task)

            while (1):
                try:
                    choice = int(
                        input(
                            "\nWhat do you want to do? (Press -1 to exit):  "))
                except ValueError:
                    print("Integer Value required. Try again.....")
                    continue
                break

            if choice == -1:
                exit()

            # moves the control into the DataDescription class.
            elif choice == 1:
                DataDescription(self.data).describe()

            # moves the control into the Imputation class.
            elif choice == 2:
                self.data = Imputation(self.data).imputer()

            # moves the control into the Categorical class.
            elif choice == 3:
                self.data = Categorical(self.data).categoricalMain()

            # moves the control into the FeatureScaling class.
            elif choice == 4:
                self.data = FeatureScaling(self.data).scaling()

            # moves the control into the Download class.
            elif choice == 5:
                Download(self.data).download()

            else:
                print("\nWrong Integer value!! Try again..")
Esempio n. 6
0
 def __init__(self):
     self.data = DataInput().inputFunction()
     print("\n\n" +
           "WELCOME TO THE MACHINE LEARNING PREPROCESSOR CLI!!!\n" + "\n\n")
Esempio n. 7
0
class Preprocessor:
    bold_text_start = "\033[1m"
    bold_text_end = "\033[0;0m"
    tasks = [
        '1. Data Description', '2. Handling NULL values',
        '3. Encoding Categorical Data', '4. Feature Scaling of the Dataset',
        '5. Download the Modified Dataset'
    ]
    data = 0

    def __init__(self):
        self.data = DataInput().inputFunction()
        print("\n\n" + self.bold_text_start +
              "MACHINE LEARNING PREPROCESSOR CLI" + self.bold_text_end +
              "\n\n")

    def removeTargetColumn(self):
        print("Columns\n")
        for column in self.data.columns.values:
            print(column, end="  ")
        while (1):
            column = input(
                "\nWhich is the target variable:[enter -1 to exit]  ").lower()
            if column == "-1":
                exit()
            choice = input("Are you sure?(y/n) ")
            if choice == "y" or choice == "Y":
                try:
                    self.data.drop([column], axis=1, inplace=True)
                except KeyError:
                    print("No column present with this name. Try again......")
                    continue
                print("Done......")
                break
            else:
                print("Try again with the correct column name...")
        return

    def printData(self):
        print(self.data)

    def preprocessorMain(self):
        self.removeTargetColumn()
        while (1):
            print("\nTasks (Preprocessing)\n")
            for task in self.tasks:
                print(task)
            while (1):
                try:
                    choice = int(
                        input(
                            "\nWhat do you want to do? [enter -1 to exit]:  "))
                except ValueError:
                    print("Integer Value required. Try again.....")
                    continue
                break
            if choice == -1:
                exit()
            elif choice == 1:
                DataDescription(self.data).describe()
            elif choice == 2:
                self.data = Imputation(self.data).imputer()
            elif choice == 3:
                self.data = Categorical(self.data).categoricalMain()
            elif choice == 4:
                self.data = FeatureScaling(self.data).scaling()
            elif choice == 5:
                Download(self.data).download()
            else:
                print("\nWrong choice!! Try again...")
Esempio n. 8
0
 def __init__(self):
     self.data = DataInput().inputFunction()
     print("\n\n" + self.bold_text_start +
           "MACHINE LEARNING PREPROCESSOR CLI" + self.bold_text_end +
           "\n\n")