Example #1
0
    def scaling(self):
        while(1):
            print("\nTasks (Feature Scaling)")
            for task in self.tasks:
                print(task)
            
            while(1):
                try:
                    choice = int(input(("\n\nWhat you want to do? (Press -1 to go back)  ")))
                except ValueError:
                    print("Integer Value required. Try again.....")
                    continue
                break
            if choice == -1:
                break
            
            elif choice == 1:
                self.normalization()

            elif choice == 2:
                self.standardization()

            elif choice==3:
                DataDescription.showDataset(self)
            
            else:
                print("\nWrong Integer value!! Try again..")
                
        # Returns all the changes on the DataFrame.
        return self.data
    def categoricalMain(self):
        while(1):
            print("\nTasks")
            for task in self.tasks:
                print(task)

            while(1):
                try:
                    choice = int(input(("\n\nWhat you want to do? (Press -1 to go back)  ")))
                except ValueError:
                    print("Integer Value required. Try again...")
                    continue
                break

            if choice == -1:
                break
            
            elif choice == 1:
                self.categoricalColumn()

            elif choice == 2:
                self.categoricalColumn()
                self.encoding()

            elif choice == 3:
                DataDescription.showDataset(self)

            else:
                print("\nWrong Integer value!! Try again..")
        # return the data after modifying
        return self.data
Example #3
0
    def normalization(self):
        while(1):
            print("\nTasks (Normalization)")
            for task in self.tasks_normalization:
                print(task)

            while(1):
                try:
                    choice = int(input(("\n\nWhat you want to do? (Press -1 to go back)  ")))
                except ValueError:
                    print("Integer Value required. Try again.....")
                    continue
                break
    
            if choice == -1:
                break
            
            # Performs normalization on the columns provided.
            elif choice == 1:
                print(self.data.dtypes)
                columns = input("Enter all the column" + "(s)"  + "you want to normalize (Press -1 to go back)  ").lower()
                if columns == "-1":
                    break
                for column in columns.split(" "):
                    # This is the basic approach to perform MinMax Scaler on a set of data.
                    try:
                        minValue = self.data[column].min()
                        maxValue = self.data[column].max()
                        self.data[column] = (self.data[column] - minValue)/(maxValue - minValue)
                    except:
                        print("\nNot possible....")
                print("Done....")

            # Performs normalization on whole dataset.
            elif choice == 2:
                try:
                    self.data = pd.DataFrame(MinMaxScaler().fit_transform(self.data))
                    print("Done.......")

                except:
                    print("\nString Columns are present. So, "  + "NOT"  + " possible.\nYou can try the first option though.")
                
            elif choice==3:
                DataDescription.showDataset(self)

            else:
                print("\nYou pressed the wrong key!! Try again..")

        return
 def standardization(self):
     while (1):
         print("\nTasks (Standardization)")
         for task in self.tasks_standardization:
             print(task)
         while (1):
             try:
                 choice = int(
                     input(
                         ("\n\nWhat you want to do? [enter -1 to go back]  "
                          )))
             except ValueError:
                 print("Integer Value required. Try again.....")
                 continue
             break
         if choice == -1:
             break
         elif choice == 1:
             print(self.data.dtypes)
             columns = input(
                 "Enter all the column" + self.bold_text_start + "(s)" +
                 self.bold_text_end +
                 "you want to normalize [enter -1 to go back]  ").lower()
             if columns == "-1":
                 break
             for column in columns.split(" "):
                 try:
                     mean = self.data[column].mean()
                     standard_deviation = self.data[column].std()
                     self.data[column] = (self.data[column] -
                                          mean) / (standard_deviation)
                 except:
                     print("\nNot possible....")
             print("Done....")
         elif choice == 2:
             try:
                 self.data = pd.DataFrame(StandardScaler().fit_transform(
                     self.data))
                 print("Done.......")
             except:
                 print("\nString Columns are present. So, " +
                       self.bold_text_start + "NOT" + self.bold_text_end +
                       " possible. \nYou can try the first option though.")
             break
         elif choice == 3:
             DataDescription.showDataset(self)
         else:
             print("\nWrong choice!! Try again...")
     return
Example #5
0
 def preprocessorMain(self):
     self.removeTargetColumn()
     while (1):
         print("\nTasks (Preprocessing)\n")
         for task in self.tasks:
             print(task)
         while (1):
             try:
                 choice = int(
                     input(
                         "\nWhat do you want to do? [enter -1 to exit]:  "))
             except ValueError:
                 print("Integer Value required. Try again.....")
                 continue
             break
         if choice == -1:
             exit()
         elif choice == 1:
             DataDescription(self.data).describe()
         elif choice == 2:
             self.data = Imputation(self.data).imputer()
         elif choice == 3:
             self.data = Categorical(self.data).categoricalMain()
         elif choice == 4:
             self.data = FeatureScaling(self.data).scaling()
         elif choice == 5:
             Download(self.data).download()
         else:
             print("\nWrong choice!! Try again...")
Example #6
0
    def imputer(self):
        while (1):
            print("\nImputation Tasks")
            for task in self.tasks:
                print(task)

            while (1):
                try:
                    choice = int(
                        input((
                            "\nWhat you want to do? (Press -1 to go back)  ")))
                except ValueError:
                    print("Integer Value required. Try again.....")
                    continue
                break

            if choice == -1:
                break

            elif choice == 1:
                self.printNullValues()

            elif choice == 2:
                self.removeColumn()

            elif choice == 3:
                self.fillNullWithMean()

            elif choice == 4:
                self.fillNullWithMedian()

            elif choice == 5:
                self.fillNullWithMode()

            elif choice == 6:
                DataDescription.showDataset(self)

            else:
                print("\nWrong Integer value!! Try again..")
        return self.data
Example #7
0
def main():
    print('-'*10+'Welcome to ML Preprocessor CLI'+'-'*10+'\n\n')
    try:
        file_path=sys.argv[1]
        if file_path.endswith('.csv')==False:
            raise IncorrectFileFormatError("file is not in CSV format")

        revised_df=readCSV(file_path)
        print('\nScreenshot of independent dataframe:\n')
        print(revised_df.head())
        print('\n'+'-'*30+'\n')
        while True:
            print('\nTasks(Preprocessing)')
            print('1.Data Description')
            print('2.Handling NULL values')
            print('3.Encoding Categorical Data')
            print('4.Feature Scaling of the Dataset')
            print('5.Download the modified Dataset\n')
            option=int(input('What do you want to do?(Press -1 to exit):'))
            if option==-1:
                raise ExitError
            elif option==1:
                data_desc=DataDescription(revised_df)
                while True:
                    option=data_desc.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        data_desc.showProperty()
                    elif option==2:
                        data_desc.showStats()
                    elif option==3:
                        data_desc.showDF()
                    else:
                        print('Incorrect option!Try again.')
                
            elif option==2:
                impute=Imputation(revised_df)
                while True:
                    option=impute.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        impute.countNULL()
                    elif option==2:
                        revised_df=impute.dropColumn()
                    elif option==3:
                        revised_df=impute.fillUtil()
                    elif option==4:
                        impute.showDF()
                    else:
                        print('Incorrect option!Try again.')
            elif option==3:
                encode=EncodeCategorical(revised_df)
                while True:
                    option=encode.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        encode.showCategorical()
                    elif option==2:
                        revised_df=encode.performOneHotEncodingUtil()
                    elif option==3:
                        encode.showDF()
                    else:
                        print('Incorrect option!Try again.')
            elif option==4:
                while True:
                    scale=FeatureScaling(revised_df)
                    option=scale.getOption()
                    if option==-1:
                        break
                    elif option==1:
                        scale.normalizeUtil()
                    elif option==2:
                        scale.standardizeUtil()
                    elif option==3:
                        scale.showDF()
                    else:
                        print('Incorrect option!Try again.')
            elif option==5:
                download=Download(revised_df)
                download.downloadDataframe()
            else:
                print('Incorrect option!Try again.')
    except IndexError as e:
        print('File path missing.',e)
    except IncorrectFileFormatError as e:
        print('Incorrect file format.',e)
    except FileNotFoundError as e:
        print('File Not Found!',e)
    except ExitError as e:
        print(e)
    except Exception as e:
        print('Incorrect option chosen.',e)