def preprocessorMain(self): self.removeTargetColumn() while (1): print("\nTasks (Preprocessing)\n") for task in self.tasks: print(task) while (1): try: choice = int( input( "\nWhat do you want to do? [enter -1 to exit]: ")) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: exit() elif choice == 1: DataDescription(self.data).describe() elif choice == 2: self.data = Imputation(self.data).imputer() elif choice == 3: self.data = Categorical(self.data).categoricalMain() elif choice == 4: self.data = FeatureScaling(self.data).scaling() elif choice == 5: Download(self.data).download() else: print("\nWrong choice!! Try again...")
def get(argsfscale): print("Feature Scaling...") args_list = argsfscale.split(",") fsn = args_list[0] filename = args_list[1] jfilename = args_list[2] json_path = currentDirPath + "/jsonfiles/" + jfilename file_path = currentDirPath + "/csvfiles/" + filename data = pd.read_csv(file_path) data_afs = FeatureScaling.Scaling(data, fsn) data_afs.reset_index(inplace=True, drop=True) data_afs.to_csv(file_path, index=False) data_t10 = data_afs.head(10) data_noff = len(data_afs.columns) with open(json_path) as jcon: jdata = json.load(jcon) jdata.update({'FeatureScaling': fsn}) with open(json_path, 'w') as j_con: json.dump(jdata, j_con) return { 'predata': data_t10.to_json(orient='table'), 'fileName': filename, 'numofcol': data_noff, 'jfilename': jfilename }
from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from feature_scaling import FeatureScaling if __name__ == "__main__": fc = FeatureScaling() train_data, test_data = fc.getFeaturedDataframe() x_train, x_test, Y_train, Y_test = train_test_split( train_data, test_data, random_state=2, test_size=0.10, ) reg = LinearRegression() reg.fit(x_train, Y_train) print(reg.score(x_test, Y_test))
def main(): print('-'*10+'Welcome to ML Preprocessor CLI'+'-'*10+'\n\n') try: file_path=sys.argv[1] if file_path.endswith('.csv')==False: raise IncorrectFileFormatError("file is not in CSV format") revised_df=readCSV(file_path) print('\nScreenshot of independent dataframe:\n') print(revised_df.head()) print('\n'+'-'*30+'\n') while True: print('\nTasks(Preprocessing)') print('1.Data Description') print('2.Handling NULL values') print('3.Encoding Categorical Data') print('4.Feature Scaling of the Dataset') print('5.Download the modified Dataset\n') option=int(input('What do you want to do?(Press -1 to exit):')) if option==-1: raise ExitError elif option==1: data_desc=DataDescription(revised_df) while True: option=data_desc.getOption() if option==-1: break elif option==1: data_desc.showProperty() elif option==2: data_desc.showStats() elif option==3: data_desc.showDF() else: print('Incorrect option!Try again.') elif option==2: impute=Imputation(revised_df) while True: option=impute.getOption() if option==-1: break elif option==1: impute.countNULL() elif option==2: revised_df=impute.dropColumn() elif option==3: revised_df=impute.fillUtil() elif option==4: impute.showDF() else: print('Incorrect option!Try again.') elif option==3: encode=EncodeCategorical(revised_df) while True: option=encode.getOption() if option==-1: break elif option==1: encode.showCategorical() elif option==2: revised_df=encode.performOneHotEncodingUtil() elif option==3: encode.showDF() else: print('Incorrect option!Try again.') elif option==4: while True: scale=FeatureScaling(revised_df) option=scale.getOption() if option==-1: break elif option==1: scale.normalizeUtil() elif option==2: scale.standardizeUtil() elif option==3: scale.showDF() else: print('Incorrect option!Try again.') elif option==5: download=Download(revised_df) download.downloadDataframe() else: print('Incorrect option!Try again.') except IndexError as e: print('File path missing.',e) except IncorrectFileFormatError as e: print('Incorrect file format.',e) except FileNotFoundError as e: print('File Not Found!',e) except ExitError as e: print(e) except Exception as e: print('Incorrect option chosen.',e)
class Preprocessor: # The Task associated with this class. This is also the main class of the project. tasks = [ '1. Data Description', '2. Handling NULL Values', '3. Encoding Categorical Data', '4. Feature Scaling of the Dataset', '5. Download the modified dataset' ] data = 0 def __init__(self): self.data = DataInput().inputFunction() print("\n\n" + "WELCOME TO THE MACHINE LEARNING PREPROCESSOR CLI!!!\n" + "\n\n") # function to remove the target column of the DataFrame. def removeTargetColumn(self): print("Columns\n") for column in self.data.columns.values: print(column, end=" ") while (1): column = input( "\nWhich is the target variable:(Press -1 to exit) ").lower() if column == "-1": exit() choice = input("Are you sure?(y/n) ") if choice == "y" or choice == "Y": try: self.data.drop([column], axis=1, inplace=True) except KeyError: print("No column present with this name. Try again......") continue print("Done.......") break else: print("Try again with the correct column name...") return def printData(self): print(self.data) # main function of the Preprocessor class. def preprocessorMain(self): self.removeTargetColumn() while (1): print("\nTasks (Preprocessing)\n") for task in self.tasks: print(task) while (1): try: choice = int( input( "\nWhat do you want to do? (Press -1 to exit): ")) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: exit() # moves the control into the DataDescription class. elif choice == 1: DataDescription(self.data).describe() # moves the control into the Imputation class. elif choice == 2: self.data = Imputation(self.data).imputer() # moves the control into the Categorical class. elif choice == 3: self.data = Categorical(self.data).categoricalMain() # moves the control into the FeatureScaling class. elif choice == 4: self.data = FeatureScaling(self.data).scaling() # moves the control into the Download class. elif choice == 5: Download(self.data).download() else: print("\nWrong Integer value!! Try again..")
def __init__(self): self.data = DataInput().inputFunction() print("\n\n" + "WELCOME TO THE MACHINE LEARNING PREPROCESSOR CLI!!!\n" + "\n\n")
class Preprocessor: bold_text_start = "\033[1m" bold_text_end = "\033[0;0m" tasks = [ '1. Data Description', '2. Handling NULL values', '3. Encoding Categorical Data', '4. Feature Scaling of the Dataset', '5. Download the Modified Dataset' ] data = 0 def __init__(self): self.data = DataInput().inputFunction() print("\n\n" + self.bold_text_start + "MACHINE LEARNING PREPROCESSOR CLI" + self.bold_text_end + "\n\n") def removeTargetColumn(self): print("Columns\n") for column in self.data.columns.values: print(column, end=" ") while (1): column = input( "\nWhich is the target variable:[enter -1 to exit] ").lower() if column == "-1": exit() choice = input("Are you sure?(y/n) ") if choice == "y" or choice == "Y": try: self.data.drop([column], axis=1, inplace=True) except KeyError: print("No column present with this name. Try again......") continue print("Done......") break else: print("Try again with the correct column name...") return def printData(self): print(self.data) def preprocessorMain(self): self.removeTargetColumn() while (1): print("\nTasks (Preprocessing)\n") for task in self.tasks: print(task) while (1): try: choice = int( input( "\nWhat do you want to do? [enter -1 to exit]: ")) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: exit() elif choice == 1: DataDescription(self.data).describe() elif choice == 2: self.data = Imputation(self.data).imputer() elif choice == 3: self.data = Categorical(self.data).categoricalMain() elif choice == 4: self.data = FeatureScaling(self.data).scaling() elif choice == 5: Download(self.data).download() else: print("\nWrong choice!! Try again...")
def __init__(self): self.data = DataInput().inputFunction() print("\n\n" + self.bold_text_start + "MACHINE LEARNING PREPROCESSOR CLI" + self.bold_text_end + "\n\n")