def normalise(self, feature_list, start_col, end_col, _range=False): # _range determines whether normalisation is by individual column/range while True: print(pg.feature_norm) print(pg.back_exit) options = input('User: '******'1' and _range == False: self.mean_norm_col(self, feature_list) pu.func_store_col(self.mean_norm_col, self.func_order) break elif options == '2' and _range == False: self.minmax_norm_col(self, feature_list) pu.func_store_col(self.minmax_norm_col, self.func_order) break elif options == '1' and _range == True: self.mean_norm_range(self, start_col, end_col) pu.func_store_range(self.mean_norm_range, self.func_order, 2) break elif options == '2' and _range == True: self.minmax_norm_range(self, start_col, end_col) pu.func_store_range(self.minmax_norm_range, self.func_order, 2) break elif options == ';': self.display_data() elif options == '9': break # returns to the previous menu elif options == '0': # saving directory # and function calls (job history) to log files save_check(self.file_directory, self.func_order) sys.exit() # exit the application else: print('Input not recognised, please type again!')
def hist_density_plots(self, feature, no_of_bins): print(pg.hist_dens) print(pg.back_exit) # choose different between data types and plots data_type = input('User: '******'1': # histogram sns.distplot(self.df[feature], hist=True, kde=False) plt.gca().set( title='Frequency Histogram of {}'.format(feature), xlabel=feature, ylabel='Frequency') plt.show(block=False) elif data_type == '2': # density sns.distplot(self.df[feature], hist=False, kde=True, kde_kws={ 'shade': True, 'linewidth': 3 }) plt.gca().set(title='Density plot of {}'.format(feature), ylabel='Density') plt.show(block=False) elif data_type == '3': # mixed plot sns.distplot(self.df[feature]) plt.gca().set( title='Histogram and Density plot of {}'.format(feature), ylabel='Density') plt.show(block=False) elif data_type == '4': # pairplots sns.pairplot(self.df, diag_kind='kde', plot_kws={'alpha': 0.2}) plt.show(block=False) elif data_type == ';': self.display_data() break elif data_type == '9': break elif data_type == '0': sys.exit() else: print('Input not recognised, please type again!') run = True
def visualise(self): while self.running: print(pg.viz) print(pg.back_exit) option = input('User: '******'1': print('Correlation matrix: \n') self.corr_matr() break elif option == '2': feature = input('Please type in the feature name: ') try: self.bar_plot(feature) except: raise KeyError('cannot find feature') break elif option == '3': # choose feature and number of bins feature = input('Please type in the feature name: ') no_of_bins = input('Please type in the number of bins: ') try: self.hist_density_plots(feature, no_of_bins) except: raise KeyError('cannot find feature') break elif option == '4': # choose feature and output variable feature = input('Please type in the feature name: ') output = input('Please type in the output name: ') try: self.scatter_plot(feature, output) except: raise KeyError('cannot find feature') break elif option == ';': self.display_data() break elif option == '9': break elif option == '0': sys.exit() else: print('Input not recognised, please type again!')
def feature_selection(self): while self.running: print(pg.feature_sel) print(pg.back_exit) option = input('User: '******'1': self.feature_lasso() break elif option == '2': self.feature_ridge() break elif option == '3': user_input = input('Please type in the number of features: ') self.feature_rfe(int(user_input)) elif option == ';': self.display_data() break elif option == '9': break elif option == '0': sys.exit() else: print('Input not recognised, please type again!')
def multi_transform(self, feature_list, start_col, end_col, _range=False): while True: print(pg.feature_trans) print(pg.back_exit) options = input('User: '******'Please type in the new/base column name: ') if options == '1' and _range == False: self.mean_trans_col(self, feature_list, col_name) pu.func_store_range(self.mean_trans_col, self.func_order, 2) break elif options == '2' and _range == False: self.iqr_trans_col(self, feature_list, col_name) pu.func_store_range(self.iqr_trans_col, self.func_order, 2) break elif options == '3' and _range == False: self.percent2whole_col(self, feature_list, col_name) pu.func_store_range(self.percent2whole_col, self.func_order, 2) break elif options == '1' and _range == True: self.mean_trans_range(self, start_col, end_col, col_name) pu.func_store_range(self.mean_trans_range, self.func_order, 3) break elif options == '3' and _range == True: self.percent2whole_range(self, start_col, end_col, col_name) pu.func_store_range(self.percent2whole_range, self.func_order, 3) break elif options == ';': self.display_data() elif options == '9': break elif options == '0': save_check(self.file_directory, self.func_order) sys.exit() else: print('Input not recognised, please type again!')
listing = os.listdir(path) for infile in listing: fullpath = '%s\\%s' % (path, infile); if (os.path.isdir(fullpath)): execute_directory(fullpath, logger, config) suite = unittest.TestSuite() for infile in glob.glob('%s\\*.csv' % path): testCase = TestContent() testCase.logger = logger testCase.config = config testCase.file_path = infile suite.addTest(testCase); if (suite.countTestCases() > 0): logger.info('Running test suite: %s' % path) unittest.TextTestRunner(stream=handler.stream, descriptions=1, verbosity=2).run(suite) path = 'c:\\selectica\\testcases' config = ConfigParser.ConfigParser() config.readfp(open('%s\\selenium.cfg' % path)) logger = logging.getLogger() handler = logging.handlers.TimedRotatingFileHandler('%s\\selenium.log' % path, 'midnight') formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) logger.info('Start to run all test cases') execute_directory(path, logger, config) logger.info('Finished running all test cases') sys.exit()
def drop_impute(self): while self.running: print(pg.drop_imp) print(pg.back_exit) option = input('User: '******'1': print(pg.all_subset) print(pg.back_exit) drop_type = input('User: '******'1': self.drop_all(self) pu.func_store(self.drop_all, self.func_order) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) break elif drop_type == '2': # lets user choose which subset to drop col = input('Please type in the subset you wish to drop: ') self.drop_subset(self, col) pu.func_store_col(self.drop_subset, self.func_order) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) break elif drop_type == ';': self.display_data() elif drop_type == '9': break elif drop_type == '0': save_check(self.file_directory, self.func_order) sys.exit() else: print('Input not recognised, please type again!') elif option == '2': print(pg.row_col) print(pg.back_exit) column_row = input('User: '******'1': # impute by column print(pg.impute_ops) print(pg.back_exit) impute_option = input('User: '******'1': # impute by mean self.impute_col(self) pu.func_store(self.impute_col, self.func_order) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) break # impute by user defined value on a feature elif impute_option == '2': col_input = input('Please type in the feature name: ') num_input = input('Please enter a value: ') num = int(num_input) self.impute_num(self, num, col_input) pu.func_store_range(self.impute_num, self.func_order, 2) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) # impute all selected columns by 0 elif impute_option == '3': start_col = input( 'Please type in the starting feature: ') end_col = input('Please type in the ending feature: ') self.impute_zero(self, start_col, end_col) pu.func_store_range(self.impute_zero, self.func_order, 2) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) break elif impute_option == ';': self.display_data() elif impute_option == '9': break elif impute_option == '0': sys.exit() else: print('Input not recognised, please type again!') elif column_row == '2': self.impute_row(self) # impute by row means pu.func_store(self.impute_row, self.func_order) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) break elif column_row == ';': self.display_data() elif column_row == '9': break elif column_row == '0': sys.exit() else: print('Input not recognised, please type again!') elif option == ';': self.display_data() elif option == '9': break elif option == '0': save_check(self.file_directory, self.func_order) sys.exit() else: print('Input not recognised, please type again!')
def feature_operation(self): while self.running: print(pg.feature_ops) print(pg.back_exit) operations = input('User: '******';': self.display_data() break elif operations == '9': break elif operations == '0': save_check(self.file_directory, self.func_order) sys.exit() elif operations == '4': # trigger rename method print(pg.rename_type) print(pg.back_exit) user_input = input('User: '******'1': feature_old = input(pg.feature_rename) feature_new = input( 'Please type in the new feature name: ') self.col_rename(self, feature_old, feature_new) pu.func_store_range(self.col_rename, self.func_order, 2) # autosaves the data after the operation executes if enabled pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) print('Feature renamed!') elif user_input == '2': list_file = input(pg.list_d) # input the list into the file with open(list_file) as f: feature_names = [line.rstrip() for line in f] self.data_rename(self, feature_names) pu.func_store_col(self.data_rename, self.func_order) pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) print('Feature(s) renamed!') break elif operations == '1' or '2' or '3' or '5': feature_list = [] feature = str() range_select = str() # asks the user how do they want to select their features range_select = input(pg.range_option) if range_select == '1': print(pg.select_fin) while feature != '*': # ends selection when user type '*' feature = input(pg.select_fea) if feature == '*': break else: # append user input to a list feature_list.append(feature) print('Feature(s) selected: ') print(feature_list) # shows the selection # trigger different feature operations based on user choice if operations == '1': self.remove_col(self, feature_list) pu.func_store_col(self.remove_col, self.func_order) print('Feature(s) deleted!') elif operations == '2': self.normalise(feature_list, None, None) print('Feature(s) normalised!') elif operations == '3': self.multi_transform(feature_list, None, None) print('Feature(s) transformed!') elif operations == '5': self.label_class_col(self, feature_list) pu.func_store_col(self.label_class_col, self.func_order) print('Feature(s) encoded!') else: print('Input not recognised, please type again!') elif range_select == '2': # asks the user the range of columns they wish to select start_col = input('Please type in the starting feature: ') end_col = input('Please type in the ending feature: ') print('Feature range: ') print(start_col, end_col) if operations == '1': self.remove_range(self, start_col, end_col) pu.func_store_range(self.remove_range, self.func_order, 2) print('Feature(s) deleted!') elif operations == '2': self.normalise(None, start_col, end_col, _range=True) print('Feature(s) normalised!') elif operations == '3': self.multi_transform(None, start_col, end_col, _range=True) print('Feature(s) transformed!') elif operations == '5': self.label_class_range(self, start_col, end_col) pu.func_store_range(self.label_class_range, self.func_order, 2) print('Feature(s) encoded!') else: print('Input not recognised, please type again!') # autosaves data if enabled pu.autosave(self.df, self.directory, self.sheet, self.func_order, enabled=self.auto_save) else: print('Input not recognised, please type again!') print('\n') print(self.df) # show data # set running to True for the running of other operations self.running = True
elif user_input == ';': _data.display_data() # display data elif user_input == '8': # returns to previous menu _running = False # clears data and return to main menu elif user_input == '7': _data = Data() _data.import_data() # import new data mainmenu = True break elif user_input == '9': # returns to main menu mainmenu = True break elif user_input == '0': # exit application save_check(_data.file_directory, _data.func_order) sys.exit() else: print('Input not recognised, please type again!') _running = True elif user_input == '2': # feature operations _data.feature_operation() elif user_input == '3': # summary statistics while _running: print( pg.summ_stat_ops) # summary statistics operations print(pg.option) user_input = input('User: '******'1': print(_data.summ_stat()) # show summary statistics elif user_input == '2': print(_data.outlier_detect()) # outlier detection