Exemple #1
0
 def normalise(self, feature_list, start_col, end_col, _range=False):
     # _range determines whether normalisation is by individual column/range
     while True:
         print(pg.feature_norm)
         print(pg.back_exit)
         options = input('User: '******'1' and _range == False:
             self.mean_norm_col(self, feature_list)
             pu.func_store_col(self.mean_norm_col, self.func_order)
             break
         elif options == '2' and _range == False:
             self.minmax_norm_col(self, feature_list)
             pu.func_store_col(self.minmax_norm_col, self.func_order)
             break
         elif options == '1' and _range == True:
             self.mean_norm_range(self, start_col, end_col)
             pu.func_store_range(self.mean_norm_range, self.func_order, 2)
             break
         elif options == '2' and _range == True:
             self.minmax_norm_range(self, start_col, end_col)
             pu.func_store_range(self.minmax_norm_range, self.func_order, 2)
             break
         elif options == ';':
             self.display_data()
         elif options == '9':
             break  # returns to the previous menu
         elif options == '0':
             # saving directory
             # and function calls (job history) to log files
             save_check(self.file_directory, self.func_order)
             sys.exit()  # exit the application
         else:
             print('Input not recognised, please type again!')
Exemple #2
0
 def hist_density_plots(self, feature, no_of_bins):
     print(pg.hist_dens)
     print(pg.back_exit)
     # choose different between data types and plots
     data_type = input('User: '******'1':  # histogram
             sns.distplot(self.df[feature], hist=True, kde=False)
             plt.gca().set(
                 title='Frequency Histogram of {}'.format(feature),
                 xlabel=feature,
                 ylabel='Frequency')
             plt.show(block=False)
         elif data_type == '2':  # density
             sns.distplot(self.df[feature],
                          hist=False,
                          kde=True,
                          kde_kws={
                              'shade': True,
                              'linewidth': 3
                          })
             plt.gca().set(title='Density plot of {}'.format(feature),
                           ylabel='Density')
             plt.show(block=False)
         elif data_type == '3':  # mixed plot
             sns.distplot(self.df[feature])
             plt.gca().set(
                 title='Histogram and Density plot of {}'.format(feature),
                 ylabel='Density')
             plt.show(block=False)
         elif data_type == '4':  # pairplots
             sns.pairplot(self.df, diag_kind='kde', plot_kws={'alpha': 0.2})
             plt.show(block=False)
         elif data_type == ';':
             self.display_data()
             break
         elif data_type == '9':
             break
         elif data_type == '0':
             sys.exit()
         else:
             print('Input not recognised, please type again!')
             run = True
Exemple #3
0
    def visualise(self):
        while self.running:
            print(pg.viz)
            print(pg.back_exit)
            option = input('User: '******'1':
                print('Correlation matrix: \n')
                self.corr_matr()
                break
            elif option == '2':
                feature = input('Please type in the feature name: ')
                try:
                    self.bar_plot(feature)
                except:
                    raise KeyError('cannot find feature')
                break
            elif option == '3':  # choose feature and number of bins
                feature = input('Please type in the feature name: ')
                no_of_bins = input('Please type in the number of bins: ')
                try:
                    self.hist_density_plots(feature, no_of_bins)
                except:
                    raise KeyError('cannot find feature')
                break
            elif option == '4':  # choose feature and output variable
                feature = input('Please type in the feature name: ')
                output = input('Please type in the output name: ')
                try:
                    self.scatter_plot(feature, output)
                except:
                    raise KeyError('cannot find feature')
                break
            elif option == ';':
                self.display_data()
                break
            elif option == '9':
                break
            elif option == '0':
                sys.exit()
            else:
                print('Input not recognised, please type again!')
Exemple #4
0
    def feature_selection(self):
        while self.running:
            print(pg.feature_sel)
            print(pg.back_exit)
            option = input('User: '******'1':
                self.feature_lasso()
                break
            elif option == '2':
                self.feature_ridge()
                break
            elif option == '3':
                user_input = input('Please type in the number of features: ')
                self.feature_rfe(int(user_input))
            elif option == ';':
                self.display_data()
                break
            elif option == '9':
                break
            elif option == '0':
                sys.exit()
            else:
                print('Input not recognised, please type again!')
Exemple #5
0
 def multi_transform(self, feature_list, start_col, end_col, _range=False):
     while True:
         print(pg.feature_trans)
         print(pg.back_exit)
         options = input('User: '******'Please type in the new/base column name: ')
         if options == '1' and _range == False:
             self.mean_trans_col(self, feature_list, col_name)
             pu.func_store_range(self.mean_trans_col, self.func_order, 2)
             break
         elif options == '2' and _range == False:
             self.iqr_trans_col(self, feature_list, col_name)
             pu.func_store_range(self.iqr_trans_col, self.func_order, 2)
             break
         elif options == '3' and _range == False:
             self.percent2whole_col(self, feature_list, col_name)
             pu.func_store_range(self.percent2whole_col, self.func_order, 2)
             break
         elif options == '1' and _range == True:
             self.mean_trans_range(self, start_col, end_col, col_name)
             pu.func_store_range(self.mean_trans_range, self.func_order, 3)
             break
         elif options == '3' and _range == True:
             self.percent2whole_range(self, start_col, end_col, col_name)
             pu.func_store_range(self.percent2whole_range, self.func_order,
                                 3)
             break
         elif options == ';':
             self.display_data()
         elif options == '9':
             break
         elif options == '0':
             save_check(self.file_directory, self.func_order)
             sys.exit()
         else:
             print('Input not recognised, please type again!')
Exemple #6
0
    listing = os.listdir(path)
    for infile in listing:
        fullpath = '%s\\%s' % (path, infile);
        if (os.path.isdir(fullpath)):
            execute_directory(fullpath, logger, config)
    suite = unittest.TestSuite()
    for infile in glob.glob('%s\\*.csv' % path):
        testCase = TestContent()
        testCase.logger = logger
        testCase.config = config
        testCase.file_path = infile
        suite.addTest(testCase);
    if (suite.countTestCases() > 0):
        logger.info('Running test suite: %s' % path)
        unittest.TextTestRunner(stream=handler.stream, descriptions=1, verbosity=2).run(suite)

path = 'c:\\selectica\\testcases'
config = ConfigParser.ConfigParser()
config.readfp(open('%s\\selenium.cfg' % path))
logger = logging.getLogger()
handler = logging.handlers.TimedRotatingFileHandler('%s\\selenium.log' % path, 'midnight')
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)
logger.info('Start to run all test cases')
execute_directory(path, logger, config)		
logger.info('Finished running all test cases')
sys.exit()
Exemple #7
0
    def drop_impute(self):
        while self.running:
            print(pg.drop_imp)
            print(pg.back_exit)
            option = input('User: '******'1':
                print(pg.all_subset)
                print(pg.back_exit)
                drop_type = input('User: '******'1':
                    self.drop_all(self)
                    pu.func_store(self.drop_all, self.func_order)
                    pu.autosave(self.df,
                                self.directory,
                                self.sheet,
                                self.func_order,
                                enabled=self.auto_save)
                    break
                elif drop_type == '2':
                    # lets user choose which subset to drop
                    col = input('Please type in the subset you wish to drop: ')
                    self.drop_subset(self, col)
                    pu.func_store_col(self.drop_subset, self.func_order)
                    pu.autosave(self.df,
                                self.directory,
                                self.sheet,
                                self.func_order,
                                enabled=self.auto_save)
                    break
                elif drop_type == ';':
                    self.display_data()
                elif drop_type == '9':
                    break
                elif drop_type == '0':
                    save_check(self.file_directory, self.func_order)
                    sys.exit()
                else:
                    print('Input not recognised, please type again!')
            elif option == '2':
                print(pg.row_col)
                print(pg.back_exit)
                column_row = input('User: '******'1':  # impute by column
                    print(pg.impute_ops)
                    print(pg.back_exit)
                    impute_option = input('User: '******'1':  # impute by mean
                        self.impute_col(self)
                        pu.func_store(self.impute_col, self.func_order)
                        pu.autosave(self.df,
                                    self.directory,
                                    self.sheet,
                                    self.func_order,
                                    enabled=self.auto_save)
                        break
                    # impute by user defined value on a feature
                    elif impute_option == '2':
                        col_input = input('Please type in the feature name: ')
                        num_input = input('Please enter a value: ')
                        num = int(num_input)
                        self.impute_num(self, num, col_input)
                        pu.func_store_range(self.impute_num, self.func_order,
                                            2)
                        pu.autosave(self.df,
                                    self.directory,
                                    self.sheet,
                                    self.func_order,
                                    enabled=self.auto_save)
                    # impute all selected columns by 0
                    elif impute_option == '3':
                        start_col = input(
                            'Please type in the starting feature: ')
                        end_col = input('Please type in the ending feature: ')
                        self.impute_zero(self, start_col, end_col)
                        pu.func_store_range(self.impute_zero, self.func_order,
                                            2)
                        pu.autosave(self.df,
                                    self.directory,
                                    self.sheet,
                                    self.func_order,
                                    enabled=self.auto_save)
                        break
                    elif impute_option == ';':
                        self.display_data()
                    elif impute_option == '9':
                        break
                    elif impute_option == '0':
                        sys.exit()
                    else:
                        print('Input not recognised, please type again!')
                elif column_row == '2':
                    self.impute_row(self)  # impute by row means
                    pu.func_store(self.impute_row, self.func_order)
                    pu.autosave(self.df,
                                self.directory,
                                self.sheet,
                                self.func_order,
                                enabled=self.auto_save)
                    break
                elif column_row == ';':
                    self.display_data()
                elif column_row == '9':
                    break
                elif column_row == '0':
                    sys.exit()
                else:
                    print('Input not recognised, please type again!')
            elif option == ';':
                self.display_data()
            elif option == '9':
                break
            elif option == '0':
                save_check(self.file_directory, self.func_order)
                sys.exit()
            else:
                print('Input not recognised, please type again!')
Exemple #8
0
    def feature_operation(self):

        while self.running:
            print(pg.feature_ops)
            print(pg.back_exit)
            operations = input('User: '******';':
                self.display_data()
                break
            elif operations == '9':
                break
            elif operations == '0':
                save_check(self.file_directory, self.func_order)
                sys.exit()
            elif operations == '4':  # trigger rename method
                print(pg.rename_type)
                print(pg.back_exit)
                user_input = input('User: '******'1':
                    feature_old = input(pg.feature_rename)
                    feature_new = input(
                        'Please type in the new feature name: ')
                    self.col_rename(self, feature_old, feature_new)
                    pu.func_store_range(self.col_rename, self.func_order, 2)
                    # autosaves the data after the operation executes if enabled
                    pu.autosave(self.df,
                                self.directory,
                                self.sheet,
                                self.func_order,
                                enabled=self.auto_save)
                    print('Feature renamed!')
                elif user_input == '2':
                    list_file = input(pg.list_d)
                    # input the list into the file
                    with open(list_file) as f:
                        feature_names = [line.rstrip() for line in f]
                    self.data_rename(self, feature_names)
                    pu.func_store_col(self.data_rename, self.func_order)
                    pu.autosave(self.df,
                                self.directory,
                                self.sheet,
                                self.func_order,
                                enabled=self.auto_save)
                    print('Feature(s) renamed!')
                    break
            elif operations == '1' or '2' or '3' or '5':
                feature_list = []
                feature = str()
                range_select = str()
                # asks the user how do they want to select their features
                range_select = input(pg.range_option)

                if range_select == '1':
                    print(pg.select_fin)
                    while feature != '*':  # ends selection when user type '*'
                        feature = input(pg.select_fea)
                        if feature == '*':
                            break
                        else:
                            # append user input to a list
                            feature_list.append(feature)
                    print('Feature(s) selected: ')
                    print(feature_list)  # shows the selection
                    # trigger different feature operations based on user choice
                    if operations == '1':
                        self.remove_col(self, feature_list)
                        pu.func_store_col(self.remove_col, self.func_order)
                        print('Feature(s) deleted!')
                    elif operations == '2':
                        self.normalise(feature_list, None, None)
                        print('Feature(s) normalised!')
                    elif operations == '3':
                        self.multi_transform(feature_list, None, None)
                        print('Feature(s) transformed!')
                    elif operations == '5':
                        self.label_class_col(self, feature_list)
                        pu.func_store_col(self.label_class_col,
                                          self.func_order)
                        print('Feature(s) encoded!')
                    else:
                        print('Input not recognised, please type again!')

                elif range_select == '2':
                    # asks the user the range of columns they wish to select
                    start_col = input('Please type in the starting feature: ')
                    end_col = input('Please type in the ending feature: ')
                    print('Feature range: ')
                    print(start_col, end_col)
                    if operations == '1':
                        self.remove_range(self, start_col, end_col)
                        pu.func_store_range(self.remove_range, self.func_order,
                                            2)
                        print('Feature(s) deleted!')
                    elif operations == '2':
                        self.normalise(None, start_col, end_col, _range=True)
                        print('Feature(s) normalised!')
                    elif operations == '3':
                        self.multi_transform(None,
                                             start_col,
                                             end_col,
                                             _range=True)
                        print('Feature(s) transformed!')
                    elif operations == '5':
                        self.label_class_range(self, start_col, end_col)
                        pu.func_store_range(self.label_class_range,
                                            self.func_order, 2)
                        print('Feature(s) encoded!')
                    else:
                        print('Input not recognised, please type again!')
                # autosaves data if enabled
                pu.autosave(self.df,
                            self.directory,
                            self.sheet,
                            self.func_order,
                            enabled=self.auto_save)

            else:
                print('Input not recognised, please type again!')

            print('\n')
            print(self.df)  # show data
            # set running to True for the running of other operations
        self.running = True
         elif user_input == ';':
             _data.display_data()  # display data
         elif user_input == '8':  # returns to previous menu
             _running = False
         # clears data and return to main menu
         elif user_input == '7':
             _data = Data()
             _data.import_data()  # import new data
             mainmenu = True
             break
         elif user_input == '9':  # returns to main menu
             mainmenu = True
             break
         elif user_input == '0':  # exit application
             save_check(_data.file_directory, _data.func_order)
             sys.exit()
         else:
             print('Input not recognised, please type again!')
     _running = True
 elif user_input == '2':  # feature operations
     _data.feature_operation()
 elif user_input == '3':  # summary statistics
     while _running:
         print(
             pg.summ_stat_ops)  # summary statistics operations
         print(pg.option)
         user_input = input('User: '******'1':
             print(_data.summ_stat())  # show summary statistics
         elif user_input == '2':
             print(_data.outlier_detect())  # outlier detection