예제 #1
0
 def __init__(self):
     self.train_file = config.TRAIN_FILE
     self.test_file = config.TEST_FILE
     self.predicted_test_file = config.PREDICTED_TEST_FILE
     self.model_folder = config.MODELS_FOLDER
     self.target_map = config.TARGET_MAP
     self.map_sensors = config.MAP_SENSORS
     self.load_cell_theshold = config.LOAD_CELL_THRESHOLD
     self.weight_threshold = config.WEIGHT_THRESHOLD
     self.outliers_threshold = config.OUTLIERS_THRESHOLD
     self.feature_names = config.FEATURE_NAMES
     self.dev_map = config.DEV_MAP
     self.plank_dict = config.PLANK_DICT
     self.position_to_remove = config.POSITION_TO_REMOVE
     self.sensor_details_file = sensor_details_file
     self.norm_sensor_details_file = norm_sensor_details_file
     self.random_state = config.RANDOM_STATE
     self.min_samples_split = config.MIN_SAMPLES_SPLIT
     self.min_samples_leaf = config.MIN_SAMPLES_LEAF
     self.n_estimators = config.N_ESTIMATORS
     self.model_name = model_name
     self.target_column = config.TARGET_COLUMN
     self.preprocess = PreProcess(self.load_cell_theshold, self.weight_threshold, self.outliers_threshold, \
                                  self.map_sensors, self.target_map, self.position_to_remove, \
                                  self.sensor_details_file, self.norm_sensor_details_file, self.model_folder)
     self.fe = FeatureExtractor(self.plank_dict)
 def get_plate(image_path, wpod_net, Dmax=608, Dmin=256):
     vehicle = PreProcess.preprocess_image(image_path)
     ratio = float(max(vehicle.shape[:2])) / min(vehicle.shape[:2])
     side = int(ratio * Dmin)
     bound_dim = min(side, Dmax)
     _, LpImg, _, cor = detect_lp(wpod_net,
                                  vehicle,
                                  bound_dim,
                                  lp_threshold=0.5)
     return vehicle, LpImg, cor
예제 #3
0
class FitAndPredict:
    
    '''
    Class contains function for the training and classification pipeline
    '''
    
    def __init__(self):
        self.train_file = config.TRAIN_FILE
        self.test_file = config.TEST_FILE
        self.predicted_test_file = config.PREDICTED_TEST_FILE
        self.model_folder = config.MODELS_FOLDER
        self.target_map = config.TARGET_MAP
        self.map_sensors = config.MAP_SENSORS
        self.load_cell_theshold = config.LOAD_CELL_THRESHOLD
        self.weight_threshold = config.WEIGHT_THRESHOLD
        self.outliers_threshold = config.OUTLIERS_THRESHOLD
        self.feature_names = config.FEATURE_NAMES
        self.dev_map = config.DEV_MAP
        self.plank_dict = config.PLANK_DICT
        self.position_to_remove = config.POSITION_TO_REMOVE
        self.sensor_details_file = sensor_details_file
        self.norm_sensor_details_file = norm_sensor_details_file
        self.random_state = config.RANDOM_STATE
        self.min_samples_split = config.MIN_SAMPLES_SPLIT
        self.min_samples_leaf = config.MIN_SAMPLES_LEAF
        self.n_estimators = config.N_ESTIMATORS
        self.model_name = model_name
        self.target_column = config.TARGET_COLUMN
        self.preprocess = PreProcess(self.load_cell_theshold, self.weight_threshold, self.outliers_threshold, \
                                     self.map_sensors, self.target_map, self.position_to_remove, \
                                     self.sensor_details_file, self.norm_sensor_details_file, self.model_folder)
        self.fe = FeatureExtractor(self.plank_dict)
        
        
    def read_train_data(self):
        
        '''
        Function to read the train data in the training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start read_train_data()')
        try:
            self.input_data = read_csv(self.train_file)
            logging.debug(__name__ + ' shape : ' + str(self.input_data.shape))
            logging.debug(__name__ + ' : ' + ' End read_train_data()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Input file not found ')
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass 
        
    def check_train_data(self):
        
        '''
        Function to check if all load cell columns and target column are present in the train data
        '''

        logging.debug(__name__ + ' : ' + ' Start check_train_data()')        
        try:
            train_columns = self.input_data.columns.values
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        # check if all load cell columns are present in the train data

        try:
            if (set(self.map_sensors.keys()).issubset(set(train_columns))) or \
            (set(self.map_sensors.values()).issubset(set(train_columns))):
                pass
            else:
                print ("LOAD CELL COLUMNS NOT PRESENT IN TRAIN DATA")
                logging.debug(__name__ + ' : ' + ' LOAD CELL COLUMNS NOT PRESENT IN TRAIN DATA')
                logging.debug(__name__ + ' : ' + ' End read_train_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
            
        # check if target column is present in the data
        
        try:
            if (set([self.target_column]).issubset(set(train_columns))):
                pass
            else:
                print ("TARGET COLUMN NOT PRESENT IN TRAIN DATA")
                logging.debug(__name__ + ' : ' + ' TARGET COLUMN NOT PRESENT IN TRAIN DATA')
                logging.debug(__name__ + ' : ' + ' End read_train_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        # check if all 5 positions are present in target column of train data

        try:
            if (set(self.input_data[self.target_column].unique()).issubset(set([1, 2, 3, 4, 5]))):
                pass
            else:
                print ("VALUES OTHER THAN PRESPECIFIED POSITION VALUES PRESENT IN TRAIN DATA")
                logging.debug(__name__ + ' : ' + ' VALUES OTHER THAN PRESPECIFIED POSITION VALUES PRESENT IN TRAIN DATA')
                logging.debug(__name__ + ' : ' + ' End read_train_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        logging.debug(__name__ + ' : ' + ' End check_train_data()')
        
        return 
        
    def preprocess_train_data(self):
        
        '''
        Function to preprocess the train data in the training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start preprocess_train_data()')
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rename_columns_if_needed()')
            self.preprocessed_input_data = self.preprocess.rename_columns_if_needed(self.input_data)
            logging.debug(__name__ + ' : ' + ' End rename_columns_if_needed()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_missing_train()')
            self.preprocessed_input_data = self.preprocess.rem_missing_train(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_missing_train()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_load_cell_threshold()')
            self.preprocessed_input_data = self.preprocess.rem_load_cell_threshold(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_load_cell_threshold()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_less_weights()')
            self.preprocessed_input_data = self.preprocess.rem_less_weights(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_less_weights()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_sitting()')
            self.preprocessed_input_data = self.preprocess.rem_sitting(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_sitting()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start normalize()')
            self.preprocessed_input_data = self.preprocess.normalize(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End normalize()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start treat_outliers_train()')
            self.preprocessed_input_data = self.preprocess.treat_outliers_train(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End treat_outliers_train()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_input_data.shape))
        logging.debug(__name__ + ' : ' + ' End preprocess_train_data()')

        return 
        
    def read_test_data(self):
        
        '''
        Function to read the test data in the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start read_test_data()')
        try:
            self.test_data = read_csv(self.test_file, header = None)
            logging.debug(__name__ + ' shape : ' + str(self.test_data.shape))
            logging.debug(__name__ + ' : ' + ' End read_test_data()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Test file not found ')
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            return
        
    def check_test_data(self):
        
        '''
        Function to check if all load cell columns are present in the test data
        '''
        logging.debug(__name__ + ' : ' + ' Start check_test_data()')

        # check if all load cell columns are present in the test data
        #try:
            #test_columns = self.test_data.columns
        #except Exception as e:
            #logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        #finally:
            #pass
        try:
            #if (set(self.map_sensors.keys()).issubset(set(test_columns))) or (set(self.map_sensors.values()).issubset(set(test_columns))):
            test_columns = ['LC' + str(x) for x in range(1, 17)]
            if self.test_data.shape[1] == len(test_columns):
            	self.test_data.columns = test_columns
            	pass
            else:
                print ("TEST DATA DO NOT HAVE ALL LOAD CELLS DATA")
                logging.debug(__name__ + ' : ' + ' TEST DATA DO NOT HAVE ALL LOAD CELLS DATA')
                logging.debug(__name__ + ' : ' + ' End check_test_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        logging.debug(__name__ + ' : ' + ' End check_test_data()')

        return 
        
        
    def preprocess_test_data(self):
        
        '''
        Function to preprocess the test data in the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start preprocess_test_data()')

        try:
        	self.preprocessed_test_data = self.test_data
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        #try:
        #    logging.debug(__name__ + ' : ' + ' Start rename_columns_if_needed()')
        #    self.preprocessed_test_data = self.preprocess.rename_columns_if_needed(self.test_data)
        #    logging.debug(__name__ + ' : ' + ' End rename_columns_if_needed()')
        #except Exception as e:
        #    logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        #finally:
        #    pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start treat_missing_test()')
            self.preprocessed_test_data = self.preprocess.treat_missing_test(self.preprocessed_test_data)
            logging.debug(__name__ + ' : ' + ' End treat_missing_test()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start normalize()')
            self.preprocessed_test_data = self.preprocess.normalize(self.preprocessed_test_data)
            logging.debug(__name__ + ' : ' + ' End normalize()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        try:
            logging.debug(__name__ + ' : ' + ' Start treat_outliers_test()')
            self.preprocessed_test_data = self.preprocess.treat_outliers_test(self.preprocessed_test_data)
            logging.debug(__name__ + ' : ' + ' End treat_outliers_test()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_test_data.shape))
        logging.debug(__name__ + ' : ' + ' End preprocess_test_data()')
        
        return
        
    def transform_train_data_into_features(self):
        
        '''
        Function to create features from train data in the training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start transform_train_data_into_features()')

        try:
            # left_sensors_pct
            logging.debug(__name__ + ' : ' + ' Start left_percent()')
            self.preprocessed_input_data['left_sensors_pct'] = self.preprocessed_input_data.apply(lambda x : self.fe.left_percent(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End left_percent()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            #  plank_1_std
            logging.debug(__name__ + ' : ' + ' Start plank_1_std_cal()')
            self.preprocessed_input_data['plank_1_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_1_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_1_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_2_std
            logging.debug(__name__ + ' : ' + ' Start plank_2_std_cal()')
            self.preprocessed_input_data['plank_2_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_2_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_2_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_std
            logging.debug(__name__ + ' : ' + ' Start plank_3_std_cal()')
            self.preprocessed_input_data['plank_3_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_3_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_3_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_std
            logging.debug(__name__ + ' : ' + ' Start plank_4_std_cal()')
            self.preprocessed_input_data['plank_4_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_4_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_x
            logging.debug(__name__ + ' : ' + ' Start get_com_1_x()')
            self.preprocessed_input_data['plank_1_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_1_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_x
            logging.debug(__name__ + ' : ' + ' Start get_com_2_x()')
            self.preprocessed_input_data['plank_2_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_2_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:        
            # com_3_x
            logging.debug(__name__ + ' : ' + ' Start get_com_3_x()')
            self.preprocessed_input_data['plank_3_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_3_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_x
            logging.debug(__name__ + ' : ' + ' Start get_com_4_x()')
            self.preprocessed_input_data['plank_4_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_4_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_y
            logging.debug(__name__ + ' : ' + ' Start get_com_1_y()')
            self.preprocessed_input_data['plank_1_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_1_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_y
            logging.debug(__name__ + ' : ' + ' Start get_com_2_y()')
            self.preprocessed_input_data['plank_2_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_2_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_3_y
            logging.debug(__name__ + ' : ' + ' Start get_com_3_y()')
            self.preprocessed_input_data['plank_3_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_3_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_y
            logging.debug(__name__ + ' : ' + ' Start get_com_4_y()')
            self.preprocessed_input_data['plank_4_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_4_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # y_errors
            logging.debug(__name__ + ' : ' + ' Start get_errors_from_fitted_line()')
            self.preprocessed_input_data['y_errors'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_errors_from_fitted_line(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_errors_from_fitted_line()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_3()')
            self.preprocessed_input_data['plank_3_dev'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_deviation_plank_3(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_3()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_4()')
            self.preprocessed_input_data['plank_4_dev'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_deviation_plank_4(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_4()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_3_dev()')
            self.preprocessed_input_data['plank_3_dev_bucket'] = self.preprocessed_input_data['plank_3_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_input_data['plank_3_dev_bucket'] = self.preprocessed_input_data['plank_3_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_3_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_4_dev()')
            self.preprocessed_input_data['plank_4_dev_bucket'] = self.preprocessed_input_data['plank_4_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_input_data['plank_4_dev_bucket'] = self.preprocessed_input_data['plank_4_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_4_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        try:
        	# plank_4_deviation from fitted line through COMs of 2nd and 3rd planks
            logging.debug(__name__ + ' : ' + ' Start plank_4_wrt_3_2()')
            self.preprocessed_input_data['plank_4_wrt_3_2'] = self.preprocessed_input_data.apply(lambda x: self.fe.plank_4_wrt_3_2(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_wrt_3_2()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass


        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_input_data.shape))
        logging.debug(__name__ + ' : ' + ' End transform_train_data_into_features()')
        
        return
        
    def transform_test_data_into_features(self):
        
        '''
        Function to create features from test data in the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start transform_test_data_into_features()')

        try:
            # left_sensors_pct
            logging.debug(__name__ + ' : ' + ' Start left_percent()')
            self.preprocessed_test_data['left_sensors_pct'] = self.preprocessed_test_data.apply(lambda x : self.fe.left_percent(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End left_percent()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_1_std
            logging.debug(__name__ + ' : ' + ' Start plank_1_std_cal()')
            self.preprocessed_test_data['plank_1_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_1_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_1_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_2_std
            logging.debug(__name__ + ' : ' + ' Start plank_2_std_cal()')
            self.preprocessed_test_data['plank_2_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_2_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_2_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_std
            logging.debug(__name__ + ' : ' + ' Start plank_3_std_cal()')
            self.preprocessed_test_data['plank_3_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_3_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_3_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_std
            logging.debug(__name__ + ' : ' + ' Start plank_4_std_cal()')
            self.preprocessed_test_data['plank_4_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_4_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_x
            logging.debug(__name__ + ' : ' + ' Start get_com_1_x()')
            self.preprocessed_test_data['plank_1_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_1_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_x
            logging.debug(__name__ + ' : ' + ' Start get_com_2_x()')
            self.preprocessed_test_data['plank_2_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_2_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_3_x
            logging.debug(__name__ + ' : ' + ' Start get_com_3_x()')
            self.preprocessed_test_data['plank_3_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_3_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_x
            logging.debug(__name__ + ' : ' + ' Start get_com_4_x()')
            self.preprocessed_test_data['plank_4_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_4_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_y
            logging.debug(__name__ + ' : ' + ' Start get_com_1_y()')
            self.preprocessed_test_data['plank_1_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_1_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_y
            logging.debug(__name__ + ' : ' + ' Start get_com_2_y()')
            self.preprocessed_test_data['plank_2_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_2_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_3_y
            logging.debug(__name__ + ' : ' + ' Start get_com_3_y()')
            self.preprocessed_test_data['plank_3_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_3_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_y
            logging.debug(__name__ + ' : ' + ' Start get_com_4_y()')
            self.preprocessed_test_data['plank_4_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_4_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # y_errors
            logging.debug(__name__ + ' : ' + ' Start get_errors_from_fitted_line()')
            self.preprocessed_test_data['y_errors'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_errors_from_fitted_line(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_errors_from_fitted_line()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_3()')
            self.preprocessed_test_data['plank_3_dev'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_deviation_plank_3(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_3()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_4()')
            self.preprocessed_test_data['plank_4_dev'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_deviation_plank_4(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_4()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_3_dev()')
            self.preprocessed_test_data['plank_3_dev_bucket'] = self.preprocessed_test_data['plank_3_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_test_data['plank_3_dev_bucket'] = self.preprocessed_test_data['plank_3_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_3_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_4_dev()')
            self.preprocessed_test_data['plank_4_dev_bucket'] = self.preprocessed_test_data['plank_4_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_test_data['plank_4_dev_bucket'] = self.preprocessed_test_data['plank_4_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_4_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        try:
        	# plank_4_deviation from fitted line through COMs of 2nd and 3rd planks
            logging.debug(__name__ + ' : ' + ' Start plank_4_wrt_3_2()')
            self.preprocessed_test_data['plank_4_wrt_3_2'] = self.preprocessed_test_data.apply(lambda x: self.fe.plank_4_wrt_3_2(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_wrt_3_2()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass


        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_test_data.shape))
        logging.debug(__name__ + ' : ' + ' End transform_test_data_into_features()')
        
        return
    
    def train_model(self):
        
        '''
        Function to train the model on train data for training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start train_model()')
        
        try:
            # train the model
            self.learner = Learner(self.n_estimators, self.min_samples_split, \
                               self.min_samples_leaf, self.random_state, \
                               self.model_folder, self.model_name)
            X_train = self.preprocessed_input_data[self.feature_names]
            Y_train = self.preprocessed_input_data[self.target_column]
            predictions, pred_prob  = self.learner.train_model(X_train, Y_train)
            #print (np.round(accuracy_score(Y_train, predictions), 4) * 100)
            #print (np.round(log_loss(Y_train, pred_prob), 2))
            logging.debug(__name__ + ' : ' + ' End train_model()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            return
        
    def classify(self):
        
        '''
        Function to make classifications on test data for the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start classify()')
        
        try:
            
            # classify using the model
            self.classifier = Classifier(self.model_folder)
            X_test = self.preprocessed_test_data[self.feature_names]
            #Y_test = self.preprocessed_test_data[self.target_column]
            #print (X_test.shape, Y_test.shape)
            predictions, pred_prob = self.classifier.classify_model(X_test)
            
            # saving the test dataset with the predicted values
            
            #self.test_data[self.target_column] = predictions
            #self.test_data.to_csv(self.predicted_test_file, index = False)
            self.predicted_test_data = DataFrame({self.target_column:predictions})
            self.predicted_test_data.to_csv(self.predicted_test_file, index = False, header = False)
            
            # printing the accuracy score
            #print (np.round(accuracy_score(Y_test, predictions), 4) * 100)
            #print (np.round(log_loss(Y_test, pred_prob), 2))
            
            logging.debug(__name__ + ' : ' + ' End classify()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            return
예제 #4
0
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from preprocessing import PreProcess
from model import LSTM_4
import numpy
import matplotlib.pyplot as plt
import math
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = PreProcess()
dataset.crawl_stock()
dataset.creat_traindata()
trainX = dataset.trainX
trainY = dataset.trainY
testX = dataset.testX
testY = dataset.testY
look_back = 1
model = LSTM_4(trainX, trainY)
model.create_model()
model.train()
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
trainPredict , trainY = dataset.inverse_data(trainPredict,trainY)
testPredict, testY = dataset.inverse_data(testPredict,testY)
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % trainScore)
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % testScore)
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset.dataset)
trainPredictPlot[:, :] = numpy.nan