def serializeModel(self, data_num, x_dim, y_dim, model_constructor, model_deserializer): self.assertGreaterEqual(data_num, 1) self.assertGreaterEqual(x_dim, 1) self.assertGreaterEqual(y_dim, 1) if x_dim > 1: x_data = pd.DataFrame(np.random.randn(data_num, x_dim)) else: x_data = pd.Series(np.random.randn(data_num)) if y_dim > 1: y_data = pd.DataFrame(np.random.randn(data_num, y_dim)) else: y_data = pd.Series(np.random.randn(data_num)) original = model_constructor(x_dim, y_dim) original.validation(x_data, y_data, 0.25) path = os.path.join(os.path.curdir, 'serialized') PublicSupport.create_path(path) file = original.save(path) self.assertTrue(os.path.isfile(file)) new = model_deserializer(path) self.assertEqual(new.x_dim, x_dim) self.assertEqual(new.y_dim, y_dim)
def _calc_train_features(original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path): excel_dataframe = (pd.read_excel(excel, sheet_name, index_col=None, na_values=["NA"])).dropna(axis=0) preprocessed_dir = os.path.join(original_data_path, preprocessed_dir) PublicSupport.create_path(preprocessed_dir) image_dict = SampleManager.prepare_preprocessing_image( excel_dataframe, preprocessed_dir, original_data_path, file_column_name ) training_data = SampleManager.prepare_image_data( excel_dataframe, image_dict, file_column_name, color_column_name, quality_column_name, subjective_column_name ) return __calc_features(training_data, "feature_train", feature_data_path)
def _calc_predict_features(predict_data_path, preprocessed_dir, feature_data_path): preprocessed_path = os.path.join(predict_data_path, preprocessed_dir) PublicSupport.create_path(preprocessed_path) image_dict = PredictorManager.prepare_preprocessing_image(preprocessed_path, predict_data_path, "*.jpg") prediction_data = PredictorManager.prepare_image_data(image_dict, file_column_name, subjective_column_name) return __calc_features(prediction_data, "feature_prediction", feature_data_path)
) # store prediction result PublicSupport.save_dataframe( all_df, os.path.join(output_result_path, "prediction" + datetime.now().strftime("%Y-%m-%d %H.%M.%S")) ) if len(sys.argv) < 2: raise ValueError("Usage:", sys.argv[0], " Missing some argument to indicate input files") json_dict = PublicSupport.read_json(sys.argv[1]) # input folder struct data_home = os.path.abspath(json_dict["data_home"]) PublicSupport.create_path(data_home) original_data_home = os.path.join(data_home, json_dict["original_data_home"]) PublicSupport.create_path(original_data_home) predict_data_home = os.path.join(data_home, json_dict["predict_data_home"]) PublicSupport.create_path(predict_data_home) feature_data_home = os.path.join(data_home, json_dict["feature_data_home"]) PublicSupport.create_path(feature_data_home) model_data_home = os.path.join(data_home, json_dict["model_data_home"]) PublicSupport.create_path(model_data_home) output_result_home = os.path.join(data_home, json_dict["output_result_home"]) PublicSupport.create_path(output_result_home) preprocessed_folder = json_dict["preprocessed_folder"] # input excel file for subjective score excel_file = os.path.join(original_data_home, json_dict["excel_file_path"])