def predict( predict_data_path, preprocessed_dir, feature_data_path, color_models, quality_models, mixed_models, output_result_path, ): if b_load_predict_feat: feat_df = _load_features(feature_data_home, "*prediction*.csv") else: feat_df = _calc_predict_features(predict_data_path, preprocessed_dir, feature_data_path) x_data = feat_df.drop(subjective_column_name, axis=1, level=0) color_df = pd.DataFrame(color_models.predict(x_data[hue_column_name])) quality_df = pd.DataFrame(quality_models.predict(x_data)) mixed_df = pd.DataFrame(mixed_models.predict(x_data)) all_df = pd.concat( [color_df, quality_df, mixed_df, feat_df[subjective_column_name]], axis=1, keys=[ color_models.__class__.__name__, quality_models.__class__.__name__, mixed_models.__class__.__name__, subjective_column_name, ], ) # store prediction result PublicSupport.save_dataframe( all_df, os.path.join(output_result_path, "prediction" + datetime.now().strftime("%Y-%m-%d %H.%M.%S")) )
def serializeModel(self, data_num, x_dim, y_dim, model_constructor, model_deserializer): self.assertGreaterEqual(data_num, 1) self.assertGreaterEqual(x_dim, 1) self.assertGreaterEqual(y_dim, 1) if x_dim > 1: x_data = pd.DataFrame(np.random.randn(data_num, x_dim)) else: x_data = pd.Series(np.random.randn(data_num)) if y_dim > 1: y_data = pd.DataFrame(np.random.randn(data_num, y_dim)) else: y_data = pd.Series(np.random.randn(data_num)) original = model_constructor(x_dim, y_dim) original.validation(x_data, y_data, 0.25) path = os.path.join(os.path.curdir, 'serialized') PublicSupport.create_path(path) file = original.save(path) self.assertTrue(os.path.isfile(file)) new = model_deserializer(path) self.assertEqual(new.x_dim, x_dim) self.assertEqual(new.y_dim, y_dim)
def __calc_features(images_data, feature_name, feature_data_path): # feature extraction feat_df = pd.DataFrame([FeatureManager.compute_feats(img).append(info) for img, info in images_data]) PublicSupport.save_dataframe( feat_df, os.path.join(feature_data_path, feature_name + datetime.now().strftime("%Y-%m-%d %H.%M.%S")) ) return feat_df
def save(self, path): json_file = os.path.join(path, self.__class__.__name__ + '.json') PublicSupport.write_json({RegressionManager.x_dim_name: self.x_dim, RegressionManager.y_dim_name: self.y_dim}, json_file) scalar_file = os.path.join(path, self.__class__.__name__ + '.scalar') self.scalar.save(scalar_file) for model in self.model_list: model.save(os.path.join(path, model.serialize_id)) return json_file
def _calc_train_features(original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path): excel_dataframe = (pd.read_excel(excel, sheet_name, index_col=None, na_values=["NA"])).dropna(axis=0) preprocessed_dir = os.path.join(original_data_path, preprocessed_dir) PublicSupport.create_path(preprocessed_dir) image_dict = SampleManager.prepare_preprocessing_image( excel_dataframe, preprocessed_dir, original_data_path, file_column_name ) training_data = SampleManager.prepare_image_data( excel_dataframe, image_dict, file_column_name, color_column_name, quality_column_name, subjective_column_name ) return __calc_features(training_data, "feature_train", feature_data_path)
def map_crop_type(path): name = PublicSupport.extract_filename_by_path(path) crop_type = CropType.none if crop_by_dot in name.lower(): crop_type = CropType.dot elif crop_by_frame in name.lower(): crop_type = CropType.frame return name, crop_type
def train( original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path, model_data_path, output_result_path ): if b_load_train_feat: feat_df = _load_features(feature_data_home, "*train*.csv") else: feat_df = _calc_train_features(original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path) x_data = feat_df.drop(subjective_column_name, axis=1, level=0) y_data = feat_df[subjective_column_name] # simple linear models to train on color score color_x = x_data[hue_column_name] color_y = y_data[color_column_name] color_models = ColorRegression(feature_dimensions(color_x), feature_dimensions(color_y)) model_score_dict = {color_models.__class__.__name__: color_models.validation(color_x, color_y, 0.25)} color_models.save(model_data_path) # kinds of models to train on quality score quality_x = x_data quality_y = y_data[quality_column_name] quality_models = QualityRegression(feature_dimensions(quality_x), feature_dimensions(quality_y)) model_score_dict.update({quality_models.__class__.__name__: quality_models.validation(quality_x, quality_y, 0.25)}) quality_models.save(model_data_path) # modes to train on both color and quality mixed_x = x_data mixed_y = y_data[[color_column_name, quality_column_name]] mixed_models = MixedRegression(feature_dimensions(mixed_x), feature_dimensions(mixed_y)) model_score_dict.update({mixed_models.__class__.__name__: mixed_models.validation(mixed_x, mixed_y, 0.25)}) mixed_models.save(model_data_path) # store cross_validation scores PublicSupport.write_json( model_score_dict, os.path.join(output_result_path, "model_score" + datetime.now().strftime("%Y-%m-%d %H.%M.%S") + ".json"), ) return color_models, quality_models, mixed_models
def _load_features(feat_path, pattern): return PublicSupport.load_dataframe(PublicSupport.find_newest_file(feat_path, pattern))
def _calc_predict_features(predict_data_path, preprocessed_dir, feature_data_path): preprocessed_path = os.path.join(predict_data_path, preprocessed_dir) PublicSupport.create_path(preprocessed_path) image_dict = PredictorManager.prepare_preprocessing_image(preprocessed_path, predict_data_path, "*.jpg") prediction_data = PredictorManager.prepare_image_data(image_dict, file_column_name, subjective_column_name) return __calc_features(prediction_data, "feature_prediction", feature_data_path)
quality_models.__class__.__name__, mixed_models.__class__.__name__, subjective_column_name, ], ) # store prediction result PublicSupport.save_dataframe( all_df, os.path.join(output_result_path, "prediction" + datetime.now().strftime("%Y-%m-%d %H.%M.%S")) ) if len(sys.argv) < 2: raise ValueError("Usage:", sys.argv[0], " Missing some argument to indicate input files") json_dict = PublicSupport.read_json(sys.argv[1]) # input folder struct data_home = os.path.abspath(json_dict["data_home"]) PublicSupport.create_path(data_home) original_data_home = os.path.join(data_home, json_dict["original_data_home"]) PublicSupport.create_path(original_data_home) predict_data_home = os.path.join(data_home, json_dict["predict_data_home"]) PublicSupport.create_path(predict_data_home) feature_data_home = os.path.join(data_home, json_dict["feature_data_home"]) PublicSupport.create_path(feature_data_home) model_data_home = os.path.join(data_home, json_dict["model_data_home"]) PublicSupport.create_path(model_data_home) output_result_home = os.path.join(data_home, json_dict["output_result_home"]) PublicSupport.create_path(output_result_home) preprocessed_folder = json_dict["preprocessed_folder"]
def deserialize_json(path, serialized_id): json_dict = PublicSupport.read_json(os.path.join(path, serialized_id + ".json")) return json_dict[RegressionManager.x_dim_name], json_dict[RegressionManager.y_dim_name]
def load_images(image_collection): return { PublicSupport.extract_filename_by_path(img_path): img for img, img_path in zip(image_collection, image_collection.files) }