Esempio n. 1
0
def predict(
    predict_data_path,
    preprocessed_dir,
    feature_data_path,
    color_models,
    quality_models,
    mixed_models,
    output_result_path,
):
    if b_load_predict_feat:
        feat_df = _load_features(feature_data_home, "*prediction*.csv")
    else:
        feat_df = _calc_predict_features(predict_data_path, preprocessed_dir, feature_data_path)

    x_data = feat_df.drop(subjective_column_name, axis=1, level=0)

    color_df = pd.DataFrame(color_models.predict(x_data[hue_column_name]))
    quality_df = pd.DataFrame(quality_models.predict(x_data))
    mixed_df = pd.DataFrame(mixed_models.predict(x_data))

    all_df = pd.concat(
        [color_df, quality_df, mixed_df, feat_df[subjective_column_name]],
        axis=1,
        keys=[
            color_models.__class__.__name__,
            quality_models.__class__.__name__,
            mixed_models.__class__.__name__,
            subjective_column_name,
        ],
    )

    # store prediction result
    PublicSupport.save_dataframe(
        all_df, os.path.join(output_result_path, "prediction" + datetime.now().strftime("%Y-%m-%d %H.%M.%S"))
    )
    def serializeModel(self, data_num, x_dim, y_dim, model_constructor, model_deserializer):
        self.assertGreaterEqual(data_num, 1)
        self.assertGreaterEqual(x_dim, 1)
        self.assertGreaterEqual(y_dim, 1)

        if x_dim > 1:
            x_data = pd.DataFrame(np.random.randn(data_num, x_dim))
        else:
            x_data = pd.Series(np.random.randn(data_num))

        if y_dim > 1:
            y_data = pd.DataFrame(np.random.randn(data_num, y_dim))
        else:
            y_data = pd.Series(np.random.randn(data_num))

        original = model_constructor(x_dim, y_dim)
        original.validation(x_data, y_data, 0.25)

        path = os.path.join(os.path.curdir, 'serialized')
        PublicSupport.create_path(path)
        file = original.save(path)
        self.assertTrue(os.path.isfile(file))
        new = model_deserializer(path)

        self.assertEqual(new.x_dim, x_dim)
        self.assertEqual(new.y_dim, y_dim)
Esempio n. 3
0
def __calc_features(images_data, feature_name, feature_data_path):
    # feature extraction
    feat_df = pd.DataFrame([FeatureManager.compute_feats(img).append(info) for img, info in images_data])
    PublicSupport.save_dataframe(
        feat_df, os.path.join(feature_data_path, feature_name + datetime.now().strftime("%Y-%m-%d %H.%M.%S"))
    )

    return feat_df
Esempio n. 4
0
 def save(self, path):
     json_file = os.path.join(path, self.__class__.__name__ + '.json')
     PublicSupport.write_json({RegressionManager.x_dim_name: self.x_dim, RegressionManager.y_dim_name: self.y_dim},
                              json_file)
     scalar_file = os.path.join(path, self.__class__.__name__ + '.scalar')
     self.scalar.save(scalar_file)
     for model in self.model_list:
         model.save(os.path.join(path, model.serialize_id))
     return json_file
Esempio n. 5
0
def _calc_train_features(original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path):
    excel_dataframe = (pd.read_excel(excel, sheet_name, index_col=None, na_values=["NA"])).dropna(axis=0)
    preprocessed_dir = os.path.join(original_data_path, preprocessed_dir)
    PublicSupport.create_path(preprocessed_dir)
    image_dict = SampleManager.prepare_preprocessing_image(
        excel_dataframe, preprocessed_dir, original_data_path, file_column_name
    )
    training_data = SampleManager.prepare_image_data(
        excel_dataframe, image_dict, file_column_name, color_column_name, quality_column_name, subjective_column_name
    )
    return __calc_features(training_data, "feature_train", feature_data_path)
def map_crop_type(path):
    name = PublicSupport.extract_filename_by_path(path)
    crop_type = CropType.none
    if crop_by_dot in name.lower():
        crop_type = CropType.dot
    elif crop_by_frame in name.lower():
        crop_type = CropType.frame

    return name, crop_type
Esempio n. 7
0
def train(
    original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path, model_data_path, output_result_path
):
    if b_load_train_feat:
        feat_df = _load_features(feature_data_home, "*train*.csv")
    else:
        feat_df = _calc_train_features(original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path)

    x_data = feat_df.drop(subjective_column_name, axis=1, level=0)
    y_data = feat_df[subjective_column_name]

    # simple linear models to train on color score
    color_x = x_data[hue_column_name]
    color_y = y_data[color_column_name]
    color_models = ColorRegression(feature_dimensions(color_x), feature_dimensions(color_y))
    model_score_dict = {color_models.__class__.__name__: color_models.validation(color_x, color_y, 0.25)}
    color_models.save(model_data_path)

    # kinds of models to train on quality score
    quality_x = x_data
    quality_y = y_data[quality_column_name]
    quality_models = QualityRegression(feature_dimensions(quality_x), feature_dimensions(quality_y))
    model_score_dict.update({quality_models.__class__.__name__: quality_models.validation(quality_x, quality_y, 0.25)})
    quality_models.save(model_data_path)

    # modes to train on both color and quality
    mixed_x = x_data
    mixed_y = y_data[[color_column_name, quality_column_name]]
    mixed_models = MixedRegression(feature_dimensions(mixed_x), feature_dimensions(mixed_y))
    model_score_dict.update({mixed_models.__class__.__name__: mixed_models.validation(mixed_x, mixed_y, 0.25)})
    mixed_models.save(model_data_path)

    # store cross_validation scores
    PublicSupport.write_json(
        model_score_dict,
        os.path.join(output_result_path, "model_score" + datetime.now().strftime("%Y-%m-%d %H.%M.%S") + ".json"),
    )

    return color_models, quality_models, mixed_models
Esempio n. 8
0
def _load_features(feat_path, pattern):
    return PublicSupport.load_dataframe(PublicSupport.find_newest_file(feat_path, pattern))
Esempio n. 9
0
def _calc_predict_features(predict_data_path, preprocessed_dir, feature_data_path):
    preprocessed_path = os.path.join(predict_data_path, preprocessed_dir)
    PublicSupport.create_path(preprocessed_path)
    image_dict = PredictorManager.prepare_preprocessing_image(preprocessed_path, predict_data_path, "*.jpg")
    prediction_data = PredictorManager.prepare_image_data(image_dict, file_column_name, subjective_column_name)
    return __calc_features(prediction_data, "feature_prediction", feature_data_path)
Esempio n. 10
0
            quality_models.__class__.__name__,
            mixed_models.__class__.__name__,
            subjective_column_name,
        ],
    )

    # store prediction result
    PublicSupport.save_dataframe(
        all_df, os.path.join(output_result_path, "prediction" + datetime.now().strftime("%Y-%m-%d %H.%M.%S"))
    )


if len(sys.argv) < 2:
    raise ValueError("Usage:", sys.argv[0], " Missing some argument to indicate input files")

json_dict = PublicSupport.read_json(sys.argv[1])

# input folder struct
data_home = os.path.abspath(json_dict["data_home"])
PublicSupport.create_path(data_home)
original_data_home = os.path.join(data_home, json_dict["original_data_home"])
PublicSupport.create_path(original_data_home)
predict_data_home = os.path.join(data_home, json_dict["predict_data_home"])
PublicSupport.create_path(predict_data_home)
feature_data_home = os.path.join(data_home, json_dict["feature_data_home"])
PublicSupport.create_path(feature_data_home)
model_data_home = os.path.join(data_home, json_dict["model_data_home"])
PublicSupport.create_path(model_data_home)
output_result_home = os.path.join(data_home, json_dict["output_result_home"])
PublicSupport.create_path(output_result_home)
preprocessed_folder = json_dict["preprocessed_folder"]
Esempio n. 11
0
def deserialize_json(path, serialized_id):
    json_dict = PublicSupport.read_json(os.path.join(path, serialized_id + ".json"))
    return json_dict[RegressionManager.x_dim_name], json_dict[RegressionManager.y_dim_name]
Esempio n. 12
0
def load_images(image_collection):
    return {
        PublicSupport.extract_filename_by_path(img_path): img for img, img_path in
        zip(image_collection, image_collection.files)
        }