コード例 #1
0
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    f1_score,
)
from utils.io_utils import yaml_loader, get_data, save_model, save_transformers
from utils.training_utils import (
    build_features_pipeline,
    build_label_encoder,
    build_model,
    early_stopping,
)

config = yaml_loader("./config/config.yml")
data = get_data(config)

features_pipeline = build_features_pipeline(config)
label_encoder = build_label_encoder(config)

X = data.loc[:, data.columns != config["features"]["target"]]
y = data[config["features"]["target"]]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=12345, test_size=0.25, shuffle=True, stratify=y
)

X_train = features_pipeline.fit_transform(X_train)
X_test = features_pipeline.transform(X_test)
y_train = label_encoder.transform(y_train)
コード例 #2
0
import pandas as pd
from utils.dataset_utils import (
    split_list,
    mount_dataset,
    process_images,
)
from utils.io_utils import txt_loader, yaml_loader

config = yaml_loader("./config/dataset_config.yml")
category_list = split_list(
    txt_loader(config["paths"]["categories_path"], skip_lines=2))
category_dict = {w[0]: int(w[1]) for w in category_list}
attribute_list = split_list(
    txt_loader(config["paths"]["attributes_path"], skip_lines=2))
attribute_dict = {w[0]: int(w[1]) for w in attribute_list}
datasets = ["train", "test", "val"]
dataset_dict = {}
for dataset in datasets:
    dataset_dict[dataset] = mount_dataset(
        files_path=config["paths"][f"{dataset}_files_path"],
        categories_path=config["paths"][f"{dataset}_categories_path"],
        attributes_path=config["paths"][f"{dataset}_attributes_path"],
        bboxes_path=config["paths"][f"{dataset}_bboxes_path"],
        category_dict=category_dict,
        attribute_dict=attribute_dict,
    )
full_dataset = pd.concat(dataset_dict.values()).reset_index(drop=True)
full_dataset["file"] = [
    f'{config["paths"]["data_folder_prefix"]}/{w}'
    for w in full_dataset["file"]
]