コード例 #1
0
def train_for_classification():
    for each in list_vals:
        classification_df = pd.read_csv(input_dict[each])
        classification_df['u_id'] = classification_df.index
        # classification_df = process_df(classification_df)
        classification_df.rename(columns={
            "STATUS": "desc",
            each: "label"
        },
                                 inplace=True)
        number_of_classes = len(list(classification_df['label'].unique()))
        model_directory = os.path.join(root_dir, "classify_dict_" + each)
        metrics_json = os.path.join(root_dir,
                                    "accuracy_metrics_" + each + ".json")
        training_loader, testing_loader = load_datasets(
            classification_df,
            train_size=0.8,
            number_of_classes=number_of_classes)
        unique_ids, val_targets, val_outputs = start_epochs(
            training_loader,
            testing_loader,
            metrics_json,
            model_directory,
            epochs=5,
            number_of_classes=number_of_classes)
        out_numpy = np.concatenate((unique_ids.reshape(
            -1, 1), val_targets.reshape(-1, 1), val_outputs.reshape(-1, 1)),
                                   axis=1)
        predicted_df = pd.DataFrame(out_numpy,
                                    columns=['id', 'original', 'predicted'])
        predicted_df.to_csv(os.path.join(root_dir,
                                         "predicted_" + each + ".csv"),
                            index=False,
                            header=True)
コード例 #2
0
def train_classification():
    classification_df = pd.read_csv(final_data)
    classification_df = setup_data(classification_df)
    number_of_classes = max(list(classification_df['label'].unique()))+1
    model_directory = os.path.join(root_dir, "classify_dict")
    metrics_json = os.path.join(root_dir, "accuracy_metrics.json")
    training_loader, testing_loader = load_datasets(classification_df, train_size=0.8,
                                                    number_of_classes=number_of_classes)
    unique_ids, val_targets, val_outputs = start_epochs(training_loader, testing_loader, metrics_json, model_directory,
                                                        epochs=20, number_of_classes=number_of_classes)
    out_numpy = np.concatenate((unique_ids.reshape(-1, 1), val_targets.reshape(-1, 1), val_outputs.reshape(-1, 1)),
                               axis=1)
    predicted_df = pd.DataFrame(out_numpy, columns=['id', 'original', 'predicted'])
    predicted_df.to_csv(os.path.join(root_dir, "predicted.csv"), index=False, header=True)