コード例 #1
0
ファイル: classifier.py プロジェクト: Nicxzmiller/StudentSs
def main():
   data_dict = load_dataset()
   nominal_data = create_nominal_dataset(data_dict)
   data_dict['nominal'] = nominal_data
   data_train, data_test, label_train, label_test = train_test_split(data_dict['nominal'], data_dict['labels'], test_size = 0.20, random_state=True)
   model = train_classifier(data_train, label_train)
   evaluate_model(model, data_test, label_test)
コード例 #2
0
def train_classifiers(args,
                      classifier_paths,
                      model,
                      tokenizer,
                      training_data_limit,
                      training_role_set,
                      training_case_set,
                      balanced=False,
                      average=False):
    print("Need to train classifiers!")
    print(f"Loading the source train set, with limit {training_data_limit}")
    src_train = data.CaseDataset(args.train_lang_base_path + "-train.conllu",
                                 model,
                                 tokenizer,
                                 limit=training_data_limit,
                                 case_set=training_case_set,
                                 role_set=training_role_set,
                                 balanced=balanced,
                                 average=average)
    training_case_distribution = src_train.get_case_distribution()
    print(
        f"Length of train set is {len(src_train)}, limit is {training_data_limit}"
    )
    if len(src_train) < training_data_limit:
        print("Too small! Exiting")
        sys.exit()
    src_test = data.CaseDataset(args.train_lang_base_path + "-test.conllu",
                                model,
                                tokenizer,
                                limit=TEST_DATA_LIMIT,
                                case_set=training_case_set,
                                average=average)
    num_layers = model.config.num_hidden_layers
    for layer in reversed(range(num_layers + 1)):
        classifier_path = classifier_paths[layer]
        if os.path.exists(classifier_path):
            continue
        train_dataset = data.CaseLayerDataset(src_train, layer_num=layer)
        print("train dataset labeldict", train_dataset.labeldict)
        print("Training on", len(train_dataset), "data points.")
        classifier = train_classifier(train_dataset)
        print("Trained a case classifier!")
        src_test_dataset = data.CaseLayerDataset(
            src_test, layer_num=layer, labeldict=train_dataset.labeldict)
        src_test_accuracy = eval_classifier(classifier, src_test_dataset)
        print(
            f"Accuracy on test set of training language: {src_test_accuracy}")
        print(f"Saving classifier to {classifier_path}")
        with open(classifier_path, 'wb') as pkl_file:
            pickle.dump((classifier, train_dataset.get_label_set(),
                         train_dataset.labeldict, src_test_accuracy,
                         training_case_distribution), pkl_file)
コード例 #3
0
        "Error message: Please use a valid model (vgg11, vgg13, vgg16 or vgg19)"
    )
    sys.exit()

acceptable_gpu = ["gpu", "no_gpu"]
if gpu not in acceptable_gpu:
    print("Error message: Please use a valid gpu (gpu or no_gpu)")
    sys.exit()

if epochs < 1:
    print("Error message: Please select a number greater than 0 for epochs")
    sys.exit()

if hidden_units < 1:
    print(
        "Error message: Please select a number greater than 0 for hidden_units"
    )
    sys.exit()

if learning_rate <= 0:
    print(
        "Error message: Please select a number greater than 0 for learning rate"
    )
    sys.exit()

train_loader, valid_loader, test_loader = utils.load_data(data_dir)
model = utils.build_classifier(arch, hidden_units)
model, optimizer = utils.train_classifier(model, train_loader, valid_loader,
                                          gpu, learning_rate, epochs)
utils.save_checkpoint(arch, hidden_units, model, optimizer, save_dir)
コード例 #4
0
explore = False
if explore:
    test_parameters = pd.read_csv('test_parameters.csv')
    for i, r in test_parameters.iterrows():
        print('Test', i)
        print('', r.clf_type, 'classifier',
              '\n', r.cspace, 'color space',
              '\n', r.spatial_size, 'spatial binning',
              '\n', r.hist_bins, 'histogram bins',
              '\n', r.orient, 'orientations',
              '\n', r.pix_per_cell, 'pixels per cell and',
              '\n', r.cell_per_block, 'cells per block',
              '\n', r.hog_channel, 'hog channel')

        clf, X_scaler, feat_shape, accuracy, time_extract, time_train, time_predict = \
            train_classifier(cars, notcars, sample_size, r.clf_type, (r.spatial_size, r.spatial_size),
                             r.hist_bins, r.cspace, r.orient, r.pix_per_cell, r.cell_per_block, r.hog_channel)

        spat = feat_shape["spat"][0]
        chist = feat_shape["chist"][0]
        fhog = feat_shape["hog"][0]
        test_parameters.loc[i, 'feat_shape_spat'] = spat
        test_parameters.loc[i, 'feat_shape_chist'] = chist
        test_parameters.loc[i, 'feat_shape_hog'] = fhog
        test_parameters.loc[i, 'sum_feat_shape'] = spat+chist+fhog
        test_parameters.loc[i, 'accuracy'] = accuracy
        test_parameters.loc[i, 'time_extract'] = time_extract
        test_parameters.loc[i, 'time_train'] = time_train
        test_parameters.loc[i, 'time_predict'] = time_predict

        print(feat_shape, 'feature shape')
        print(accuracy, 'accuracy')
コード例 #5
0
import numpy as np
import cv2
import os

import utils as ut

print("Loading images...")
faces, ids = ut.load_training_data("train_images")
print("{} images loaded".format(len(faces)))

print("Starting training...")
model = ut.train_classifier(faces, ids)
print("Model trained. Saving...")
# TODO: save model
print("Model saved")