def main(): data_dict = load_dataset() nominal_data = create_nominal_dataset(data_dict) data_dict['nominal'] = nominal_data data_train, data_test, label_train, label_test = train_test_split(data_dict['nominal'], data_dict['labels'], test_size = 0.20, random_state=True) model = train_classifier(data_train, label_train) evaluate_model(model, data_test, label_test)
def train_classifiers(args, classifier_paths, model, tokenizer, training_data_limit, training_role_set, training_case_set, balanced=False, average=False): print("Need to train classifiers!") print(f"Loading the source train set, with limit {training_data_limit}") src_train = data.CaseDataset(args.train_lang_base_path + "-train.conllu", model, tokenizer, limit=training_data_limit, case_set=training_case_set, role_set=training_role_set, balanced=balanced, average=average) training_case_distribution = src_train.get_case_distribution() print( f"Length of train set is {len(src_train)}, limit is {training_data_limit}" ) if len(src_train) < training_data_limit: print("Too small! Exiting") sys.exit() src_test = data.CaseDataset(args.train_lang_base_path + "-test.conllu", model, tokenizer, limit=TEST_DATA_LIMIT, case_set=training_case_set, average=average) num_layers = model.config.num_hidden_layers for layer in reversed(range(num_layers + 1)): classifier_path = classifier_paths[layer] if os.path.exists(classifier_path): continue train_dataset = data.CaseLayerDataset(src_train, layer_num=layer) print("train dataset labeldict", train_dataset.labeldict) print("Training on", len(train_dataset), "data points.") classifier = train_classifier(train_dataset) print("Trained a case classifier!") src_test_dataset = data.CaseLayerDataset( src_test, layer_num=layer, labeldict=train_dataset.labeldict) src_test_accuracy = eval_classifier(classifier, src_test_dataset) print( f"Accuracy on test set of training language: {src_test_accuracy}") print(f"Saving classifier to {classifier_path}") with open(classifier_path, 'wb') as pkl_file: pickle.dump((classifier, train_dataset.get_label_set(), train_dataset.labeldict, src_test_accuracy, training_case_distribution), pkl_file)
"Error message: Please use a valid model (vgg11, vgg13, vgg16 or vgg19)" ) sys.exit() acceptable_gpu = ["gpu", "no_gpu"] if gpu not in acceptable_gpu: print("Error message: Please use a valid gpu (gpu or no_gpu)") sys.exit() if epochs < 1: print("Error message: Please select a number greater than 0 for epochs") sys.exit() if hidden_units < 1: print( "Error message: Please select a number greater than 0 for hidden_units" ) sys.exit() if learning_rate <= 0: print( "Error message: Please select a number greater than 0 for learning rate" ) sys.exit() train_loader, valid_loader, test_loader = utils.load_data(data_dir) model = utils.build_classifier(arch, hidden_units) model, optimizer = utils.train_classifier(model, train_loader, valid_loader, gpu, learning_rate, epochs) utils.save_checkpoint(arch, hidden_units, model, optimizer, save_dir)
explore = False if explore: test_parameters = pd.read_csv('test_parameters.csv') for i, r in test_parameters.iterrows(): print('Test', i) print('', r.clf_type, 'classifier', '\n', r.cspace, 'color space', '\n', r.spatial_size, 'spatial binning', '\n', r.hist_bins, 'histogram bins', '\n', r.orient, 'orientations', '\n', r.pix_per_cell, 'pixels per cell and', '\n', r.cell_per_block, 'cells per block', '\n', r.hog_channel, 'hog channel') clf, X_scaler, feat_shape, accuracy, time_extract, time_train, time_predict = \ train_classifier(cars, notcars, sample_size, r.clf_type, (r.spatial_size, r.spatial_size), r.hist_bins, r.cspace, r.orient, r.pix_per_cell, r.cell_per_block, r.hog_channel) spat = feat_shape["spat"][0] chist = feat_shape["chist"][0] fhog = feat_shape["hog"][0] test_parameters.loc[i, 'feat_shape_spat'] = spat test_parameters.loc[i, 'feat_shape_chist'] = chist test_parameters.loc[i, 'feat_shape_hog'] = fhog test_parameters.loc[i, 'sum_feat_shape'] = spat+chist+fhog test_parameters.loc[i, 'accuracy'] = accuracy test_parameters.loc[i, 'time_extract'] = time_extract test_parameters.loc[i, 'time_train'] = time_train test_parameters.loc[i, 'time_predict'] = time_predict print(feat_shape, 'feature shape') print(accuracy, 'accuracy')
import numpy as np import cv2 import os import utils as ut print("Loading images...") faces, ids = ut.load_training_data("train_images") print("{} images loaded".format(len(faces))) print("Starting training...") model = ut.train_classifier(faces, ids) print("Model trained. Saving...") # TODO: save model print("Model saved")