def show(args): params = vars(args) data_bunch = load_data(params["datadir"], 1, 1) if params["type"] == "train": indices = np.arange(params["start"], data_bunch["train_data"].shape[0] - 1) show_data(data_bunch["train_data"][params["start"]:], data_bunch["train_label"][params["start"]:], indices) else: indices = np.arange(params["start"], data_bunch["test_data"].shape[0] - 1) show_data(data_bunch["test_data"][params["start"]:], data_bunch["test_label"][params["start"]:], indices)
def main(): # Basic usage: python train.py data_directory parser = argparse.ArgumentParser(description='Optional add-ons.') parser.add_argument('data_directory') parser.add_argument('--save_dir', action='store') parser.add_argument('--arch', action='store') parser.add_argument('--learning_rate', action='store', type=float) parser.add_argument('--hidden_units', action='store', type=int) parser.add_argument('--epochs', action='store', type=int) parser.add_argument('--gpu', action='store_true') args = parser.parse_args() # get category class mapping to names for flowers with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) # capture train and validation data and transform train_data, valid_data = th.load_data(args.data_directory) trainloader, validloader, class_to_idx = th.transform_data( train_data, valid_data) # Use GPU if it's available device = torch.device('cuda') if ( args.gpu | torch.cuda.is_available()) else torch.device('cpu') print('Building model...') # set default to vgg11 args.arch = args.arch if args.arch else 'vgg11' model = models.__dict__[args.arch](pretrained=True) model.arch = args.arch # update model classifier to customize for our problem (102 category probability calcs) hidden_units = args.hidden_units if args.hidden_units else 4098 model.classifier = th.update_classifier(model, hidden_units) print('Training model...') epochs = args.epochs if args.epochs else 5 learning_rate = args.learning_rate if args.learning_rate else 0.001 th.train_model(model, trainloader, validloader, epochs, learning_rate, device) # add class_to_idx attribute to model model.class_to_idx = class_to_idx print('Saving model...') save_dir = args.save_dir if args.save_dir else 'saved_models' th.save_model(model, save_dir) print('Trained model saved!')
device = 'cpu' if gpu: if torch.cuda.is_available(): device = 'cuda' print("GPU mode enabled\n") else: print("Device doesn't support CUDA\n") exit(0) else: device = 'cpu' print("Further training will be done on cpu, switch to GPU\n") print("Selected Device: ", device, "\n") # load the datasets data, loader = load_data(data_dir) # make model model = make_model(arch, hidden_units, drop) model.to(device) # set optimizer state according to arch if model.name == 'vgg16' or model.name == 'densenet121': optimizer = optim.Adam(model.classifier.parameters(), lr=lr) elif model.name == 'resnet50': optimizer = optim.Adam(model.fc.parameters(), lr=lr) # train model, get new state of optimizer in order to save it in checkpoint trained_model, optimizer = train_model(model, optimizer, epochs, device, data, loader) # check accuracy
from tensorflow.keras import models from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM from tensorflow.keras.utils import to_categorical, normalize from tensorflow.keras.optimizers import Adam from tensorflow.keras.regularizers import l1, l2 import numpy as np from train_helper import load_data, preprocess_data, plot_learncurve from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from time import process_time import matplotlib.pyplot as plt data_dir = '/home/kangle/dataset/PedBicCarData' train_data, train_label, test_data, test_label = load_data( data_dir, 2, 2, 6, 1) train_data, train_label, test_data, test_label = preprocess_data( train_data, train_label, test_data, test_label, 'rnn_a') # train_data = np.squeeze(train_data) # test_data = np.squeeze(test_data) # train_data = np.transpose(train_data, (0,2,1)) # test_data = np.transpose(test_data, (0,2,1)) train_data, val_data, train_label, val_label = train_test_split( train_data, train_label, test_size=0.1, random_state=42) print("Split training data into training and validation data:\n") print("training data: %d" % train_data.shape[0]) print("validation data: %d" % val_data.shape[0]) model = models.Sequential() model.add(LSTM(64, input_shape=(train_data.shape[1], train_data.shape[2])))
def train(args): params = vars(args) with open(params["config"], mode='r') as f: paramset = json.load(f) data_dir = paramset["root_dir"] if "sample_rate" in paramset: samp_rate_t = paramset["sample_rate"]["sample_rate_t"] samp_rate_f = paramset["sample_rate"]["sample_rate_f"] else: samp_rate_t = 1 samp_rate_f = 1 data_bunch = load_data(data_dir, samp_rate_t, samp_rate_f) test_data_visual = np.copy(data_bunch["test_data"]) if "dimension_reduction" in paramset: dim_reducer = paramset["dimension_reduction"]["method"] num_components = paramset["dimension_reduction"]["n_components"] algo_map[dim_reducer]["parameters"]["n_components"] = num_components print('\nbegin dimensionality reduction process.') module = importlib.import_module(algo_map[dim_reducer]["module"]) reducer = getattr(module, algo_map[dim_reducer]["function"])( **algo_map[dim_reducer]["parameters"]) data_bunch = preprocess_data(data_bunch, dim_reducer) reducer.fit(data_bunch["train_data"]) data_bunch["train_data"] = reducer.transform(data_bunch["train_data"]) data_bunch["test_data"] = reducer.transform(data_bunch["test_data"]) print('\nafter dimensionality reduction:') print(data_bunch["train_data"].shape) print(data_bunch["test_data"].shape) classify_method = paramset["classifier"]["method"] classify_parameter = paramset["classifier"]["parameter"] if classify_method in nn_type: data_bunch = preprocess_data(data_bunch, classify_method) classifier = load_model(paramset["classifier"]["model"], data_bunch["train_data"].shape) history = nnet_fit(data_bunch, classifier, paramset["classifier"]["parameter"]) plot_learncurve(classify_method, history) else: module = importlib.import_module(algo_map[classify_method]["module"]) classifier = getattr( module, algo_map[classify_method]["function"])(**classify_parameter) classifier.fit(data_bunch["train_data"], data_bunch["train_label"]) plot_learncurve(classify_method, estimator=classifier, data=data_bunch["train_data"], label=data_bunch["train_label"], train_sizes=np.linspace(0.05, 0.2, 5)) print('\npredict for test data.') test_pred = classifier.predict(data_bunch["test_data"]) train_pred = classifier.predict(data_bunch["train_data"]) train_label = data_bunch["train_label"] test_label = data_bunch["test_label"] if len(test_pred.shape) > 1: test_pred = np.argmax(test_pred, axis=1) train_pred = np.argmax(train_pred, axis=1) test_label = np.argmax(data_bunch["test_label"], axis=1) train_label = np.argmax(data_bunch["train_label"], axis=1) print('\nevaluate the prediction(train data).') train_conf = confusion_matrix(train_label, train_pred) train_precision = precision_score(train_label, train_pred, average=None) train_recall = recall_score(train_label, train_pred, average=None) print(train_conf) print(train_precision) print(train_recall) print('\nevaluate the prediction(test data).') test_conf = confusion_matrix(test_label, test_pred) test_precision = precision_score(test_label, test_pred, average=None) test_recall = recall_score(test_label, test_pred, average=None) print(test_conf) print(test_precision) print(test_recall) pred_result = { "train_conf": train_conf, "train_precision": train_precision, "train_recall": train_recall, "test_conf": test_conf, "test_precision": test_precision, "test_recall": test_recall } print('\ngenerate report file \t') if classify_method in nn_type: log_file = write_log(paramset, pred_result, classifier, history) else: log_file = write_log(paramset, pred_result) print(log_file) if params["show_misclassified"]: indices = [ i for i in range(len(data_bunch["test_label"])) if test_pred[i] != data_bunch["test_label"][i] ] show_data(test_data_visual[indices], data_bunch["test_label"][indices], indices, test_pred[indices])
from sklearn.ensemble import GradientBoostingClassifier from xgboost import XGBClassifier from sklearn.manifold import LocallyLinearEmbedding from sklearn.metrics import confusion_matrix, precision_score, recall_score import matplotlib.pyplot as plt import numpy as np from train_helper import load_data from sklearn.preprocessing import StandardScaler from sklearn.model_selection import GridSearchCV data_dir = '/home/kangle/dataset/PedBicCarData' train_data, train_label, test_data, test_label = load_data(data_dir, 4, 5) print("Data sample distribution in training set: %d %d %d %d %d\n" % (np.count_nonzero(train_label==1), np.count_nonzero(train_label==2), np.count_nonzero(train_label==3), np.count_nonzero(train_label==4), np.count_nonzero(train_label==5))) print("Data sample distribution in test set: %d %d %d %d %d\n" % (np.count_nonzero(test_label==1), np.count_nonzero(test_label==2), np.count_nonzero(test_label==3), np.count_nonzero(test_label==4), np.count_nonzero(test_label==5))) scaler = StandardScaler() train_data = scaler.fit_transform(train_data) test_data = scaler.transform(test_data) # print('\nbegin LLE process.') # lle = LocallyLinearEmbedding(n_components=20,n_neighbors=10,n_jobs=-1) # train_feature = lle.fit_transform(train_data) # print(train_feature.shape) #
#!/usr/bin/env python import train_helper n_epochs = 20 batch_size = 32 dataloaders, class_names = train_helper.load_data('../../data', batch_size=batch_size) model, criterion, optimizer, scheduler = train_helper.get_model( dataloaders, n_epochs) model = train_helper.train_model(model, criterion, optimizer, scheduler, dataloaders, n_epochs=n_epochs) train_helper.save_model(model, 'model.pt') train_helper.save_mobile_model(model, 'model_mobile.pt')
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, AveragePooling2D, Input, GlobalAveragePooling2D from tensorflow.keras.utils import to_categorical, normalize from tensorflow.keras.optimizers import Adam, SGD from tensorflow.keras.regularizers import l1, l2 from tensorflow.keras.initializers import RandomUniform, GlorotUniform, he_uniform from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, EarlyStopping, TensorBoard, ModelCheckpoint, Callback from tensorflow.keras.backend import get_value, set_value from train_helper import load_data, preprocess_data, plot_learncurve from sklearn.model_selection import train_test_split from time import process_time from lr_finder import LRFinder from sgdr import SGDRScheduler from cyclic_lr import CyclicLR data_dir = '/home/kangle/dataset/PedBicCarData' data_bunch = load_data(data_dir, 2, 2, 6, 1) data_bunch = preprocess_data(data_bunch, 'cnn') model = models.Sequential() regularizer = None #l2(1e-4) initializer = he_uniform() model.add( Conv2D(32, [3, 3], input_shape=data_bunch["train_data"].shape[1:], activation='relu', kernel_initializer=initializer, kernel_regularizer=regularizer, padding='same', name='conv_1')) model.add(MaxPooling2D()) model.add(