def main(): #-c 'json파일경로'로 받아온 json경로를 config객체에 저장한다 try: args = get_args() config = process_config(args.config) except: print("missing or invalid arguments") exit(0) # 모델의 학습 결과와 가중치를 저장할 경로를 설정한다 create_dirs([config.summary_dir, config.checkpoint_dir]) # 텐서플로우의 세션을 생성한다 sess = tf.Session() # 데이터를 불러온다. 전달한 config객체는 batch사이즈로 데이터를 쪼개기위해 사용된다 data = DataGenerator(config) # 사용할 모델의 개형을 불러온다. 해당 프로젝트에는 input사이즈외에 참고하지 않았지만 #본래 모델의 깊이,모양,loss함수,optimizer 등 config 값에 따라 다른 모델을 불러올 수 있다 model = mlp(config) # 학습진행과 저장을 담당하는 logger객체를 생성한다 logger = Logger(sess, config) #먼저 생성한 학습에 필요한 세션,모델,데이터셋,설정,logger를 전달해 학습 준비를 마친다 trainer = ExampleTrainer(sess, model, data, config, logger) #기존에 학습중이던 같은 모델이 있다면 해당 모델을 이어서 학습한다 model.load(sess) # here you train your model trainer.train()
def main(): try: args = get_args() config = process_config(args.config) except: print("missing or invalid arguments") exit(0) create_dirs([config.summary_dir, config.checkpoint_dir]) sess = tf.Session() data = DataGenerator(config) model = mlp(config) logger = Logger(sess, config) trainer = ExampleTrainer(sess, model, data, config, logger) model.load(sess) #trainer파일을 확인하면 trainer.train()과 새로 작성한 trainer.test()의 차이를 확인할 수 있다. #y는 테스트데이터의 실제 ppa, result는 학습된 모델의 추정 ppa값을 리스트로 받아온다. #result는 세션의 return이 [1][데이터개수]의 2차원 리스트의 형태이고 [0][i]로 각 input의 결과를 확인할 수 있다 y, result = trainer.test() cnt = 0 print(result[0]) for i in range(len(y)): #실제값-추측값을 실제값으로 나누어 오차10%내의 데이터의 수를 센다 if (abs(y[i] - float(result[0][i])) / y[i] <= 0.1): cnt += 1 print('10% 내외로 예측한 데이터는 ', cnt / len(y), '% 이다')
def get_classifier(model, parameters, subset): """returns a classification model based on specified model name, which may be one of ('mlp', 'knn', 'svm', 'rf', 'nbc', 'lr', 'dt', 'bn')""" if model == 'mlp': return mlp(parameters) elif model == 'knn': return knn(parameters) elif model == 'svm': return svm(parameters) elif model == 'rf': return rf(parameters) elif model == 'nbc': return nbc(parameters) elif model == 'lr': return lr(parameters) elif model == 'dt': return dt(parameters) elif model == 'bn': parameters['subset'] = subset return bn(parameters) else: raise ValueError
def get_model(model_type, params=None, schedule=None): if params is None: params, _ = pp.get_params(model_type) if schedule is None: _, schedule = pp.get_params(model_type) if model_type.lower() == "mlp": return mlp(params, schedule) if model_type.lower() == "ica": return ica(params, schedule) if model_type.lower() == "ica_pca": return ica_pca(params, schedule) if model_type.lower() == "lca": return lca(params, schedule) if model_type.lower() == "lca_pca": return lca_pca(params, schedule) if model_type.lower() == "lca_pca_fb": return lca_pca_fb(params, schedule) if model_type.lower() == "conv_lca": return conv_lca(params, schedule) if model_type.lower() == "dsc": return dsc(params, schedule) if model_type.lower() == "density_learner": return dl(params, schedule)
def run_exp(replace_params={}): # READ PARAMETERS AND DATA params = setup(replace_params) t1Data, t1Label, t2Data, t2Label, vData, vLabel, testD, testL = read_preprocess(params=params) np.savez('preprocessed_cifar.npz', X_train=t1Data, Y_train=t1Label, X_t2=t2Data, Y_t2=t2Label, X_v=vData, Y_v=vLabel, X_test=testD, Y_test=testL) return # random numbers rng = np.random.RandomState(params.seed) rstream = RandomStreams(rng.randint(params.seed+1)+1) ''' Construct Theano functions ''' # INPUTS useRglrz = T.fscalar('useRglrz') bnPhase = T.fscalar('bnPhase') if params.model == 'convnet': x = T.ftensor4('x') else: x = T.matrix('x') trueLabel = T.ivector('trueLabel') globalLR1 = T.fscalar('globalLR1') globalLR2 = T.fscalar('globalLR2') moment1 = T.fscalar('moment1') moment2 = T.fscalar('moment2') # NETWORK if params.model == 'convnet': model = convnet(rng=rng, rstream=rstream, x=x, wantOut=trueLabel, params=params, useRglrz=useRglrz, bnPhase=bnPhase) else: model = mlp(rng=rng, rstream=rstream, x=x, wantOut=trueLabel, params=params, useRglrz=useRglrz, bnPhase=bnPhase) # UPDATES updateT1, updateT2, updateC2grad, grads = updates(mlp=model, params=params, globalLR1=globalLR1, globalLR2=globalLR2, momentParam1=moment1, momentParam2=moment2) updateBN = [] if params.batchNorm: for param, up in zip(model.paramsBN, model.updateBN): updateBN += [(param, up)] updateT1 = theano.function( inputs = [x, trueLabel, globalLR1, moment1, useRglrz, bnPhase], outputs = [model.trainCost, model.guessLabel] + grads, updates = updateT1 + updateBN, # mode=theano.compile.MonitorMode(post_func=detect_nan), on_unused_input='ignore', allow_input_downcast=True) updateT2part1 = theano.function( inputs = [x, trueLabel, globalLR1, moment1, useRglrz, bnPhase], outputs = [model.trainCost, model.guessLabel] + grads, updates = updateC2grad, # mode=theano.compile.MonitorMode(post_func=detect_nan), on_unused_input='ignore', allow_input_downcast=True) updateT2part2 = theano.function( inputs = [x, trueLabel, globalLR1, moment1, globalLR2, moment2, useRglrz, bnPhase], outputs = [model.trainCost, model.guessLabel] + grads, updates = updateT2, # mode=theano.compile.MonitorMode(post_func=detect_nan), on_unused_input='ignore', allow_input_downcast=True) evaluate = theano.function( inputs = [x, trueLabel, useRglrz, bnPhase], outputs = [model.trainCost, model.guessLabel, model.penalty, model.netStats], on_unused_input='ignore', allow_input_downcast=True) evaluateBN = theano.function( inputs = [x, useRglrz, bnPhase], updates = updateBN, on_unused_input='ignore', # mode=theano.compile.MonitorMode(post_func=detect_nan), allow_input_downcast=True) ''' Inializations ''' # INITIALIZE # layers to be read from loopOver = range(params.nLayers) # initializing training values currentT2Batch = 0 # samples, batches per epoch, etc. nSamples1 = t1Data.shape[0] nVSamples, nTestSamples = [vData.shape[0], testD.shape[0]] nBatches1 = nSamples1 / params.batchSize1 # permutations testPerm = range(0, nTestSamples) train1Perm = range(0, nSamples1) if params.useT2: nSamples2 = t2Data.shape[0] train2Perm = range(0, nSamples2) nBatches2 = nSamples2 / params.batchSize2 # TRACKING # (1) best results bestVal = 1.; bestValTst = 1. # (2) errors tempError1, tempError2, tempCost1, tempCost2 = [[],[], [],[]] t1Error, t2Error, validError, testError = [[],[],[],[]] t1Cost, t2Cost, penaltyCost, validCost, testCost = [[],[],[],[],[]] # (3) activation statistics (per layer) trackTemplate = np.empty((0,params.nLayers), dtype = object) trackLayers = {} for stat in params.activTrack: trackLayers[stat] = trackTemplate # (4) penalty, noise, activation parametrization (per layer) penalList = ['L1', 'L2', 'Lmax', 'LmaxCutoff', 'LmaxSlope', 'LmaxHard'] noiseList = ['addNoise', 'inputNoise', 'dropOut', 'dropOutB'] sharedNames = [p.name for p in model.paramsT1] + [p.name for p in model.paramsT2] print sharedNames trackPenal = {}; trackPenalSTD = {} trackNoise = {}; trackNoiseSTD = {} trackGrads = {} track1stFeatures = [] trackRglrzTemplate = np.empty((0,len(loopOver)), dtype = object) for param in params.rglrz: if param in penalList: trackPenal[param] = trackRglrzTemplate trackPenalSTD[param] = trackRglrzTemplate if param in noiseList: trackNoise[param] = trackRglrzTemplate trackNoiseSTD[param] = trackRglrzTemplate # (5) other trackLR1, trackLR2 = [[],[]] params.halfLife = params.halfLife*10000./(params.maxEpoch*nBatches1) print 'number of updates total', params.maxEpoch*nBatches1 print 'number of updates within epoch', nBatches1 ''' Training!!! ''' lastUpdate = params.maxEpoch*nBatches1 - 1 try: t_start = time() # for i in range(0, params.maxEpoch*nBatches1): # i = nUpdates # EPOCHS currentEpoch = i / nBatches1 currentBatch = i % nBatches1 # batch order in the current epoch currentProgress = np.around(1.*i/nBatches1, decimals=4) ''' Learning rate and momentum schedules. ''' t = 1.*i/(params.maxEpoch*nBatches1) lr1 = np.asarray(params.learnRate1* lr_schedule(fun=params.learnFun1,var=t,halfLife=params.halfLife, start=0),theano.config.floatX) lr2 = np.asarray(params.learnRate2* lr_schedule(fun=params.learnFun2,var=t,halfLife=params.halfLife, start=params.triggerT2),theano.config.floatX) moment1 = np.asarray(params.momentum1[1] - (params.momentum1[1]-(params.momentum1[0]))* lr_schedule(fun=params.momentFun,var=t,halfLife=params.halfLife,start=0), theano.config.floatX) moment2 = np.asarray(params.momentum2[1] - (params.momentum2[1]-(params.momentum2[0]))* lr_schedule(fun=params.momentFun,var=t,halfLife=params.halfLife,start=0), theano.config.floatX) # PERMUTING T1 AND T2 SETS if currentBatch == 0: np.random.shuffle(train1Perm) if params.useT2 and (currentT2Batch == nBatches2 - 1) : np.random.shuffle(train2Perm) currentT2Batch = 0 ''' Update T1&T2 ''' # Update both if params.useT2: # make batches sampleIndex1 = train1Perm[(currentBatch * params.batchSize1): ((currentBatch + 1) * (params.batchSize1))] sampleIndex2 = train2Perm[(currentT2Batch * params.batchSize2): ((currentT2Batch + 1) * (params.batchSize2))] if (i % params.T1perT2 == 0) and ( i >= params.triggerT2): res = updateT2part1(t2Data[sampleIndex2], t2Label[sampleIndex2], lr1, moment1, 0, 1) (c2, y2, debugs) = (res[0], res[1], res[2:]) res = updateT2part2(t1Data[sampleIndex1], t1Label[sampleIndex1], lr1, moment1, lr2, moment2, 1, 0) (c1, y1, debugs) = (res[0], res[1], res[2:]) tempError2 += [1.*sum(t2Label[sampleIndex2] != y2) / params.batchSize2] tempCost2 += [c2] currentT2Batch += 1 if np.isnan(c1): print 'NANS in part 2!' if np.isnan(c2): print 'NANS in part 1!' else: res = updateT1(t1Data[sampleIndex1], t1Label[sampleIndex1], lr1, moment1, 1, 0) (c1, y1, debugs) = (res[0], res[1], res[2:]) tempError1 += [1.*sum(t1Label[sampleIndex1] != y1) / params.batchSize1] tempCost1 += [c1] if np.isnan(c1): print 'NANS!' # Update T1 only else: # make batch sampleIndex1 = train1Perm[(currentBatch * params.batchSize1): ((currentBatch + 1) * (params.batchSize1))] res = updateT1(t1Data[sampleIndex1], t1Label[sampleIndex1], lr1, moment1, 1, 0) (c1, y1, debugs) = (res[0], res[1], res[2:]) tempError1 += [1.*sum(t1Label[sampleIndex1] != y1) / params.batchSize1] tempCost1 += [c1] if np.isnan(c1): print 'NANS', c1 ''' Evaluate test, store results, print status. ''' if np.around(currentProgress % (1./params.trackPerEpoch), decimals=4) == 0 \ or i == lastUpdate: # batchnorm parameters: estimate for the final model if (params.batchNorm and (currentEpoch > 1)) \ and ((currentEpoch % params.evaluateTestInterval) == 0 or i == lastUpdate) \ and params.testBN != 'lazy': model = update_bn(model, params, evaluateBN, t1Data, t1Label) # # EVALUATE: validation set # allVar = evaluate(vData[:2], vData, vLabel[:2], vLabel, 1) # cV, yTest, _ , _ = allVar[0], allVar[1], allVar[2], allVar[3], allVar[4:] # #cV, yTest = allVar[0], allVar[1] # tempVError = 1.*sum(yTest != vLabel) / nVSamples # tempVError = 7.; cV = 7. ''' EVALUATE: test set - in batches of 1000, ow too large to fit on gpu - using dummy input in place of regularized input stream (Th complains ow) - graph = 1, hence BN constants do not depend on regularized input stream (see batchnorm.py) ''' if params.model == 'mlp': nTempSamples = 5000 else: nTempSamples = 1000 tempError = 0.; tempCost = 0.; batchSizeT = nTestSamples / 10 if currentEpoch < 0.8*params.maxEpoch: np.random.shuffle(testPerm) tempIndex = testPerm[:nTempSamples] cT, yTest, p, stats = evaluate(testD[tempIndex], testL[tempIndex], 0, 1) tempError = 1.*sum(yTest != testL[tempIndex]) / nTempSamples else: for j in range(10): tempIndex = testPerm[j*batchSizeT:(j+1)*batchSizeT] cT, yTest, p, stats = evaluate(testD[tempIndex], testL[tempIndex], 0, 1) tempError += 1.*sum(yTest != testL[tempIndex]) / batchSizeT tempCost += cT tempError /= 10. cT = tempCost / 10. ''' TRACK: class errors & cost ''' # note: T1 and T2 errors are averaged over training, hence initially can not be compared to valid and test set t1Error += [np.mean(tempError1)]; t1Cost += [np.mean(tempCost1)] if params.useT2: t2Error += [np.mean(tempError2)]; t2Cost += [np.mean(tempCost2)] testError += [tempError]; testCost += [cT] penaltyCost += [p] #validError += [tempVError] # RESET tracked errors tempError1 = []; tempCost1 = [] tempError2 = []; tempCost2 = [] ''' TRACK: T2 parameter statistics & learning rates ''' # monitoring T2 values if params.useT2: trackNoise, trackPenal = t2_extract(model, params, trackNoise, trackPenal) # monitoring activations if params.trackStats: trackLayers = stat_extract(stats, params, trackLayers) # monitoring gradients if params.trackGrads: trackGrads = grad_extract(debugs, params, sharedNames, trackGrads) # monitoring log learning rates trackLR1 += [lr1] trackLR2 += [lr2] ''' STATUS print ''' if params.useT2 and ((currentEpoch % params.printInterval) == 0 or (i == params.maxEpoch*nBatches1 - 1)): print currentEpoch, ') time=%.f T1 | T2 | test | penalty ' % ((time() - t_start)/60) print 'ERR %.3f | %.3f | %.3f | - ' % ( t1Error[-1]*100, t2Error[-1]*100, testError[-1]*100) print 'COSTS %.3f | %.3f | %.3f | %.3f ' % ( t1Cost[-1], t2Cost[-1], testCost[-1], penaltyCost[-1]) print 'Log[learningRates] ', np.log10(lr1), 'T1 ', np.log10(lr2), 'T2' for param in params.rglrzTrain: if param in penalList: print param, trackPenal[param][-1] if param in noiseList: print param, trackNoise[param][-1] if ((currentEpoch % params.printInterval) == 0 or (i == params.maxEpoch*nBatches1 - 1)): print currentEpoch, 'TRAIN %.2f TEST %.2f time %.f' % ( t1Error[-1]*100, testError[-1]*100, ((time() - t_start)/60)) print 'Est. time till end: ', (((time() - t_start)/60) / (currentEpoch+1))*(params.maxEpoch - currentEpoch) except KeyboardInterrupt: pass time2train = (time() - t_start)/60 ''' Prepare variables for output. ''' if params.useT2: lastT2 = t2Error[-1] allErrors = np.concatenate(([t1Error], [t2Error], [testError]), axis = 0) allCosts = np.concatenate(([t1Cost], [t2Cost], [testCost], [penaltyCost]), axis = 0) outParams = {} for param in params.rglrz: if param in penalList: outParams[param] = trackPenal[param][-1] if param in noiseList: outParams[param] = trackNoise[param][-1] else: print 'param not tracked, fix!' else: lastT2 = 0. allErrors = np.concatenate(([t1Error], [testError]), axis = 0) allCosts = np.concatenate(([t1Cost], [testCost]), axis = 0) outParams = {} for param in params.rglrz: outParams[param] = params.rglrzInitial[param] modelName = 'pics/' best = min(testError) modelName += str(params.nLayers-1)+'x'+str(params.model)+'_best:'+str(best)+'.pdf' # saved for plot data = { #'setup' : params, 'modelName' : modelName, 'best' : best, 'lastEpoch' : (currentEpoch+1), 'paramsTrained' : params.rglrzTrain, 'allErrors': allErrors, 'allCosts': allCosts, 'trackLayers': trackLayers, 'trackPenal': trackPenal, 'trackNoise': trackNoise, 'trackFeatures': track1stFeatures, 'trackPenalSTD': trackPenalSTD, 'trackNoiseSTD': trackNoiseSTD, 'trackGrads': trackGrads, 'trackLR1': trackLR1, 'trackLR2': trackLR2, 'outParams': outParams, } import pickle; file = open(params.saveName,'wb'); pickle.dump(data, file); file.close() # prepared for return results = {'bestVal': bestVal, # which could be validation or T2 'bestValTest': best, 'lastT1': t1Error[-1], 'lastT2': lastT2, 'lastVal': None,#validError[-1], 'lastTest':testError[-1], 'outParams': outParams, 'trackGrads': trackGrads, 'trackPenal': trackPenal, 'trackNoise': trackNoise, 'setup' : params, 'lastCTest': testCost[-1], 'lastCT1': t1Cost[-1], 'trainTime': time2train, } return results
from __future__ import print_function import argparse import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable from models.mlp import mlp from keras.datasets import mnist import numpy as np weight = [] for i in range(10): model = mlp() weight.append([x.data.numpy() for x in list(model.parameters())])
def trainer(X, Y, data, para): ''' IMPORTANT: to see the plots in jupyter remember to invoke: %matplotlib inline (could be used as stand-alone but we call it through commands) INPUT: X with one or more variables in float32 and Y with a single binary value. These can be easily produced through transform_data if you insist to bybass commands function. OUTOUT: Trains a model and outputs the training results with a plot comparing train and test. The predictions are loaded on to a data object. ''' ind_var = Y # this is used later for output X_num = X data = data.sample(frac=1) X, Y = transform_data(data, para['flatten'], X, Y) if para['validation'] is not False: X, Y, X_val, Y_val = separate(X, Y, para['validation']) try: dims = X.shape[1] except IndexError: dims = X_num para['dims'] = dims if para['layers'] == 1: para['shape'] = 'funnel' if para['neuron_max'] == 'auto' and dims >= 4: para['neuron_max'] = int(dims + (dims * 0.2)) elif para['neuron_max'] == 'auto': para['neuron_max'] = 4 para['neuron_count'] = shapes(para) if para['model'] is 'mlp': model, history = mlp(X, Y, para) if para['model'] is 'regression': model, history = regression(X, Y, para['epoch'], para['reg_mode']) network_scale = len( X) * para['epoch'] * para['layers'] * para['neuron_max'] # train / test results ex2 = pd.DataFrame({ 'train_acc': history.history['acc'], 'train_loss': history.history['loss'], 'test_acc': history.history['val_acc'], 'test_loss': history.history['val_loss'] }) scores = model.evaluate(X, Y, verbose=para['verbose']) if para['double_check'] is False or para['validation'] is False: if para['hyperscan'] is False: print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100)) # calculate and round predictions predictions = model.predict(X) rounded = [round(x[0]) for x in predictions] if para['double_check'] is True: check(Y, rounded, scores) if para['save_model'] is False and para['validation'] is not False: para['save_model'] = 'saved_model' if para['save_model'] is not False: save_model_as(X_num, data.columns, model, para['save_model'], para['flatten']) # shuffling and separating the data if para['validation'] is not False: validate(X_val, Y_val, para['save_model']) # model parameters ex1 = pd.Series({ 'ind_var': ind_var, 'y_transform': para['flatten'], 'n=': len(X), 'features': para['dims'], 'epochs': para['epoch'], 'layers': para['layers'], 'dropout': para['dropout'], 'batch_size': para['batch_size'], 'shape': para['shape'], 'max_neurons': para['neuron_max'], 'network_scale': network_scale }) # prevent Tensorflow memory leakage K.clear_session() if para['hyperscan'] is True: ex3 = pd.Series({ 'optimizer': para['optimizer'], 'activation': para['activation'], 'activation_out': para['activation_out'], 'loss': para['loss'], }) return ex1, ex2, ex3 else: display(pd.DataFrame(ex1).transpose()) accuracy(ex2) return
def train(options): required_keys = ('training_path', 'test_path', 'output_directory', 'folder_name') if not all(elem in options for elem in required_keys): print('Error: Missing input information.\n') exit() X, y = get_data(options['training_path']) X_test, y_test = get_data(options['test_path']) info = dataset_info(X, y, X_test, y_test) selectors = None if 'selectors' in options: variance_threshold_selector, percentile_selector = fit_selectors( X, y, options['selectors']) X = transform_features(X, variance_threshold_selector, percentile_selector) X_test = transform_features(X_test, variance_threshold_selector, percentile_selector) selectors = { 'variance': variance_threshold_selector, 'percentile': percentile_selector } if 'scaling' in options: X, scaler = scale(X, options['scaling']) if scaler is not None: X_test = scaler.transform(X_test) X, y = shuffle(X, y) X_test, y_test = shuffle(X_test, y_test) info = post_processing_info(info, X, X_test) if 'mlp' in options: model_options = options['mlp'] classifiers = mlp(X, y, model_options) else: model_options = options['svc'] classifiers = svc(X, y, model_options) # Print some intial analysis if model_options['probability']: probabilities = classifiers['neg_log_loss'].predict_proba(X_test) print('Log Loss') print(log_loss(y_test, probabilities)) predictions = classifiers['accuracy'].predict(X_test) print('\nconfusion matrix:') print(confusion_matrix(y_test, predictions)) print('\nclassification report:\n') print(classification_report(y_test, predictions)) print(info) save_model = yes_no_prompt.yes_or_no('Save model?') if save_model: # preserve the split data for later tests data = { 'X': X, 'y': y, 'X_test': X_test, 'y_test': y_test, } results_directory = export(classifiers, data, selectors, options, info, scaler, options['folder_name'], options['output_directory'])