def start(self): # perform some logging self.jlogger.info("Starting job with job id {}".format(self.job_id)) self.jlogger.debug("Job Config: {}".format(self.config)) self.jlogger.debug("Job Other Data: {}".format(self.job_data)) try: rud.ReadUserData(self) fg.FeatureGeneration(self, is_train=True) pp.Preprocessing(self, is_train=True) fs.FeatureSelection(self, is_train=True) fe.FeatureExtraction(self, is_train=True) clf.Classification(self) cv.CrossValidation(self) tsg.TestSetGeneration(self) tspp.TestSetPreprocessing(self) tsprd.TestSetPrediction(self) job_success_status = True except: job_success_status = False helper.update_running_job_status(self.job_id, "Errored") self.jlogger.exception("Exception occurred in ML Job {} ".format( self.job_id)) return job_success_status
def buildTestsTrainings(self): """ Do the cross validation with the protein classes """ self.crossValidation = CrossValidation.CrossValidation() self.crossValidation.addClass(self.oxidoreductaseProteinList) self.crossValidation.addClass(self.transferaseProteinList) self.crossValidation.addClass(self.hydrolaseProteinList) self.crossValidation.addClass(self.lyaseProteinList) self.crossValidation.addClass(self.isomeraseProteinList) self.crossValidation.addClass(self.ligaseProteinList)
def test_addClass(self): """ test adding a class """ crossValidation = CrossValidation.CrossValidation() self.class1 = self.createClass('1', 148) self.class2 = self.createClass('2', 17) crossValidation.addClass(self.class1) crossValidation.addClass(self.class2) self.assertCrossValidation(crossValidation)
def run_part3(trainX, trainY, testX, testY, lr, eps, max_iter, lmd_reg, k=1): valid_loss = [] smp_num, dim_num = trainX.shape test_num = smp_num / k # sample number of a validation set random_index = random.sample(xrange(0, smp_num), smp_num) # for randomized split for lmd in lmd_reg: loss = cv.CrossValidation(trainX, trainY, lr, eps, max_iter, lmd, k, test_num, random_index) valid_loss.append(loss) print "for diff lambda, their final validation loss are:", valid_loss
def __init__(self, path_folder, winlen): if winlen != None: self.winlen_ = winlen else: self.winlen_ = 0.025 self.reader_ = WaveReader.WaveReader(path_folder) (self.signals, self.rate) = self.reader_.read_all() self.converter = WaveToMfcc.WaveToMfcc(self.signals, self.rate, self.winlen_, nfilt=30, ncep=7) self.gmm_table_ = [] self.cross_split = CrossValidation.CrossValidation( self.converter.list_of_speakers, 2) self.results_ = np.array([]) self.rr_ = np.array([])
def test_getNumberTests(self): """ Tests the getNumberTests method """ crossValidation = CrossValidation.CrossValidation() actual_test1 = crossValidation.getNumberTests(73) actual_test2 = crossValidation.getNumberTests(117) actual_test3 = crossValidation.getNumberTests(131) actual_test4 = crossValidation.getNumberTests(51) actual_test5 = crossValidation.getNumberTests(47) actual_test6 = crossValidation.getNumberTests(17) expected_test1 = 7, 10 expected_test2 = 12, 9 expected_test3 = 13, 14 expected_test4 = 5, 6 expected_test5 = 5, 2 expected_test6 = 1, 8 self.assertEqual(actual_test1, expected_test1) self.assertEqual(actual_test2, expected_test2) self.assertEqual(actual_test3, expected_test3) self.assertEqual(actual_test4, expected_test4) self.assertEqual(actual_test5, expected_test5) self.assertEqual(actual_test6, expected_test6)
## Replace my references with references to your answers to those assignments. ## IMPORTANT NOTE !! ## Remember to install the Pillow library (which is required to execute 'import PIL') ## Remember to install Pytorch: https://pytorch.org/get-started/locally/ (if you want GPU you need to figure out CUDA...) from PIL import Image import torchvision import torchvision.transforms as transforms import torch import numpy as np import Assignment5Support import EvaluationsStub import CrossValidation crossValidation = CrossValidation.CrossValidation(3) import Featurize if __name__=='__main__': kDataPath = '../dataset_B_Eye_Images' (xRaw, yRaw) = Assignment5Support.LoadRawData(kDataPath, includeLeftEye = True, includeRightEye = True) (xTrainRaw, yTrainRaw, xTestRaw, yTestRaw) = Assignment5Support.TrainTestSplit(xRaw, yRaw, percentTest = .25) print('Train is %f percent closed.' % (sum(yTrainRaw)/len(yTrainRaw))) print('Test is %f percent closed.' % (sum(yTestRaw)/len(yTestRaw))) # Load the images and then convert them into tensors (no normalization) xTrainImages = [ Image.open(path) for path in xTrainRaw ] xTrain, yTrain = Featurize.Featurize(xTrainImages, yTrainRaw) print(f'Training data size: {xTrain.size()}')
dataset = DataSet(filename='../config/referral_source.txt') path_to_file = os.path.join( gen_dir, filename+'.csv') df = pd.read_csv(path_to_file, sep=',') sf_univarate = dataset.univariate_selection(data=df, k_best=(len(df.loc[0]) - 1) ) sf_univarate.insert(0, 'value') sf_importance = dataset.f_importance(data=df, n_attrs =(len(df.loc[0]) - 1) ) sf_importance.insert(0, 'value') nm = NetworkModel() ds = CrossValidation() result_list = [] if feature_selection_name == 'univ': f_selection = sf_univarate elif feature_selection_name == 'impot': f_selection = sf_importance tmp_row = [] for layers in ann_proposal: for momentum in momentum_proposal: model = nm.create_model(layers=layers, input_size=number_of_features, output_size=len(set(df['value'])) , momentum=momentum)
def main(): print "" print "\t+----------------------------------------------------------------+" print "\t| |" print "\t| CROSS VALIDATION OF LEARNING FORM EXAMPLE MODULES (LEM1) |" print "\t| RULE INDUCTION ALGORITHM |" print "\t| Author : Madhu Chegondi |" print "\t| KUID : m136c192 |" print "\t| |" print "\t+----------------------------------------------------------------+" print "" dataFile = raw_input("\tEnter Name Of DataFile : ") while (True): if (dataFile): try: dfp = open('Data/' + dataFile, 'r') # This Program assumes that first 2 lines of the input data filename have # < a a a d > # [ attribute1 attribute2 attribute3 decision ] header1 = dfp.readline() header2 = dfp.readline().strip().split() AttNames = header2[1:-1] DesName = header2[-2] attr = [] decisions = [] for line in dfp: if re.match(r'^\!.*', line) or line.strip() == '': continue line.strip() values = line.split() rawData = {} des = {} for i in range(len(values) - 1): try: if (type(float(values[i])) == float): rawData[AttNames[i]] = float(values[i]) except ValueError: rawData[AttNames[i]] = values[i] attr.append(rawData) des[DesName] = values[-1] decisions.append(des.items()) break except: print "\t\tERROR: Enter A Valid File Name\n" dataFile = raw_input("\tEnter Name Of DataFile : ") else: dataFile = raw_input("\tEnter Name Of DataFile : ") print "\n\tCROSS VALIDATION TECHNIQUES" print "\t\t1. BOOTSTRAP CROSS VALIDATION" print "\t\t2. LEAVING ONE OUT CROSS VALIDATION" choice = raw_input("\n\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ") while True: if choice == '1' or choice == '2': break else: choice = raw_input( "\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ") samples = None if choice == '1': method = 'Bootstrap' print "\n\tCONFIGURING BOOTSTRAP" samples = raw_input( "\t\tHow many samples do you wish to create (default 200 samples) : " ) else: method = 'LeaveOneOut' CrossValidation.CrossValidation(attr, decisions, DesName, method, samples, dataFile)
def executeSVM(X_train, y_train, OX_test, oy_test): learning_rates = [1e-5, 1e-8] regularization_strengths = [10e2, 10e4] results = {} best_val = -1 best_svm = None # X_train = getCIFAR_as_32Pixels_Image(X_train) # OX_test = getCIFAR_as_32Pixels_Image(OX_test) accuracy = [] totalAccuracy = 0.0 ## Implementing Cross Validation crossValidObj = CrossValidation(5, X_train, y_train) foldsGen = crossValidObj.generateTrainAndTest() for i in range(5): next(foldsGen) X_test = OX_test X_train = crossValidObj.train y_train = crossValidObj.labels_train X_val = crossValidObj.test y_val = crossValidObj.labels_test # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T SVM_sgd = SVM() losses_sgd = SVM_sgd.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=1e-6, reg=1e5, num_iters=1000, verbose=False, vectorized=True) y_train_pred_sgd = SVM_sgd.predict(X_train)[0] print('Training accuracy: %f' % (np.mean(y_train == y_train_pred_sgd))) y_val_pred_sgd = SVM_sgd.predict(X_val)[0] print('Validation accuracy: %f' % (np.mean(y_val == y_val_pred_sgd))) i = 0 interval = 5 for learning_rate in np.linspace(learning_rates[0], learning_rates[1], num=interval): i += 1 print('The current iteration is %d/%d' % (i, interval)) for reg in np.linspace(regularization_strengths[0], regularization_strengths[1], num=interval): svm = SVM() svm.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=learning_rate, reg=reg, num_iters=1000, verbose=False, vectorized=True) y_train_pred = svm.predict(X_train)[0] y_val_pred = svm.predict(X_val)[0] train_accuracy = np.mean(y_train == y_train_pred) val_accuracy = np.mean(y_val == y_val_pred) results[(learning_rate, reg)] = (train_accuracy, val_accuracy) if val_accuracy > best_val: best_val = val_accuracy best_svm = svm else: pass # Print out the results for learning_rate, reg in sorted(results): train_accuracy, val_accuracy = results[(learning_rate, reg)] print('learning rate %e and regularization %e, \n \ the training accuracy is: %f and validation accuracy is: %f.\n' % (learning_rate, reg, train_accuracy, val_accuracy)) print(accuracy) y_test_predict_result = best_svm.predict(X_test) y_test_predict = y_test_predict_result[0] test_accuracy = np.mean(oy_test == y_test_predict) accuracy.append(test_accuracy) totalAccuracy += test_accuracy print('The test accuracy is: %f' % test_accuracy) print(accuracy) avgAccuracy = totalAccuracy / 5.0 print('Average Accuracy: %f' % avgAccuracy)