Ejemplo n.º 1
0
    def start(self):
        # perform some logging
        self.jlogger.info("Starting job with job id {}".format(self.job_id))
        self.jlogger.debug("Job Config: {}".format(self.config))
        self.jlogger.debug("Job Other Data: {}".format(self.job_data))

        try:
            rud.ReadUserData(self)
            fg.FeatureGeneration(self, is_train=True)
            pp.Preprocessing(self, is_train=True)
            fs.FeatureSelection(self, is_train=True)
            fe.FeatureExtraction(self, is_train=True)
            clf.Classification(self)
            cv.CrossValidation(self)
            tsg.TestSetGeneration(self)
            tspp.TestSetPreprocessing(self)
            tsprd.TestSetPrediction(self)
            job_success_status = True
        except:
            job_success_status = False
            helper.update_running_job_status(self.job_id, "Errored")
            self.jlogger.exception("Exception occurred in ML Job {} ".format(
                self.job_id))

        return job_success_status
Ejemplo n.º 2
0
 def buildTestsTrainings(self):
     """ Do the cross validation with the protein classes """
     self.crossValidation = CrossValidation.CrossValidation()
     self.crossValidation.addClass(self.oxidoreductaseProteinList)
     self.crossValidation.addClass(self.transferaseProteinList)
     self.crossValidation.addClass(self.hydrolaseProteinList)
     self.crossValidation.addClass(self.lyaseProteinList)
     self.crossValidation.addClass(self.isomeraseProteinList)
     self.crossValidation.addClass(self.ligaseProteinList)
Ejemplo n.º 3
0
    def test_addClass(self):
        """ test adding a class """
        crossValidation = CrossValidation.CrossValidation()
        
        self.class1 = self.createClass('1', 148)
        self.class2 = self.createClass('2', 17)
        
        crossValidation.addClass(self.class1)
        crossValidation.addClass(self.class2)

        self.assertCrossValidation(crossValidation)
Ejemplo n.º 4
0
def run_part3(trainX, trainY, testX, testY, lr, eps, max_iter, lmd_reg, k=1):
    valid_loss = []

    smp_num, dim_num = trainX.shape
    test_num = smp_num / k  # sample number of a validation set
    random_index = random.sample(xrange(0, smp_num),
                                 smp_num)  # for randomized split

    for lmd in lmd_reg:
        loss = cv.CrossValidation(trainX, trainY, lr, eps, max_iter, lmd, k,
                                  test_num, random_index)
        valid_loss.append(loss)

    print "for diff lambda, their final validation loss are:", valid_loss
Ejemplo n.º 5
0
 def __init__(self, path_folder, winlen):
     if winlen != None:
         self.winlen_ = winlen
     else:
         self.winlen_ = 0.025
     self.reader_ = WaveReader.WaveReader(path_folder)
     (self.signals, self.rate) = self.reader_.read_all()
     self.converter = WaveToMfcc.WaveToMfcc(self.signals,
                                            self.rate,
                                            self.winlen_,
                                            nfilt=30,
                                            ncep=7)
     self.gmm_table_ = []
     self.cross_split = CrossValidation.CrossValidation(
         self.converter.list_of_speakers, 2)
     self.results_ = np.array([])
     self.rr_ = np.array([])
Ejemplo n.º 6
0
 def test_getNumberTests(self):
     """ Tests the getNumberTests method """
     crossValidation = CrossValidation.CrossValidation()
     actual_test1 = crossValidation.getNumberTests(73)
     actual_test2 = crossValidation.getNumberTests(117)
     actual_test3 = crossValidation.getNumberTests(131)
     actual_test4 = crossValidation.getNumberTests(51)
     actual_test5 = crossValidation.getNumberTests(47)
     actual_test6 = crossValidation.getNumberTests(17)
     
     expected_test1 = 7, 10
     expected_test2 = 12, 9
     expected_test3 = 13, 14
     expected_test4 = 5, 6
     expected_test5 = 5, 2
     expected_test6 = 1, 8
     
     self.assertEqual(actual_test1, expected_test1)
     self.assertEqual(actual_test2, expected_test2)
     self.assertEqual(actual_test3, expected_test3)
     self.assertEqual(actual_test4, expected_test4)
     self.assertEqual(actual_test5, expected_test5)
     self.assertEqual(actual_test6, expected_test6)
Ejemplo n.º 7
0
##  Replace my references with references to your answers to those assignments.

## IMPORTANT NOTE !!
## Remember to install the Pillow library (which is required to execute 'import PIL')
## Remember to install Pytorch: https://pytorch.org/get-started/locally/ (if you want GPU you need to figure out CUDA...)

from PIL import Image
import torchvision
import torchvision.transforms as transforms
import torch
import numpy as np

import Assignment5Support
import EvaluationsStub
import CrossValidation
crossValidation = CrossValidation.CrossValidation(3)
import Featurize

if __name__=='__main__':
    kDataPath = '../dataset_B_Eye_Images'

    (xRaw, yRaw) = Assignment5Support.LoadRawData(kDataPath, includeLeftEye = True, includeRightEye = True)
    (xTrainRaw, yTrainRaw, xTestRaw, yTestRaw) = Assignment5Support.TrainTestSplit(xRaw, yRaw, percentTest = .25)

    print('Train is %f percent closed.' % (sum(yTrainRaw)/len(yTrainRaw)))
    print('Test is %f percent closed.' % (sum(yTestRaw)/len(yTestRaw)))

    # Load the images and then convert them into tensors (no normalization)
    xTrainImages = [ Image.open(path) for path in xTrainRaw ]
    xTrain, yTrain = Featurize.Featurize(xTrainImages, yTrainRaw)
    print(f'Training data size: {xTrain.size()}')
Ejemplo n.º 8
0
    dataset = DataSet(filename='../config/referral_source.txt')

    path_to_file = os.path.join( gen_dir, filename+'.csv')

    df = pd.read_csv(path_to_file, sep=',')

    sf_univarate =  dataset.univariate_selection(data=df, k_best=(len(df.loc[0]) - 1) )
    sf_univarate.insert(0, 'value')

    sf_importance =  dataset.f_importance(data=df, n_attrs =(len(df.loc[0]) - 1) )
    sf_importance.insert(0, 'value')


    nm = NetworkModel()
    ds = CrossValidation()

    result_list = []

    if feature_selection_name == 'univ':
        f_selection = sf_univarate
    elif feature_selection_name == 'impot':
        f_selection = sf_importance

    tmp_row = []
    for layers in ann_proposal:
        for momentum in momentum_proposal:
            model = nm.create_model(layers=layers,
                                    input_size=number_of_features, 
                                    output_size=len(set(df['value'])) ,
                                    momentum=momentum)
Ejemplo n.º 9
0
def main():
    print ""
    print "\t+----------------------------------------------------------------+"
    print "\t|                                                                |"
    print "\t|       CROSS VALIDATION OF LEARNING FORM EXAMPLE MODULES (LEM1) |"
    print "\t|                    RULE INDUCTION ALGORITHM                    |"
    print "\t|       Author : Madhu Chegondi                                  |"
    print "\t|       KUID   : m136c192                                        |"
    print "\t|                                                                |"
    print "\t+----------------------------------------------------------------+"
    print ""
    dataFile = raw_input("\tEnter Name Of DataFile : ")
    while (True):
        if (dataFile):
            try:
                dfp = open('Data/' + dataFile, 'r')
                # This Program assumes that first 2 lines of the input data filename have
                # < a a a d >
                # [ attribute1 attribute2 attribute3 decision ]
                header1 = dfp.readline()
                header2 = dfp.readline().strip().split()
                AttNames = header2[1:-1]
                DesName = header2[-2]
                attr = []
                decisions = []
                for line in dfp:
                    if re.match(r'^\!.*', line) or line.strip() == '':
                        continue
                    line.strip()
                    values = line.split()
                    rawData = {}
                    des = {}
                    for i in range(len(values) - 1):
                        try:
                            if (type(float(values[i])) == float):
                                rawData[AttNames[i]] = float(values[i])
                        except ValueError:
                            rawData[AttNames[i]] = values[i]
                    attr.append(rawData)
                    des[DesName] = values[-1]
                    decisions.append(des.items())
                break
            except:
                print "\t\tERROR: Enter A Valid File Name\n"
                dataFile = raw_input("\tEnter Name Of DataFile : ")
        else:
            dataFile = raw_input("\tEnter Name Of DataFile : ")

    print "\n\tCROSS VALIDATION TECHNIQUES"
    print "\t\t1. BOOTSTRAP CROSS VALIDATION"
    print "\t\t2. LEAVING ONE OUT CROSS VALIDATION"
    choice = raw_input("\n\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ")
    while True:
        if choice == '1' or choice == '2':
            break
        else:
            choice = raw_input(
                "\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ")

    samples = None
    if choice == '1':
        method = 'Bootstrap'
        print "\n\tCONFIGURING BOOTSTRAP"
        samples = raw_input(
            "\t\tHow many samples do you wish to create (default 200 samples) : "
        )
    else:
        method = 'LeaveOneOut'

    CrossValidation.CrossValidation(attr, decisions, DesName, method, samples,
                                    dataFile)
def executeSVM(X_train, y_train, OX_test, oy_test):
    learning_rates = [1e-5, 1e-8]
    regularization_strengths = [10e2, 10e4]
    results = {}
    best_val = -1
    best_svm = None
    # X_train = getCIFAR_as_32Pixels_Image(X_train)
    # OX_test = getCIFAR_as_32Pixels_Image(OX_test)
    accuracy = []
    totalAccuracy = 0.0

    ## Implementing Cross Validation
    crossValidObj = CrossValidation(5, X_train, y_train)
    foldsGen = crossValidObj.generateTrainAndTest()
    for i in range(5):
        next(foldsGen)
        X_test = OX_test
        X_train = crossValidObj.train
        y_train = crossValidObj.labels_train
        X_val = crossValidObj.test
        y_val = crossValidObj.labels_test

        # Preprocessing: reshape the image data into rows
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_val = np.reshape(X_val, (X_val.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))

        # Normalize the data: subtract the mean image
        mean_image = np.mean(X_train, axis=0)
        X_train -= mean_image
        X_val -= mean_image
        X_test -= mean_image

        # Add bias dimension and transform into columns
        X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
        X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
        X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T

        SVM_sgd = SVM()

        losses_sgd = SVM_sgd.train(X_train,
                                   y_train,
                                   method='sgd',
                                   batch_size=200,
                                   learning_rate=1e-6,
                                   reg=1e5,
                                   num_iters=1000,
                                   verbose=False,
                                   vectorized=True)

        y_train_pred_sgd = SVM_sgd.predict(X_train)[0]
        print('Training accuracy: %f' % (np.mean(y_train == y_train_pred_sgd)))
        y_val_pred_sgd = SVM_sgd.predict(X_val)[0]
        print('Validation accuracy: %f' % (np.mean(y_val == y_val_pred_sgd)))

        i = 0
        interval = 5
        for learning_rate in np.linspace(learning_rates[0],
                                         learning_rates[1],
                                         num=interval):
            i += 1
            print('The current iteration is %d/%d' % (i, interval))
            for reg in np.linspace(regularization_strengths[0],
                                   regularization_strengths[1],
                                   num=interval):
                svm = SVM()
                svm.train(X_train,
                          y_train,
                          method='sgd',
                          batch_size=200,
                          learning_rate=learning_rate,
                          reg=reg,
                          num_iters=1000,
                          verbose=False,
                          vectorized=True)
                y_train_pred = svm.predict(X_train)[0]
                y_val_pred = svm.predict(X_val)[0]
                train_accuracy = np.mean(y_train == y_train_pred)
                val_accuracy = np.mean(y_val == y_val_pred)
                results[(learning_rate, reg)] = (train_accuracy, val_accuracy)
                if val_accuracy > best_val:
                    best_val = val_accuracy
                    best_svm = svm
                else:
                    pass

        # Print out the results
        for learning_rate, reg in sorted(results):
            train_accuracy, val_accuracy = results[(learning_rate, reg)]
            print('learning rate %e and regularization %e, \n \
            the training accuracy is: %f and validation accuracy is: %f.\n' %
                  (learning_rate, reg, train_accuracy, val_accuracy))
            print(accuracy)

        y_test_predict_result = best_svm.predict(X_test)
        y_test_predict = y_test_predict_result[0]
        test_accuracy = np.mean(oy_test == y_test_predict)
        accuracy.append(test_accuracy)
        totalAccuracy += test_accuracy
        print('The test accuracy is: %f' % test_accuracy)
    print(accuracy)
    avgAccuracy = totalAccuracy / 5.0
    print('Average Accuracy: %f' % avgAccuracy)