Ejemplo n.º 1
0
def CrossValidation(estimator, data, rowstart, rowfinish, attributes, CV,
                    CVparams):
    CVObject = cv.GetCVObject(CV, **CVparams)
    X = data["inputdata"][rowstart:rowfinish]
    y = data["outputdata"][rowstart:rowfinish]
    groups = data["groupdata"][rowstart:rowfinish] if "groupdata" in data.keys(
    ) else None
    return cv.cross_validation(estimator, X, y, groups, attributes, CVObject)
def neural_network(layers, lamb, theta_matrices, inputs, outputs,
                   dataset_file):
    network = np.array(layers)

    thetas = [theta_matrices[0]]
    cont = 0

    for theta in theta_matrices:
        if cont == 0:
            cont += 1
            continue

        thetas.append(theta)

    thetas = np.array(thetas)

    regularization = lamb

    examples = []

    for i in range(0, len(inputs)):
        examples.append([inputs[i], outputs[i]])

    thetas_finais, gradientes_finais = cv.run(examples, thetas, regularization,
                                              network, dataset_file)
    #j_value = calculate_j(examples, thetas_finais, regularization, network)
    #print(j_value)
    return thetas_finais, gradientes_finais
Ejemplo n.º 3
0
    def start(self):
        # perform some logging
        self.jlogger.info("Starting job with job id {}".format(self.job_id))
        self.jlogger.debug("Job Config: {}".format(self.config))
        self.jlogger.debug("Job Other Data: {}".format(self.job_data))

        try:
            rud.ReadUserData(self)
            fg.FeatureGeneration(self, is_train=True)
            pp.Preprocessing(self, is_train=True)
            fs.FeatureSelection(self, is_train=True)
            fe.FeatureExtraction(self, is_train=True)
            clf.Classification(self)
            cv.CrossValidation(self)
            tsg.TestSetGeneration(self)
            tspp.TestSetPreprocessing(self)
            tsprd.TestSetPrediction(self)
            job_success_status = True
        except:
            job_success_status = False
            helper.update_running_job_status(self.job_id, "Errored")
            self.jlogger.exception("Exception occurred in ML Job {} ".format(
                self.job_id))

        return job_success_status
Ejemplo n.º 4
0
    def run(self, train_returns, train_sizes, bin_number):
        cross_val = CrossValidation.CrossVal(train_sizes, bin_number)
        bins = cross_val.get_bins()

        scores = self.predict(train_returns, bins)
        self.interpret_scores(scores)
        print(scores)  #decile scores
Ejemplo n.º 5
0
 def buildTestsTrainings(self):
     """ Do the cross validation with the protein classes """
     self.crossValidation = CrossValidation.CrossValidation()
     self.crossValidation.addClass(self.oxidoreductaseProteinList)
     self.crossValidation.addClass(self.transferaseProteinList)
     self.crossValidation.addClass(self.hydrolaseProteinList)
     self.crossValidation.addClass(self.lyaseProteinList)
     self.crossValidation.addClass(self.isomeraseProteinList)
     self.crossValidation.addClass(self.ligaseProteinList)
Ejemplo n.º 6
0
    def iterativeTester(self):

        # Example tests on iosphere data
        # ---------------------------------

        # lr = 20, eps = 0, tests = 50, iterstep = 5
        # -> Oscialates around best answer

        # lr = 1, eps = 0, tests = 50, iterstep = 5
        # -> Reaches answer quickly

        # lr = 0.01, eps = 0, tests = 50, iterstep = 5
        # -> Does not have time to reach best answer

        # Example tests on adult data
        # ---------------------------------

        # lr = 1, eps = 0, tests = 10, iterstep = 1000
        # -> Oscialates a lot

        # lr = 1, eps = 0, tests = 100, iterstep = 10
        # -> Still lots of oscilating

        # lr = 0.001, eps = 0, tests = 100, iterstep = 10
        # -> Still lots of oscilating

        # Example tests on glass data
        # -----------------------------------
        # lr = 0.02, eps = 0.01, tests = 10, iterstep = 5000
        # -> Good results (over 0.965)

        # Example tests on auto-mg data
        # -----------------------------------
        # lr = 0.02, eps = 0.01, tests = 10, iterstep = 5000
        # -> Good results (over 0.8 except first run)

        # Settings
        # --------------
        test_lr = 0.001
        test_eps = 0
        tests = 100000
        iterStep = 10
        self.results = [0] * tests

        # Test iteratively
        for i in range(0, tests):
            crossValidator = CV.LogRegCrossValidation(self.data,
                                                      self.target,
                                                      self.k,
                                                      lr=test_lr,
                                                      eps=test_eps,
                                                      iterations=(i + 1) *
                                                      iterStep)
            crossValidator.crossValidation()
            self.results[i] = crossValidator.averageError()
        # Plot results
        self.plotResults()
Ejemplo n.º 7
0
    def test_addClass(self):
        """ test adding a class """
        crossValidation = CrossValidation.CrossValidation()
        
        self.class1 = self.createClass('1', 148)
        self.class2 = self.createClass('2', 17)
        
        crossValidation.addClass(self.class1)
        crossValidation.addClass(self.class2)

        self.assertCrossValidation(crossValidation)
Ejemplo n.º 8
0
def run_part3(trainX, trainY, testX, testY, lr, eps, max_iter, lmd_reg, k=1):
    valid_loss = []

    smp_num, dim_num = trainX.shape
    test_num = smp_num / k  # sample number of a validation set
    random_index = random.sample(xrange(0, smp_num),
                                 smp_num)  # for randomized split

    for lmd in lmd_reg:
        loss = cv.CrossValidation(trainX, trainY, lr, eps, max_iter, lmd, k,
                                  test_num, random_index)
        valid_loss.append(loss)

    print "for diff lambda, their final validation loss are:", valid_loss
def run(dominios, targets, anotacoes, atributos, incluiParticipante):
    folds = cross.crossValidationParticipant(6, anotacoes)
    
    diceTotal = []
    masiTotal = []
    acuraciaTotal = 0.0
    results = []
    
    acertosT = {}
    totalT = {}
    
    for participante in folds.keys():
        resultadoTotal, dice, masi, acuracia = exp4.run(dominios, targets, folds[participante], atributos, {}, incluiParticipante)
        
        diceTotal.extend(dice)
        masiTotal.extend(masi)
        acuraciaTotal = acuraciaTotal + acuracia
        
        for resultados in resultadoTotal:
            acertos = resultados[0]
            total = resultados[1]
            
            for atributo in acertos.keys():
                if atributo not in acertosT:
                    acertosT[atributo] = 0.0
                    totalT[atributo] = 0.0
                
                acertosT[atributo] = acertosT[atributo] + acertos[atributo]
                totalT[atributo] = totalT[atributo] + total[atributo]
        results.append([acertosT, totalT])
    
    print "\n"
    print "General:"
    print 50 * "*"
    print "Expressions: "
    print "Dice: " + str(np.mean(diceTotal))
    print "Masi: " + str(np.mean(masiTotal))
    print "Accuracy: " + str(acuraciaTotal / len(diceTotal))
    print "\n"       
    
    print "Attributes:"
    print 15 * "-"     
    for atributo in acertosT.keys():
        print "Attribute: " + str(atributo)
        print "Accuracy: " + str(acertosT[atributo] / totalT[atributo])
        print 10 * "-" 
    
    return results, diceTotal, masiTotal, acuraciaTotal
Ejemplo n.º 10
0
 def __init__(self, path_folder, winlen):
     if winlen != None:
         self.winlen_ = winlen
     else:
         self.winlen_ = 0.025
     self.reader_ = WaveReader.WaveReader(path_folder)
     (self.signals, self.rate) = self.reader_.read_all()
     self.converter = WaveToMfcc.WaveToMfcc(self.signals,
                                            self.rate,
                                            self.winlen_,
                                            nfilt=30,
                                            ncep=7)
     self.gmm_table_ = []
     self.cross_split = CrossValidation.CrossValidation(
         self.converter.list_of_speakers, 2)
     self.results_ = np.array([])
     self.rr_ = np.array([])
Ejemplo n.º 11
0
    def run(self, train_corpus, freq_type, stopwords, train_returns,
            bin_number, filename):
        vectorizer = MatrixVectorizer.Vectorizer(train_corpus, freq_type,
                                                 stopwords)
        train_count_matrix = vectorizer.get_count_matrix()

        negative_word_list = self.create_negative_stuff(filename)
        negative_word_matrix = vectorizer.transform_new_data(
            negative_word_list)

        document_scores = self.score_documents(negative_word_matrix,
                                               train_count_matrix)

        cross_val = CrossValidation.CrossVal(document_scores, bin_number)
        bins = cross_val.get_bins()

        scores = self.predict(train_returns, bins)
        self.interpret_scores(scores)
        print(scores)
Ejemplo n.º 12
0
    def run(self, train_corpus, freq_type, stopwords, test_corpus, svals,
            reduce_type, test_returns, bin_number, train_returns, method):
        vectorizer = MatrixVectorizer.Vectorizer(train_corpus, freq_type,
                                                 stopwords)
        train_count_matrix = vectorizer.get_count_matrix()
        test_count_matrix = vectorizer.transform_new_data(test_corpus)

        dim_reducer = DimensionReducer.Reducer(train_count_matrix)
        reduced_train_count_matrix = dim_reducer.reduce_dimension(
            svals, reduce_type)
        reduced_test_count_matrix = dim_reducer.reduce_more_data(
            test_count_matrix.todense())

        cross_val = CrossValidation.CrossVal(test_returns, bin_number)
        bins = cross_val.get_bins()

        scores = self.fit_predict(reduced_train_count_matrix, train_returns,
                                  reduced_test_count_matrix, bins, method)
        self.interpret_scores(scores)
Ejemplo n.º 13
0
def getSolution_5_a_1(IrisData, fold, patternDimension, labelIndex,
                      FlowerDict):
    ## 10-fold cross validation
    errorRateList, gaussianMatrix = crvd.crossValidate(IrisData, fold,
                                                       patternDimension,
                                                       labelIndex)
    sizeOfErrorList = len(errorRateList)
    numOfGroup = len(gaussianMatrix)
    numOfGaussianEachGroup = len(gaussianMatrix[0])
    assert sizeOfErrorList == numOfGroup

    ## first print overall error
    averageErrorRate = sum(errorRateList) / len(errorRateList)
    string = str.format("errorRate: {0:.4f}%", averageErrorRate * 100)
    string += "\n ["
    ## now print parameters of each data
    for i in range(0, numOfGroup):
        print
        print "Cross Validation Set " + str(i + 1) + ": \\\\"
        print "\htab Error of this Testing Set: " + str.format(
            "${0:.2f}\%$ \n", errorRateList[i] * 100)
        gaussians = gaussianMatrix[i]
        for j in range(0, numOfGaussianEachGroup):
            tempGaussian = gaussians[j]
            print "\\textbf{For class " + FlowerDict.get(
                tempGaussian.getLabel()) + "}:"
            tempGaussian.showMuInTex()
            print "\\vspace{-1cm}"
            tempGaussian.showSigmaInTex()
            print
        if (i + 1) % 2 == 0:
            print "\\newpage"
    ## overall summary for error
    for i in range(0, numOfGroup):
        tempError = errorRateList[i]
        string += " " + str.format("{0:.3f}%", tempError * 100) + ","
    string += "] \n"
    print string
Ejemplo n.º 14
0
 def test_getNumberTests(self):
     """ Tests the getNumberTests method """
     crossValidation = CrossValidation.CrossValidation()
     actual_test1 = crossValidation.getNumberTests(73)
     actual_test2 = crossValidation.getNumberTests(117)
     actual_test3 = crossValidation.getNumberTests(131)
     actual_test4 = crossValidation.getNumberTests(51)
     actual_test5 = crossValidation.getNumberTests(47)
     actual_test6 = crossValidation.getNumberTests(17)
     
     expected_test1 = 7, 10
     expected_test2 = 12, 9
     expected_test3 = 13, 14
     expected_test4 = 5, 6
     expected_test5 = 5, 2
     expected_test6 = 1, 8
     
     self.assertEqual(actual_test1, expected_test1)
     self.assertEqual(actual_test2, expected_test2)
     self.assertEqual(actual_test3, expected_test3)
     self.assertEqual(actual_test4, expected_test4)
     self.assertEqual(actual_test5, expected_test5)
     self.assertEqual(actual_test6, expected_test6)
Ejemplo n.º 15
0
print("=== Preprocessing ===")
pr.praproses_data(data_input, data_clean)

# --- Load Kamus Kata (unique word) ---
print("=== Fitur Freq Perdoc & Alldoc ===")
fitur_onedoc, fitur_alldoc = Regex.load_fitur_postag(data_clean)

# '''
print("=== Bag Of Words ===")
bow = tfidf.bagofword(fitur_alldoc)

start_time1 = time.time()
# --- Load Feature Extraction Using TF IDF---
print("=== NEW Feature Extraction TfIdf ===")
hasil_ekstraksi_tfidf, bow = tfidf.main(fitur_onedoc, fitur_alldoc, result_tfidf)
h_loss_tfidf = cr.cross_validation(result_tfidf, data_label, path_hasil_tfidf, metode_tfidf)
waktu.write("TF-IDF " +  "--- %s seconds ---" % (time.time() - start_time1) + '\n')

start_time1 = time.time()
# --- Load Feature Extraction Using Vector ---
print("=== NEW Feature Extraction Vector ===")
model = vector.load_w2vec_model(path_model)
hasil_ekstraksi_w2vec = vector.feature_extraction(model, fitur_onedoc, result_word2vec)
h_loss_vector = cr.cross_validation(result_word2vec, data_label, path_hasil_word2vec, metode_w2vec)
waktu.write("W2VEC " +  "--- %s seconds ---" % (time.time() - start_time1) + '\n')

start_time1 = time.time()
# --- Load Feature Extraction Using TF IDF Concat Vector---
print("=== NEW Feature Extraction TfIdf & Vector ===") 
hasil_ekstraksi_gabungan = gabungan.load_weight_gabungan(result_tfidf, bow, model, result_gabungan)
h_loss_gabungan = cr.cross_validation(result_gabungan, data_label, path_hasil_gabungan, metode_gabungan)
Ejemplo n.º 16
0
from collections import OrderedDict


def full_tree(validation_data, attribute_matrix):
    fullDecisionTree = dt.DecisionTree()

    dt.select_node_id(fullDecisionTree, validation_data, attribute_matrix,
                      True)
    dt.add_branch(fullDecisionTree, validation_data, attribute_matrix)
    dt.split_examples(fullDecisionTree, validation_data, attribute_matrix,
                      True)

    print("root attribute selected:" + fullDecisionTree.node_id)

    dt.print_tree(fullDecisionTree)

    return fullDecisionTree


# uncomment to test CrossValidation
if __name__ == '__main__':
    #arquivo = "dadosBenchmark_validacaoAlgoritmoAD.csv"
    arquivo = "vertebra.csv"
    #arquivo = "dataset_191_wine-1.csv"
    #arquivo = "dataset_31_credit-g.csv"

    data, attribute_matrix = CsvReader.read_csv(arquivo)
    #decision_tree = full_tree(data, attribute_matrix)

    cs.run(data, attribute_matrix)
Ejemplo n.º 17
0
X_test = data_set_test
X_test = (X_test - X_test.mean()) / X_test.std()
X_test.insert(0, "Intercept", 1)
X_test = np.matrix(X_test)

# Initial Thetas
theta = np.matrix(np.zeros(shape=X.shape[1]))

# Parameters
learning_rate = 0.01
iteration = 500

print("\nRunning Linear Regression On Whole Set")
result = gd.gradient_descent(X, y, theta, learning_rate, iteration)
gd.plot_graph(iteration, result[1])

final_predictions = X.dot(result[0].T)
mae = gd.mean_absolute_error(final_predictions, y)
print("Mean Absolute Error: {0}".format(mae))

print("\nRunning Linear Regression On Split Sets")
splits = cv.cross_validation_split(data_set, 5)
cv.perform_gradient_on_splits(splits, learning_rate, iteration)

prediction_of_test_set = X_test.dot(result[0].T)
prediction_df = pd.DataFrame(prediction_of_test_set)
prediction_df.columns = ['SalePrice']

df_submission = pd.concat([data_test['Id'], prediction_df], axis=1)
df_submission.to_csv('data/Submission.csv', index=False)
Ejemplo n.º 18
0
import sys

#Generate the data from the basis function
if(len(sys.argv) == 1):
	#Generate the order of the random true polynomial function
	trueOrder = random.randint(1,10)	
	D = Data.genData(trueOrder)
elif(sys.argv[1] == "nonpoly"):
	D = Data.genNonPoly()
else:
	raise Exception("Invalid command line argument")


#In the following, D is the data set which has all the x values as its first entry and the y values as its second.

error,order = CV.kFoldErrorChoose(D[0],D[1],10,5)

#Graph the points on the base polynomial
Graph.lineColor(D[0],D[1],'red')

#Add Gaussian noise to the data outputs
D[1] = Data.addGaussianNoise(D[1],1.0/2000)

#Graph them as points in blue
Graph.pointsSimple(D[0],D[1])

#Estimate the coefficients of the polynomial with best order
fit = Regression.polyTrain(D[0],D[1],order)

#Get the function's estimates for the training x values
z = [fit(i) for i in D[0]]
def prepClassfier(classfier, predictFunc, classfierParam, classfierName = 'default', doCV = kFold_CV_Active):
    dataAndLabels = loadDataAndLabels(num_mnist_images,input_csv_train if classfierName == 'naiveBayes' else (input_mnist_data_train,input_mnist_labels_train),classfierName)
    if doCV == True:
        return cv.crossValidation(np.array(dataAndLabels[0]),np.array(dataAndLabels[1]),classfier,predictFunc,classfierParam,num_fold)
    return classfier(np.array(dataAndLabels[0]),np.array(dataAndLabels[1]),classfierParam[len(classfierParam)//2])
Ejemplo n.º 20
0
'''
Paper Title: Road Surface Recognition Based on DeepSense Neural Network using Accelerometer Data
Created by ITS Lab, Institute of Computer Science, University of Tartu
'''

from model import DeepSense
import CrossValidation

if __name__ == '__main__':
    ds = DeepSense.DeepSenseTS(preprocess=True)
    cv = CrossValidation.CV(network=ds)
    cv.create_folds()
    cv.train_on_cv()
Ejemplo n.º 21
0
import pprint

import BuildTree
import CrossValidation
import datasets.DataLenses as Lenses
import datasets.DataMushrooms as Mushrooms
import RelativeFrequency

#Import DataFrame from DataMushrooms class
mushrooms_data = Mushrooms.data

#Import DataFrame from DataMushrooms class
lenses_data = Lenses.data
""" First part: Design and Implementation """

#Build tree using ID3 algorithm for Lenses data
print('Lenses data solution tree:')
pprint.pprint(BuildTree.build_tree(lenses_data))

print()

#Build tree using ID3 algorithm for Mushrooms data
print('Mushrooms data solution tree:')
pprint.pprint(BuildTree.build_tree(mushrooms_data))
""" Second part: Experimentation """

print('Cross-validation over the Mushrooms data:')
#Perform cross-validation over the mushrooms dataset in 10 folds
CrossValidation.cross_validation(mushrooms_data)
def execute_softmax(X_train,y_train,OX_test,oy_test):

    learning_rates = [1e-5, 1e-8]
    regularization_strengths = [10e2, 10e4]
    results = {}
    best_val = -1
    best_softmax = None
    # X_train = getCIFAR_as_32Pixels_Image(X_train)
    # OX_test = getCIFAR_as_32Pixels_Image(OX_test)
    accuracy = []
    totalAccuracy = 0.0

    ## Implementing Cross Validation
    crossValidObj = CrossValidation(5, X_train, y_train)
    foldsGen = crossValidObj.generateTrainAndTest()
    for i in range(5):
        next(foldsGen)
        X_test = OX_test
        X_train = crossValidObj.train
        y_train = crossValidObj.labels_train
        X_val = crossValidObj.test
        y_val = crossValidObj.labels_test

        # Preprocessing: reshape the image data into rows
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_val = np.reshape(X_val, (X_val.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))

        # Normalize the data: subtract the mean image
        mean_image = np.mean(X_train, axis = 0)
        X_train -= mean_image
        X_val -= mean_image
        X_test -= mean_image

        # Add bias dimension and transform into columns
        X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
        X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
        X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T

        softmax_sgd = Softmax()
        tic = time.time()
        losses_sgd = softmax_sgd.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=1e-6,
                      reg = 1e5, num_iters=1000, verbose=False, vectorized=True)
        toc = time.time()


        y_train_pred_sgd = softmax_sgd.predict(X_train)[0]
        print('Training accuracy: %f' % (np.mean(y_train == y_train_pred_sgd)))
        y_val_pred_sgd = softmax_sgd.predict(X_val)[0]
        print('Validation accuracy: %f' % (np.mean(y_val == y_val_pred_sgd)))


        # Choose the best hyperparameters by tuning on the validation set
        i = 0
        interval = 5
        for learning_rate in np.linspace(learning_rates[0], learning_rates[1], num=interval):
            i += 1
            print('The current iteration is %d/%d' % (i, interval))
            for reg in np.linspace(regularization_strengths[0], regularization_strengths[1], num=interval):
                softmax = Softmax()
                softmax.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=learning_rate,
                      reg = reg, num_iters=1000, verbose=False, vectorized=True)
                y_train_pred = softmax.predict(X_train)[0]
                y_val_pred = softmax.predict(X_val)[0]
                train_accuracy = np.mean(y_train == y_train_pred)
                val_accuracy = np.mean(y_val == y_val_pred)
                results[(learning_rate, reg)] = (train_accuracy, val_accuracy)
                if val_accuracy > best_val:
                    best_val = val_accuracy
                    best_softmax = softmax
                else:
                    pass

        # Print out the results
        for learning_rate, reg in sorted(results):
            train_accuracy,val_accuracy = results[(learning_rate, reg)]
            print('learning rate %e and regularization %e, \n \
            the training accuracy is: %f and validation accuracy is: %f.\n' % (learning_rate, reg, train_accuracy, val_accuracy))

        y_test_predict_result = best_softmax.predict(X_test)
        y_test_predict = y_test_predict_result[0]
        test_accuracy = np.mean(oy_test == y_test_predict)
        accuracy.append(test_accuracy)
        totalAccuracy+=test_accuracy
        print('The test accuracy is: %f' % test_accuracy)
    print(accuracy)
    avgAccuracy = totalAccuracy / 5.0
    print('Average Accuracy: %f' % avgAccuracy)
Ejemplo n.º 23
0
##  Replace my references with references to your answers to those assignments.

## IMPORTANT NOTE !!
## Remember to install the Pillow library (which is required to execute 'import PIL')
## Remember to install Pytorch: https://pytorch.org/get-started/locally/ (if you want GPU you need to figure out CUDA...)

from PIL import Image
import torchvision
import torchvision.transforms as transforms
import torch
import numpy as np

import Assignment5Support
import EvaluationsStub
import CrossValidation
crossValidation = CrossValidation.CrossValidation(3)
import Featurize

if __name__=='__main__':
    kDataPath = '../dataset_B_Eye_Images'

    (xRaw, yRaw) = Assignment5Support.LoadRawData(kDataPath, includeLeftEye = True, includeRightEye = True)
    (xTrainRaw, yTrainRaw, xTestRaw, yTestRaw) = Assignment5Support.TrainTestSplit(xRaw, yRaw, percentTest = .25)

    print('Train is %f percent closed.' % (sum(yTrainRaw)/len(yTrainRaw)))
    print('Test is %f percent closed.' % (sum(yTestRaw)/len(yTestRaw)))

    # Load the images and then convert them into tensors (no normalization)
    xTrainImages = [ Image.open(path) for path in xTrainRaw ]
    xTrain, yTrain = Featurize.Featurize(xTrainImages, yTrainRaw)
    print(f'Training data size: {xTrain.size()}')
Ejemplo n.º 24
0
def main():
    print ""
    print "\t+----------------------------------------------------------------+"
    print "\t|                                                                |"
    print "\t|       CROSS VALIDATION OF LEARNING FORM EXAMPLE MODULES (LEM1) |"
    print "\t|                    RULE INDUCTION ALGORITHM                    |"
    print "\t|       Author : Madhu Chegondi                                  |"
    print "\t|       KUID   : m136c192                                        |"
    print "\t|                                                                |"
    print "\t+----------------------------------------------------------------+"
    print ""
    dataFile = raw_input("\tEnter Name Of DataFile : ")
    while (True):
        if (dataFile):
            try:
                dfp = open('Data/' + dataFile, 'r')
                # This Program assumes that first 2 lines of the input data filename have
                # < a a a d >
                # [ attribute1 attribute2 attribute3 decision ]
                header1 = dfp.readline()
                header2 = dfp.readline().strip().split()
                AttNames = header2[1:-1]
                DesName = header2[-2]
                attr = []
                decisions = []
                for line in dfp:
                    if re.match(r'^\!.*', line) or line.strip() == '':
                        continue
                    line.strip()
                    values = line.split()
                    rawData = {}
                    des = {}
                    for i in range(len(values) - 1):
                        try:
                            if (type(float(values[i])) == float):
                                rawData[AttNames[i]] = float(values[i])
                        except ValueError:
                            rawData[AttNames[i]] = values[i]
                    attr.append(rawData)
                    des[DesName] = values[-1]
                    decisions.append(des.items())
                break
            except:
                print "\t\tERROR: Enter A Valid File Name\n"
                dataFile = raw_input("\tEnter Name Of DataFile : ")
        else:
            dataFile = raw_input("\tEnter Name Of DataFile : ")

    print "\n\tCROSS VALIDATION TECHNIQUES"
    print "\t\t1. BOOTSTRAP CROSS VALIDATION"
    print "\t\t2. LEAVING ONE OUT CROSS VALIDATION"
    choice = raw_input("\n\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ")
    while True:
        if choice == '1' or choice == '2':
            break
        else:
            choice = raw_input(
                "\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ")

    samples = None
    if choice == '1':
        method = 'Bootstrap'
        print "\n\tCONFIGURING BOOTSTRAP"
        samples = raw_input(
            "\t\tHow many samples do you wish to create (default 200 samples) : "
        )
    else:
        method = 'LeaveOneOut'

    CrossValidation.CrossValidation(attr, decisions, DesName, method, samples,
                                    dataFile)
Ejemplo n.º 25
0
# from hcluster import *
import ParserStars as parser
import CrossValidation as cross
import Experiment1 as exp1
import SVMValidatedExperiment as exp2
import SVMValidatedExperiment2 as exp3
import ExperimentDecisionTree as exp4
import ValidatedExperimentIndividual as exp5

def initialize():
    anotacoes = parser.parseAnnotation()
    dominios = parser.parseDominio()
    participantes = {}
    atributos = ["type", "size", "colour", "hpos", "vpos", "near", "left", "right", "below", "above", "in-front-of"]
    targets = {"01f-t1n":"h", "01f-t1r":"h", "01f-t2n":"h", "01f-t2r":"h", "01o-t1n":"h", "01o-t1r":"h", "01o-t2n":"h", "01o-t2r":"h", "02f-t1n":"o", "02f-t1r":"o", "02f-t2n":"o", "02f-t2r":"o", "02o-t1n":"o", "02o-t1r":"o", "02o-t2n":"o", "02o-t2r":"o", "03f-t1n":"m", "03f-t1r":"m", "03f-t2n":"m", "03f-t2r":"m", "03o-t1n":"m", "03o-t1r":"m", "03o-t2n":"m", "03o-t2r":"m", "04f-t1n":"a", "04f-t1r":"a", "04f-t2n":"a", "04f-t2r":"a", "04o-t1n":"a", "04o-t1r":"a", "04o-t2n":"a", "04o-t2r":"a", "05f-t1n":"m", "05f-t2n":"m", "05f-t1r":"m", "05f-t2r":"m", "05o-t1n":"m", "05o-t1r":"m", "05o-t2n":"m", "05o-t2r":"m", "06f-t1n":"h", "06f-t1r":"h", "06f-t2n":"h", "06f-t2r":"h", "06o-t1n":"h", "06o-t1r":"h", "06o-t2n":"h", "06o-t2r":"h", "07f-t1n":"i", "07f-t1r":"i", "07f-t2n":"i", "07f-t2r":"i", "07o-t1n":"i", "07o-t1r":"i", "07o-t2n":"i", "07o-t2r":"i", "08f-t1n":"a", "08f-t1r":"a", "08f-t2n":"a", "08f-t2r":"a", "08o-t1n":"a", "08o-t1r":"a", "08o-t2n":"a", "08o-t2r":"a" }
    return dominios, targets, anotacoes, atributos, participantes


if __name__ == '__main__':
    dominios, targets, anotacoes, atributos, participantes = initialize()
    
    folds = cross.crossValidation(10, anotacoes)
    
    print "Machine Learning sem ID"
#     exp5.run(dominios, targets, anotacoes, atributos, False)
    exp2.run(dominios, targets, folds, atributos, {}, False)
    
    print "\n\n"
    print "Machine Learning com ID"
#     exp5.run(dominios, targets, anotacoes, atributos, True)
    exp2.run(dominios, targets, folds, atributos, {}, True)
Ejemplo n.º 26
0
def testDataset(csvFile):
    attributes, targetAttr, examples, trainingSet, lista = dataDefinition(
        csvFile)
    cv.unknownDataTest(examples, attributes, targetAttr, 5)
def executeSVM(X_train, y_train, OX_test, oy_test):
    learning_rates = [1e-5, 1e-8]
    regularization_strengths = [10e2, 10e4]
    results = {}
    best_val = -1
    best_svm = None
    # X_train = getCIFAR_as_32Pixels_Image(X_train)
    # OX_test = getCIFAR_as_32Pixels_Image(OX_test)
    accuracy = []
    totalAccuracy = 0.0

    ## Implementing Cross Validation
    crossValidObj = CrossValidation(5, X_train, y_train)
    foldsGen = crossValidObj.generateTrainAndTest()
    for i in range(5):
        next(foldsGen)
        X_test = OX_test
        X_train = crossValidObj.train
        y_train = crossValidObj.labels_train
        X_val = crossValidObj.test
        y_val = crossValidObj.labels_test

        # Preprocessing: reshape the image data into rows
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_val = np.reshape(X_val, (X_val.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))

        # Normalize the data: subtract the mean image
        mean_image = np.mean(X_train, axis=0)
        X_train -= mean_image
        X_val -= mean_image
        X_test -= mean_image

        # Add bias dimension and transform into columns
        X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
        X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
        X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T

        SVM_sgd = SVM()

        losses_sgd = SVM_sgd.train(X_train,
                                   y_train,
                                   method='sgd',
                                   batch_size=200,
                                   learning_rate=1e-6,
                                   reg=1e5,
                                   num_iters=1000,
                                   verbose=False,
                                   vectorized=True)

        y_train_pred_sgd = SVM_sgd.predict(X_train)[0]
        print('Training accuracy: %f' % (np.mean(y_train == y_train_pred_sgd)))
        y_val_pred_sgd = SVM_sgd.predict(X_val)[0]
        print('Validation accuracy: %f' % (np.mean(y_val == y_val_pred_sgd)))

        i = 0
        interval = 5
        for learning_rate in np.linspace(learning_rates[0],
                                         learning_rates[1],
                                         num=interval):
            i += 1
            print('The current iteration is %d/%d' % (i, interval))
            for reg in np.linspace(regularization_strengths[0],
                                   regularization_strengths[1],
                                   num=interval):
                svm = SVM()
                svm.train(X_train,
                          y_train,
                          method='sgd',
                          batch_size=200,
                          learning_rate=learning_rate,
                          reg=reg,
                          num_iters=1000,
                          verbose=False,
                          vectorized=True)
                y_train_pred = svm.predict(X_train)[0]
                y_val_pred = svm.predict(X_val)[0]
                train_accuracy = np.mean(y_train == y_train_pred)
                val_accuracy = np.mean(y_val == y_val_pred)
                results[(learning_rate, reg)] = (train_accuracy, val_accuracy)
                if val_accuracy > best_val:
                    best_val = val_accuracy
                    best_svm = svm
                else:
                    pass

        # Print out the results
        for learning_rate, reg in sorted(results):
            train_accuracy, val_accuracy = results[(learning_rate, reg)]
            print('learning rate %e and regularization %e, \n \
            the training accuracy is: %f and validation accuracy is: %f.\n' %
                  (learning_rate, reg, train_accuracy, val_accuracy))
            print(accuracy)

        y_test_predict_result = best_svm.predict(X_test)
        y_test_predict = y_test_predict_result[0]
        test_accuracy = np.mean(oy_test == y_test_predict)
        accuracy.append(test_accuracy)
        totalAccuracy += test_accuracy
        print('The test accuracy is: %f' % test_accuracy)
    print(accuracy)
    avgAccuracy = totalAccuracy / 5.0
    print('Average Accuracy: %f' % avgAccuracy)
Ejemplo n.º 28
0
    dataset = DataSet(filename='../config/referral_source.txt')

    path_to_file = os.path.join( gen_dir, filename+'.csv')

    df = pd.read_csv(path_to_file, sep=',')

    sf_univarate =  dataset.univariate_selection(data=df, k_best=(len(df.loc[0]) - 1) )
    sf_univarate.insert(0, 'value')

    sf_importance =  dataset.f_importance(data=df, n_attrs =(len(df.loc[0]) - 1) )
    sf_importance.insert(0, 'value')


    nm = NetworkModel()
    ds = CrossValidation()

    result_list = []

    if feature_selection_name == 'univ':
        f_selection = sf_univarate
    elif feature_selection_name == 'impot':
        f_selection = sf_importance

    tmp_row = []
    for layers in ann_proposal:
        for momentum in momentum_proposal:
            model = nm.create_model(layers=layers,
                                    input_size=number_of_features, 
                                    output_size=len(set(df['value'])) ,
                                    momentum=momentum)
Ejemplo n.º 29
0
def EvaluateCompletionMain(data,mask,method,useRelation,execTimes,logger,information,unobservedRates = None,alpha=None,ranks=None):
    """
    数値実験本体
    """
    global log 
    log = logger

    varianceTimes = execTimes

    L = data["L"]
    X = data["X"]
    normX = numpy.linalg.norm(X)
    #X = X / normX

    if not useRelation:
        L = [None for i in range(X.ndim)]
        alpha = [1]

    #if unobservedRates == None:
    #    unobservedRates = array([0.5,0.75,0.9])
    #    #unobservedRates = array([0.75,0.9,0.95])
    #    #unobservedRates = unobservedRates[::-1]
    #if alpha == None:
    #    #alpha =[pow(10,x) for x in [-4,-3,-2,-1,0,1]] #for L
    #    alpha =[pow(10,x) for x in [-5,-4,-3,-2,-1]] #for L
    #    alpha =[pow(10,x) for x in [-7,-6,-5,-4,-3,-2,-1]] #for L

    #if ranks == None:
    #    #ranks = [2,3,5]
    #    ranks = [5,7,9]
    #    ranks = [40]
    #    ranks = [5,10,15]
    #    ranks=[35]
    #    #ranks = [7]

    shape = X.shape

    #alphaはLにしか関係ない
    if all(map(lambda i:i==None,L)):
        print "hogehogehogehogehogehoge"
        alpha = [1]


    maskAxis = 1
    elems = prod(X.shape) 
    print elems, "kdkdkdkd"
    if mask == "Random":
        targetelems = elems
        print "MASKING: RANDOM"
        def createObservedTensor(data):
            data = array(data)
            X = zeros(elems)
            def setter(index):
                X[index] = 1
            vectset = vectorize(setter)
            vectset(data)
            return X.reshape(shape)
    elif mask == "Fiber":
        targetelems = elems / X.shape[maskAxis]
        print "MASKING: FIBER"
        def createObservedTensor(data):
            data = array(data)
            S = zeros(targetelems)
            def setter(index):
                S[index] = 1
            vectset = vectorize(setter)
            vectset(data)
            X = zeros(elems).reshape(shape)
            if maskAxis == 0:
                S = S.reshape(shape[1],shape[2])
                for i in xrange(shape[0]):
                    X[i,:,:] = S
            elif maskAxis == 1:
                S = S.reshape(shape[0],shape[2])
                for i in xrange(shape[1]):
                    X[:,i,:] = S
            elif maskAxis == 2:
                S = S.reshape(shape[0],shape[1])
                for i in xrange(shape[2]):
                    X[:,:,i] = S

            return X.reshape(shape)
    elif mask == "Slice":
        targetelems = X.shape[maskAxis]
        print "MASKING: SLICE"
        def createObservedTensor(data):
            data = array(data)
            S = zeros(targetelems)
            def setter(index):
                S[index] = 1
            vectset = vectorize(setter)
            vectset(data)
            X = zeros(elems).reshape(shape)
            if maskAxis == 0:
                for i in xrange(X.shape[1]):
                    for j in xrange(X.shape[2]):
                        X[:,i,j] = S
            elif maskAxis == 1:
                for i in xrange(X.shape[0]):
                    for j in xrange(X.shape[2]):
                        X[i,:,j] = S
            elif maskAxis == 2:
                for i in xrange(X.shape[0]):
                    for j in xrange(X.shape[1]):
                        X[i,j,:] = S

            return X.reshape(shape)

    evalDataGenerator = lambda separatingNumber,unobservedRate,targetIndeces:dataGenerator(
            int(targetelems* unobservedRate),separatingNumber,unobservedRate, targetIndeces)

    hpOptimDataGenerator = lambda separatingNumber,unobservedRate,targetIndeces:dataGenerator(
            min(int(targetelems* unobservedRate * 0.5),int(len(targetIndeces) * 0.5)),
            separatingNumber,unobservedRate, targetIndeces)

    def dataGenerator(hiddens,separatingNumber,unobservedRate,targetIndeces):
        #print elems
        #print len(targetIndeces)
        rs = Toolbox.GenerateRandomSeparation(targetIndeces, hiddens)
        return Toolbox.Take(rs,separatingNumber)

    import CompletionMethods
    import Decomposition
    if method in ["Tucker","TuckerSum"]:
        decomposition = Decomposition.TuckerSum()
        completionMethod = CompletionMethods.Tucker(X,L,decomposition)
    elif method in ["CP","CPSum"]:
        print "hogehogehgoehgoe"
        decomposition = Decomposition.CPSum()
        completionMethod = CompletionMethods.CP(X,L,decomposition)
    elif method == "TuckerProd":
        decomposition = Decomposition.TuckerProd()
        completionMethod = CompletionMethods.Tucker(X,L,decomposition)
    elif method == "CPProd":
        decomposition = Decomposition.CPProd()
        completionMethod = CompletionMethods.CP(X,L,decomposition)
    elif method == "CPWOPT":
        completionMethod = CompletionMethods.CPWOPT(X,L)
        if not useRelation:
            alpha = [0]
    elif method == "CPWOPTProd":
        completionMethod = CompletionMethods.CPWOPTProd(X,L)
        if not useRelation:
            alpha = [0]

    #convert list of indeces to binary tensor
    completionMethod.createObservedTensor = createObservedTensor

    estimator = completionMethod.estimator
    

    def lossFunction(estimation,evalData):
        #evalData = Toolbox.Take(evalData,500)
        W = createObservedTensor(evalData)
        Y=estimation
        return numpy.linalg.norm((Y - X)*W) * sqrt(1.0*elems / len(evalData))

    trainingData = range(targetelems)

    log.WriteLine("Start Evaluatig method:" + method + " ")
    log.WriteLine("Using Relation Data" if useRelation else "Without Relation Data")
    log.WriteLine("Ranks for Estimation:"+str(ranks))
    log.WriteLine("Unobserved Rates:"+str(unobservedRates))
    log.WriteLine("HyperParameters alpha to try:"+str(alpha))
    log.WriteLine("hyperParameters rank to try:" + str(ranks))
    print type(information)
    information["setting"]={}
    information["setting"]["method"] = method
    information["setting"]["using relation data"] = useRelation
    information["setting"]["rank for estimation"] = ranks
    information["setting"]["fraction of unobserved elements"] = unobservedRates
    information["setting"]["tested alpha"] = alpha 
    information["setting"]["tested rank"] = ranks
    information["result"]={}


    for unobservedRate in unobservedRates:
        information["result"][unobservedRate]={}
        #log.WriteLine("unobserved rate, "+str(unobservedRate)+ " ")

        time = varianceTimes

        import CrossValidation
        
        print unobservedRate, "kkkkkkkk"
        parameters = [(a,rank) for a in alpha for rank in ranks] 
        errors = CrossValidation.Evaluate(
                trainingData,
                estimator,
                lossFunction,
                functools.partial(evalDataGenerator,time,unobservedRate),
                parameters,
                functools.partial(hpOptimDataGenerator,1,unobservedRate))

        #[log.Write(", " + str(error),False) for error in errors]
        for error in errors:
            e = error["error"]
            param = error["param"]
            log.WriteLine("unobserved, "+ str(unobservedRate)+", bestparam,"+str(param)+", error, "+str(e))

            if not param in information["result"][unobservedRate]:
                information["result"][unobservedRate][param]=[]
            information["result"][unobservedRate][param].append(e)
            print "score logged:", e
            


        log.WriteLine()

    return information
Ejemplo n.º 30
0
import numpy as np
from scipy.stats import wilcoxon, chisquare
# import matplotlib.pyplot as plt
# from hcluster import *
import Assurance as ass
import Parser as parser
import Experiment1 as exp1
import Experiment5 as exp5
import Experiment6 as exp6
import CrossValidation as cross
import SVMValidatedExperiment as exp7

def initialize():
    trials = parser.parse()
    atributos = ["type", "orientation", "age", "hairColour", "hasBeard", "hasHair", "hasGlasses", "hasShirt", "hasTie", "hasSuit", "x-dimension", "y-dimension"]
    return trials, atributos

if __name__ == '__main__':
    trials, atributos = initialize()
    
#     trials = exp1.run(trials, atributos)
    
    folds = cross.crossValidation(10, trials)
      
#     exp5.run(folds, atributos, 0.7)
      
#     exp6.run(folds, atributos, 0.7)
    
#     exp7.run(trials, folds, atributos, {}, False)
    
    exp7.run(trials, folds, atributos, {}, True)
Ejemplo n.º 31
0
        print("Then press Enter to continue...")
        raw_input()
        GetGenes.Sort(fps, labels)
        GetGenes.getDiff_Badge(fps, labels)
        GetGenes.nuID2enterzID(fps, labels)


    import David
    David.davidCall(fps, labels)

    import String
    String.stringCall(path, fps, labels)
    String.genEdgeList(path, fps, labels)
    String.genNetworkInput(path, fps, labels)
    String.genNetwork(path, progpath)
    String.annoNetwork(path, progpath, fps, labels)

    import CrossValidation
    CrossValidation.exprToArff(path, fps, labels)
    CrossValidation.syncArffFeatures(path, fps, labels)
    CrossValidation.callWeka(fps, labels)

    import WriteReport
    WriteReport.writeDocReport(path, IOpath, fps, labels)
    WriteReport.writeXlsReport(path, IOpath, fps, labels)