예제 #1
0
def compute_features(imdb, args, useValSet=False):
    if args.feature == 'tinyimage':
        features = tinyimage_features(imdb, args.tinyimage_patchdim)
    elif args.feature == 'bow-patches':
        features = bow_patch_features(imdb,
                                      args.patches_dictionarysize,
                                      args.patches_radius,
                                      args.patches_stride,
                                      useValSet=useValSet)
    elif args.feature == 'bow-sift':
        features = bow_sift_features(imdb,
                                     args.sift_dictionarysize,
                                     args.sift_radius,
                                     args.sift_stride,
                                     args.sift_binsize,
                                     useValSet=useValSet)
    else:
        raise NotImplementedError('Selected feature not yet implemented')

    if args.feature != 'tinyimage':
        print('normalizing')
        features = normalize_features(features)
    return features
예제 #2
0
import sys
import load
from normalize import normalize_features
from print_matrices import print_matrices
from numeric_verification import verify
import train

if len(sys.argv) >= 4:
    network_filename = sys.argv[1]
    weights_filename = sys.argv[2]
    dataset_name = sys.argv[3]
else:
    print("\nUsage:\t python backpropagation.py network weights dataset\n")
    sys.exit()

dataset = load.load_dataset(dataset_name)
network = load.load_network_structure(network_filename)
initial_weights = load.load_weights(weights_filename)

normalize_features(dataset)

# Calcula gradientes usando todas as instâncias do dataset.
gradients = train.calculate_gradients(dataset, 0, len(dataset),
                                      initial_weights,
                                      network['regularization'])

print_matrices(gradients)
def cross_validation(dataset, k):

    dlen = len(dataset)
    num_outputs = len(dataset[0]['output'])
    kfolds = {}

    # cria k folds
    for i in range(k):
        kfolds[i] = []

    # CASO 1: só há 1 output, e ele é zero ou um
    if num_outputs == 1:

        # divide o conjunto original em 2 classes: 0 e 1
        class0 = []
        class1 = []
        #print(dlen)
        for j in range(dlen):
            if dataset[j]['output'][0][0] == 1.0:
                class1.append(dataset[j])
            else:
                class0.append(dataset[j])
        #print(len(class0))
        #print(len(class1))

        # coloca len(class1)/k instancias com output=1 em cada fold
        # coloca len(class0)/k instancias com output=0 em cada fold
        numk = 0
        for i in range(len(class1)):
            if numk == k: numk = 0
            kfolds[numk].append(class1[i])
            numk += 1
        for i in range(len(class0)):
            if numk == k: numk = 0
            kfolds[numk].append(class0[i])
            numk += 1
        """for i in range(k): # DEBUG CODE
            freq0count = 0
            freq1count = 0
            for j in range(len(kfolds[i])):
                if kfolds[i][j]['output'][0][0] == 1.0:
                    freq1count += 1
                else:
                    freq0count += 1
            print('fold {} counts'.format(i))
            print(freq0count)
            print(freq1count)
            print(f'Total: {len(kfolds[i])}\n')"""

    # CASO 2: há mais de 1 output, deve-se preservar a quantidades de instancias "1" de cada output em cada fold
    elif num_outputs > 1:

        #divide o conjunto original em 'num_outputs' classes
        classes = {}
        for i in range(num_outputs):
            classes[i] = []
        for i in range(dlen):
            for j in range(num_outputs):
                if dataset[i]['output'][j][0] == 1.0:
                    classes[j].append(dataset[i])

        numk = 0
        for i in range(num_outputs):

            # coloca len(classes[i])/k instâncias em cada fold
            for j in range(len(classes[i])):
                if numk == k: numk = 0
                kfolds[numk].append(classes[i][j])
                numk += 1
        """for i in range(k): # DEBUG CODE
            freqcount = {}
            for j in range(num_outputs):
                freqcount[j] = 0
            for j in range(len(kfolds[i])):
                for k in range(num_outputs):
                    if kfolds[i][j]['output'][k][0] == 1.0:
                        freqcount[k] += 1
            print('fold {} counts'.format(i))
            for j in range(num_outputs):
                print(freqcount[j])
            print(f'Total: {len(kfolds[i])}\n')"""

    cvset = {}
    for i in range(k):
        testSet = []
        trainingSet = []
        for j in range(k):
            if i == j: testSet.extend(kfolds[i])
            else: trainingSet.extend(kfolds[i])
        normalize_features(testSet)
        normalize_features(trainingSet)
        cvset[i] = {'testSet': testSet, 'trainingSet': trainingSet}
    return cvset
예제 #4
0
파일: main.py 프로젝트: erfannoury/cmsc-691
def main(args):
    parser = argparse.ArgumentParser(
        description='Train and evaluate a model on the Cats vs. Dogs dataset')

    parser.add_argument('-d',
                        '--dataset-dir',
                        required=True,
                        type=str,
                        help='Path to the dataset')
    parser.add_argument('-f',
                        '--feature',
                        required=True,
                        choices=FEATURES,
                        help='Select which feature representation to use. '
                        'Choices are {' + ', '.join(FEATURES) + '}')
    parser.add_argument('-c',
                        '--classifier',
                        required=True,
                        choices=CLASSIFIERS,
                        help='Select which classifier to use. '
                        'Choices are {' + ', '.join(CLASSIFIERS) + '}')
    parser.add_argument('-k',
                        '--knn-k',
                        default=3,
                        type=int,
                        help='Number of neighbors for kNN classifier')
    parser.add_argument('-l',
                        '--svm-lambda',
                        default=1.0,
                        type=float,
                        help='Lambda paramter for SVM')
    parser.add_argument('--tinyimage-patchdim', default=16, type=int)
    parser.add_argument('--patches-dictionarysize', default=128, type=int)
    parser.add_argument('--patches-radius', default=8, type=float)
    parser.add_argument('--patches-stride', default=12, type=int)
    parser.add_argument('--sift-dictionarysize', default=128, type=int)
    parser.add_argument('--sift-binsize',
                        default=8,
                        type=int,
                        help='Size of the bin in terms of number of pixels in '
                        'the image. Recall that SIFT has 4x4=16 bins.')
    parser.add_argument('--sift-stride',
                        default=12,
                        type=int,
                        help='Spacing between succesive x (and y) coordinates '
                        'for sampling dense features.')

    args = parser.parse_args(args)

    imdb = read_dataset(args.dataset_dir)

    features = compute_features(imdb, args)

    if args.feature != 'tinyimage':
        features = normalize_features(features)

    print(f'Experiment setup: trainining set: train, test set: val')
    clf = train_classifier(features[imdb.train_indices, :],
                           imdb.class_ids[imdb.train_indices], args)
    val_preds, val_scores = make_predictions(clf,
                                             features[imdb.val_indices, :])
    show_confusion(imdb.class_ids[imdb.val_indices], val_preds)

    print(f'Experiment setup: trainining set: train+val, test set: test')
    clf = train_classifier(
        features[np.hstack((imdb.train_indices, imdb.val_indices)), :],
        imdb.class_ids[np.hstack(
            (imdb.train_indices, imdb.val_indices))], args)
    test_preds, test_scores = make_predictions(clf,
                                               features[imdb.test_indices, :])
    show_confusion(imdb.class_ids[imdb.test_indices], test_preds)