def compute_features(imdb, args, useValSet=False): if args.feature == 'tinyimage': features = tinyimage_features(imdb, args.tinyimage_patchdim) elif args.feature == 'bow-patches': features = bow_patch_features(imdb, args.patches_dictionarysize, args.patches_radius, args.patches_stride, useValSet=useValSet) elif args.feature == 'bow-sift': features = bow_sift_features(imdb, args.sift_dictionarysize, args.sift_radius, args.sift_stride, args.sift_binsize, useValSet=useValSet) else: raise NotImplementedError('Selected feature not yet implemented') if args.feature != 'tinyimage': print('normalizing') features = normalize_features(features) return features
import sys import load from normalize import normalize_features from print_matrices import print_matrices from numeric_verification import verify import train if len(sys.argv) >= 4: network_filename = sys.argv[1] weights_filename = sys.argv[2] dataset_name = sys.argv[3] else: print("\nUsage:\t python backpropagation.py network weights dataset\n") sys.exit() dataset = load.load_dataset(dataset_name) network = load.load_network_structure(network_filename) initial_weights = load.load_weights(weights_filename) normalize_features(dataset) # Calcula gradientes usando todas as instâncias do dataset. gradients = train.calculate_gradients(dataset, 0, len(dataset), initial_weights, network['regularization']) print_matrices(gradients)
def cross_validation(dataset, k): dlen = len(dataset) num_outputs = len(dataset[0]['output']) kfolds = {} # cria k folds for i in range(k): kfolds[i] = [] # CASO 1: só há 1 output, e ele é zero ou um if num_outputs == 1: # divide o conjunto original em 2 classes: 0 e 1 class0 = [] class1 = [] #print(dlen) for j in range(dlen): if dataset[j]['output'][0][0] == 1.0: class1.append(dataset[j]) else: class0.append(dataset[j]) #print(len(class0)) #print(len(class1)) # coloca len(class1)/k instancias com output=1 em cada fold # coloca len(class0)/k instancias com output=0 em cada fold numk = 0 for i in range(len(class1)): if numk == k: numk = 0 kfolds[numk].append(class1[i]) numk += 1 for i in range(len(class0)): if numk == k: numk = 0 kfolds[numk].append(class0[i]) numk += 1 """for i in range(k): # DEBUG CODE freq0count = 0 freq1count = 0 for j in range(len(kfolds[i])): if kfolds[i][j]['output'][0][0] == 1.0: freq1count += 1 else: freq0count += 1 print('fold {} counts'.format(i)) print(freq0count) print(freq1count) print(f'Total: {len(kfolds[i])}\n')""" # CASO 2: há mais de 1 output, deve-se preservar a quantidades de instancias "1" de cada output em cada fold elif num_outputs > 1: #divide o conjunto original em 'num_outputs' classes classes = {} for i in range(num_outputs): classes[i] = [] for i in range(dlen): for j in range(num_outputs): if dataset[i]['output'][j][0] == 1.0: classes[j].append(dataset[i]) numk = 0 for i in range(num_outputs): # coloca len(classes[i])/k instâncias em cada fold for j in range(len(classes[i])): if numk == k: numk = 0 kfolds[numk].append(classes[i][j]) numk += 1 """for i in range(k): # DEBUG CODE freqcount = {} for j in range(num_outputs): freqcount[j] = 0 for j in range(len(kfolds[i])): for k in range(num_outputs): if kfolds[i][j]['output'][k][0] == 1.0: freqcount[k] += 1 print('fold {} counts'.format(i)) for j in range(num_outputs): print(freqcount[j]) print(f'Total: {len(kfolds[i])}\n')""" cvset = {} for i in range(k): testSet = [] trainingSet = [] for j in range(k): if i == j: testSet.extend(kfolds[i]) else: trainingSet.extend(kfolds[i]) normalize_features(testSet) normalize_features(trainingSet) cvset[i] = {'testSet': testSet, 'trainingSet': trainingSet} return cvset
def main(args): parser = argparse.ArgumentParser( description='Train and evaluate a model on the Cats vs. Dogs dataset') parser.add_argument('-d', '--dataset-dir', required=True, type=str, help='Path to the dataset') parser.add_argument('-f', '--feature', required=True, choices=FEATURES, help='Select which feature representation to use. ' 'Choices are {' + ', '.join(FEATURES) + '}') parser.add_argument('-c', '--classifier', required=True, choices=CLASSIFIERS, help='Select which classifier to use. ' 'Choices are {' + ', '.join(CLASSIFIERS) + '}') parser.add_argument('-k', '--knn-k', default=3, type=int, help='Number of neighbors for kNN classifier') parser.add_argument('-l', '--svm-lambda', default=1.0, type=float, help='Lambda paramter for SVM') parser.add_argument('--tinyimage-patchdim', default=16, type=int) parser.add_argument('--patches-dictionarysize', default=128, type=int) parser.add_argument('--patches-radius', default=8, type=float) parser.add_argument('--patches-stride', default=12, type=int) parser.add_argument('--sift-dictionarysize', default=128, type=int) parser.add_argument('--sift-binsize', default=8, type=int, help='Size of the bin in terms of number of pixels in ' 'the image. Recall that SIFT has 4x4=16 bins.') parser.add_argument('--sift-stride', default=12, type=int, help='Spacing between succesive x (and y) coordinates ' 'for sampling dense features.') args = parser.parse_args(args) imdb = read_dataset(args.dataset_dir) features = compute_features(imdb, args) if args.feature != 'tinyimage': features = normalize_features(features) print(f'Experiment setup: trainining set: train, test set: val') clf = train_classifier(features[imdb.train_indices, :], imdb.class_ids[imdb.train_indices], args) val_preds, val_scores = make_predictions(clf, features[imdb.val_indices, :]) show_confusion(imdb.class_ids[imdb.val_indices], val_preds) print(f'Experiment setup: trainining set: train+val, test set: test') clf = train_classifier( features[np.hstack((imdb.train_indices, imdb.val_indices)), :], imdb.class_ids[np.hstack( (imdb.train_indices, imdb.val_indices))], args) test_preds, test_scores = make_predictions(clf, features[imdb.test_indices, :]) show_confusion(imdb.class_ids[imdb.test_indices], test_preds)