def three(): km_trainer = kmeans.kMeansTrainer(nr_centroids=nr_centroids, nr_it=nr_iterations) print "Creating centroids" centroids = km_trainer.fit() print "Saving centroids to file" km_trainer.save_centroids(centroids, file_path=centroids_folder) print "Done"
def steven(): calculator = ac.ActivationCalculation() km = kmeans.kMeansTrainer() print "Loading centroids" centroids = km.get_saved_centroids(nr_centroids, file_path=centroids_folder) print "Calculating activations of test data" calculator.pipeline(centroids, n_pool_regions=nr_pool_regions, file_path=activations_folder_test, data_file=processed_test_filename) print "Done"
f, ax = plt.subplots(length, length) for i in range(0, length): for j in range(0, length): ax[i, j].imshow(reshaped[i * length + j], cmap='Greys', interpolation='nearest') ax[i, j].axis('off') plt.show() def visualize_activation_alt(self, activations): patch_size = np.sqrt(activations.shape[0]) one = activations[:, 0] im = np.reshape(one, (patch_size, patch_size)) plt.imshow(im, cmap='Greys', interpolation='nearest') plt.show() if __name__ == '__main__': km = kmeans.kMeansTrainer() centroids = km.get_saved_centroids(100) #util.plot_centroids(centroids, "../data/centroidskmeans") sup_km = ActivationCalculation() sup_km.pipeline(centroids=centroids, data_file="../data/preprocessed_test.h5")
def singlePipeline(nr_centroids, nr_it, label_path = "../data/preprocessed.h5", clsfr = "SGD", calc_centroids = True, dogfeed=True, train_model=True, cache_size=4000, degree=3, tol=1e-3, max_iter=-1, kernel='rbf', model_file='UNSPECIFIED'): if calc_centroids: print "calculating centroids..." #Finds the features using kmeans kmTrainer = kmeans.kMeansTrainer(nr_centroids = nr_centroids, nr_it = nr_it) centroids = kmTrainer.fit() kmTrainer.save_centroids(centroids) print "calculating activations..." #Calculates the activaiton of the test set act_calc = act.ActivationCalculation() features = act_calc.pipeline(centroids) else: print "loading activations from file..." #loads feature data feature_data = h5py.File("../data/activations_train/"+str(nr_centroids)+"activationkmeans.h5") features = feature_data["activations"] print "Loading labels from file..." #get the labels labels = util.load_labels(label_path) label_names = util.load_label_names(label_path) print "Got labels" if clsfr == "SGD": if train_model: #Train the SGD classifier print "Begin training of SGD..." train.trainSGD(features, labels, nr_centroids) print "Training done" if not dogfeed: return print "Dogfeeding" #Predict based on SGD training print "Begin SGD predictions..." classified = classifier.predict(features, nr_centroids, degree=degree, cache_size=cache_size) print "Predicting done" elif clsfr == "SVC" or clsfr == "NUSVR": if train_model: print "Begin training of Model..." if clsfr=="SVC": #Train SVC classifier model = svc.train_svc(features, labels, nr_centroids, degree=degree, cache_size=cache_size, tol=tol, max_iter=max_iter, kernel = kernel) else : #Train SVC classifier model = svc.train_svc(features, labels, nr_centroids, degree=degree, cache_size=cache_size, tol=tol, max_iter=max_iter, kernel=kernel) print "Training done" else: print "Loading model" model = joblib.load(model_file) if not dogfeed: return print "Dogfeeding" #Predict based on SVC training print "Begin SVC predictions..." classified = model.predict_proba(features) print "Predicting done" else: print "Selected classifier not available, please use an available classifier" return print "Calculating log loss..." summing = 0 correct = 0 np.savetxt("meuk.csv", classified, delimiter=";") loss = metrics.log_loss(labels, classified) print loss print -np.mean(np.log(classified)[np.arange(len(labels)), labels]) #calculate the log loss for i, label in enumerate(labels): actual = labels[i] if(classified[i][label] == 0): summing+= np.log(10e-15) else: summing+= np.log(classified[i][label]) if actual == np.argmax(classified[i]): correct += 1 image = np.zeros((len(label_names),len(labels))) for j, label_index in enumerate(labels): image[label_index,j] = 1 scipy.misc.imsave('correct.png', image) scipy.misc.imsave('predicted.png', classified.T) error = image - classified.T scipy.misc.imsave('error.png', error) print "Calculation finished" summing = -summing/len(labels) print "log loss: ", summing print "correct/amount_of_labels: ", correct/len(labels) print "lowest classification score: ", np.min(classified) # print summing np.savetxt( "realLabel.csv", labels, delimiter=";") # np.savetxt( "SGD_label.csv", max_SGD, delimiter=";") if calc_centroids is False: feature_data.close()