from scipy.stats.mstats import mquantiles thumbnail = "img" def process_galaxy(galaxy_id, transform=0): #root = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/" root = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/" f = root + "images_training_rev1/" + galaxy_id + ".jpg" return galaxy.get_features(f, image_statistics=True, transform=transform) f = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/training_solutions_rev1.csv" #f = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/training_solutions_rev1.csv" responses, ids = galaxy.read_responses(f) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') mapping = galaxy.get_classes() for Class in xrange(1, 12): classes = np.nonzero(mapping == Class)[0] X = [] Y = [] svm_class = 0 for c in classes: q = 0.95
from sklearn import decomposition from scipy.stats.mstats import mquantiles thumbnail = "img" def process_galaxy( galaxy_id, transform=0 ): #root = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/" root = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/" f = root + "images_training_rev1/"+galaxy_id+".jpg" return galaxy.get_features(f,image_statistics=True,transform=transform) f = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/training_solutions_rev1.csv" #f = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/training_solutions_rev1.csv" responses, ids = galaxy.read_responses( f ) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') mapping = galaxy.get_classes() for Class in xrange(1,12): classes = np.nonzero(mapping==Class)[0] X = [] Y = [] svm_class = 0 for c in classes: q = 0.95
import scipy.ndimage as nd import csv import math import joblib from joblib import Parallel, delayed import galaxy def to_dict(responses,id_responses): res = {} for r,i in zip(responses,id_responses): res[i] = r return res responses,id_responses = galaxy.read_responses("/vol/biomedic/users/kpk09/kaggle/galaxy/data/training_solutions_rev1.csv" ) predictions,id_predictions = galaxy.read_responses(sys.argv[1]) ground_truth = to_dict(responses,id_responses) MSE = np.zeros( (len(id_predictions),37), dtype="float" ) n = 0 for p,i in zip(predictions,id_predictions): MSE[n] = (p - ground_truth[i])**2 n += 1 mse = np.mean(MSE,axis=0).mean() print "MSE:", mse print "RMSE:", math.sqrt(mse) scores = MSE.mean(axis=1)