def run(self): for parameters in self.parameter_search: print "Trying parameters %s." % (parameters) try: folds_test = mask.compute_folds_attempts(I=self.I,J=self.J,no_folds=self.K,attempts=attempts_generate_M,M=self.M) folds_training = mask.compute_Ms(folds_test) # We need to put the parameter dict into json to hash it self.all_performances[self.JSON(parameters)] = {} # Create the threads for the folds, and run them pool = Pool(self.P) all_parameters = [ { 'parameters' : parameters, 'X' : numpy.copy(self.X), 'train' : train, 'test' : test, 'method' : self.method, 'train_config' : self.train_config } for (train,test) in zip(folds_training,folds_test) ] outputs = pool.map(run_fold,all_parameters) pool.close() for performance_dict in outputs: self.store_performances(performance_dict,parameters) self.log(parameters) except Exception as e: self.fout.write("Tried parameters %s but got exception: %s. \n" % (parameters,e))
def run(self): folds_test = mask.compute_folds_attempts(I=self.I,J=self.J,no_folds=self.K,attempts=attempts_generate_M,M=self.M) folds_training = mask.compute_Ms(folds_test) for i,(train,test) in enumerate(zip(folds_training,folds_test)): print "Fold %s of nested cross-validation." % (i+1) # Run the cross-validation crossval = ParallelMatrixCrossValidation( #crossval = MatrixCrossValidation( method=self.method, X=self.X, M=train, K=self.K, parameter_search=self.parameter_search, train_config=self.train_config, file_performance=self.files_nested_performances[i], P=self.P ) crossval.run() try: (best_parameters,_) = crossval.find_best_parameters(evaluation_criterion='MSE',low_better=True) print "Best parameters for fold %s were %s." % (i+1,best_parameters) except KeyError: best_parameters = self.parameter_search[0] print "Found no performances, dataset too sparse? Use first values instead for fold %s, %s." % (i+1,best_parameters) # Train the model and test the performance on the test set performance_dict = self.run_model(train,test,best_parameters) self.store_performances(performance_dict) print "Finished fold %s, with performances %s." % (i+1,performance_dict) self.log()
def run(self, burn_in=None, thinning=None, minimum_TN=None): folds_test = mask.compute_folds_attempts(I=self.I, J=self.J, no_folds=self.folds, attempts=attempts_generate_M, M=self.M) folds_training = mask.compute_Ms(folds_test) performances_test = {measure: [] for measure in measures} for i, (train, test) in enumerate(zip(folds_training, folds_test)): print "Fold %s." % (i + 1) # Run the line search line_search = LineSearch(classifier=self.classifier, values_K=self.values_K, R=self.R, M=train, priors=self.priors, initUV=self.init_UV, iterations=self.iterations, restarts=self.restarts) line_search.search(burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN) # Store the model fits, and find the best one according to the metric all_performances = line_search.all_values( metric=self.quality_metric) self.fout.write("All model fits for fold %s, metric %s: %s.\n" % (i + 1, self.quality_metric, all_performances)) self.fout.flush() best_K = line_search.best_value(metric=self.quality_metric) self.fout.write("Best K for fold %s: %s.\n" % (i + 1, best_K)) # Train a model with this K and measure performance on the test set performance = self.run_model(train, test, best_K, burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN) self.fout.write("Performance: %s.\n\n" % performance) self.fout.flush() for measure in measures: performances_test[measure].append(performance[measure]) # Store the final performances and average average_performance_test = self.compute_average_performance( performances_test) message = "Average performance: %s. \nPerformances test: %s." % ( average_performance_test, performances_test) print message self.fout.write(message) self.fout.flush()
def run(self,burn_in=None,thinning=None,minimum_TN=None): folds_test = mask.compute_folds_attempts(I=self.I,J=self.J,no_folds=self.folds,attempts=attempts_generate_M,M=self.M) folds_training = mask.compute_Ms(folds_test) performances_test = {measure:[] for measure in measures} for i,(train,test) in enumerate(zip(folds_training,folds_test)): print "Fold %s." % (i+1) # Run the greedy grid search greedy_search = GreedySearch( classifier=self.classifier, values_K=self.values_K, values_L=self.values_L, R=self.R, M=self.M, priors=self.priors, initS=self.init_S, initFG=self.init_FG, iterations=self.iterations, restarts=self.restarts) greedy_search.search(self.quality_metric,burn_in=burn_in,thinning=thinning,minimum_TN=minimum_TN) # Store the model fits, and find the best one according to the metric all_performances = greedy_search.all_values(metric=self.quality_metric) self.fout.write("All model fits for fold %s, metric %s: %s.\n" % (i+1,self.quality_metric,all_performances)) self.fout.flush() best_KL = greedy_search.best_value(metric=self.quality_metric) self.fout.write("Best K,L for fold %s: %s.\n" % (i+1,best_KL)) # Train a model with this K and measure performance on the test set performance = self.run_model(train,test,best_KL[0],best_KL[1],burn_in=burn_in,thinning=thinning,minimum_TN=minimum_TN) self.fout.write("Performance: %s.\n\n" % performance) self.fout.flush() for measure in measures: performances_test[measure].append(performance[measure]) # Store the final performances and average average_performance_test = self.compute_average_performance(performances_test) message = "Average performance: %s. \nPerformances test: %s." % (average_performance_test,performances_test) print message self.fout.write(message) self.fout.flush()
def run(self): folds_test = mask.compute_folds_attempts(I=self.I, J=self.J, no_folds=self.K, attempts=attempts_generate_M, M=self.M) folds_training = mask.compute_Ms(folds_test) for i, (train, test) in enumerate(zip(folds_training, folds_test)): print "Fold %s of nested cross-validation." % (i + 1) # Run the cross-validation crossval = ParallelMatrixCrossValidation( #crossval = MatrixCrossValidation( method=self.method, X=self.X, M=train, K=self.K, parameter_search=self.parameter_search, train_config=self.train_config, file_performance=self.files_nested_performances[i], P=self.P) crossval.run() try: (best_parameters, _) = crossval.find_best_parameters(evaluation_criterion='MSE', low_better=True) print "Best parameters for fold %s were %s." % ( i + 1, best_parameters) except KeyError: best_parameters = self.parameter_search[0] print "Found no performances, dataset too sparse? Use first values instead for fold %s, %s." % ( i + 1, best_parameters) # Train the model and test the performance on the test set performance_dict = self.run_model(train, test, best_parameters) self.store_performances(performance_dict) print "Finished fold %s, with performances %s." % ( i + 1, performance_dict) self.log()
def run(self): for parameters in self.parameter_search: print "Trying parameters %s." % (parameters) try: folds_test = mask.compute_folds_attempts(I=self.I,J=self.J,no_folds=self.K,attempts=attempts_generate_M,M=self.M) folds_training = mask.compute_Ms(folds_test) # We need to put the parameter dict into json to hash it self.all_performances[self.JSON(parameters)] = {} for i,(train,test) in enumerate(zip(folds_training,folds_test)): print "Fold %s (parameters: %s)." % (i+1,parameters) performance_dict = self.run_model(train,test,parameters) self.store_performances(performance_dict,parameters) self.log(parameters) except Exception as e: self.fout.write("Tried parameters %s but got exception: %s. \n" % (parameters,e)) self.fout.flush()
def run(self): for parameters in self.parameter_search: print "Trying parameters %s." % (parameters) try: folds_test = mask.compute_folds_attempts( I=self.I, J=self.J, no_folds=self.K, attempts=attempts_generate_M, M=self.M) folds_training = mask.compute_Ms(folds_test) # We need to put the parameter dict into json to hash it self.all_performances[self.JSON(parameters)] = {} # Create the threads for the folds, and run them pool = Pool(self.P) all_parameters = [{ 'parameters': parameters, 'X': numpy.copy(self.X), 'train': train, 'test': test, 'method': self.method, 'train_config': self.train_config } for (train, test) in zip(folds_training, folds_test)] outputs = pool.map(run_fold, all_parameters) pool.close() for performance_dict in outputs: self.store_performances(performance_dict, parameters) self.log(parameters) except Exception as e: self.fout.write( "Tried parameters %s but got exception: %s. \n" % (parameters, e))