def use_models(self, train_data, test_data): y_train = train_data['time'] X_train = util.get_predictors(train_data) y_test = test_data['time'] X_test = util.get_predictors(test_data) errors = {} for model in self.models: model = model() model.fit(X_train, y_train) errors[str(model)] = {} errors[str(model)]['test'] = abs(util.relative_error(y_test, model.predict(X_test))) errors[str(model)]['train'] = abs(util.relative_error(y_train, model.predict(X_train))) return errors
def analyze(self, train_data, test_data, models): keys = ['application'] error = dict(application=[], model=[], model_nice_name=[], error=[]) grouped = test_data.groupby(keys) for app, group in grouped: y = group['time'] X = util.get_predictors(group).values for model_name in models[app]: model = models[app][model_name] pred = model.predict(X) res = util.relative_error(y, pred) for err in res.values: error['error'].append(err) error['model'].append(model_name) error['model_nice_name'].append(str(model)) error['application'].append(app) self.error = pd.DataFrame(error) return self
def analyze(self, train_data, test_data, models): keys = ['application'] errors = dict(application=[], model=[], error=[], reps=[], error_type=[]) indexes = range(0, len(self.models)) model_idx = { str(model()): idx for (model, idx) in zip(self.models, indexes) } self.test_error = np.zeros((len(self.models), len(self.nreps))) self.train_error = np.zeros((len(self.models), len(self.nreps))) for reps in self.nreps: for app, group in train_data.groupby(keys): app_data = group[group['rep'] <= reps] test = test_data[test_data.application == app] data = { 'train': (util.get_predictors(app_data), app_data['time']), 'test': (util.get_predictors(test), test['time']) } for model in self.models: model = model() model.fit(data['train'][0], data['train'][1]) # Find predictions over the test set for t, (X, y) in data.items(): pred = model.predict(X) error = abs(util.relative_error(y, pred)) for err in error.values: errors['application'].append(app) errors['model'].append(str(model)) errors['error'].append(err) errors['reps'].append(reps) errors['error_type'].append(t) self.errors = pd.DataFrame(errors) return self
def test_matrix_completion(error_matrix, runtime_matrix, rank_for_imputation, runtime_threshold, verbose=False): print("rank: {}".format(rank_for_imputation)) masking_criteria = "runtime_matrix >= {}".format(runtime_threshold) if verbose: print("masking entries that satisfy: {}".format(masking_criteria)) masked_indices = np.where(eval(masking_criteria)) error_matrix_masked = error_matrix.copy() error_matrix_masked[eval(masking_criteria)] = np.nan error_matrix_pred, errors_matrix = matrix_completion_by_EM( error_matrix_masked, rank=rank_for_imputation, verbose=verbose) return relative_error(error_matrix[eval(masking_criteria)], error_matrix_pred[eval(masking_criteria)])
def analyze(self, train_data, test_data, models): errors = dict(application=[], error=[], model=[]) grouped = test_data.groupby('application') for app, group in grouped: for model_name in models[app]: model = models[app][model_name] # Only want the predictors, drop everything else y = group['time'] X = util.get_predictors(group).values pred = model.predict(X) # Parse and combine data res = abs(util.relative_error(y, pred)) for err in res.values: errors['error'].append(err) errors['application'].append(app) errors['model'].append(str(model)) self.errors = pd.DataFrame(errors) return self
def test_tensor_completion(error_tensor, runtime_tensor, ranks_for_imputation, runtime_threshold, verbose=False): print("ranks: {}, runtime_threshold: {}".format(ranks_for_imputation, runtime_threshold)) masking_criteria = "runtime_tensor >= {}".format(runtime_threshold) if verbose: print("masking entries that satisfy: {}".format(masking_criteria)) masked_indices = np.where(eval(masking_criteria)) error_tensor_masked = error_tensor.copy() error_tensor_masked[eval(masking_criteria)] = np.nan _, _, error_tensor_pred, errors = tucker_on_error_tensor( error_tensor_masked, ranks_for_imputation, save_results=False, verbose=verbose) return relative_error(error_tensor[eval(masking_criteria)], error_tensor_pred[eval(masking_criteria)])
db = database.database() db.init(db_file) db.rbf.set_sigma(sigma) # run trials o = open("radial_distribution.dat", "w") i = 0 j = 0 while i < Ntrials and j < db.N_neighborhoods: sys.stdout.flush() q = db.Neighborhoods[j] f = db.Forces[j] neighbors = db.Neighborhood_distances[j] ids = [] error = -1. #print >>o,"# trial",i for n in range(len(neighbors)): [ii, d] = neighbors[n] if (d > dmax): break ids.append(ii) error = 0. ff = db.force_interpolation(q, ids) error = util.relative_error(ff, f) print >> o, d, n, error j += 1 if (error > 0.): print ">> trial {0:3d}/{1:3d} min error {2:8g}".format( i + 1, Ntrials, error) print >> o print >> o i += 1
db.init(db_file) db.rbf.set_sigma(sigma) # run trials o = open("radial_distribution.dat","w") i = 0 j = 0 while i < Ntrials and j < db.N_neighborhoods: sys.stdout.flush() q = db.Neighborhoods[j] f = db.Forces[j] neighbors = db.Neighborhood_distances[j] ids = [] error = -1. #print >>o,"# trial",i for n in range(len(neighbors)): [ii,d] = neighbors[n] if (d > dmax): break ids.append(ii) error = 0. ff = db.force_interpolation(q,ids) error = util.relative_error(ff,f) print >>o, d,n,error j += 1 if (error > 0.): print ">> trial {0:3d}/{1:3d} min error {2:8g}".format(i+1,Ntrials,error) print >>o print >>o i += 1
def analyze(self, train_data, test_data, models): # models = [{'model': linear_model.LinearRegression(), # 'grid': {}, # 'name': 'Linear', # 'color': 'blue'}, # {'model': linear_model.Ridge(), # 'grid': [{'regressor__alpha': util.frange(0, 10, 0.2)}], # 'name': 'Ridge', # 'color': 'red'}, # {'model': ensemble.GradientBoostingRegressor(), # 'grid': [{'regressor__learning_rate': util.frange(0.05, 1, 0.05), # 'regressor__n_estimators': range(20, 300, 20), # 'regressor__max_depth': range(2, 7) # }], # 'name': 'GBM', # 'color': 'yellow' # }, ## {'model': svm.SVR(kernel='poly'), ## 'grid': [{ ## 'regressor__degree': range(1, 4), ## 'regressor__C': [10**i for i in range(-5, 6)] ## }], ## 'name': 'SVMPoly' ## }, # {'model': svm.SVR(kernel='linear'), # 'grid': [{ # 'regressor__C': [10**i for i in range(-5, 6)] # }], # 'name': 'SVMLinear', # 'color': 'green' # } # ] # # errors = dict(application=[], model=[], feature_count=[], error=[], error_type=[]) # features = dict(application=[], model=[], feature=[], count=[]) # # max_feature_count = len(util.get_predictors(train_data).columns) # for feature_count in range(4, (max_feature_count / 2) * 2, 2): # for app, group in train_data.groupby('application'): # for model_params in models: # model = model_params['model'] # grid = model_params['grid'] # name = model_params['name'] # # pipeline = build_pipeline(model) # rfe = RFE(pipeline, feature_count, step=1) # cv = GridSearchCV(rfe, grid, cv=10) # test = test_data[test_data['application'] == app] # # X_train = util.get_predictors(group) # y_train = group['time'] # X_test = util.get_predictors(test) # y_test = test['time'] # # cv.fit(X_train, y_train) # # # Build feature heatmap # for feature in self._extract_features(rfe, X_train): # features['application'].append(app) # features['model'].append(name) # features['feature'].append(feature) # features['count'].append(feature) # # types = {'train': (X_train, y_train), 'test': (X_test, y_test)} # for err_type, (X, y) in types: # pred = rfe.predict(X) # for error in util.relative_error(y, pred): # errors['application'].append(app) # errors['model'].append(str(model)) # errors['feature_count'].append(feature_count) # errors['error'].append(error) # errors['error_type'].append('train') # self.errors = pd.DataFrame(errors) # # # Fetch minimum count for each feature, application, and model # features = pd.DataFrame(features) # self.features = dict(application=[], model=[], feature=[], count=[]) # for model, model_group in features.groupby('model'): # for app, app_group in model_group.groupby('application'): # for feature, feature_group in app_group.groupby('feature'): # min_count = feature_group.feature_count.min() # self.features['application'].append(app) # self.features['model'].append(app) # self.features['feature'].append(feature) # self.features['count'].append(min_count) # self.features = pd.DataFrame(self.features) feature_choices = self.feature_selection(train_data, test_data, models) models = {str(model()): model() for model in self._models} errors = dict(application=[], model=[], error=[], feature_count=[]) for model, feature_selection in feature_choices.items(): for feature_count, features in feature_selection: model = models[model] for app, group in train_data.groupby('application'): test = test_data[test_data.application == app] data = {'train': {'X': util.get_predictors(group)[features], 'y': group.time}, 'test': {'X': util.get_predictors(test)[fetures], 'y': test.time}} model.fit(data['train']['X'], data['train']['y']) for err_type, d in data.items(): pred = model.pred(d['X']) actual = d['y'] error = util.relative_error(actual, pred) for i in range(0, len(error)): errors['application'].append(app) errors['model'].append(str(model)) errors['error'].append(error[i]) errors['feature_count'].append(len(features)) self.errors = pd.DataFrame(errors) return self