Esempio n. 1
0
 def use_models(self, train_data, test_data):
     y_train = train_data['time']
     X_train = util.get_predictors(train_data)
     y_test = test_data['time']
     X_test = util.get_predictors(test_data)
     errors = {}
     for model in self.models:
         model = model()
         model.fit(X_train, y_train)
         errors[str(model)] = {}
         errors[str(model)]['test'] = abs(util.relative_error(y_test, model.predict(X_test)))
         errors[str(model)]['train'] = abs(util.relative_error(y_train, model.predict(X_train)))
     return errors
Esempio n. 2
0
    def analyze(self, train_data, test_data, models):
        keys = ['application']
        error = dict(application=[], model=[], model_nice_name=[], error=[])
        grouped = test_data.groupby(keys)
        for app, group in grouped:
            y = group['time']
            X = util.get_predictors(group).values
            for model_name in models[app]:
                model = models[app][model_name]
                pred = model.predict(X)

                res = util.relative_error(y, pred)
                for err in res.values:
                    error['error'].append(err)
                    error['model'].append(model_name)
                    error['model_nice_name'].append(str(model))
                    error['application'].append(app)
        self.error = pd.DataFrame(error)
        return self
    def analyze(self, train_data, test_data, models):

        keys = ['application']
        errors = dict(application=[],
                      model=[],
                      error=[],
                      reps=[],
                      error_type=[])

        indexes = range(0, len(self.models))
        model_idx = {
            str(model()): idx
            for (model, idx) in zip(self.models, indexes)
        }

        self.test_error = np.zeros((len(self.models), len(self.nreps)))
        self.train_error = np.zeros((len(self.models), len(self.nreps)))

        for reps in self.nreps:
            for app, group in train_data.groupby(keys):
                app_data = group[group['rep'] <= reps]
                test = test_data[test_data.application == app]

                data = {
                    'train': (util.get_predictors(app_data), app_data['time']),
                    'test': (util.get_predictors(test), test['time'])
                }
                for model in self.models:
                    model = model()
                    model.fit(data['train'][0], data['train'][1])
                    # Find predictions over the test set
                    for t, (X, y) in data.items():
                        pred = model.predict(X)
                        error = abs(util.relative_error(y, pred))
                        for err in error.values:
                            errors['application'].append(app)
                            errors['model'].append(str(model))
                            errors['error'].append(err)
                            errors['reps'].append(reps)
                            errors['error_type'].append(t)
        self.errors = pd.DataFrame(errors)
        return self
def test_matrix_completion(error_matrix,
                           runtime_matrix,
                           rank_for_imputation,
                           runtime_threshold,
                           verbose=False):
    print("rank: {}".format(rank_for_imputation))
    masking_criteria = "runtime_matrix >= {}".format(runtime_threshold)
    if verbose:
        print("masking entries that satisfy: {}".format(masking_criteria))

    masked_indices = np.where(eval(masking_criteria))

    error_matrix_masked = error_matrix.copy()
    error_matrix_masked[eval(masking_criteria)] = np.nan

    error_matrix_pred, errors_matrix = matrix_completion_by_EM(
        error_matrix_masked, rank=rank_for_imputation, verbose=verbose)

    return relative_error(error_matrix[eval(masking_criteria)],
                          error_matrix_pred[eval(masking_criteria)])
    def analyze(self, train_data, test_data, models):
        errors = dict(application=[], error=[], model=[])
        grouped = test_data.groupby('application')

        for app, group in grouped:
            for model_name in models[app]:
                model = models[app][model_name]

                # Only want the predictors, drop everything else 
                y = group['time']
                X = util.get_predictors(group).values
                pred = model.predict(X)
                
                # Parse and combine data
                res = abs(util.relative_error(y, pred))
                for err in res.values:
                    errors['error'].append(err)
                    errors['application'].append(app)
                    errors['model'].append(str(model))
        self.errors = pd.DataFrame(errors)
        return self
def test_tensor_completion(error_tensor,
                           runtime_tensor,
                           ranks_for_imputation,
                           runtime_threshold,
                           verbose=False):
    print("ranks: {}, runtime_threshold: {}".format(ranks_for_imputation,
                                                    runtime_threshold))
    masking_criteria = "runtime_tensor >= {}".format(runtime_threshold)
    if verbose:
        print("masking entries that satisfy: {}".format(masking_criteria))

    masked_indices = np.where(eval(masking_criteria))
    error_tensor_masked = error_tensor.copy()
    error_tensor_masked[eval(masking_criteria)] = np.nan

    _, _, error_tensor_pred, errors = tucker_on_error_tensor(
        error_tensor_masked,
        ranks_for_imputation,
        save_results=False,
        verbose=verbose)

    return relative_error(error_tensor[eval(masking_criteria)],
                          error_tensor_pred[eval(masking_criteria)])
Esempio n. 7
0
    db = database.database()
    db.init(db_file)
    db.rbf.set_sigma(sigma)
    # run trials
    o = open("radial_distribution.dat", "w")
    i = 0
    j = 0
    while i < Ntrials and j < db.N_neighborhoods:
        sys.stdout.flush()
        q = db.Neighborhoods[j]
        f = db.Forces[j]
        neighbors = db.Neighborhood_distances[j]
        ids = []
        error = -1.
        #print >>o,"# trial",i
        for n in range(len(neighbors)):
            [ii, d] = neighbors[n]
            if (d > dmax): break
            ids.append(ii)
            error = 0.
            ff = db.force_interpolation(q, ids)
            error = util.relative_error(ff, f)
            print >> o, d, n, error
        j += 1
        if (error > 0.):
            print ">> trial {0:3d}/{1:3d} min error {2:8g}".format(
                i + 1, Ntrials, error)
            print >> o
            print >> o
            i += 1
  db.init(db_file)
  db.rbf.set_sigma(sigma)
  # run trials
  o = open("radial_distribution.dat","w")
  i = 0
  j = 0
  while i < Ntrials and j < db.N_neighborhoods:
    sys.stdout.flush()
    q = db.Neighborhoods[j]
    f = db.Forces[j]
    neighbors = db.Neighborhood_distances[j]
    ids = []
    error = -1.
    #print >>o,"# trial",i
    for n in range(len(neighbors)):
      [ii,d] = neighbors[n]
      if (d > dmax): break
      ids.append(ii)
      error = 0.
      ff = db.force_interpolation(q,ids)
      error = util.relative_error(ff,f)
      print >>o, d,n,error
    j += 1
    if (error > 0.):
      print ">> trial {0:3d}/{1:3d} min error {2:8g}".format(i+1,Ntrials,error)
      print >>o
      print >>o
      i += 1
      
      
Esempio n. 9
0
    def analyze(self, train_data, test_data, models):
#        models = [{'model': linear_model.LinearRegression(),
#                   'grid': {},
#                   'name': 'Linear',
#                   'color': 'blue'}, 
#                  {'model': linear_model.Ridge(),
#                   'grid': [{'regressor__alpha': util.frange(0, 10, 0.2)}],
#                   'name': 'Ridge',
#                   'color': 'red'},
#                  {'model': ensemble.GradientBoostingRegressor(),
#                   'grid': [{'regressor__learning_rate': util.frange(0.05, 1, 0.05),
#                             'regressor__n_estimators': range(20, 300, 20),
#                             'regressor__max_depth': range(2, 7)
#                            }],
#                   'name': 'GBM',
#                   'color': 'yellow'
#                  },
##                  {'model': svm.SVR(kernel='poly'),
##                   'grid': [{
##                            'regressor__degree': range(1, 4),
##                            'regressor__C': [10**i for i in range(-5, 6)]
##                       }],
##                   'name': 'SVMPoly'
##                  },
#                  {'model': svm.SVR(kernel='linear'),
#                   'grid': [{
#                            'regressor__C': [10**i for i in range(-5, 6)]
#                       }],
#                   'name': 'SVMLinear',
#                   'color': 'green'
#                  }
#                ]
#
#        errors = dict(application=[], model=[], feature_count=[], error=[], error_type=[])
#        features = dict(application=[], model=[], feature=[], count=[]) 
#        
#        max_feature_count = len(util.get_predictors(train_data).columns)
#        for feature_count in range(4, (max_feature_count / 2) * 2, 2):
#            for app, group in train_data.groupby('application'):
#                for model_params in models:
#                    model = model_params['model']
#                    grid = model_params['grid']
#                    name = model_params['name']
#
#                    pipeline = build_pipeline(model)
#                    rfe = RFE(pipeline, feature_count, step=1)
#                    cv = GridSearchCV(rfe, grid, cv=10)
#                    test = test_data[test_data['application'] == app]
#                    
#                    X_train = util.get_predictors(group)
#                    y_train = group['time']
#                    X_test = util.get_predictors(test)
#                    y_test = test['time']
#                    
#                    cv.fit(X_train, y_train)
#                
#                    # Build feature heatmap
#                    for feature in self._extract_features(rfe, X_train):
#                        features['application'].append(app)
#                        features['model'].append(name)
#                        features['feature'].append(feature)
#                        features['count'].append(feature)
#
#                    types = {'train': (X_train, y_train), 'test': (X_test, y_test)}
#                    for err_type, (X, y) in types:
#                        pred = rfe.predict(X)
#                        for error in util.relative_error(y, pred):
#                            errors['application'].append(app)
#                            errors['model'].append(str(model))
#                            errors['feature_count'].append(feature_count)
#                            errors['error'].append(error)
#                            errors['error_type'].append('train')
#        self.errors = pd.DataFrame(errors)
#        
#        # Fetch minimum count for each feature, application, and model
#        features = pd.DataFrame(features)
#        self.features = dict(application=[], model=[], feature=[], count=[])
#        for model, model_group in features.groupby('model'):
#            for app, app_group in model_group.groupby('application'):
#                for feature, feature_group in app_group.groupby('feature'):
#                    min_count = feature_group.feature_count.min()
#                    self.features['application'].append(app)
#                    self.features['model'].append(app)
#                    self.features['feature'].append(feature)
#                    self.features['count'].append(min_count)
#        self.features = pd.DataFrame(self.features)
        feature_choices = self.feature_selection(train_data, test_data, models)

        models = {str(model()): model() for model in self._models}

        errors = dict(application=[], model=[], error=[], feature_count=[])
        for model, feature_selection in feature_choices.items():
            for feature_count, features in feature_selection:
                model = models[model]
                for app, group in train_data.groupby('application'):
                    test = test_data[test_data.application == app]
                    data = {'train': {'X': util.get_predictors(group)[features], 'y': group.time},
                            'test': {'X': util.get_predictors(test)[fetures], 'y': test.time}}
                    model.fit(data['train']['X'], data['train']['y'])
                    for err_type, d in data.items():
                        pred = model.pred(d['X'])
                        actual = d['y']
                        error = util.relative_error(actual, pred)
                        for i in range(0, len(error)):
                            errors['application'].append(app)
                            errors['model'].append(str(model))
                            errors['error'].append(error[i])
                            errors['feature_count'].append(len(features))
        self.errors = pd.DataFrame(errors)
        return self