help='Path to logging folder.', required=True) parser.add_argument('-s', '--seed', help='Random seed.', type=int) args = parser.parse_args() if args.experiment == 'TruncationElite': experiment_class, experiment_name = utils.get_experiment_class_and_name( TruncationElite) elif args.experiment == 'TruncationEliteRT': experiment_class, experiment_name = utils.get_experiment_class_and_name( TruncationEliteRT) elif args.experiment == 'TruncationEliteRTNOMUT': experiment_class, experiment_name = utils.get_experiment_class_and_name( TruncationEliteRTNOMUT) training_data = LearningData() training_data.from_file(args.data) X_train, X_test, y_train, y_test = train_test_split(training_data.predictors, training_data.response, test_size=0.2, shuffle=False) print('Training examples: ' + str(X_train.shape[0])) print('Testing examples: ' + str(X_test.shape[0])) model = SymbolicRegression( experiment_class=experiment_class, variable_type_indices=training_data.variable_type_indices, variable_names=training_data.variable_names, variable_dict=training_data.variable_dict, num_features=training_data.num_variables, pop_size=100, ngen=1000,
import pandas as pd from fastsr.containers.learning_data import LearningData dat = pd.read_csv('data/energydata_complete.csv') cols = dat.columns.tolist() cols_ordered = cols[3:len(cols)] + [cols[1]] trimmed_dat = dat[cols_ordered] learning_data = LearningData() predictor_names = cols_ordered[:-1] # We need to remove '_' as they don't play nice with Range Terminals predictor_names = list(map(lambda x: x.replace('_', ''), predictor_names)) learning_data.from_data(trimmed_dat, predictor_names, 'energy_data') learning_data.lag_predictors(6, column_names=predictor_names) learning_data.to_hdf('data/energy_hour_lagged.hdf5')
import numpy as np import matplotlib.pyplot as plt from fastsr.estimators.symbolic_regression import SymbolicRegression from fastsr.containers.learning_data import LearningData from fastsr.experiments.truncation_elite import TruncationElite from experiments.truncation_elite_rt import TruncationEliteRT import utils best_num = 0 experiment_class, experiment_name = utils.get_experiment_class_and_name(TruncationEliteRT) model = SymbolicRegression() model.load('/home/cfusting/rtresults_1000_100/energy_lagged/' + experiment_name + '/saved_models/' + experiment_name + '_energy_lagged_3965.pkl') training_data = LearningData() #training_data.from_file('data/minimum.csv') training_data.from_hdf('data/energy_lagged.hdf5') experiment = experiment_class() pset = experiment.get_pset(training_data.num_variables, training_data.variable_type_indices, training_data.variable_names, training_data.variable_dict) scoring_toolbox = experiment.get_scoring_toolbox(training_data.predictors, training_data.response, pset) best_individuals = [] for individual in model.best_individuals_: if hasattr(individual, 'history_index'): best_individuals.append(individual) best_genealogy = model.history_.getGenealogy(best_individuals[best_num]) errors = [] def populate_individuals(history_index):
from sklearn.pipeline import Pipeline from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from fastsr.estimators.symbolic_regression import SymbolicRegression from fastsr.containers.learning_data import LearningData estimators = [('reduce_dim', PCA()), ('symbolic_regression', SymbolicRegression())] pipe = Pipeline(estimators) training_data = LearningData() training_data.from_file('data/hour_simple_lagged.hdf5') X_train, X_test, y_train, y_test = train_test_split(training_data.predictors, training_data.response, test_size=0.1, shuffle=False) pipe.fit(X_train, y_train) print(pipe.score(X_train, y_train)) model = SymbolicRegression() model.fit(X_train, y_train) print(model.score(X_train, y_train))
import pandas as pd from fastsr.containers.learning_data import LearningData dat = pd.read_csv('data/hour.csv') datetime_index = list() for i, r in dat.iterrows(): datetime_index.append(pd.to_datetime(r[1]) + pd.DateOffset(hours=r[5])) dt_index = pd.DatetimeIndex(datetime_index) dat.set_index(dt_index) dat_onehot = pd.get_dummies( dat, prefix_sep='', columns=['season', 'mnth', 'hr', 'weekday', 'weathersit']) indices = [x for x in range(2, 9)] + [11] + [x for x in range(12, 63)] dat_train = dat_onehot.iloc[:, indices] cols = dat_train.columns.tolist() cols_ordered = cols[0:7] + cols[8:58] + [cols[7]] dat_train_ordered = dat_train[cols_ordered] learning_data = LearningData() learning_data.from_data(dat_train_ordered, cols_ordered[:-1], 'ucibike') lag_variables = ['holiday', 'workingday', 'temp', 'atemp', 'hum', 'windspeed'] learning_data.lag_predictors(6, column_names=lag_variables) learning_data.to_hdf('data/hour_lagged.hdf5')
import pandas as pd from fastsr.containers.learning_data import LearningData dat = pd.read_csv('data/hour.csv') datetime_index = list() for i, r in dat.iterrows(): datetime_index.append(pd.to_datetime(r[1]) + pd.DateOffset(hours=r[5])) dt_index = pd.DatetimeIndex(datetime_index) dat.set_index(dt_index) columns = ['atemp', 'windspeed', 'hum', 'cnt'] slim_dat = dat[columns] learning_data = LearningData() learning_data.from_data(slim_dat, columns, 'ucisimplebike') learning_data.lag_predictors(24, column_names=['atemp', 'windspeed', 'hum']) learning_data.to_hdf('data/hour_simple_lagged.hdf5')
from fastgp.utilities.metrics import mean_squared_error experiment = 'rt' dataset = 'energy_lagged' path = '/'.join([dataset, experiment]) models_dir = '/home/cfusting/efsresults/' + path + '/saved_models/' results_dir = '/home/cfusting/efsscores/' + path def get_seed(file): pattern = re.compile('\d+') match = pattern.search(file) return match.group(0) training_data = LearningData() training_data.from_file( '/home/cfusting/bunny/range-terminal/data/energy_lagged.hdf5') X_train, X_test, y_train, y_test = train_test_split(training_data.predictors, training_data.response, test_size=0.2, shuffle=False) files = [f for f in listdir(models_dir) if isfile(join(models_dir, f))] scores = {} all_features = {} print('Processing seeds for ' + experiment) for fl in files: with open(models_dir + '/' + fl, 'rb') as f: data = pickle.load(f) seed = get_seed(fl) print('Processing seed: ' + str(seed))