예제 #1
0
                    help='Path to logging folder.',
                    required=True)
parser.add_argument('-s', '--seed', help='Random seed.', type=int)
args = parser.parse_args()

if args.experiment == 'TruncationElite':
    experiment_class, experiment_name = utils.get_experiment_class_and_name(
        TruncationElite)
elif args.experiment == 'TruncationEliteRT':
    experiment_class, experiment_name = utils.get_experiment_class_and_name(
        TruncationEliteRT)
elif args.experiment == 'TruncationEliteRTNOMUT':
    experiment_class, experiment_name = utils.get_experiment_class_and_name(
        TruncationEliteRTNOMUT)

training_data = LearningData()
training_data.from_file(args.data)
X_train, X_test, y_train, y_test = train_test_split(training_data.predictors,
                                                    training_data.response,
                                                    test_size=0.2,
                                                    shuffle=False)
print('Training examples: ' + str(X_train.shape[0]))
print('Testing examples: ' + str(X_test.shape[0]))
model = SymbolicRegression(
    experiment_class=experiment_class,
    variable_type_indices=training_data.variable_type_indices,
    variable_names=training_data.variable_names,
    variable_dict=training_data.variable_dict,
    num_features=training_data.num_variables,
    pop_size=100,
    ngen=1000,
import pandas as pd

from fastsr.containers.learning_data import LearningData

dat = pd.read_csv('data/energydata_complete.csv')
cols = dat.columns.tolist()
cols_ordered = cols[3:len(cols)] + [cols[1]]
trimmed_dat = dat[cols_ordered]
learning_data = LearningData()
predictor_names = cols_ordered[:-1]
# We need to remove '_' as they don't play nice with Range Terminals
predictor_names = list(map(lambda x: x.replace('_', ''), predictor_names))
learning_data.from_data(trimmed_dat, predictor_names, 'energy_data')
learning_data.lag_predictors(6, column_names=predictor_names)
learning_data.to_hdf('data/energy_hour_lagged.hdf5')
예제 #3
0
import numpy as np
import matplotlib.pyplot as plt

from fastsr.estimators.symbolic_regression import SymbolicRegression
from fastsr.containers.learning_data import LearningData
from fastsr.experiments.truncation_elite import TruncationElite
from experiments.truncation_elite_rt import TruncationEliteRT
import utils

best_num = 0
experiment_class, experiment_name = utils.get_experiment_class_and_name(TruncationEliteRT)
model = SymbolicRegression()
model.load('/home/cfusting/rtresults_1000_100/energy_lagged/' + experiment_name + '/saved_models/' +
           experiment_name + '_energy_lagged_3965.pkl')

training_data = LearningData()
#training_data.from_file('data/minimum.csv')
training_data.from_hdf('data/energy_lagged.hdf5')
experiment = experiment_class()
pset = experiment.get_pset(training_data.num_variables, training_data.variable_type_indices,
                           training_data.variable_names, training_data.variable_dict)
scoring_toolbox = experiment.get_scoring_toolbox(training_data.predictors, training_data.response, pset)
best_individuals = []
for individual in model.best_individuals_:
    if hasattr(individual, 'history_index'):
        best_individuals.append(individual)
best_genealogy = model.history_.getGenealogy(best_individuals[best_num])
errors = []


def populate_individuals(history_index):
예제 #4
0
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

from fastsr.estimators.symbolic_regression import SymbolicRegression
from fastsr.containers.learning_data import LearningData

estimators = [('reduce_dim', PCA()), ('symbolic_regression', SymbolicRegression())]
pipe = Pipeline(estimators)

training_data = LearningData()
training_data.from_file('data/hour_simple_lagged.hdf5')
X_train, X_test, y_train, y_test = train_test_split(training_data.predictors, training_data.response, test_size=0.1,
                                                    shuffle=False)

pipe.fit(X_train, y_train)
print(pipe.score(X_train, y_train))

model = SymbolicRegression()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))
import pandas as pd

from fastsr.containers.learning_data import LearningData

dat = pd.read_csv('data/hour.csv')
datetime_index = list()
for i, r in dat.iterrows():
    datetime_index.append(pd.to_datetime(r[1]) + pd.DateOffset(hours=r[5]))
dt_index = pd.DatetimeIndex(datetime_index)
dat.set_index(dt_index)
dat_onehot = pd.get_dummies(
    dat,
    prefix_sep='',
    columns=['season', 'mnth', 'hr', 'weekday', 'weathersit'])
indices = [x for x in range(2, 9)] + [11] + [x for x in range(12, 63)]
dat_train = dat_onehot.iloc[:, indices]
cols = dat_train.columns.tolist()
cols_ordered = cols[0:7] + cols[8:58] + [cols[7]]
dat_train_ordered = dat_train[cols_ordered]
learning_data = LearningData()
learning_data.from_data(dat_train_ordered, cols_ordered[:-1], 'ucibike')
lag_variables = ['holiday', 'workingday', 'temp', 'atemp', 'hum', 'windspeed']
learning_data.lag_predictors(6, column_names=lag_variables)
learning_data.to_hdf('data/hour_lagged.hdf5')
예제 #6
0
import pandas as pd

from fastsr.containers.learning_data import LearningData

dat = pd.read_csv('data/hour.csv')
datetime_index = list()
for i, r in dat.iterrows():
    datetime_index.append(pd.to_datetime(r[1]) + pd.DateOffset(hours=r[5]))
dt_index = pd.DatetimeIndex(datetime_index)
dat.set_index(dt_index)
columns = ['atemp', 'windspeed', 'hum', 'cnt']
slim_dat = dat[columns]
learning_data = LearningData()
learning_data.from_data(slim_dat, columns, 'ucisimplebike')
learning_data.lag_predictors(24, column_names=['atemp', 'windspeed', 'hum'])
learning_data.to_hdf('data/hour_simple_lagged.hdf5')
예제 #7
0
from fastgp.utilities.metrics import mean_squared_error

experiment = 'rt'
dataset = 'energy_lagged'
path = '/'.join([dataset, experiment])
models_dir = '/home/cfusting/efsresults/' + path + '/saved_models/'
results_dir = '/home/cfusting/efsscores/' + path


def get_seed(file):
    pattern = re.compile('\d+')
    match = pattern.search(file)
    return match.group(0)


training_data = LearningData()
training_data.from_file(
    '/home/cfusting/bunny/range-terminal/data/energy_lagged.hdf5')
X_train, X_test, y_train, y_test = train_test_split(training_data.predictors,
                                                    training_data.response,
                                                    test_size=0.2,
                                                    shuffle=False)
files = [f for f in listdir(models_dir) if isfile(join(models_dir, f))]
scores = {}
all_features = {}
print('Processing seeds for ' + experiment)
for fl in files:
    with open(models_dir + '/' + fl, 'rb') as f:
        data = pickle.load(f)
    seed = get_seed(fl)
    print('Processing seed: ' + str(seed))