def get_models(trans): # get prefix of model files prefix = save.get_prefix("model") #get number of versions with this prefix versions = save.get_version(prefix) # return name, version number, and parameter of each model models = [] for v in range(versions-1): _, _, model_type, parameter, _ = save.load("model", v+1) new_model = { "id": v+1, "name": trans[model_type] } # add parameter if it exists if parameter != "": new_model['name'] += ", " + parameter # append this model to the output models.append(new_model) return models
def run(): prefix = save.get_prefix("model") versions = save.get_version(prefix) display = [] for v in range(versions-1): _, _, model_type, parameter, comparators = save.load("model", v+1) display.append([model_type, parameter, comparators]) return display
def predict(program_dict, sim_model, num_results): # load latest model and data pred_model = save.get_version(save.get_prefix("model")) - 1 clfs, decision_tree, model_type, parameter, _ = save.load("model", pred_model) data, encoders, averages = save.load("data") # create cooler cooler = data_input.create_program(program_dict, encoders, averages) sample = features.features([cooler], encoders) # get predictions quote, lower, upper = results.get_quote(sample, clfs, decision_tree) # find similar coolers scores = similarity.get_scores(cooler, data, sim_model) # display correctly similar_list = results.sort_and_display(data, scores, num_results, encoders) return quote, lower, upper, similar_list
def create(model_type, parameter): # load latest data data, encoders, averages = save.load("data") # only use those value that have a final price ml_data = [] for item in data: if item.data['final_price'] != None and item.data['use'] == 'yes': ml_data.append(item) # normalize values normalized = [(data_input.replace_blanks(item, averages)) for item in ml_data] # generate features x, y = features.features_labels(normalized, encoders) # split into training and testing set x_train, x_test = features.split_data(x) y_train, y_test = features.split_data(y) # prepare for bootstrapping n_size = len(x_train) test_size = len(x_test) indices = list(range(n_size)) clfs = [] scores = [] # second level model second_level = tree.DecisionTreeRegressor() tree_train = np.zeros( (num_iterations, n_size) ) # num_iterations is the number of bootstrapped instances to create for instance in range(num_iterations): # make bootstrap samples indices train_indices = resample(indices, n_samples=n_size) # prepare train and test sets x_bs = x[train_indices] y_bs = y[train_indices].reshape(-1,1) # train model clf = globals()[model_type](parameter) clf.fit(x_bs, y_bs) # training data for second level model tree_train[instance] = clf.predict(x_train).reshape(n_size) # keep important values for next iteration clfs.append(clf) # training second level model tree_train = np.transpose(tree_train) second_level.fit( tree_train, y_train ) # calculate a baseline median = [metrics.median(y_train)] * test_size base = sk_metrics.mean_absolute_error(y_test, median) # calculate the metrics y_ensemble = np.zeros( (3, test_size ) ) # for each cooler in the test set for idx in range(test_size): pred, lower, upper = results.get_quote(x_test[idx], clfs, second_level) # save these values y_ensemble[0][idx] = pred y_ensemble[1][idx] = lower y_ensemble[2][idx] = upper # calculate comparators comparators = metrics.get_comparators(y_test, y_ensemble) # save models to predict coolers save.update("model", [clfs, second_level, model_type, parameter, comparators]) return base, comparators['mean-absolute-error']
def get_data(): data, _, _ = save.load("data") return data