def min_empirical_error(xpldata): """ Given the data originally from a XPL file the minimal empirical error is a value threshold for overfitting reference. Parameters ---------- xpldata : ExampleData(data, freq0, freq1, winshape, windata, filename) Same as xplutil returns. Returns ------- err : double The error value. """ w0, w1 = clf.normalize_table(xpldata.freq0, xpldata.freq1) err = clf.error(w0,w1) return err
def test_error(): w0 = np.array([0.15, 0.2, 0.1, 0.03, 0.08]) w1 = np.array([0.05, 0.0, 0.1, 0.17, 0.12]) expected_error = w0[[2, 3, 4]].sum() + w1[[0, 1]].sum() e0 = cl.error(w0, w1) nt.assert_almost_equal(e0, expected_error)
import cPickle as pickle import numpy as np import classifier import featurizer import gen_training_data from scipy import sparse numBusinessClusters = 10 numUserClusters = 10 business_data = pickle.load(open("business_data.p", "rb")) business_clusters = featurizer.kmeans(business_data, numBusinessClusters) pickle.dump(business_clusters["data_clusters"], open("clustered_business.p", "wb")) user_data = gen_training_data.cluster_users(numUserClusters, 5, 800000) user_clusters = featurizer.kmeans(user_data["training"], numUserClusters) pickle.dump(user_clusters["data_clusters"], open("clustered_user.p", "wb")) error = classifier.error(numUserClusters, numBusinessClusters) print error pickle.dump(error, open("error.p", "wb")) # print results # user_weights = pickle.load(open('user_weights.p', 'rb')); # user_clusters = featurizer.kmeans(user_weights, 32); # pickle.dump(user_clusters, open('user_clusters.p', 'wb')) # users = user_clusters.keys(); # businesses = business_clusters.keys(); # user_ratings = {}; # predictions = []; # w1 = 1; # w2 = 1; # b = 1; # for user in users: # predictions = [];