def __init__(self,raw_data, feature_stop, target_column, M=10, # Number of imtes to loop GBM portion regressor=True, existing_weights="nope", features_to_use="all"): # Instantiate the regModel first self.regModel = baseRegressor(raw_data,feature_stop,target_column,regressor, existing_weights,features_to_use); self.raw_data = raw_data; self.fs = feature_stop; self.tc = target_column; self.regressor = regressor; self.M = M; self.gamma = gamma; self.residModel = 0;
from excess_functions import return_single_column df = list_correct() # 268 & 267 are regression problems # 266 is a classification problem DAT = 268; # Taking the first 20 variance problems wt = [12, 21, 267, 25, 266, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14] # Toy Data #df = [ ["P1",1,2,10],["P2",3,5,26],["P3",1,1,6],["P4",6,2,20],["P5",5,3,22] ] # [0,1,2,3] model = baseRegressor(raw_data=df,feature_stop=265,target_column=DAT, regressor=True,features_to_use=wt,stoppage=10000000); model.fit(alpha=0.000000000001,target_error=1); # Best I could find for 268 """ model = baseRegressor(raw_data=df,feature_stop=265,target_column=266, regressor=False,features_to_use=wt); model.fit(alpha=0.00000000001,target_error=0.1); # For Classification """ print model.weights; # Returns a list of random values. l = map(abs, model.weights); #print "Should return 2,4" #print model.baseError_plot # this is a list b = sorted(range(len(l)),key=lambda k: l[k]); b.reverse()
for i in range(len(l1)): running_summation += (l1[i] - l2[i])**2 return float( (1/float(len(l1))) * running_summation ); df = list_correct() training, validation = splitter(df) # Taking the first 20 variance problems #wt = [12, 21, 267, 25, 266, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14] wt = [12, 21, 25, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14] # categorical: [ 1, 4, 5, 6, 7, 8, 9, 10, 11 ] x # 11 is treatments (1,2,3,4,5,6) categories # This will train for the first regressive value model = baseRegressor(raw_data=df,feature_stop=265,target_column=267, regressor=True, existing_weights=[-0.10,0.10], features_to_use=wt,stoppage=80000); model.fit(alpha=0.0000000000001,target_error=0.00001); # Best I could find for 268 """ # This will train for the last regressive value model = baseRegressor(raw_data=df,feature_stop=265,target_column=268, regressor=True,features_to_use=wt,stoppage=10000000); model.fit(alpha=0.000000000001,target_error=1); # Best I could find for 268 """ print model.weights; # Returns a list of random values. predictions = model.predict(validation); answers = return_single_column(validation,dex=267); print "Mean-Squared Error: " + str(mse(answers,predictions));
if l1[i] == l2[i]: # I guess it doesn't matter which are the predictions? yes_count += 1; return float(yes_count / float(total_count)); df = list_correct() training, validation = splitter(df) # Taking the first 20 variance problems #wt = [12, 21, 267, 25, 266, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14] wt = [12, 21, 25, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14] # categorical: [ 1, 4, 5, 6, 7, 8, 9, 10, 11 ] x # 11 is treatments (1,2,3,4,5,6) categories # I changed alpha by 1 decimal place # Below will be the classification problem model = baseRegressor(raw_data=training,feature_stop=265,target_column=266, regressor=False,features_to_use=wt,stoppage=5000000); model.fit(alpha=0.0000000001,target_error=0.1); # For Classification print model.weights; # Need to test that I can insert weights correctly here predictions = model.predict(validation); answers = return_single_column(validation,dex=266); print "Percentage Correct: " + str(binary_cv(predictions,answers)); """ # This will train for the first regressive value model = baseRegressor(raw_data=df,feature_stop=265,target_column=267, regressor=True,features_to_use=wt,stoppage=10000000); model.fit(alpha=0.000000000001,target_error=1); # Best I could find for 268 # This will train for the last regressive value
wt = [12, 21, 25, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14] wt_class = [0.005282546933085302, 0.00019005094290200331, -0.3208438520005847, 0.060298374167868034, 0.47575278102153445, -0.007341090879961012, -0.0003345576888578404, -0.34825960673644474, 0.0003877180341366817, -0.28494755094203117, 2.964726478234287e-06, -0.01292471641406875, 0.0005911449483264154, 0.008637346773164302, 0.1367495624114614, 0.004521909748139259, -0.009062774664435601, -0.01652866109254906] model = baseRegressor(raw_data=training, feature_stop=3, target_column=4, regressor=False, existing_weights=wt_class, features_to_use=wt); predictions = model.predict(validation); answers = return_single_column(validation,dex=266); print "Percentage Correct: " + str(binary_cv(predictions,answers));
from base_regressor import baseRegressor from text_to_list import list_correct df = list_correct() model = baseRegressor(raw_data=df,feature_stop=265,target_column=266, regressor=False,features_to_use=[3,4,5,6], existing_weights = [6.0,3.0,4.0,5.0], stoppage=10000000); print model.weights;