Esempio n. 1
0
    def __init__(self,raw_data,
                 feature_stop,
                 target_column,
                 M=10,   # Number of imtes to loop GBM portion
                 regressor=True,
                 existing_weights="nope",
                 features_to_use="all"):
        # Instantiate the regModel first
        self.regModel = baseRegressor(raw_data,feature_stop,target_column,regressor,
                                      existing_weights,features_to_use);
        self.raw_data = raw_data;
        self.fs = feature_stop;
        self.tc = target_column;
        self.regressor = regressor;

        self.M = M;

        self.gamma = gamma;
        self.residModel = 0;
from excess_functions import return_single_column

df = list_correct()
# 268 & 267 are regression problems
# 266 is a classification problem

DAT = 268;

# Taking the first 20 variance problems
wt = [12, 21, 267, 25, 266, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14]

# Toy Data
#df = [ ["P1",1,2,10],["P2",3,5,26],["P3",1,1,6],["P4",6,2,20],["P5",5,3,22] ]
# [0,1,2,3]

model = baseRegressor(raw_data=df,feature_stop=265,target_column=DAT,
                      regressor=True,features_to_use=wt,stoppage=10000000);
model.fit(alpha=0.000000000001,target_error=1); # Best I could find for 268
"""
model = baseRegressor(raw_data=df,feature_stop=265,target_column=266,
                      regressor=False,features_to_use=wt);
model.fit(alpha=0.00000000001,target_error=0.1);   # For Classification
"""

print model.weights;     # Returns a list of random values.
l = map(abs, model.weights);
#print "Should return 2,4"

#print model.baseError_plot # this is a list

b = sorted(range(len(l)),key=lambda k: l[k]);
b.reverse()
    for i in range(len(l1)):
        running_summation += (l1[i] - l2[i])**2
    return float( (1/float(len(l1))) * running_summation );

df = list_correct()
training, validation = splitter(df)

# Taking the first 20 variance problems
#wt = [12, 21, 267, 25, 266, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14]
wt = [12, 21, 25, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14]
# categorical: [ 1, 4, 5, 6, 7, 8, 9, 10, 11 ] x
# 11 is treatments (1,2,3,4,5,6) categories

# This will train for the first regressive value
model = baseRegressor(raw_data=df,feature_stop=265,target_column=267,
                      regressor=True, existing_weights=[-0.10,0.10],
                      features_to_use=wt,stoppage=80000);
model.fit(alpha=0.0000000000001,target_error=0.00001); # Best I could find for 268
"""
# This will train for the last regressive value
model = baseRegressor(raw_data=df,feature_stop=265,target_column=268,
                      regressor=True,features_to_use=wt,stoppage=10000000);
model.fit(alpha=0.000000000001,target_error=1); # Best I could find for 268
"""

print model.weights;     # Returns a list of random values.
predictions = model.predict(validation);
answers = return_single_column(validation,dex=267);
print "Mean-Squared Error: " + str(mse(answers,predictions));

        if l1[i] == l2[i]:  # I guess it doesn't matter which are the predictions?
            yes_count += 1;
    return float(yes_count / float(total_count));

df = list_correct()
training, validation = splitter(df)

# Taking the first 20 variance problems
#wt = [12, 21, 267, 25, 266, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14]
wt = [12, 21, 25, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14]
# categorical: [ 1, 4, 5, 6, 7, 8, 9, 10, 11 ] x
# 11 is treatments (1,2,3,4,5,6) categories

# I changed alpha by 1 decimal place
# Below will be the classification problem
model = baseRegressor(raw_data=training,feature_stop=265,target_column=266,
                      regressor=False,features_to_use=wt,stoppage=5000000);
model.fit(alpha=0.0000000001,target_error=0.1);   # For Classification

print model.weights;   # Need to test that I can insert weights correctly here

predictions = model.predict(validation);
answers = return_single_column(validation,dex=266);
print "Percentage Correct: " + str(binary_cv(predictions,answers));

"""
# This will train for the first regressive value
model = baseRegressor(raw_data=df,feature_stop=265,target_column=267,
                      regressor=True,features_to_use=wt,stoppage=10000000);
model.fit(alpha=0.000000000001,target_error=1); # Best I could find for 268
 
# This will train for the last regressive value
wt = [12, 21, 25, 20, 11, 28, 27, 16, 26, 32, 13, 29, 2, 33, 1, 17, 30, 14]
wt_class = [0.005282546933085302, 
    0.00019005094290200331, 
    -0.3208438520005847, 
    0.060298374167868034, 
    0.47575278102153445, 
    -0.007341090879961012, 
    -0.0003345576888578404, 
    -0.34825960673644474, 
    0.0003877180341366817, 
    -0.28494755094203117, 
    2.964726478234287e-06, 
    -0.01292471641406875, 
    0.0005911449483264154, 
    0.008637346773164302, 
    0.1367495624114614, 
    0.004521909748139259, 
    -0.009062774664435601, 
    -0.01652866109254906]

model = baseRegressor(raw_data=training,
                      feature_stop=3,
                      target_column=4,
                      regressor=False,
                      existing_weights=wt_class,
                      features_to_use=wt);

predictions = model.predict(validation);
answers = return_single_column(validation,dex=266);
print "Percentage Correct: " + str(binary_cv(predictions,answers));
from base_regressor import baseRegressor
from text_to_list import list_correct
df = list_correct()

model = baseRegressor(raw_data=df,feature_stop=265,target_column=266,
                      regressor=False,features_to_use=[3,4,5,6],
                      existing_weights = [6.0,3.0,4.0,5.0],
                      stoppage=10000000);

print model.weights;