#fit_predict(model2)
#fit_predict(model1)
#fit_predict(model3)
#fit_predict(model5)

#fit_predict(model4, features, salaries, validation_features, type_n="test_subm")

all_model_predictions = []
for model_name in model_names:
    fit_predict(model_name,
                features,
                salaries,
                validation_features,
                type_n="test_subm")
    model_predictions = load_predictions(model_name, type_n="test_subm")
    print "modelp", model_predictions.shape
    #print "%s\nMAE: %f\n" % (model_name, log_mean_absolute_error(np.log(valid_salaries), model_predictions))
    all_model_predictions.append(model_predictions)
predictions = np.vstack(all_model_predictions).T
predictions = np.exp(predictions)
#predictions = np.random.randint(0,5, size=(10,3))
print predictions.shape
print predictions[1:10, :]

#classifier = LinearRegression()
#classifier.fit(predictions, salaries)
#result = classifier.predict(validation_features)
result = predictions.mean(axis=1)
model_name = "-".join(model_names)
model_name = "vowpal_loc5-extra20_40-mean"
#model_names = [model2, model4]
model_names = [model1, model6, model4]


#fit_predict(model2)
#fit_predict(model1)
#fit_predict(model3)
#fit_predict(model5)

#fit_predict(model4, features, salaries, validation_features, type_n="test_subm")


all_model_predictions = []
for model_name in model_names:
    fit_predict(model_name, features, salaries, validation_features, type_n="test_subm")
    model_predictions = load_predictions(model_name, type_n="test_subm")
    print "modelp", model_predictions.shape
    #print "%s\nMAE: %f\n" % (model_name, log_mean_absolute_error(np.log(valid_salaries), model_predictions))
    all_model_predictions.append(model_predictions)
predictions = np.vstack(all_model_predictions).T
predictions = np.exp(predictions)
#predictions = np.random.randint(0,5, size=(10,3))
print predictions.shape
print predictions[1:10, :]


#classifier = LinearRegression()
#classifier.fit(predictions, salaries)
#result = classifier.predict(validation_features)
result = predictions.mean(axis=1)
model_name = "-".join(model_names)
Example #3
0
print valid_salaries.shape

model1 = "ExtraTree_min_sample2_20trees_200f_noNorm_categoryTimeType_log"
model2 = "vowpall"
model3 = "Random_forest_min_sample2_20trees_200f_noNorm_categoryTimeType_log"
model4 = "ExtraTree_min_sample2_40trees_200f_noNorm_categoryTimeType_log"
model_names = [model1, model2, model3, model4]


#fit_predict(model2)
#fit_predict(model1)
#fit_predict(model3)

all_model_predictions = []
for model_name in model_names:
    model_predictions = load_predictions(model_name)
    #print model_predictions[0]
    if not model_name.endswith("log") and not model_name.startswith("vowpall"):
        model_predictions = np.log(model_predictions)
    #if model_name.startswith("vowpall"):
        #model_predictions = np.log(model_predictions)
    #print model_predictions[0]
    print "%s\nMAE: %f\n" % (model_name, mean_absolute_error(valid_salaries, np.exp(model_predictions)))
    all_model_predictions.append(model_predictions)
predictions = np.vstack(all_model_predictions).T
predictions = np.exp(predictions)
#predictions = np.random.randint(0,5, size=(10,3))
print predictions.shape
print predictions[1:10, :]
indexes = range(0, len(model_names))