#fit_predict(model2) #fit_predict(model1) #fit_predict(model3) #fit_predict(model5) #fit_predict(model4, features, salaries, validation_features, type_n="test_subm") all_model_predictions = [] for model_name in model_names: fit_predict(model_name, features, salaries, validation_features, type_n="test_subm") model_predictions = load_predictions(model_name, type_n="test_subm") print "modelp", model_predictions.shape #print "%s\nMAE: %f\n" % (model_name, log_mean_absolute_error(np.log(valid_salaries), model_predictions)) all_model_predictions.append(model_predictions) predictions = np.vstack(all_model_predictions).T predictions = np.exp(predictions) #predictions = np.random.randint(0,5, size=(10,3)) print predictions.shape print predictions[1:10, :] #classifier = LinearRegression() #classifier.fit(predictions, salaries) #result = classifier.predict(validation_features) result = predictions.mean(axis=1) model_name = "-".join(model_names) model_name = "vowpal_loc5-extra20_40-mean"
#model_names = [model2, model4] model_names = [model1, model6, model4] #fit_predict(model2) #fit_predict(model1) #fit_predict(model3) #fit_predict(model5) #fit_predict(model4, features, salaries, validation_features, type_n="test_subm") all_model_predictions = [] for model_name in model_names: fit_predict(model_name, features, salaries, validation_features, type_n="test_subm") model_predictions = load_predictions(model_name, type_n="test_subm") print "modelp", model_predictions.shape #print "%s\nMAE: %f\n" % (model_name, log_mean_absolute_error(np.log(valid_salaries), model_predictions)) all_model_predictions.append(model_predictions) predictions = np.vstack(all_model_predictions).T predictions = np.exp(predictions) #predictions = np.random.randint(0,5, size=(10,3)) print predictions.shape print predictions[1:10, :] #classifier = LinearRegression() #classifier.fit(predictions, salaries) #result = classifier.predict(validation_features) result = predictions.mean(axis=1) model_name = "-".join(model_names)
print valid_salaries.shape model1 = "ExtraTree_min_sample2_20trees_200f_noNorm_categoryTimeType_log" model2 = "vowpall" model3 = "Random_forest_min_sample2_20trees_200f_noNorm_categoryTimeType_log" model4 = "ExtraTree_min_sample2_40trees_200f_noNorm_categoryTimeType_log" model_names = [model1, model2, model3, model4] #fit_predict(model2) #fit_predict(model1) #fit_predict(model3) all_model_predictions = [] for model_name in model_names: model_predictions = load_predictions(model_name) #print model_predictions[0] if not model_name.endswith("log") and not model_name.startswith("vowpall"): model_predictions = np.log(model_predictions) #if model_name.startswith("vowpall"): #model_predictions = np.log(model_predictions) #print model_predictions[0] print "%s\nMAE: %f\n" % (model_name, mean_absolute_error(valid_salaries, np.exp(model_predictions))) all_model_predictions.append(model_predictions) predictions = np.vstack(all_model_predictions).T predictions = np.exp(predictions) #predictions = np.random.randint(0,5, size=(10,3)) print predictions.shape print predictions[1:10, :] indexes = range(0, len(model_names))