def train_model(X_df, y_array, skf_is): fe = FeatureExtractor() fe.fit(X_df, y_array) X_array = fe.transform(X_df) # Regression train_is, _ = skf_is X_train_array = np.array([X_array[i] for i in train_is]) y_train_array = np.array([y_array[i] for i in train_is]) reg = Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
def main(): if len(sys.argv) != 3: print usage() return random_forest = Regressor() random_forest.load_train_data(sys.argv[1]) random_forest.load_test_data(sys.argv[2]) random_forest.train(500) random_forest.print_score()
def train_regressor(num_iterations=20000): p = params.regressor regressor = Regressor(p.height, p.width, p.n_channels) data = dataset.localization(p.train_data) iterator = dataset.iterator(data) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) next_element = iterator.get_next() for i in range(num_iterations): x, y = sess.run(next_element) feed_dict = { regressor.x: x, regressor.y: y, regressor.keep_prob: 0.5, } regressor.train_step.run(feed_dict=feed_dict) if (i+1) % 100 == 0: accuracy = regressor.accuracy.eval(feed_dict=feed_dict) print(i+1, accuracy) print('Training complete') print('Model saved: ', saver.save(sess, p.model_path))
def various_regression(): # one_piece_t_range = [range(INCIDENT_START_SECOND, INCIDENT_END_SECOND + 1, 60)] two_piece_t_range = [ range(INCIDENT_START_SECOND, INCIDENT_START_SECOND + 6 * 60, 60), range(INCIDENT_START_SECOND + 6 * 60, INCIDENT_END_SECOND + 1, 60) ] num_lags = 2 minutes_back_to_first_lag = 5 for pieces_of_time_range in [two_piece_t_range]: # one_piece_t_range # LR compute_stats(logger, ARBITRARY_SEED, random_sample_of_normal_atts=None, lags=num_lags, first_lag_minutes_back=minutes_back_to_first_lag, regressor=Regressor('output_LR_5step', logger, pieces_of_time_range, LinearRegression, dict(fit_intercept=False), None, ARBITRARY_SEED, num_lags, minutes_back_to_first_lag))
def __plot_various_time_series(args): scenario_results_dir, num_lags, earliest_lag_minutes_back = args two_piece_t_range = [ range(INCIDENT_START_SECOND, INCIDENT_START_SECOND + 6 * 60, 60), range(INCIDENT_START_SECOND + 6 * 60, INCIDENT_END_SECOND + 1, 60) ] regressor = Regressor( output_subdir='output_plots_degradation_m_ordinary_LR', logger=logger, t_ranges_for_piecewise_modeling=two_piece_t_range, regression_cls=LinearRegression, regressor_init_kwargs=dict(fit_intercept=False), scenario_results_dir=scenario_results_dir, seed=ARBITRARY_SEED, num_lags=num_lags, earliest_lag_minutes_back=earliest_lag_minutes_back) # plotter = Plotter(output_dir='output_tmp', # scenario_results_dir=None, # seed=ARBITRARY_SEED, # num_lags=num_lags, # earliest_lag_minutes_back=earliest_lag_minutes_back) normal_atts = glob.glob( os.path.join(scenario_results_dir, 'normal_scenario_demand_*', '*', '*Link Segment Results_001.att')) regressor.compare_scatter_of_normal_model_with_and_without_other_links( normal_atts) abnormal_atts = glob.glob( os.path.join(scenario_results_dir, 'incident_scenarios', 'incident_link74_location_*', 'results.att')) degradation_m_ordinary_hexbin(regressor, normal_atts, abnormal_atts) regressor.time_series_degradation_of_normal_model_with_and_without_other_links( normal_atts, abnormal_atts, 'time_series_degradation_of_normal_model_with_or_without_other_links.png' )
''' Find the household with the lowest carbon emissions from a singe group ''' def find_greenest(cluster): min = 100000000 min_index = -1 for i in range(len(cluster)): if sum(cluster[1:len(cluster) - 1]) < min: min = cluster[i] min_index = i return min, min_index # Preprocessing preprocessor = Preprocessor() preprocessor.run_preprocessor() # Elbow method elbow = Elbow() elbow.run_elbow() # Clustering clusterer = Clusterer() clusterer.run_clusterer() # Regression regressor = Regressor() regressor.run_regressor()
def test_split_node(self): regressor = Regressor() regressor.fit(x, y) print(regressor.predict(x_valid))
def __init__(self): Regressor.__init__(self) self.weights, self.bias = (None, None)
T0 = time() print "load dataset..." X_df_2011 = pd.DataFrame.from_csv("datasets/2011.csv") X_df_2012 = pd.DataFrame.from_csv("datasets/2012.csv") X_df_2013 = pd.DataFrame.from_csv("datasets/2013.csv") X_df = pd.concat([X_df_2011, X_df_2012, X_df_2013], axis=0) print "load dates..." with open("target_dates_1.pkl") as f: dates = pickle.load(f) # date n1677, n3051 and n3451 cause trouble dates = dates.delete([1677, 3051, 3451]) sub = load_submission("data/submission.txt") pred_dates = sub.index fit_dates = load_all_data().index fit_dates = fit_dates.delete(range(18024)) # hack print "make the prediction..." # make prediction reg = Regressor() reg.fit(fit_dates) pred = reg.predict(pred_dates) print "acquire the true value..." target = X_df.loc[dates] print "compute error..." # get the error err = get_error_dfs(pred, target) print "LinExp error: ", err, "run in :", time() - T0, "s"
print "Loading the data to predict ..." dataTest=pickle.load(open("/Users/ozad/Desktop/axa_data_challenge-master/test_DataFrame.obj","rb")) labels = data['CSPL_RECEIVED_CALLS'].unique() #### Load the X train and Y train Y_train=data['CSPL_RECEIVED_CALLS'][:300000] X_train=data[ [ 'DAY_WE_DS', 'cod_ASS_ASSIGNMENT','year','month','day','hour','minute']][:300000] X_test=dataTest[[ 'DAY_WE_DS', 'cod_ASS_ASSIGNMENT','year','month','day','hour','minute']] Y_train=np.array(Y_train) X_train=np.array(X_train) X_test=np.array(X_test) #### Creation of regressor reg=Regressor() #### Cross validation print "Cross validation ..." #loo = cross_validation.LeaveOneOut(len(y_df)) loo=10 scores = cross_validation.cross_val_score(reg, X_train, Y_train, scoring='mean_squared_error', cv=loo,) print "The score mean of cross validation : " print scores.mean() #### fit print "Fit ..." reg.fit(X_train, Y_train)
print "Loading the X test ..." set_X_test=[] i=0 while i < len(sub_data['cod_ASS_ASSIGNMENT'].unique()): set_X_test.append(sub_test[features][sub_test['cod_ASS_ASSIGNMENT' ]==(i)]) i=i+1 i=0 listPred=[] score_cv_global=[] while i<len(set_X_train): scaler = pre.StandardScaler().fit(set_X_train[i][features_train]) X_train_scaled = scaler.transform(set_X_train[i][features_train]) print " Train et Predict the categorie : ",i reg=Regressor() reg.fit(X_train_scaled, set_Y_train[i]) #### Cross validation #print "Cross validation ...", i #loo = cross_validation.LeaveOneOut(len(y_df)) #loo=10 #scores = cross_validation.cross_val_score(reg, X_train_scaled, set_Y_train[i], scoring='neg_mean_squared_error', cv=loo,) #print "The score mean of cross validation : ", scores.mean() #score_cv_global.append(scores.mean()) if(len(set_X_test[i])>0): X_test_scaled = scaler.transform(set_X_test[i][features_train]) listPred.append( reg.predict(X_test_scaled)) i=i+1
from regressor import Regressor from codecs import open import time from flask import Flask, render_template, request app = Flask(__name__) print("Load regressor") regr = Regressor() print("Regressor is successfully loaded") @app.route("/renting-apartments", methods=["POST", "GET"]) def index_page(text="", prediction_message=""): if request.method == "POST": query_dict = {} query_dict['host_response_time'] = request.form['host_response_time'] query_dict['host_is_superhost'] = request.form['host_is_superhost'] query_dict['host_identity_verified'] = request.form[ 'host_identity_verified'] query_dict['is_location_exact'] = request.form['is_location_exact'] query_dict['property_type'] = request.form['property_type'] query_dict['room_type'] = request.form['room_type'] query_dict['bed_type'] = request.form['bed_type'] query_dict['cancellation_policy'] = request.form['cancellation_policy'] query_dict['accommodates'] = request.form['accommodates'] query_dict['bedrooms'] = request.form['bedrooms'] query_dict['cleaning_fee'] = request.form['cleaning_fee'] query_dict['guests_included'] = request.form['guests_included'] query_dict['minimum_nights'] = request.form['minimum_nights'] query_dict['neighbourhood_cleansed'] = request.form[ 'neighbourhood_cleansed']
df_tmp = df[df['Part of'] == 'France'] df = df.drop(df_tmp.index) from regressor import Regressor from feature_extractor import FeatureExtractor df_features = df.drop('target', axis=1) y = df.target.values df_train, df_test, y_train, y_test = train_test_split(df_features, y, test_size=0.5, random_state=42) feature_extractor = FeatureExtractor() model = Regressor() X_train = feature_extractor.transform(df_train) model.fit(X_train, y_train) X_test = feature_extractor.transform(df_test) y_pred = model.predict(X_test) print('RMSE = ', np.sqrt(mean_squared_error(y_test, y_pred))) imputer = model.clf.named_steps['imputer'] valid_idx = imputer.transform(np.arange(df_train.shape[1])).astype(np.int) et = model.clf.named_steps['extratreesregressor']
T0 = time() print "load dataset..." X_df_2011 = pd.DataFrame.from_csv("datasets/2011.csv") X_df_2012 = pd.DataFrame.from_csv("datasets/2012.csv") X_df_2013 = pd.DataFrame.from_csv("datasets/2013.csv") X_df = pd.concat([X_df_2011, X_df_2012, X_df_2013], axis=0) print "load dates..." with open("target_dates_1.pkl") as f: dates = pickle.load(f) # date n1677, n3051 and n3451 cause trouble dates = dates.delete([1677, 3051, 3451]) sub = load_submission("data/submission.txt") pred_dates = sub.index fit_dates = load_all_data().index fit_dates = fit_dates.delete(range(18024)) # hack print "loading searching parameters..." # Implement parameters to grid search here n_estimators = range(300, 900, 50) param_grid = [{ 'n_estimators': n_estimators, }] print "start gridsearch..." # make prediction reg = Regressor() reg.gridsearch(fit_dates, param_grid)
# backward elimination max_p_value = 1 non_significant_index = -1 eliminator = None while max_p_value > 0.05: if not non_significant_index == -1: x_train = np.delete(x_train, non_significant_index, 1) x_test = np.delete(x_test, non_significant_index, 1) eliminator = Back_Elimination() eliminator.fit_OLS(y_train, x_train) p_values = eliminator.get_p_values() max_p_value = np.amax(p_values) non_significant_index = list(p_values).index(max_p_value) """ LOGISTIC REGRESSION """ regressor = Regressor() regressor.train_machine(x_train, y_train) prediction = regressor.predict(x_test) print(prediction) # ################# # SUBMIT ANSWER # ################# # print(test.columns); holdout_ids = df_test['Id'] sub_df = { "Id": holdout_ids, "Cover_Type": prediction } ds = Data_Set(sub_df)
regressorA3 = linear_model.Ridge() # regressorA = linear_model.BayesianRidge() regressorA4 = linear_model.LinearRegression() regressorA5 = linear_model.PassiveAggressiveRegressor() # regressorA = linear_model.SGDRegressor() # regressorA = linear_model.Lasso() # regressorA = linear_model.RANSACRegressor() # regressorA = RadiusNeighborsRegressor(radius=1.0) # regressorA = KNeighborsRegressor(n_neighbors=4) regressorB = MetaRegressor([regressorB2]) regressorA = MetaRegressor( [regressorA1, regressorA2, regressorA3, regressorA4, regressorA5]) baseRegressor = linear_model.LinearRegression() regressor = Regressor(regressorA, regressorB, baseRegressor) regressor.fit(historic_data_set, target_data_set) # plot the trained models against the data they were trained on # together with least squares measures(in order to experiment with diff linear models) predict_base, predict_anomaly, predict_total, predict_dummy = regressor.predict( historic_data_set) plt.figure(1) plt.subplot(311) plt.plot(predict_total, label="total") plt.plot(predict_base, label="base") plt.plot(predict_anomaly, label="anomaly") plt.plot(target_data_set, label="target") plt.plot(predict_dummy, label="dummy")
def __init__(self): Regressor.__init__(self) self.n_features, self.n_targets, self.n_bases = (None, None, None) self.m_weights = None
print "Loading the X test ..." set_X_test=[] i=0 while i < len(sub_data['cod_ASS_ASSIGNMENT'].unique()): set_X_test.append(sub_test[features][sub_test['cod_ASS_ASSIGNMENT' ]==(i)]) i=i+1 i=0 listPred=[] score=[] while i<len(set_X_train): scaler = StandardScaler().fit(set_X_train[i][features_train]) X_train_scaled = scaler.transform(set_X_train[i][features_train]) print " Train et Predict the categorie : ",i reg=Regressor() #reg.fit(X_train_scaled, set_Y_train[i]) #### Cross validation #print "Cross validation ...", i #loo = cross_validation.LeaveOneOut(len(y_df)) #loo=10 #scores = cross_validation.cross_val_score(reg, X_train_scaled, set_Y_train[i], scoring='neg_mean_squared_error', cv=loo,) #print "The score mean of cross validation : ", scores.mean() #score_cv_global.append(scores.mean()) ##### cross validation : score.append(crossVal(reg,X_train_scaled,set_Y_train[i]))
from sklearn.linear_model import LogisticRegression; from main import Main; from data_set import Data_Set; from dummy_master import Dummy_Master; from regressor import Regressor; from metrics import Metrics; from back_elimination import Back_Eliminations; from set_reader import Set_Reader; from splitter import Splitter; from plot import Plot; from process_data import Pre_Process_Data; import visual-python m = Main('init'); r = Regressor(); sp = Splitter(); mt = Metrics(); m.print(); be = Back_Eliminations(); pd = Pre_Process_Data(); sr = Set_Reader(); sr.read_files(); # sr.print_files_shapes(); train = sr.get_train(); test = sr.get_test(); ploter = Plot(); ploter.cut_survived(train, test); # ploter.plot_set_survived(sr.get_train(), "Sex", "Survived");
test['Embarked_missing_data'] = test['Embarked_missing_data'].apply(lambda x: 0 if str(x) == 'nan' else x); # print(test.columns) # print(train.columns) train_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data']; test_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data']; # print(train[train_columns].to_string()) train[train_columns],test[test_columns] = processor_ms.scale_fit_train_test(train[train_columns],test[test_columns]); # print(train[train_columns].to_string()) # print(test[test_columns]) # Regressor regressor_object_1 = Regressor(); regressor_object_1.train_machine(train[train_columns], train['Survived']); prediction = regressor_object_1.predict(test[train_columns]); prediction = prediction.astype(int); print(prediction); # ################# # SUBMIT ANSWER # ################# # print(test.columns); holdout_ids = test["PassengerId"]; sub_df = { "PassengerId":holdout_ids, "Survived": prediction };
def main(): parser = argparse.ArgumentParser(description="Tensorflow Contrastive Convolution") parser.add_argument('--num_classes', default=10575, type=int, metavar='N', help='number of classes (default: 5749)') parser.add_argument('--iters', type=int, default = 200000, metavar='N', help='number of iterations to train (default: 10)') args = parser.parse_args() dataset = CasiaFaceDataset() testset = LFWDataset() base_model = ConstractiveFourLayers() gen_model = GenModel(512) reg_model = Regressor(350*32) idreg_model = IdentityRegressor(14*512*3*3, args.num_classes) input1 = tf.placeholder(tf.float32, [None, 128, 128, 1]) input2 = tf.placeholder(tf.float32, [None, 128, 128, 1]) target = tf.placeholder(tf.float32, [None, 1]) c1 = tf.placeholder(tf.float32, [None, args.num_classes]) c2 = tf.placeholder(tf.float32, [None, args.num_classes]) A_list, B_list, org_kernel_1, org_kernel_2 = compute_contrastive_features(input1, input2, base_model, gen_model) reg_1 = reg_model.forward(A_list) reg_2 = reg_model.forward(B_list) SAB = tf.add(reg_1, reg_2) / 2.0 hk1 = idreg_model.forward(org_kernel_1) hk2 = idreg_model.forward(org_kernel_2) # print("target", target) # print("SAB", SAB) loss1 = tf.losses.sigmoid_cross_entropy(multi_class_labels=target, logits=SAB) cross_entropy_1 = tf.reduce_mean(-tf.reduce_sum(c1 * tf.log(hk1), reduction_indices=[1])) cross_entropy_2 = tf.reduce_mean(-tf.reduce_sum(c2 * tf.log(hk2), reduction_indices=[1])) loss2 = tf.add(cross_entropy_1, cross_entropy_2) * 0.5 loss = tf.add(loss1, loss2) optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) f = open("result.txt", "w") for iteration in range(args.iters): data_1_batch, data_2_batch, c1_batch, c2_batch, target_batch = dataset.get_batch(batch_size=GLOBAL_BATCH_SIZE) # print(target_batch.shape) # data_1_cur, data_2_cur, c1_cur, c2_cur, target_cur = sess.run([data_1_batch, data_2_batch, c1_batch, c2_batch, target_batch]) _, loss_val, loss1_val, loss2_val, reg1_val, reg2_val = sess.run([optimizer, loss, loss1, loss2, reg_1, reg_2], feed_dict={input1: data_1_batch, input2: data_2_batch, c1: c1_batch, c2: c2_batch, target: target_batch}) print("Itera {0} : loss = {1}, loss1 = {2}, loss2 = {3}".format(iteration, loss_val, loss1_val, loss2_val)) f.write("Itera {0} : loss = {1}, loss1 = {2}, loss2 = {3}\r\n".format(iteration, loss_val, loss1_val, loss2_val)) f.flush() if(iteration != 0 and iteration % 100 == 0): acc_pool, start_time = [], time.time() for i in range(50): test_1_batch, test_2_batch, label_batch = testset.get_batch(batch_size=GLOBAL_BATCH_SIZE) # test_1_cur, test_2_cur, label_cur = sess.run([data_1_batch, data_2_batch, label_batch]) # out1_a, out1_b, k1, k2 = sess.run(compute_contrastive_features(test_1_batch, test_2_batch, base_model, gen_model)) SAB_val, reg1_val, reg2_val = sess.run([SAB, reg_1, reg_2], feed_dict={input1: test_1_batch, input2: test_2_batch}) # print("SAB", SAB_val) # print("1v", reg1_val) # print("2v", reg2_val) dists = np.array(SAB_val).reshape((-1, 1)) # print(dists) labels = np.array(label_batch).reshape((-1, 1)) # print(labels) accuracy = evaluate(1.0 - dists, labels) acc_pool.append(np.mean(accuracy)) print("Acc(%.2f)"%(time.time()-start_time), np.mean(acc_pool), acc_pool) f.write("Acc" + str(np.mean(acc_pool)) + str(acc_pool) + str("\r\n")) f.flush() f.close()
classifier_accuracy = classifier.get_accuracy(y_test, y_predicted) # Visualizing the results visualizer = Visualizer() visualizer.plot_classifier_regressor(y_test, y_predicted, method_identifier) print('The accuracy is: ' + str(classifier_accuracy) + ' %') print(algorithm_name) # ---------------------Applying Regression to the data-------------------------- elif method_identifier == 2: from regressor import Regressor regressor = Regressor(algorithm_name) y_predicted = regressor.predict(X_train, y_train, X_test) regressor_score = regressor.get_score(y_test, y_predicted) # Visualizing the results visualizer = Visualizer() visualizer.plot_classifier_regressor(y_test, y_predicted, method_identifier) print('The coefficient of determination is: ' + str(regressor_score)) print(algorithm_name) # ---------------------Clustering the data------------------------------------ elif method_identifier == 3: from clustering import Clustering
from sklearn.metrics import mean_squared_error from math import sqrt min_max_scaler = MinMaxScaler() df = pd.read_csv("market-price-2014.csv") df_norm = df.drop(df.columns[0], 1, inplace=True) data_splitter = DataSplitter(df) df_train, df_validate, df_test = data_splitter.train_validate_test_split() data_splitter = DataSplitter(df_train) x_train, y_train = data_splitter.get_XY_sets(min_max_scaler, 30, 5) data_splitter = DataSplitter(df_validate) x_validate, y_validate = data_splitter.get_XY_sets(min_max_scaler, 30, 5) regressor = Regressor(x_train, y_train, x_validate, y_validate).train() # PREDICT PRICE test_set = df_test.values data_splitter = DataSplitter(df_test) inputs, outputs = data_splitter.get_XY_sets(min_max_scaler, 30, 5) predicted_price = regressor.predict(inputs) x = np.array(outputs).ravel() y = np.array(predicted_price).ravel() rmse = sqrt(mean_squared_error(x, y)) print('RMSE: %.3f' % rmse) predicted_price = min_max_scaler.inverse_transform( np.array(predicted_price[-2]).reshape(-1, 1)).tolist() plotter = Plotter(test_set[-10:-5], predicted_price)
def load_regressor(sess): p = params.regressor regressor = Regressor(p.height, p.width, p.n_channels) regressor.saver.restore(sess, p.model_path) return regressor