def evaluate(): # TRAINING data_train = util.load_train_data('../data/').values x_train = np.delete(data_train, [0, 2, 3, 4, 7, 25], axis=1) y_train = data_train[:, 3] std_train = data_train[:, 26] mean_train = data_train[:, 25] y_train_norm = (y_train - mean_train) / std_train model = linear_model.Ridge(alpha=1.0) model.fit(x_train, y_train_norm) # TESTING data_test = util.load_test_data('../data/').values x_test = np.delete(data_test, [0, 1, 3, 6, 24], axis=1) label = data_test[:, 0] mean_test = data_test[:, 24] std_test = data_test[:, 25] y_predict = model.predict(x_test) y_predict_denorm = (y_predict * std_test) + mean_test result = pd.DataFrame(np.c_[label, y_predict_denorm]) result.to_csv('../data/ridge_result.csv', header=['Id', 'Sales'], index=False) return True
def main(): model = build_model() model.summary() train_images, targets = load_train_data() train_images = train_images.reshape(-1, 28, 28, 1) targets = to_categorical(targets, 10) callbacks = [ EarlyStopping(monitor='val_acc', patience=3), ModelCheckpoint('keras_convnet', save_best_only=True, save_weights_only=True), ] model.fit(train_images, targets, batch_size=64, epochs=100, validation_split=0.1, callbacks=callbacks) model.load_weights('keras_convnet') test_images = load_test_data() test_images = test_images.reshape(-1, 28, 28, 1) predictions = model.predict(test_images) labels = np.argmax(predictions, 1) save_predictions(labels, 'keras_convnet.csv')
def predictPerStore(k): # initialize result array result = np.zeros(shape=(41088, 2)) traind = util.load_train_data('../data/') testd = util.load_test_data('../data/') # additional features tr, ts = preProcess(traind, testd) ts_id = ts['Store'].unique() for i in ts_id: d_tr = tr[tr['Store'] == i] # train using kfold print('training for store id : {}'.format(i)) model = trainKFold(d_tr, k) # predict print('predicting for store id : {}'.format(i)) d_ts = ts[ts['Store'] == i] # check for open or close # predict only for open store opened = d_ts[d_ts['Open'] == 1] closed = d_ts[d_ts['Open'] == 0] # x test x_ts = opened.copy() del x_ts['Id'] del x_ts['Store'] del x_ts['Date'] del x_ts['DayOfWeek'] del x_ts['StateHoliday'] del x_ts['Mean'] del x_ts['Std'] # sales predict y_pred = model.predict(x_ts) # denom y_pred_denorm = (y_pred * opened['Std']) + opened['Mean'] for j in opened['Id']: result[j - 1] = [j, y_pred_denorm[j - 1]] for k in closed['Id']: result[k - 1] = [k, 0] print('result stored!') print('-------------------------------') result = pd.DataFrame(result) result[0] = result[0].astype(int) result.to_csv('../data/ridge_result.csv', header=['Id', 'Sales'], index=False) return True
def main(): model = ConvNet() print(model) images, targets = load_train_data() train_images, val_images, train_targets, val_targets = train_test_split(images, targets, test_size=0.1) train_images = torch.from_numpy(train_images).unsqueeze(1) train_targets = torch.from_numpy(train_targets) train_dataset = TensorDataset(train_images, train_targets) train_loader = DataLoader(train_dataset, batch_size=64) val_images = torch.from_numpy(val_images).unsqueeze(1) val_targets = torch.from_numpy(val_targets) val_dataset = TensorDataset(val_images, val_targets) val_loader = DataLoader(val_dataset, batch_size=64) optimizer = Adam(model.parameters(), lr=1e-3) best_val_acc = -1 patience_count = 0 for epoch in range(1, 1001): loss, acc = train_model(model, optimizer, train_loader) val_loss, val_acc = evaluate_model(model, val_loader) patience_count += 1 if val_acc > best_val_acc: best_val_acc = val_acc patience_count = 0 torch.save(model, 'pytorch_convnet') msg = 'Epoch {:04d} - loss: {:.6g} - acc: {:.6g} - val_loss: {:.6g} - val_acc: {:.6g}' print(msg.format(epoch, loss, acc, val_loss, val_acc)) if patience_count > 3: break model = torch.load('pytorch_convnet') images = load_test_data() images = torch.from_numpy(images).unsqueeze(1) test_dataset = TensorDataset(images, torch.zeros(images.size(0))) test_loader = DataLoader(test_dataset) labels = [] for images, _ in test_loader: images = Variable(images.float(), requires_grad=False) outputs = model.forward(images) labels.extend(torch.max(outputs.data, 1)[1]) save_predictions(np.array(labels), 'pytorch_convnet.csv')
def predict_for_kaggle_test_set(nn,filename): """ this function is responsible for saving test predictions to given filename. Parameters: nn : object filename: str Returns: (no-returns) """ kaggle_test_set = util.load_test_data() preds = [] for i in kaggle_test_set: preds.append(nn.predict(i, show=False)) util.save_predictions(preds, filename)
import os import sys import logging import pandas as pd import numpy as np from util import load_train_data, load_test_data, save_result from keras.utils import np_utils from keras.layers import Dense, Input from keras.models import Model train_file = os.path.join('data', 'train.csv') test_file = os.path.join('data', 'test.csv') x_train, y_train = load_train_data(train_file) x_test = load_test_data(test_file) batch_size = 100 nb_epoch = 20 hidden_units_1 = 256 hidden_units_2 = 100 y_train = np_utils.to_categorical(y_train) input_layer = Input(shape=(784, )) hidden_layer_1 = Dense(hidden_units_1, activation='sigmoid')(input_layer) hidden_layer_2 = Dense(hidden_units_2, activation='sigmoid')(hidden_layer_1) output_layer = Dense(10, activation='softmax')(hidden_layer_2) model = Model(input_layer, output_layer) model.compile(optimizer='sgd', loss='categorical_crossentropy')
import sys import logging import numpy as np from util import load_train_data, load_test_data from keras.utils import np_utils from keras.layers import Dense, Input, Conv2D, Reshape, Dropout, MaxPooling2D, Flatten from keras.models import Model from keras.preprocessing.image import ImageDataGenerator from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score x_train, y_train = load_train_data('cifar-10') x_test, y_test = load_test_data(os.path.join('cifar-10', 'test_batch')) x_train = np.reshape(x_train, (x_train.shape[0], 32, 32, 3)) x_test = np.reshape(x_test, (x_test.shape[0], 32, 32, 3)) print(x_train.shape) print(y_train.shape) print(x_test.shape) print(y_test.shape) y_train = np_utils.to_categorical(y_train) batch_size = 32 num_classes = 10 epochs = 100
import util import tensorflow as tf # from neuron_network import NeuronNetwork # print util.mutation("A") # input_data = util.string_to_ascii("BACBDCBDBBACBACDADDCABBCBACDACDDBDABDACD") # net = NeuronNetwork([[2.0, 1.0]], 1, 1) # net.il_node_num = 10 # print net.il_node_num, net.hl2_node_num test_x, test_y = util.load_test_data('UnitTest') print test_x print test_y # x = tf.placeholder(tf.float32, shape=(3, 3))
def main(): # Placeholders images = tf.placeholder(tf.float32, [None, 28, 28]) targets = tf.placeholder(tf.int32, [None, 10]) keep_prob = tf.placeholder(tf.float32) # Weights W_conv1 = weight_variable([3, 3, 1, 16]) b_conv1 = bias_variable([16]) W_conv2 = weight_variable([3, 3, 16, 32]) b_conv2 = bias_variable([32]) hidden_units = (7 * 7 * 32 + 10) // 2 W_hidden = weight_variable([7 * 7 * 32, hidden_units]) b_hidden = bias_variable([hidden_units]) W_output = weight_variable([hidden_units, 10]) b_output = bias_variable([10]) weights = [ W_conv1, b_conv1, W_conv2, b_conv2, W_hidden, b_hidden, W_output, b_output, ] # Forward x = tf.reshape(images, [-1, 28, 28, 1]) x = max_pool(tf.nn.relu(conv2d(x, W_conv1) + b_conv1)) x = max_pool(tf.nn.relu(conv2d(x, W_conv2) + b_conv2)) x = tf.reshape(x, [-1, 7 * 7 * 32]) x = tf.nn.dropout(x, keep_prob) x = tf.nn.relu(tf.matmul(x, W_hidden) + b_hidden) x = tf.nn.dropout(x, keep_prob) outputs = tf.matmul(x, W_output) + b_output # Loss loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=targets)) optimizer = tf.train.AdamOptimizer(1e-3).minimize(loss) # Accuracy correct = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) with tf.Session() as sess: batch_size = 64 # Training sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(weights, max_to_keep=1) X, y = load_train_data() y = one_hot(y) X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1) best_val_acc = -1 patience_count = 0 for epoch in range(1, 1001): X_train, y_train = shuffle(X_train, y_train) X_batches = np.array_split(X_train, X_train.shape[0] // batch_size) y_batches = np.array_split(y_train, y_train.shape[0] // batch_size) loss_sum = acc_sum = 0.0 for X_batch, y_batch in zip(X_batches, y_batches): loss_batch, acc_batch, _ = sess.run( [loss, accuracy, optimizer], feed_dict={ images: X_batch, targets: y_batch, keep_prob: 0.5 }) loss_sum += loss_batch * X_batch.shape[0] acc_sum += acc_batch * X_batch.shape[0] acc = acc_sum / X.shape[0] X_batches = np.array_split(X_val, X_val.shape[0] // batch_size) y_batches = np.array_split(y_val, y_val.shape[0] // batch_size) acc_sum = 0.0 for X_batch, y_batch in zip(X_batches, y_batches): acc_batch = sess.run(accuracy, feed_dict={ images: X_batch, targets: y_batch, keep_prob: 1.0 }) acc_sum += acc_batch * X_batch.shape[0] val_acc = acc_sum / X_val.shape[0] patience_count += 1 if val_acc > best_val_acc: best_val_acc = val_acc patience_count = 0 saver.save(sess, 'tensorflow_convnet') msg = 'Epoch {:04d} - loss: {:.6g} - acc: {:.6g} - val_acc: {:.6g}' print(msg.format(epoch, loss_sum / X.shape[0], acc, val_acc)) if patience_count > 3: break # Prediction saver.restore(sess, 'tensorflow_convnet') X = load_test_data() X_batches = np.array_split(X, X.shape[0] // batch_size) labels = [] for X_batch in X_batches: y = sess.run(outputs, feed_dict={images: X_batch, keep_prob: 1.0}) labels.extend(np.argmax(y, 1)) save_predictions(np.array(labels), 'tensorflow_convnet.csv')
del count_vectorizer return words_freq[:n_features] def get_top_n_features_count_unigram(df,n_features): label_df = df count_vectorizer = CountVectorizer() words = count_vectorizer.fit_transform(label_df['title']) s = words.sum(axis=0) words_freq = [(word, s[0, idx]) for word, idx in count_vectorizer.vocabulary_.items()] words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True) words_freq = [ x[0] for x in words_freq] del count_vectorizer return words_freq[: n_features] train_df = load_train_data() test_df = load_test_data() test_df = text_process2(test_df) train_df = text_process2(train_df) print(len(train_df['title'])) print(len(test_df['title'])) del train_df['category'] df = pd.concat([train_df, test_df]) print(len(df)) #print(df.head()) #features = get_top_n_features_count(train_df,50) #features = get_top_n_features_count_unigram(train_df,200) #features = get_top_n_features_count_unigram(df,200) #print(features) import re import numpy as np
self.w1 = w_load self.b1 = b_load def predict(self, test_x): """ get the answer of test_x by calculate logistic using the param learned before, and set the self.y_predict param: test_x(np.array) return: y_predict(np.array) """ print("predict......") y_predict = self.sigmoid(np.dot(self.w1, test_x) + self.b1) print("finish.......") y_result = list() for y in y_predict[0]: if y > 0.5: y_result.append(1) else: y_result.append(0) self.y_predict = y_result return y_result if __name__ == "__main__": x, y = load_training_data() print(x.shape) log_reg = logistic_regression(x, y) log_reg.load_param() test_x = load_test_data() y_predict = log_reg.predict(test_x) print(y_predict)
return False return True if __name__ == "__main__": train_data = '' model_file = '' mode = '' if validate_arguments(sys.argv): train_data = sys.argv[3] model_file = sys.argv[2] mode = sys.argv[1] else: sys.exit("Invalid Arguments") inputs, outputs = util.load_test_data(train_data) if not inputs or not outputs: raise ValueError('Input data and output data cannot be empty') # exit(0) # inputs, outputs = util.load_test_data('Test_3') # inputs, outputs = util.load_test_data('UnitTest') nn = NeuronNetwork(inputs, outputs, 0.01) # set the number of node for input layer nn.il_node_num = 10 # set the number of node for hidden layer 1 nn.hl1_node_num = 10 # set the number of node for hidden layer 2 nn.hl2_node_num = 10 # set the number of node for hidden layer 3 nn.hl3_node_num = 10
def main(unused_argv): train_data = util.load_train_img(tiling=False) train_labels = util.load_train_lbl(tiling=False) predict_data = util.load_test_data(tiling=False) train_labels = np.around(train_labels) train_labels = train_labels.astype('int32') # EXPAND to 608 x 608 train_data = np.pad(train_data, ((0, 0), (104, 104), (104, 104), (0, 0)), 'reflect') train_labels = np.pad(train_labels, ((0, 0), (104, 104), (104, 104)), 'reflect') # Channel first # train_data = np.rollaxis(train_data, -1, 1) # predict_data = np.rollaxis(predict_data, -1, 1) # neeed to expand the channel axis for the image augmentation train_labels = np.expand_dims(train_labels, 3) # Create the Estimator road_estimator = tf.estimator.Estimator( model_fn=cnn_model_fn, model_dir="outputs/road") # Train the model train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": train_data}, y=train_labels, batch_size=constants.BATCH_SIZE, num_epochs=None, shuffle=True) road_estimator.train( input_fn=train_input_fn, max_steps=(constants.N_SAMPLES * constants.NUM_EPOCH) // constants.BATCH_SIZE) # road_estimator.train( # input_fn=train_input_fn, # max_steps=10) # Predicions # Do prediction on test data util.create_prediction_dir("predictions_test/") file_names = util.get_file_names() predict_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": predict_data}, num_epochs=1, shuffle=False, batch_size=constants.BATCH_SIZE) predictions = road_estimator.predict(input_fn=predict_input_fn) res = [p['classes'] for p in predictions] for i in range(constants.N_TEST_SAMPLES): labels = res[i] img = util.label_to_img_full(IMG_SIZE, IMG_SIZE, labels) img = util.img_float_to_uint8(img) Image.fromarray(img).save('predictions_test/' + file_names[i]) # Do prediction on train data util.create_prediction_dir("predictions_train/") predict_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": train_data}, num_epochs=1, shuffle=False, batch_size=constants.BATCH_SIZE) predictions = road_estimator.predict(input_fn=predict_input_fn) res = [p['classes'] for p in predictions] for i in range(constants.N_SAMPLES): labels = res[i] img = util.label_to_img_full(IMG_SIZE, IMG_SIZE, labels) img = util.img_float_to_uint8(img) Image.fromarray(img).save('predictions_train/satImage_{:03}.png'.format(i + 1))
model_name = type(clf).__name__ score = cross_validate(clf, X_train, y_train, cv=kfold, scoring=('accuracy','f1_macro','f1_micro','precision_macro','precision_micro','recall_macro','recall_micro' ), verbose=3, n_jobs=-1, error_score='raise-deprecating') score_headers =list(score.keys())[2:] score_result = list(score.values())[2:] score_result = [x.mean() for x in score_result] result.append([model_name, 'mercadolivre'] +score_result) print(tabulate(result, headers=['classificador', 'data_set']+score_headers)) """ import sys #sys.exit("Error message") print("Iniciando Treino...") clf.fit(X_train, y_train) print("Fim Treino...") print('Carregando submissao...') test_dft = load_test_data() test_dft = text_process(test_dft) X_testt = vectorizer.transform(test_dft['title']) pred = clf.predict(X_testt) df = pd.DataFrame(columns=['id', 'category']) cate = encoder.inverse_transform(pred) df['category'] = cate df['id'] = np.arange(len(cate)) print(df.head()) df.to_csv('./submissao33.csv', index=False) #pred = clf.predict(X_test) import sys sys.exit("Error message") score = metrics.accuracy_score(y_test, pred)
parameters -- python dictionary containing your parameters X -- input data of size (n_x, m) Returns: predictions -- vector of predictions of our model (0/1) """ A2, cache = self.forward_propagation(X, self.parameters) threshold = 0.5 predictions = A2 > threshold return predictions def run(self): n_x, n_h, n_y = self.layer_sizes(x, y) parameters = self.initialize_parameters(n_x, n_h, n_y) A2, cache = self.forward_propagation(x, parameters) cost = self.compute_cost(A2, y, parameters) print(cost) grads = self.backward_propagation(parameters, cache, x, y) self.update_parameters(parameters, grads) if __name__ == "__main__": x, y = load_training_data() clf = logistic_regression_Ng() clf.nn_model(X=x, Y=y, n_h=4, print_cost=True) x_test = load_test_data() y_predict = clf.predict(x_test) print(y_predict) print(np.mean(y_predict))