Ejemplo n.º 1
0
def evaluate():
    # TRAINING
    data_train = util.load_train_data('../data/').values
    x_train = np.delete(data_train, [0, 2, 3, 4, 7, 25], axis=1)
    y_train = data_train[:, 3]
    std_train = data_train[:, 26]
    mean_train = data_train[:, 25]
    y_train_norm = (y_train - mean_train) / std_train

    model = linear_model.Ridge(alpha=1.0)
    model.fit(x_train, y_train_norm)

    # TESTING
    data_test = util.load_test_data('../data/').values
    x_test = np.delete(data_test, [0, 1, 3, 6, 24], axis=1)
    label = data_test[:, 0]
    mean_test = data_test[:, 24]
    std_test = data_test[:, 25]
    y_predict = model.predict(x_test)
    y_predict_denorm = (y_predict * std_test) + mean_test

    result = pd.DataFrame(np.c_[label, y_predict_denorm])
    result.to_csv('../data/ridge_result.csv',
                  header=['Id', 'Sales'],
                  index=False)

    return True
Ejemplo n.º 2
0
def main():
    model = build_model()
    model.summary()

    train_images, targets = load_train_data()
    train_images = train_images.reshape(-1, 28, 28, 1)
    targets = to_categorical(targets, 10)
    callbacks = [
        EarlyStopping(monitor='val_acc', patience=3),
        ModelCheckpoint('keras_convnet',
                        save_best_only=True,
                        save_weights_only=True),
    ]
    model.fit(train_images,
              targets,
              batch_size=64,
              epochs=100,
              validation_split=0.1,
              callbacks=callbacks)

    model.load_weights('keras_convnet')
    test_images = load_test_data()
    test_images = test_images.reshape(-1, 28, 28, 1)
    predictions = model.predict(test_images)
    labels = np.argmax(predictions, 1)
    save_predictions(labels, 'keras_convnet.csv')
Ejemplo n.º 3
0
def predictPerStore(k):
    # initialize result array
    result = np.zeros(shape=(41088, 2))

    traind = util.load_train_data('../data/')
    testd = util.load_test_data('../data/')

    # additional features
    tr, ts = preProcess(traind, testd)

    ts_id = ts['Store'].unique()
    for i in ts_id:
        d_tr = tr[tr['Store'] == i]

        # train using kfold
        print('training for store id : {}'.format(i))
        model = trainKFold(d_tr, k)

        # predict
        print('predicting for store id : {}'.format(i))

        d_ts = ts[ts['Store'] == i]

        # check for open or close
        # predict only for open store
        opened = d_ts[d_ts['Open'] == 1]
        closed = d_ts[d_ts['Open'] == 0]

        # x test
        x_ts = opened.copy()
        del x_ts['Id']
        del x_ts['Store']
        del x_ts['Date']
        del x_ts['DayOfWeek']
        del x_ts['StateHoliday']
        del x_ts['Mean']
        del x_ts['Std']

        # sales predict
        y_pred = model.predict(x_ts)

        # denom
        y_pred_denorm = (y_pred * opened['Std']) + opened['Mean']

        for j in opened['Id']:
            result[j - 1] = [j, y_pred_denorm[j - 1]]

        for k in closed['Id']:
            result[k - 1] = [k, 0]

        print('result stored!')
        print('-------------------------------')

    result = pd.DataFrame(result)
    result[0] = result[0].astype(int)
    result.to_csv('../data/ridge_result.csv',
                  header=['Id', 'Sales'],
                  index=False)

    return True
Ejemplo n.º 4
0
def main():
    model = ConvNet()
    print(model)

    images, targets = load_train_data()
    train_images, val_images, train_targets, val_targets = train_test_split(images, targets, test_size=0.1)

    train_images = torch.from_numpy(train_images).unsqueeze(1)
    train_targets = torch.from_numpy(train_targets)
    train_dataset = TensorDataset(train_images, train_targets)
    train_loader = DataLoader(train_dataset, batch_size=64)

    val_images = torch.from_numpy(val_images).unsqueeze(1)
    val_targets = torch.from_numpy(val_targets)
    val_dataset = TensorDataset(val_images, val_targets)
    val_loader = DataLoader(val_dataset, batch_size=64)

    optimizer = Adam(model.parameters(), lr=1e-3)

    best_val_acc = -1
    patience_count = 0
    for epoch in range(1, 1001):
        loss, acc = train_model(model, optimizer, train_loader)
        val_loss, val_acc = evaluate_model(model, val_loader)
        patience_count += 1
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_count = 0
            torch.save(model, 'pytorch_convnet')
        msg = 'Epoch {:04d} - loss: {:.6g} - acc: {:.6g} - val_loss: {:.6g} - val_acc: {:.6g}'
        print(msg.format(epoch, loss, acc, val_loss, val_acc))
        if patience_count > 3:
            break

    model = torch.load('pytorch_convnet')
    images = load_test_data()
    images = torch.from_numpy(images).unsqueeze(1)
    test_dataset = TensorDataset(images, torch.zeros(images.size(0)))
    test_loader = DataLoader(test_dataset)
    labels = []
    for images, _ in test_loader:
        images = Variable(images.float(), requires_grad=False)
        outputs = model.forward(images)
        labels.extend(torch.max(outputs.data, 1)[1])
    save_predictions(np.array(labels), 'pytorch_convnet.csv')
def predict_for_kaggle_test_set(nn,filename):
    """
    this function is responsible for saving test predictions to given filename.
    
    Parameters:
        nn      :   object          
        filename:   str             
    Returns:
        (no-returns)
    """
    
    kaggle_test_set = util.load_test_data()
    preds = []

    for i in kaggle_test_set:
        preds.append(nn.predict(i, show=False))

    util.save_predictions(preds, filename)
import os
import sys
import logging

import pandas as pd
import numpy as np

from util import load_train_data, load_test_data, save_result
from keras.utils import np_utils
from keras.layers import Dense, Input
from keras.models import Model

train_file = os.path.join('data', 'train.csv')
test_file = os.path.join('data', 'test.csv')
x_train, y_train = load_train_data(train_file)
x_test = load_test_data(test_file)

batch_size = 100
nb_epoch = 20
hidden_units_1 = 256
hidden_units_2 = 100

y_train = np_utils.to_categorical(y_train)

input_layer = Input(shape=(784, ))
hidden_layer_1 = Dense(hidden_units_1, activation='sigmoid')(input_layer)
hidden_layer_2 = Dense(hidden_units_2, activation='sigmoid')(hidden_layer_1)
output_layer = Dense(10, activation='softmax')(hidden_layer_2)

model = Model(input_layer, output_layer)
model.compile(optimizer='sgd', loss='categorical_crossentropy')
Ejemplo n.º 7
0
import sys
import logging

import numpy as np

from util import load_train_data, load_test_data
from keras.utils import np_utils
from keras.layers import Dense, Input, Conv2D, Reshape, Dropout, MaxPooling2D, Flatten
from keras.models import Model

from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

x_train, y_train = load_train_data('cifar-10')
x_test, y_test = load_test_data(os.path.join('cifar-10', 'test_batch'))

x_train = np.reshape(x_train, (x_train.shape[0], 32, 32, 3))
x_test = np.reshape(x_test, (x_test.shape[0], 32, 32, 3))

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

y_train = np_utils.to_categorical(y_train)

batch_size = 32
num_classes = 10
epochs = 100
Ejemplo n.º 8
0
import util
import tensorflow as tf
# from neuron_network import NeuronNetwork

# print util.mutation("A")

# input_data = util.string_to_ascii("BACBDCBDBBACBACDADDCABBCBACDACDDBDABDACD")

# net = NeuronNetwork([[2.0, 1.0]], 1, 1)
# net.il_node_num = 10

# print net.il_node_num, net.hl2_node_num

test_x, test_y = util.load_test_data('UnitTest')
print test_x
print test_y
# x = tf.placeholder(tf.float32, shape=(3, 3))


Ejemplo n.º 9
0
def main():
    # Placeholders

    images = tf.placeholder(tf.float32, [None, 28, 28])
    targets = tf.placeholder(tf.int32, [None, 10])
    keep_prob = tf.placeholder(tf.float32)

    # Weights

    W_conv1 = weight_variable([3, 3, 1, 16])
    b_conv1 = bias_variable([16])

    W_conv2 = weight_variable([3, 3, 16, 32])
    b_conv2 = bias_variable([32])

    hidden_units = (7 * 7 * 32 + 10) // 2
    W_hidden = weight_variable([7 * 7 * 32, hidden_units])
    b_hidden = bias_variable([hidden_units])

    W_output = weight_variable([hidden_units, 10])
    b_output = bias_variable([10])

    weights = [
        W_conv1,
        b_conv1,
        W_conv2,
        b_conv2,
        W_hidden,
        b_hidden,
        W_output,
        b_output,
    ]

    # Forward

    x = tf.reshape(images, [-1, 28, 28, 1])

    x = max_pool(tf.nn.relu(conv2d(x, W_conv1) + b_conv1))
    x = max_pool(tf.nn.relu(conv2d(x, W_conv2) + b_conv2))
    x = tf.reshape(x, [-1, 7 * 7 * 32])

    x = tf.nn.dropout(x, keep_prob)
    x = tf.nn.relu(tf.matmul(x, W_hidden) + b_hidden)

    x = tf.nn.dropout(x, keep_prob)
    outputs = tf.matmul(x, W_output) + b_output

    # Loss

    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=outputs,
                                                labels=targets))
    optimizer = tf.train.AdamOptimizer(1e-3).minimize(loss)

    # Accuracy

    correct = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.Session() as sess:
        batch_size = 64

        # Training
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(weights, max_to_keep=1)

        X, y = load_train_data()
        y = one_hot(y)
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

        best_val_acc = -1
        patience_count = 0

        for epoch in range(1, 1001):
            X_train, y_train = shuffle(X_train, y_train)
            X_batches = np.array_split(X_train, X_train.shape[0] // batch_size)
            y_batches = np.array_split(y_train, y_train.shape[0] // batch_size)
            loss_sum = acc_sum = 0.0
            for X_batch, y_batch in zip(X_batches, y_batches):
                loss_batch, acc_batch, _ = sess.run(
                    [loss, accuracy, optimizer],
                    feed_dict={
                        images: X_batch,
                        targets: y_batch,
                        keep_prob: 0.5
                    })
                loss_sum += loss_batch * X_batch.shape[0]
                acc_sum += acc_batch * X_batch.shape[0]
            acc = acc_sum / X.shape[0]

            X_batches = np.array_split(X_val, X_val.shape[0] // batch_size)
            y_batches = np.array_split(y_val, y_val.shape[0] // batch_size)
            acc_sum = 0.0
            for X_batch, y_batch in zip(X_batches, y_batches):
                acc_batch = sess.run(accuracy,
                                     feed_dict={
                                         images: X_batch,
                                         targets: y_batch,
                                         keep_prob: 1.0
                                     })
                acc_sum += acc_batch * X_batch.shape[0]
            val_acc = acc_sum / X_val.shape[0]
            patience_count += 1
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_count = 0
                saver.save(sess, 'tensorflow_convnet')

            msg = 'Epoch {:04d} - loss: {:.6g} - acc: {:.6g} - val_acc: {:.6g}'
            print(msg.format(epoch, loss_sum / X.shape[0], acc, val_acc))
            if patience_count > 3:
                break

        # Prediction
        saver.restore(sess, 'tensorflow_convnet')
        X = load_test_data()
        X_batches = np.array_split(X, X.shape[0] // batch_size)
        labels = []
        for X_batch in X_batches:
            y = sess.run(outputs, feed_dict={images: X_batch, keep_prob: 1.0})
            labels.extend(np.argmax(y, 1))
        save_predictions(np.array(labels), 'tensorflow_convnet.csv')
Ejemplo n.º 10
0
    del count_vectorizer
    return words_freq[:n_features]

def get_top_n_features_count_unigram(df,n_features):
    label_df = df
    count_vectorizer = CountVectorizer()
    words = count_vectorizer.fit_transform(label_df['title'])
    s = words.sum(axis=0)
    words_freq = [(word, s[0, idx]) for word, idx in     count_vectorizer.vocabulary_.items()]
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    words_freq = [ x[0] for x in     words_freq]
    del count_vectorizer
    return words_freq[: n_features]

train_df = load_train_data()
test_df = load_test_data()
test_df = text_process2(test_df)
train_df = text_process2(train_df)
print(len(train_df['title']))
print(len(test_df['title']))
del train_df['category']
df = pd.concat([train_df, test_df])
print(len(df))
#print(df.head())
#features = get_top_n_features_count(train_df,50)
#features = get_top_n_features_count_unigram(train_df,200)
#features = get_top_n_features_count_unigram(df,200)
#print(features)

import re
import numpy as np
Ejemplo n.º 11
0
        self.w1 = w_load
        self.b1 = b_load

    def predict(self, test_x):
        """
        get the answer of test_x by calculate logistic using the param learned before, and set the self.y_predict
        param: test_x(np.array)
        return: y_predict(np.array)
        """
        print("predict......")
        y_predict = self.sigmoid(np.dot(self.w1, test_x) + self.b1)
        print("finish.......")
        y_result = list()
        for y in y_predict[0]:
            if y > 0.5:
                y_result.append(1)
            else:
                y_result.append(0)
        self.y_predict = y_result
        return y_result


if __name__ == "__main__":
    x, y = load_training_data()
    print(x.shape)
    log_reg = logistic_regression(x, y)
    log_reg.load_param()
    test_x = load_test_data()
    y_predict = log_reg.predict(test_x)
    print(y_predict)
Ejemplo n.º 12
0
        return False
    return True


if __name__ == "__main__":

    train_data = ''
    model_file = ''
    mode = ''
    if validate_arguments(sys.argv):
        train_data = sys.argv[3]
        model_file = sys.argv[2]
        mode = sys.argv[1]
    else:
        sys.exit("Invalid Arguments")
    inputs, outputs = util.load_test_data(train_data)

    if not inputs or not outputs:
        raise ValueError('Input data and output data cannot be empty')
        # exit(0)
    # inputs, outputs = util.load_test_data('Test_3')
    # inputs, outputs = util.load_test_data('UnitTest')
    nn = NeuronNetwork(inputs, outputs, 0.01)
    # set the number of node for input layer
    nn.il_node_num = 10
    # set the number of node for hidden layer 1
    nn.hl1_node_num = 10
    # set the number of node for hidden layer 2
    nn.hl2_node_num = 10
    # set the number of node for hidden layer 3
    nn.hl3_node_num = 10
Ejemplo n.º 13
0
def main(unused_argv):
    train_data = util.load_train_img(tiling=False)
    train_labels = util.load_train_lbl(tiling=False)
    predict_data = util.load_test_data(tiling=False)

    train_labels = np.around(train_labels)
    train_labels = train_labels.astype('int32')

    # EXPAND to 608 x 608
    train_data = np.pad(train_data, ((0, 0), (104, 104), (104, 104), (0, 0)), 'reflect')
    train_labels = np.pad(train_labels, ((0, 0), (104, 104), (104, 104)), 'reflect')

    # Channel first
    # train_data = np.rollaxis(train_data, -1, 1)
    # predict_data = np.rollaxis(predict_data, -1, 1)

    # neeed to expand the channel axis for the image augmentation
    train_labels = np.expand_dims(train_labels, 3)

    # Create the Estimator
    road_estimator = tf.estimator.Estimator(
        model_fn=cnn_model_fn, model_dir="outputs/road")

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=constants.BATCH_SIZE,
        num_epochs=None,
        shuffle=True)

    road_estimator.train(
        input_fn=train_input_fn,
        max_steps=(constants.N_SAMPLES * constants.NUM_EPOCH) // constants.BATCH_SIZE)

    # road_estimator.train(
    #     input_fn=train_input_fn,
    #     max_steps=10)

    # Predicions
    # Do prediction on test data
    util.create_prediction_dir("predictions_test/")
    file_names = util.get_file_names()

    predict_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": predict_data},
        num_epochs=1,
        shuffle=False,
        batch_size=constants.BATCH_SIZE)

    predictions = road_estimator.predict(input_fn=predict_input_fn)
    res = [p['classes'] for p in predictions]

    for i in range(constants.N_TEST_SAMPLES):
        labels = res[i]
        img = util.label_to_img_full(IMG_SIZE, IMG_SIZE, labels)
        img = util.img_float_to_uint8(img)
        Image.fromarray(img).save('predictions_test/' + file_names[i])

    # Do prediction on train data
    util.create_prediction_dir("predictions_train/")

    predict_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        num_epochs=1,
        shuffle=False,
        batch_size=constants.BATCH_SIZE)

    predictions = road_estimator.predict(input_fn=predict_input_fn)
    res = [p['classes'] for p in predictions]

    for i in range(constants.N_SAMPLES):
        labels = res[i]
        img = util.label_to_img_full(IMG_SIZE, IMG_SIZE, labels)
        img = util.img_float_to_uint8(img)
        Image.fromarray(img).save('predictions_train/satImage_{:03}.png'.format(i + 1))
Ejemplo n.º 14
0
model_name = type(clf).__name__
score = cross_validate(clf, X_train, y_train, cv=kfold, scoring=('accuracy','f1_macro','f1_micro','precision_macro','precision_micro','recall_macro','recall_micro' ), verbose=3, n_jobs=-1,
                                error_score='raise-deprecating')
score_headers =list(score.keys())[2:]
score_result = list(score.values())[2:]
score_result = [x.mean() for x in score_result]
result.append([model_name, 'mercadolivre'] +score_result)
print(tabulate(result, headers=['classificador', 'data_set']+score_headers)) """

import sys
#sys.exit("Error message")
print("Iniciando Treino...")
clf.fit(X_train, y_train)
print("Fim Treino...")
print('Carregando submissao...')
test_dft = load_test_data()
test_dft = text_process(test_dft)
X_testt = vectorizer.transform(test_dft['title'])
pred = clf.predict(X_testt)

df = pd.DataFrame(columns=['id', 'category'])
cate = encoder.inverse_transform(pred)
df['category'] = cate
df['id'] = np.arange(len(cate))
print(df.head())
df.to_csv('./submissao33.csv', index=False)
#pred = clf.predict(X_test)

import sys
sys.exit("Error message")
score = metrics.accuracy_score(y_test, pred)
Ejemplo n.º 15
0
        parameters -- python dictionary containing your parameters
        X -- input data of size (n_x, m)

        Returns:
        predictions -- vector of predictions of our model (0/1)
        """
        A2, cache = self.forward_propagation(X, self.parameters)
        threshold = 0.5
        predictions = A2 > threshold

        return predictions

    def run(self):
        n_x, n_h, n_y = self.layer_sizes(x, y)
        parameters = self.initialize_parameters(n_x, n_h, n_y)
        A2, cache = self.forward_propagation(x, parameters)
        cost = self.compute_cost(A2, y, parameters)
        print(cost)
        grads = self.backward_propagation(parameters, cache, x, y)
        self.update_parameters(parameters, grads)


if __name__ == "__main__":
    x, y = load_training_data()
    clf = logistic_regression_Ng()
    clf.nn_model(X=x, Y=y, n_h=4, print_cost=True)
    x_test = load_test_data()
    y_predict = clf.predict(x_test)
    print(y_predict)
    print(np.mean(y_predict))