# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.linear_model

import submissions
from data import *

lr = sklearn.linear_model.LogisticRegression(n_jobs=2)
lr.fit(train, target)
pred = lr.predict_proba(test)

submissions.save_csv(pred, "logistic_regression.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.naive_bayes

import submissions
from data import *

mnb = sklearn.naive_bayes.MultinomialNB()
mnb.fit(train, target)
pred = mnb.predict_proba(test)

submissions.save_csv(pred, "multinomial_naive_bayes.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.svm

import submissions
from data import *

svm = sklearn.svm.LinearSVC()
svm.fit(train, target)
pred = svm.predict(test)

submissions.save_csv(pred, "support_vector_machine.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.naive_bayes

import submissions
from data import *

bnb = sklearn.naive_bayes.BernoulliNB()
bnb.fit(train, target)
pred = bnb.predict(test)

submissions.save_csv(pred, "bernoulli_naive_bayes.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.neighbors

import submissions
from data import *

knn = sklearn.neighbors.KNeighborsClassifier(n_jobs=2)
knn.fit(train, target)
pred = knn.predict(test)

submissions.save_csv(pred, "k-nearest_neighbors.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import generators
from __future__ import nested_scopes
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import with_statement

import sklearn.naive_bayes

import data
import process.bag_of_words
import submissions

if __name__ == '__main__':
    bnb = sklearn.naive_bayes.BernoulliNB()
    bnb.fit(process.bag_of_words.train, data.target)
    pred = bnb.predict(process.bag_of_words.test)

    submissions.save_csv(pred, '{file_name}.csv'.format(file_name=__file__[:-3]))
Beispiel #7
0
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.ensemble

import submissions
from data import *

rf = sklearn.ensemble.RandomForestClassifier(n_estimators=100, oob_score=True, n_jobs=2)
rf.fit(train, target)
pred = rf.predict_proba(test)

submissions.save_csv(pred, "random_forest.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.naive_bayes

import submissions
from data import *

gnb = sklearn.naive_bayes.GaussianNB()
gnb.fit(train, target)
pred = gnb.predict_proba(test)

submissions.save_csv(pred, "gaussian_naive_bayes.csv")
train = train.astype(numpy.float32)
test = test.astype(numpy.float32)

target = keras.utils.np_utils.to_categorical(target)

input_dim = train.shape[1]
nb_classes = target.shape[1]

mlp = keras.models.Sequential()

mlp.add(keras.layers.core.Dense(512, input_dim=input_dim))
mlp.add(keras.layers.advanced_activations.PReLU())
mlp.add(keras.layers.normalization.BatchNormalization())
mlp.add(keras.layers.core.Dropout(0.2))

mlp.add(keras.layers.core.Dense(512))
mlp.add(keras.layers.advanced_activations.PReLU())
mlp.add(keras.layers.normalization.BatchNormalization())
mlp.add(keras.layers.core.Dropout(0.2))

mlp.add(keras.layers.core.Dense(nb_classes))
mlp.add(keras.layers.core.Activation("softmax"))

rmsprop = keras.optimizers.RMSprop(lr=1e-5)
mlp.compile(optimizer=rmsprop, loss="categorical_crossentropy")

mlp.fit(train, target, batch_size=128, nb_epoch=12, verbose=1, show_accuracy=True)
pred = mlp.predict(test, verbose=0)

submissions.save_csv(pred, "multi-layer_perceptron.csv")
Beispiel #10
0
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.ensemble

import submissions
from data import *

adb = sklearn.ensemble.AdaBoostClassifier(n_estimators=100)
adb.fit(train, target)
pred = adb.predict_proba(test)

submissions.save_csv(pred, "adaboost.csv")
import numpy

import submissions
from data import *

target = target.astype(numpy.uint8)
train = train.reshape((train.shape[0], -1, 1)).astype(numpy.float32)
test = test.reshape((test.shape[0], -1, 1)).astype(numpy.float32)

target = keras.utils.np_utils.to_categorical(target)
train /= 255
test /= 255

input_shape = train.shape[1:]
nb_classes = target.shape[1]

lstm = keras.models.Sequential()

lstm.add(keras.layers.recurrent.LSTM(100, input_shape=input_shape))

lstm.add(keras.layers.core.Dense(nb_classes))
lstm.add(keras.layers.core.Activation("softmax"))

rmsprop = keras.optimizers.RMSprop(lr=1e-6)
lstm.compile(optimizer=rmsprop, loss="categorical_crossentropy")

lstm.fit(train, target, batch_size=32, nb_epoch=200, verbose=1, show_accuracy=True)
pred = lstm.predict_classes(test, verbose=0)

submissions.save_csv(pred, "long_short_term_memory.csv")
Beispiel #12
0
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import sklearn.tree

import submissions
from data import *

dt = sklearn.tree.DecisionTreeClassifier()
dt.fit(train, target)
pred = dt.predict_proba(test)

submissions.save_csv(pred, "decision_tree.csv")
import submissions
from data import *

target = target.astype(numpy.uint8)
train = train.reshape((train.shape[0], -1, 1)).astype(numpy.float32)
test = test.reshape((test.shape[0], -1, 1)).astype(numpy.float32)

target = keras.utils.np_utils.to_categorical(target)
train /= 255
test /= 255

input_shape = train.shape[1:]
nb_classes = target.shape[1]

rnn = keras.models.Sequential()

rnn.add(keras.layers.recurrent.SimpleRNN(100, init=lambda shape: keras.initializations.normal(shape, scale=0.001),
                                         inner_init=lambda shape: keras.initializations.identity(shape, scale=1.0),
                                         activation="relu", input_shape=input_shape))

rnn.add(keras.layers.core.Dense(nb_classes))
rnn.add(keras.layers.core.Activation("softmax"))

rmsprop = keras.optimizers.RMSprop(lr=1e-6)
rnn.compile(optimizer=rmsprop, loss="categorical_crossentropy")

rnn.fit(train, target, batch_size=32, nb_epoch=200, verbose=1, show_accuracy=True)
pred = rnn.predict_classes(test, verbose=0)

submissions.save_csv(pred, "recurrent_neural_network.csv")
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import xgboost

import submissions
from data import *

gbm = xgboost.XGBClassifier(learning_rate=0.05, n_estimators=300)
gbm.fit(train, target)
pred = gbm.predict(test)

submissions.save_csv(pred, "gradient_boost_xgboost.csv")
        input_dim=process.word_vectors.max_features + 1,
        output_dim=process.word_vectors.word_vec_dim,
        input_length=input_dim,
        weights=[process.word_vectors.weights],
    )(input_tensor)
    # embedded = keras.layers.Dropout(0.5)(embedded)

    tensors = []
    for filter_length in (3, 4, 5):
        tensor = keras.layers.Convolution1D(nb_filter=100, filter_length=filter_length)(embedded)
        tensor = keras.layers.Activation("relu")(tensor)
        tensor = keras.layers.MaxPooling1D(pool_length=input_dim - filter_length + 1)(tensor)
        tensor = keras.layers.Flatten()(tensor)
        tensors.append(tensor)

    output_tensor = keras.layers.merge(tensors, mode="concat", concat_axis=1)
    output_tensor = keras.layers.Dropout(0.5)(output_tensor)
    output_tensor = keras.layers.Dense(1, activation="sigmoid")(output_tensor)

    cnn = keras.models.Model(input_tensor, output_tensor)
    cnn.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"])
    print(cnn.summary())
    # keras.utils.visualize_util.plot(cnn, to_file='images/{file_name}.png'.format(file_name=__file__[:-3]),
    #                                 show_shapes=True)

    cnn.fit(process.word_vectors.train, data.target, batch_size=64, nb_epoch=4)
    pred = cnn.predict(process.word_vectors.test)
    pred = (pred > 0.5).astype("int32")

    submissions.save_csv(pred.flatten(), "{file_name}.csv".format(file_name=__file__[:-3]))
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division

import tgrocery

import data.load_data
import submissions

train_src = zip(data.load_data.target, data.load_data.train_df["ingredients"])

grocery = tgrocery.Grocery("whats_cooking")
grocery.train(train_src)

pred = [grocery.predict(text) for text in data.load_data.test_df["ingredients"]]

submissions.save_csv(pred, "liblinear_tgrocery.csv")