# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.linear_model import submissions from data import * lr = sklearn.linear_model.LogisticRegression(n_jobs=2) lr.fit(train, target) pred = lr.predict_proba(test) submissions.save_csv(pred, "logistic_regression.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.naive_bayes import submissions from data import * mnb = sklearn.naive_bayes.MultinomialNB() mnb.fit(train, target) pred = mnb.predict_proba(test) submissions.save_csv(pred, "multinomial_naive_bayes.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.svm import submissions from data import * svm = sklearn.svm.LinearSVC() svm.fit(train, target) pred = svm.predict(test) submissions.save_csv(pred, "support_vector_machine.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.naive_bayes import submissions from data import * bnb = sklearn.naive_bayes.BernoulliNB() bnb.fit(train, target) pred = bnb.predict(test) submissions.save_csv(pred, "bernoulli_naive_bayes.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.neighbors import submissions from data import * knn = sklearn.neighbors.KNeighborsClassifier(n_jobs=2) knn.fit(train, target) pred = knn.predict(test) submissions.save_csv(pred, "k-nearest_neighbors.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import generators from __future__ import nested_scopes from __future__ import print_function from __future__ import unicode_literals from __future__ import with_statement import sklearn.naive_bayes import data import process.bag_of_words import submissions if __name__ == '__main__': bnb = sklearn.naive_bayes.BernoulliNB() bnb.fit(process.bag_of_words.train, data.target) pred = bnb.predict(process.bag_of_words.test) submissions.save_csv(pred, '{file_name}.csv'.format(file_name=__file__[:-3]))
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.ensemble import submissions from data import * rf = sklearn.ensemble.RandomForestClassifier(n_estimators=100, oob_score=True, n_jobs=2) rf.fit(train, target) pred = rf.predict_proba(test) submissions.save_csv(pred, "random_forest.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.naive_bayes import submissions from data import * gnb = sklearn.naive_bayes.GaussianNB() gnb.fit(train, target) pred = gnb.predict_proba(test) submissions.save_csv(pred, "gaussian_naive_bayes.csv")
train = train.astype(numpy.float32) test = test.astype(numpy.float32) target = keras.utils.np_utils.to_categorical(target) input_dim = train.shape[1] nb_classes = target.shape[1] mlp = keras.models.Sequential() mlp.add(keras.layers.core.Dense(512, input_dim=input_dim)) mlp.add(keras.layers.advanced_activations.PReLU()) mlp.add(keras.layers.normalization.BatchNormalization()) mlp.add(keras.layers.core.Dropout(0.2)) mlp.add(keras.layers.core.Dense(512)) mlp.add(keras.layers.advanced_activations.PReLU()) mlp.add(keras.layers.normalization.BatchNormalization()) mlp.add(keras.layers.core.Dropout(0.2)) mlp.add(keras.layers.core.Dense(nb_classes)) mlp.add(keras.layers.core.Activation("softmax")) rmsprop = keras.optimizers.RMSprop(lr=1e-5) mlp.compile(optimizer=rmsprop, loss="categorical_crossentropy") mlp.fit(train, target, batch_size=128, nb_epoch=12, verbose=1, show_accuracy=True) pred = mlp.predict(test, verbose=0) submissions.save_csv(pred, "multi-layer_perceptron.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.ensemble import submissions from data import * adb = sklearn.ensemble.AdaBoostClassifier(n_estimators=100) adb.fit(train, target) pred = adb.predict_proba(test) submissions.save_csv(pred, "adaboost.csv")
import numpy import submissions from data import * target = target.astype(numpy.uint8) train = train.reshape((train.shape[0], -1, 1)).astype(numpy.float32) test = test.reshape((test.shape[0], -1, 1)).astype(numpy.float32) target = keras.utils.np_utils.to_categorical(target) train /= 255 test /= 255 input_shape = train.shape[1:] nb_classes = target.shape[1] lstm = keras.models.Sequential() lstm.add(keras.layers.recurrent.LSTM(100, input_shape=input_shape)) lstm.add(keras.layers.core.Dense(nb_classes)) lstm.add(keras.layers.core.Activation("softmax")) rmsprop = keras.optimizers.RMSprop(lr=1e-6) lstm.compile(optimizer=rmsprop, loss="categorical_crossentropy") lstm.fit(train, target, batch_size=32, nb_epoch=200, verbose=1, show_accuracy=True) pred = lstm.predict_classes(test, verbose=0) submissions.save_csv(pred, "long_short_term_memory.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import sklearn.tree import submissions from data import * dt = sklearn.tree.DecisionTreeClassifier() dt.fit(train, target) pred = dt.predict_proba(test) submissions.save_csv(pred, "decision_tree.csv")
import submissions from data import * target = target.astype(numpy.uint8) train = train.reshape((train.shape[0], -1, 1)).astype(numpy.float32) test = test.reshape((test.shape[0], -1, 1)).astype(numpy.float32) target = keras.utils.np_utils.to_categorical(target) train /= 255 test /= 255 input_shape = train.shape[1:] nb_classes = target.shape[1] rnn = keras.models.Sequential() rnn.add(keras.layers.recurrent.SimpleRNN(100, init=lambda shape: keras.initializations.normal(shape, scale=0.001), inner_init=lambda shape: keras.initializations.identity(shape, scale=1.0), activation="relu", input_shape=input_shape)) rnn.add(keras.layers.core.Dense(nb_classes)) rnn.add(keras.layers.core.Activation("softmax")) rmsprop = keras.optimizers.RMSprop(lr=1e-6) rnn.compile(optimizer=rmsprop, loss="categorical_crossentropy") rnn.fit(train, target, batch_size=32, nb_epoch=200, verbose=1, show_accuracy=True) pred = rnn.predict_classes(test, verbose=0) submissions.save_csv(pred, "recurrent_neural_network.csv")
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import xgboost import submissions from data import * gbm = xgboost.XGBClassifier(learning_rate=0.05, n_estimators=300) gbm.fit(train, target) pred = gbm.predict(test) submissions.save_csv(pred, "gradient_boost_xgboost.csv")
input_dim=process.word_vectors.max_features + 1, output_dim=process.word_vectors.word_vec_dim, input_length=input_dim, weights=[process.word_vectors.weights], )(input_tensor) # embedded = keras.layers.Dropout(0.5)(embedded) tensors = [] for filter_length in (3, 4, 5): tensor = keras.layers.Convolution1D(nb_filter=100, filter_length=filter_length)(embedded) tensor = keras.layers.Activation("relu")(tensor) tensor = keras.layers.MaxPooling1D(pool_length=input_dim - filter_length + 1)(tensor) tensor = keras.layers.Flatten()(tensor) tensors.append(tensor) output_tensor = keras.layers.merge(tensors, mode="concat", concat_axis=1) output_tensor = keras.layers.Dropout(0.5)(output_tensor) output_tensor = keras.layers.Dense(1, activation="sigmoid")(output_tensor) cnn = keras.models.Model(input_tensor, output_tensor) cnn.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) print(cnn.summary()) # keras.utils.visualize_util.plot(cnn, to_file='images/{file_name}.png'.format(file_name=__file__[:-3]), # show_shapes=True) cnn.fit(process.word_vectors.train, data.target, batch_size=64, nb_epoch=4) pred = cnn.predict(process.word_vectors.test) pred = (pred > 0.5).astype("int32") submissions.save_csv(pred.flatten(), "{file_name}.csv".format(file_name=__file__[:-3]))
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division import tgrocery import data.load_data import submissions train_src = zip(data.load_data.target, data.load_data.train_df["ingredients"]) grocery = tgrocery.Grocery("whats_cooking") grocery.train(train_src) pred = [grocery.predict(text) for text in data.load_data.test_df["ingredients"]] submissions.save_csv(pred, "liblinear_tgrocery.csv")