def log(info): f = file('submission.txt', 'w+') f.write(str(info)) f.close() def submission(preds): out = "" for p in preds: out += str(p) + "\n" log(out) data, targets = Data.data() print "training data: ", len(data) test = Data.test() print "test data: ", len(test) data = data + test print "all data: ", len(data) # preprocessing start = time() matrix = BlackboxPreprocess.to_matrix(data) print matrix.shape matrix = BlackboxPreprocess.scale(matrix) #matrix = BlackboxPreprocess.polynomial(matrix, 2) matrix = preprocessing.normalize(matrix, norm='l2') min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1.,1.)) matrix = min_max_scaler.fit_transform(matrix) #matrix = BlackboxPreprocess.norm(matrix) print matrix.shape
from preprocess import Polynomial from abstract_model import ClassifierEnsemble from sklearn_wrapper import LinearRegressionModel, LogisticRegressionModel, SVCModel from score import score import logging """ This is supposed to be a sample interface for me to develop to support. """ objective = Objective.MAXIMIZE # data train_data, train_targets = Data.train() test_data, test_targets = Data.test() # feature engineering extra_data = test_data pipe = Pipeline(Polynomial, LogisticRegressionModel, objective, logging.WARN) pipe.fit(train_data, train_targets, extra_data) print pipe.hyperparams # train model train_data = pipe.transform(train_data) voter1 = LogisticRegressionModel(objective, logging.INFO) models = [m(objective, logging.INFO) for m in [SVCModel, LogisticRegressionModel]] ensemble = ClassifierEnsemble(models, voter1, objective, logging.INFO) voter2 = LogisticRegressionModel(objective, logging.INFO)
from data import Data from blackbox_preprocess import BlackboxPreprocess from sklearn.linear_model import LogisticRegression data, targets = Data.data() extra = Data.test() data = data + extra originals = data # preprocessing matrix = BlackboxPreprocess.to_matrix(data) print "(examples, dimensions): ", matrix.shape matrix = BlackboxPreprocess.scale(matrix) matrix = BlackboxPreprocess.polynomial(matrix, 2) print "(examples, dimensions): ", matrix.shape data = matrix.tolist() # split training and CV data tr_data = data[:1000] unlabeled = data[1000:] # create psuedo labels model = LogisticRegression(C=1.3, penalty='l1', tol=0.05) print len(targets) print targets[:10] model.fit(tr_data, targets) labeled = [] for i,u in enumerate(unlabeled): orig = originals[i]