out = "" for p in preds: out += str(p) + "\n" log(out) data, targets = Data.data() print "training data: ", len(data) test = Data.test() print "test data: ", len(test) data = data + test print "all data: ", len(data) # preprocessing start = time() matrix = BlackboxPreprocess.to_matrix(data) print matrix.shape matrix = BlackboxPreprocess.scale(matrix) #matrix = BlackboxPreprocess.polynomial(matrix, 2) matrix = preprocessing.normalize(matrix, norm='l2') min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1.,1.)) matrix = min_max_scaler.fit_transform(matrix) #matrix = BlackboxPreprocess.norm(matrix) print matrix.shape data = matrix.tolist() # split training and test data test_data = data[1000:] data, targets = data[:1000], targets[:1000] # testing
from data import Data from blackbox_preprocess import BlackboxPreprocess from sklearn.linear_model import LogisticRegression data, targets = Data.data() extra = Data.test() data = data + extra originals = data # preprocessing matrix = BlackboxPreprocess.to_matrix(data) print "(examples, dimensions): ", matrix.shape matrix = BlackboxPreprocess.scale(matrix) matrix = BlackboxPreprocess.polynomial(matrix, 2) print "(examples, dimensions): ", matrix.shape data = matrix.tolist() # split training and CV data tr_data = data[:1000] unlabeled = data[1000:] # create psuedo labels model = LogisticRegression(C=1.3, penalty='l1', tol=0.05) print len(targets) print targets[:10] model.fit(tr_data, targets) labeled = [] for i,u in enumerate(unlabeled): orig = originals[i]