from data import Data
from blackbox_preprocess import BlackboxPreprocess
from sklearn.linear_model import LogisticRegression


data, targets = Data.data()
extra = Data.test()
data = data + extra
originals = data

# preprocessing
matrix = BlackboxPreprocess.to_matrix(data)
print "(examples, dimensions): ", matrix.shape
matrix = BlackboxPreprocess.scale(matrix)
matrix = BlackboxPreprocess.polynomial(matrix, 2)
print "(examples, dimensions): ", matrix.shape
data = matrix.tolist()

# split training and CV data
tr_data = data[:1000]
unlabeled = data[1000:]

# create psuedo labels
model = LogisticRegression(C=1.3, penalty='l1', tol=0.05)
print len(targets)
print targets[:10]
model.fit(tr_data, targets)

labeled = []
for i,u in enumerate(unlabeled):
    orig = originals[i]