#!/usr/bin/python
#

import utility
import numpy as np
import multiprocessing
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression

# Get Data
d = utility.get_train_data(rn=False, csr=True)
l = utility.get_train_labels()
a = np.arange(len(l))
nc = np.max(l) + 1
cs = np.arange(nc)
nw = d.shape[1]
ncv = 5
cvsets = utility.def_cross_validate_sets(d.shape[0], ncv)

# Cross Validation
def do_cv_set(icv) :

    # New Sets
    indtest = cvsets[icv]
    indtrain = np.array([], dtype=np.int32)
    for i in range(ncv) :
        if i != icv : indtrain = np.hstack((indtrain, cvsets[i]))
    dtest = d[indtest]   
    dtrain = d[indtrain]
    ltest = l[indtest]
    ltrain = l[indtrain]
Example #2
0
#!/usr/bin/python
#

import utility
import numpy as np
from scipy.io import savemat

d = utility.get_train_data(rn=False, csr=True)
l = utility.get_train_labels()
a = np.arange(len(l))

nc = np.max(l) + 1
nw = d.shape[1]
bp = np.reshape(np.zeros(nc * nw), (nc, nw))

for ic in range(nc) :
    dc = d[a[l == ic]]
    s = np.array(dc.sum(axis=0))[0]
    bp[ic] = (1.0 + s) / (np.sum(s) + nw)

savemat(utility.ddir + "/train_data_bayesian_priors.mat", {'a': bp})