#!/usr/bin/python # import utility import numpy as np import multiprocessing from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import LogisticRegression # Get Data d = utility.get_train_data(rn=False, csr=True) l = utility.get_train_labels() a = np.arange(len(l)) nc = np.max(l) + 1 cs = np.arange(nc) nw = d.shape[1] ncv = 5 cvsets = utility.def_cross_validate_sets(d.shape[0], ncv) # Cross Validation def do_cv_set(icv) : # New Sets indtest = cvsets[icv] indtrain = np.array([], dtype=np.int32) for i in range(ncv) : if i != icv : indtrain = np.hstack((indtrain, cvsets[i])) dtest = d[indtest] dtrain = d[indtrain] ltest = l[indtest] ltrain = l[indtrain]
#!/usr/bin/python # import utility import numpy as np from scipy.io import savemat d = utility.get_train_data(rn=False, csr=True) l = utility.get_train_labels() a = np.arange(len(l)) nc = np.max(l) + 1 nw = d.shape[1] bp = np.reshape(np.zeros(nc * nw), (nc, nw)) for ic in range(nc) : dc = d[a[l == ic]] s = np.array(dc.sum(axis=0))[0] bp[ic] = (1.0 + s) / (np.sum(s) + nw) savemat(utility.ddir + "/train_data_bayesian_priors.mat", {'a': bp})