def GTM_discret(data, m, n, intl=[], tol=1e-3, max_itr=99, alpha=10, beta=10, mu0=0, sigma0=1): err = 99 index, claim, count = bsf.extract(data, m, n) itr = 0 truth, sigma_vec = Initialization(intl, claim, index, m, n, alpha, beta) #truth, tau = TruthFinder.TruthFinder(data, m, n) while ((err > tol) & (itr < max_itr)): itr = itr + 1 truth_old = np.copy(truth) truth = E_step(claim, index, m, n, sigma_vec, mu0, sigma0) sigma_vec = M_step(claim, index, m, n, truth, alpha, beta) err = la.norm(truth - truth_old) / la.norm(truth_old) truth = np.zeros(n) for i in range(n): truth[i] = claim[i][sigma_vec[index[i]].argmin()] return ([truth, sigma_vec])
def KDEm_fast(data, m, n, tol=1e-5, max_itr=99, method="Gaussian", h=-1): err = 99 index, claim, count = bsf.extract(data, m, n) data_c = bsf.compress(data) w_M = [] for i in range(n): l = len(index[i]) w_M.append(np.ones(l) / l) itr = 0 kernel_M, value_M = bsf.get_kernel_matrix_fast(data_c, n, method) norm_M = bsf.get_norm_matrix_fast(data_c, kernel_M, value_M, w_M, method) c_vec, J = update_c(index, m, n, count, norm_M, method) while ((err > tol) & (itr < max_itr)): itr = itr + 1 J_old = J c_old = np.copy(c_vec) w_M = update_w(index, m, n, c_old, norm_M, method) norm_M = bsf.get_norm_matrix_fast(data_c, kernel_M, value_M, w_M, method) c_vec, J = update_c(index, m, n, count, norm_M, method) #err = la.norm(c_vec - c_old)/la.norm(c_old) err = abs((J - J_old) / J_old) #print itr,err print "#iteration:", itr return ([c_vec, w_M, itr])
def RKDE(data, m, n, rho_para=np.array([0.5, 0.75, 0.85]), tol=1e-3, max_itr=99, method="Gaussian"): index, claim, count = bsf.extract(data, m, n) w_M = [] for i in range(n): l = len(index[i]) w_M.append(np.ones(l) / l) kernel_M = bsf.get_kernel_matrix(claim, n, method) norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method) c_vec = np.ones(m) a_vec = compute_a(norm_M, n, rho_para) w_M = [] for i in range(n): kernel_m = np.copy(kernel_M[i]) w_i, norm = RKDE_single(index[i], m, n, c_vec, kernel_m, a_vec[i, :], method, max_itr=max_itr, tol=1e-3) w_M.append(w_i) return (w_M)
def evaluation_source_single(data, m, n, truth): index, claim, count = bsf.extract(data, m, n) mae = np.zeros(m) rmse = np.zeros(m) for i in range(n): mae[index[i]] = mae[index[i]] + abs(claim[i] - truth[i]) rmse[index[i]] = rmse[index[i]] + (claim[i] - truth[i])**2 mae[count > 0] = mae[count > 0] / count[count > 0] rmse[count > 0] = np.sqrt(rmse[count > 0] / count[count > 0]) rtn = np.append(mae.reshape(m, 1), rmse.reshape(m, 1), axis=1) rtn = np.append(rtn, count.reshape(m, 1), axis=1) return (rtn)
def CATD_discret(data, m, n, intl=[]): index, claim, count = bsf.extract(data, m, n) w_vec = np.ones(m) if (len(intl) > 0): truth = update_truth(claim, index, w_vec, m, n) else: truth = np.copy(intl) w_vec = update_w(claim, index, count, truth, m, n) truth = np.zeros(n) for i in range(n): truth[i] = claim[i][w_vec[index[i]].argmax()] return ([truth, w_vec])
def CATD_discret(data, m, n, intl=[]): index, claim, count = bsf.extract(data, m, n) w_vec = np.ones(m) if(len(intl)>0): truth = update_truth(claim, index, w_vec, m, n) else: truth = np.copy(intl) w_vec = update_w(claim, index, count, truth, m, n) truth = np.zeros(n) for i in range(n): truth[i] = claim[i][w_vec[index[i]].argmax()] return([truth, w_vec])
def evaluation_source_single(data, m, n, truth): index, claim, count = bsf.extract(data, m, n) mae = np.zeros(m) rmse = np.zeros(m) for i in range(n): mae[index[i]] = mae[index[i]] + abs(claim[i]-truth[i]) rmse[index[i]] = rmse[index[i]] + (claim[i]-truth[i])**2 mae[count>0] = mae[count>0]/count[count>0] rmse[count>0] = np.sqrt(rmse[count>0]/count[count>0]) rtn = np.append(mae.reshape(m,1),rmse.reshape(m,1),axis=1) rtn = np.append(rtn,count.reshape(m,1),axis=1) return(rtn)
def CRH(data, m, n, tol=1e-3, max_itr=99): err = 99 index, claim, count = bsf.extract(data, m, n) itr = 0 w_vec = np.ones(m) truth = np.zeros(n) while((err > tol) & (itr < max_itr)): itr = itr+1 truth_old = np.copy(truth) truth = update_truth(claim, index, w_vec, m, n) w_vec = update_w(claim, index, truth, m, n) err = la.norm(truth-truth_old)/la.norm(truth_old) return([truth, w_vec])
def evaluation_source_multiple(data, m, n, truth): index, claim, count = bsf.extract(data, m, n) mae = np.zeros(m) rmse = np.zeros(m) for i in range(n): cluster_ind = data[i][:, 2] ind = np.array(index[i]) index_new = list(ind[cluster_ind >= 0]) index_noise = list(ind[cluster_ind < 0]) tmp = truth[i][list(cluster_ind[cluster_ind >= 0])] mae[index_new] = mae[index_new] + abs(claim[i] - tmp) rmse[index_new] = rmse[index_new] + (claim[i] - tmp)**2 count[index_noise] = -1 mae[count > 0] = mae[count > 0] / count[count > 0] rmse[count > 0] = np.sqrt(rmse[count > 0] / count[count > 0]) return ([mae, rmse, count])
def CATD(data, m, n, intl=[], tol=0.1, max_itr=10): index, claim, count = bsf.extract(data, m, n) w_vec = np.ones(m) if (len(intl) > 0): truth = update_truth(claim, index, w_vec, m, n) else: truth = np.copy(intl) err = 99 itr = 0 while (err > tol and itr < max_itr): w_old = np.copy(w_vec) w_vec = update_w(claim, index, count, truth, m, n) truth = update_truth(claim, index, w_vec, m, n) err = la.norm(w_old - w_vec) / la.norm(w_old) itr = itr + 1 return ([truth, w_vec])
def CATD(data, m, n, intl=[], tol=0.1, max_itr=10): index, claim, count = bsf.extract(data, m, n) w_vec = np.ones(m) if(len(intl)>0): truth = update_truth(claim, index, w_vec, m, n) else: truth = np.copy(intl) err = 99 itr = 0 while(err > tol and itr < max_itr): w_old = np.copy(w_vec) w_vec = update_w(claim, index, count, truth, m, n) truth = update_truth(claim, index, w_vec, m, n) err = la.norm(w_old-w_vec)/la.norm(w_old) itr = itr+1 return([truth, w_vec])
def RKDE(data, m, n, rho_para=np.array([0.5,0.75,0.85]), tol=1e-3, max_itr=99, method="Gaussian"): index,claim,count = bsf.extract(data, m, n) w_M = [] for i in range(n): l = len(index[i]) w_M.append(np.ones(l)/l) kernel_M = bsf.get_kernel_matrix(claim, n, method) norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method) c_vec = np.ones(m) a_vec = compute_a(norm_M, n, rho_para) w_M = [] for i in range(n): kernel_m = np.copy(kernel_M[i]) w_i, norm = RKDE_single(index[i], m, n, c_vec, kernel_m, a_vec[i,:], method, max_itr=max_itr, tol=1e-3) w_M.append(w_i) return(w_M)
def evaluation_source_multiple(data, m, n, truth): index, claim, count = bsf.extract(data, m, n) mae = np.zeros(m) rmse = np.zeros(m) for i in range(n): cluster_ind = data[i][:,2] ind = np.array(index[i]) index_new = list(ind[cluster_ind>=0]) index_noise = list(ind[cluster_ind<0]) tmp = truth[i][list(cluster_ind[cluster_ind>=0])] mae[index_new] = mae[index_new] + abs(claim[i]-tmp) rmse[index_new] = rmse[index_new] + (claim[i]-tmp)**2 count[index_noise] = -1 mae[count>0] = mae[count>0]/count[count>0] rmse[count>0] = np.sqrt(rmse[count>0]/count[count>0]) return([mae, rmse, count])
def CRH_discret(data, m, n, tol=1e-3, max_itr=99): err = 99 index, claim, count = bsf.extract(data, m, n) itr = 0 w_vec = np.ones(m) truth = np.zeros(n) while((err > tol) & (itr < max_itr)): itr = itr+1 truth_old = np.copy(truth) truth = update_truth(claim, index, w_vec, m, n) w_vec = update_w(claim, index, truth, m, n) err = la.norm(truth-truth_old)/la.norm(truth_old) truth = np.zeros(n) for i in range(n): truth[i] = claim[i][w_vec[index[i]].argmax()] return([truth, w_vec])
def TruthFinder(data, m, n, tol=0.1, max_itr=10): err = 99 index, claim, count = bsf.extract(data, m, n) itr = 0 tau_vec = -np.log(1-np.ones(m)*0.9) truth = np.zeros(n) rho = 0.5 gamma = 0.3 while((err > tol) & (itr < max_itr)): itr = itr+1 tau_old = np.copy(tau_vec) s_set = update_claim(claim, index, tau_vec, m, n, rho, gamma) tau_vec = update_source(claim, index, s_set, m, n) err = 1 - np.dot(tau_vec,tau_old)/(la.norm(tau_vec)*la.norm(tau_old)) print itr, err truth = np.zeros(n) for i in range(n): truth[i] = claim[i][np.argmax(s_set[i])] return([truth, tau_vec])
def TruthFinder(data, m, n, tol=0.1, max_itr=10): err = 99 index, claim, count = bsf.extract(data, m, n) itr = 0 tau_vec = -np.log(1 - np.ones(m) * 0.9) truth = np.zeros(n) rho = 0.5 gamma = 0.3 while ((err > tol) & (itr < max_itr)): itr = itr + 1 tau_old = np.copy(tau_vec) s_set = update_claim(claim, index, tau_vec, m, n, rho, gamma) tau_vec = update_source(claim, index, s_set, m, n) err = 1 - np.dot(tau_vec, tau_old) / (la.norm(tau_vec) * la.norm(tau_old)) print itr, err truth = np.zeros(n) for i in range(n): truth[i] = claim[i][np.argmax(s_set[i])] return ([truth, tau_vec])
def KDE_twist(data, m, n, method, argmax=False, h=-1, p_unre=0.1): index, claim, count = bsf.extract(data, m, n) n = len(claim) truth = [] ind_c = [] conf = [] evl = np.zeros((20, 4)) for i in range(n): l = len(claim[i]) x_new = bsf.DENCLUE(claim[i], np.ones(l) / l, method, h=h) center, ind, conf_i = twist_AUC(x_new, claim[i], np.ones(l) / l, argmax, h) tmp0 = ind < 0 tmp1 = data[i][:, 0] >= np.int(m * (1 - p_unre)) evl[:, 0] = evl[:, 0] + np.sum(tmp0 * tmp1, axis=1) evl[:, 1] = evl[:, 1] + np.sum(tmp0 * (tmp1 == False), axis=1) evl[:, 2] = evl[:, 2] + np.sum((tmp0 == False) * tmp1, axis=1) evl[:, 3] = evl[:, 3] + np.sum( (tmp0 == False) * (tmp1 == False), axis=1) truth.append(center) ind_c.append(ind) conf.append(conf_i) tpr = evl[:, 0] / (evl[:, 0] + evl[:, 2]) fpr = evl[:, 1] / (evl[:, 1] + evl[:, 3]) auc = compute_AUC(fpr, tpr) rtn_truth = [] rtn_ind = [] rtn_conf = [] for j in range(20): tmp1 = [] tmp2 = [] tmp3 = [] for i in range(n): tmp1.append(np.copy(truth[i][j])) tmp2.append(np.copy(ind_c[i][j])) tmp3.append(np.copy(conf[i][j])) rtn_truth.append(tmp1) rtn_ind.append(tmp2) rtn_conf.append(tmp3) return ([rtn_truth, rtn_ind, rtn_conf, auc])
def GTM(data, m, n, intl=[], tol=1e-3, max_itr=99, alpha=10, beta=10, mu0=0, sigma0=1): err = 99 index, claim, count = bsf.extract(data, m, n) itr = 0 truth, sigma_vec = Initialization(intl, claim, index, m, n, alpha, beta) #truth, tau = TruthFinder.TruthFinder(data, m, n) while ((err > tol) & (itr < max_itr)): itr = itr + 1 truth_old = np.copy(truth) truth = E_step(claim, index, m, n, sigma_vec, mu0, sigma0) sigma_vec = M_step(claim, index, m, n, truth, alpha, beta) err = la.norm(truth - truth_old) / la.norm(truth_old) return ([truth, sigma_vec])
def KDE_twist(data, m, n, method, argmax=False, h=-1, p_unre=0.1): index, claim, count = bsf.extract(data, m, n) n = len(claim) truth = [] ind_c = [] conf = [] evl = np.zeros((20,4)) for i in range(n): l = len(claim[i]) x_new = bsf.DENCLUE(claim[i], np.ones(l)/l, method, h=h) center,ind,conf_i = twist_AUC(x_new, claim[i], np.ones(l)/l, argmax, h) tmp0 = ind<0 tmp1 = data[i][:,0]>=np.int(m*(1-p_unre)) evl[:,0] = evl[:,0] + np.sum(tmp0*tmp1, axis=1) evl[:,1] = evl[:,1] + np.sum(tmp0*(tmp1==False), axis=1) evl[:,2] = evl[:,2] + np.sum((tmp0==False)*tmp1, axis=1) evl[:,3] = evl[:,3] + np.sum((tmp0==False)*(tmp1==False), axis=1) truth.append(center) ind_c.append(ind) conf.append(conf_i) tpr = evl[:,0]/(evl[:,0]+evl[:,2]) fpr = evl[:,1]/(evl[:,1]+evl[:,3]) auc = compute_AUC(fpr,tpr) rtn_truth = [] rtn_ind = [] rtn_conf = [] for j in range(20): tmp1 = [] tmp2 = [] tmp3 = [] for i in range(n): tmp1.append(np.copy(truth[i][j])) tmp2.append(np.copy(ind_c[i][j])) tmp3.append(np.copy(conf[i][j])) rtn_truth.append(tmp1) rtn_ind.append(tmp2) rtn_conf.append(tmp3) return([rtn_truth,rtn_ind,rtn_conf, auc])
def KDEm(data, m, n, tol=1e-5, max_itr=99, method="Gaussian", h=-1): err = 99 index, claim, count = bsf.extract(data, m, n) w_M = [] for i in range(n): l = len(index[i]) w_M.append(np.ones(l)/l) itr=1 kernel_M = bsf.get_kernel_matrix(claim, n, method) norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method) c_vec, J = update_c(index, m, n, count, norm_M, method) while((err > tol) & (itr < max_itr)): itr=itr+1 J_old = J c_old = np.copy(c_vec) w_M = update_w(index, m, n, c_old, norm_M, method) norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method) c_vec, J = update_c(index, m, n, count, norm_M, method) #err = la.norm(c_vec - c_old)/la.norm(c_old) err = abs((J-J_old)/J_old) #print itr,err print "#iteration:",itr return([c_vec, w_M, itr])