Esempio n. 1
0
def GTM_discret(data,
                m,
                n,
                intl=[],
                tol=1e-3,
                max_itr=99,
                alpha=10,
                beta=10,
                mu0=0,
                sigma0=1):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    itr = 0
    truth, sigma_vec = Initialization(intl, claim, index, m, n, alpha, beta)
    #truth, tau = TruthFinder.TruthFinder(data, m, n)
    while ((err > tol) & (itr < max_itr)):
        itr = itr + 1
        truth_old = np.copy(truth)
        truth = E_step(claim, index, m, n, sigma_vec, mu0, sigma0)
        sigma_vec = M_step(claim, index, m, n, truth, alpha, beta)
        err = la.norm(truth - truth_old) / la.norm(truth_old)
    truth = np.zeros(n)
    for i in range(n):
        truth[i] = claim[i][sigma_vec[index[i]].argmin()]
    return ([truth, sigma_vec])
Esempio n. 2
0
File: KDEm.py Progetto: wdzhong/KDEm
def KDEm_fast(data, m, n, tol=1e-5, max_itr=99, method="Gaussian", h=-1):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    data_c = bsf.compress(data)
    w_M = []
    for i in range(n):
        l = len(index[i])
        w_M.append(np.ones(l) / l)
    itr = 0
    kernel_M, value_M = bsf.get_kernel_matrix_fast(data_c, n, method)
    norm_M = bsf.get_norm_matrix_fast(data_c, kernel_M, value_M, w_M, method)
    c_vec, J = update_c(index, m, n, count, norm_M, method)
    while ((err > tol) & (itr < max_itr)):
        itr = itr + 1
        J_old = J
        c_old = np.copy(c_vec)
        w_M = update_w(index, m, n, c_old, norm_M, method)
        norm_M = bsf.get_norm_matrix_fast(data_c, kernel_M, value_M, w_M,
                                          method)
        c_vec, J = update_c(index, m, n, count, norm_M, method)
        #err = la.norm(c_vec - c_old)/la.norm(c_old)
        err = abs((J - J_old) / J_old)
        #print itr,err
    print "#iteration:", itr
    return ([c_vec, w_M, itr])
Esempio n. 3
0
def RKDE(data,
         m,
         n,
         rho_para=np.array([0.5, 0.75, 0.85]),
         tol=1e-3,
         max_itr=99,
         method="Gaussian"):
    index, claim, count = bsf.extract(data, m, n)
    w_M = []
    for i in range(n):
        l = len(index[i])
        w_M.append(np.ones(l) / l)
    kernel_M = bsf.get_kernel_matrix(claim, n, method)
    norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method)
    c_vec = np.ones(m)
    a_vec = compute_a(norm_M, n, rho_para)
    w_M = []
    for i in range(n):
        kernel_m = np.copy(kernel_M[i])
        w_i, norm = RKDE_single(index[i],
                                m,
                                n,
                                c_vec,
                                kernel_m,
                                a_vec[i, :],
                                method,
                                max_itr=max_itr,
                                tol=1e-3)
        w_M.append(w_i)
    return (w_M)
Esempio n. 4
0
def evaluation_source_single(data, m, n, truth):
    index, claim, count = bsf.extract(data, m, n)
    mae = np.zeros(m)
    rmse = np.zeros(m)
    for i in range(n):
        mae[index[i]] = mae[index[i]] + abs(claim[i] - truth[i])
        rmse[index[i]] = rmse[index[i]] + (claim[i] - truth[i])**2
    mae[count > 0] = mae[count > 0] / count[count > 0]
    rmse[count > 0] = np.sqrt(rmse[count > 0] / count[count > 0])
    rtn = np.append(mae.reshape(m, 1), rmse.reshape(m, 1), axis=1)
    rtn = np.append(rtn, count.reshape(m, 1), axis=1)
    return (rtn)
Esempio n. 5
0
def CATD_discret(data, m, n, intl=[]):
    index, claim, count = bsf.extract(data, m, n)
    w_vec = np.ones(m)
    if (len(intl) > 0):
        truth = update_truth(claim, index, w_vec, m, n)
    else:
        truth = np.copy(intl)
    w_vec = update_w(claim, index, count, truth, m, n)
    truth = np.zeros(n)
    for i in range(n):
        truth[i] = claim[i][w_vec[index[i]].argmax()]
    return ([truth, w_vec])
Esempio n. 6
0
def CATD_discret(data, m, n, intl=[]):
    index, claim, count = bsf.extract(data, m, n)
    w_vec = np.ones(m)
    if(len(intl)>0):
        truth = update_truth(claim, index, w_vec, m, n)
    else:
        truth = np.copy(intl)
    w_vec = update_w(claim, index, count, truth, m, n)    
    truth = np.zeros(n)
    for i in range(n):
        truth[i] = claim[i][w_vec[index[i]].argmax()]
    return([truth, w_vec])
Esempio n. 7
0
def evaluation_source_single(data, m, n, truth):
    index, claim, count = bsf.extract(data, m, n)
    mae = np.zeros(m)
    rmse = np.zeros(m)
    for i in range(n):
        mae[index[i]] = mae[index[i]] + abs(claim[i]-truth[i])
        rmse[index[i]] = rmse[index[i]] + (claim[i]-truth[i])**2
    mae[count>0] = mae[count>0]/count[count>0]
    rmse[count>0] = np.sqrt(rmse[count>0]/count[count>0])
    rtn = np.append(mae.reshape(m,1),rmse.reshape(m,1),axis=1)
    rtn = np.append(rtn,count.reshape(m,1),axis=1)
    return(rtn)
Esempio n. 8
0
def CRH(data, m, n, tol=1e-3, max_itr=99):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    itr = 0
    w_vec = np.ones(m)
    truth = np.zeros(n)
    while((err > tol) & (itr < max_itr)):
        itr = itr+1
        truth_old = np.copy(truth)
        truth = update_truth(claim, index, w_vec, m, n)
        w_vec = update_w(claim, index, truth, m, n)
        err = la.norm(truth-truth_old)/la.norm(truth_old)
    return([truth, w_vec])
Esempio n. 9
0
def evaluation_source_multiple(data, m, n, truth):
    index, claim, count = bsf.extract(data, m, n)
    mae = np.zeros(m)
    rmse = np.zeros(m)
    for i in range(n):
        cluster_ind = data[i][:, 2]
        ind = np.array(index[i])
        index_new = list(ind[cluster_ind >= 0])
        index_noise = list(ind[cluster_ind < 0])
        tmp = truth[i][list(cluster_ind[cluster_ind >= 0])]
        mae[index_new] = mae[index_new] + abs(claim[i] - tmp)
        rmse[index_new] = rmse[index_new] + (claim[i] - tmp)**2
        count[index_noise] = -1
    mae[count > 0] = mae[count > 0] / count[count > 0]
    rmse[count > 0] = np.sqrt(rmse[count > 0] / count[count > 0])
    return ([mae, rmse, count])
Esempio n. 10
0
def CATD(data, m, n, intl=[], tol=0.1, max_itr=10):
    index, claim, count = bsf.extract(data, m, n)
    w_vec = np.ones(m)
    if (len(intl) > 0):
        truth = update_truth(claim, index, w_vec, m, n)
    else:
        truth = np.copy(intl)
    err = 99
    itr = 0
    while (err > tol and itr < max_itr):
        w_old = np.copy(w_vec)
        w_vec = update_w(claim, index, count, truth, m, n)
        truth = update_truth(claim, index, w_vec, m, n)
        err = la.norm(w_old - w_vec) / la.norm(w_old)
        itr = itr + 1
    return ([truth, w_vec])
Esempio n. 11
0
def CATD(data, m, n, intl=[], tol=0.1, max_itr=10):
    index, claim, count = bsf.extract(data, m, n)
    w_vec = np.ones(m)
    if(len(intl)>0):
        truth = update_truth(claim, index, w_vec, m, n)
    else:
        truth = np.copy(intl)
    err = 99
    itr = 0
    while(err > tol and itr < max_itr):
        w_old = np.copy(w_vec)
        w_vec = update_w(claim, index, count, truth, m, n)    
        truth = update_truth(claim, index, w_vec, m, n)
        err = la.norm(w_old-w_vec)/la.norm(w_old)
        itr = itr+1
    return([truth, w_vec])
Esempio n. 12
0
def RKDE(data, m, n, rho_para=np.array([0.5,0.75,0.85]), tol=1e-3, max_itr=99, method="Gaussian"):
    index,claim,count = bsf.extract(data, m, n)
    w_M = []
    for i in range(n):
        l = len(index[i])
        w_M.append(np.ones(l)/l)        
    kernel_M = bsf.get_kernel_matrix(claim, n, method)
    norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method)
    c_vec = np.ones(m)
    a_vec = compute_a(norm_M, n, rho_para)
    w_M = []
    for i in range(n):
        kernel_m = np.copy(kernel_M[i])
        w_i, norm = RKDE_single(index[i], m, n, c_vec, kernel_m, a_vec[i,:], method, max_itr=max_itr, tol=1e-3)
        w_M.append(w_i)
    return(w_M)
Esempio n. 13
0
def evaluation_source_multiple(data, m, n, truth):
    index, claim, count = bsf.extract(data, m, n)
    mae = np.zeros(m)
    rmse = np.zeros(m)
    for i in range(n):
        cluster_ind = data[i][:,2]
        ind = np.array(index[i])
        index_new = list(ind[cluster_ind>=0])
        index_noise = list(ind[cluster_ind<0])
        tmp = truth[i][list(cluster_ind[cluster_ind>=0])]
        mae[index_new] = mae[index_new] + abs(claim[i]-tmp)
        rmse[index_new] = rmse[index_new] + (claim[i]-tmp)**2
        count[index_noise] = -1
    mae[count>0] = mae[count>0]/count[count>0]
    rmse[count>0] = np.sqrt(rmse[count>0]/count[count>0])
    return([mae, rmse, count])
Esempio n. 14
0
def CRH_discret(data, m, n, tol=1e-3, max_itr=99):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    itr = 0
    w_vec = np.ones(m)
    truth = np.zeros(n)
    while((err > tol) & (itr < max_itr)):
        itr = itr+1
        truth_old = np.copy(truth)
        truth = update_truth(claim, index, w_vec, m, n)
        w_vec = update_w(claim, index, truth, m, n)
        err = la.norm(truth-truth_old)/la.norm(truth_old)
    truth = np.zeros(n)
    for i in range(n):
        truth[i] = claim[i][w_vec[index[i]].argmax()]
    return([truth, w_vec])
Esempio n. 15
0
def TruthFinder(data, m, n, tol=0.1, max_itr=10):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    itr = 0
    tau_vec = -np.log(1-np.ones(m)*0.9)
    truth = np.zeros(n)
    rho = 0.5
    gamma = 0.3
    while((err > tol) & (itr < max_itr)):
        itr = itr+1
        tau_old = np.copy(tau_vec)
        s_set = update_claim(claim, index, tau_vec, m, n, rho, gamma)
        tau_vec = update_source(claim, index, s_set, m, n)
        err = 1 - np.dot(tau_vec,tau_old)/(la.norm(tau_vec)*la.norm(tau_old))
        print itr, err
    truth = np.zeros(n)
    for i in range(n):
        truth[i] = claim[i][np.argmax(s_set[i])]
    return([truth, tau_vec])
Esempio n. 16
0
def TruthFinder(data, m, n, tol=0.1, max_itr=10):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    itr = 0
    tau_vec = -np.log(1 - np.ones(m) * 0.9)
    truth = np.zeros(n)
    rho = 0.5
    gamma = 0.3
    while ((err > tol) & (itr < max_itr)):
        itr = itr + 1
        tau_old = np.copy(tau_vec)
        s_set = update_claim(claim, index, tau_vec, m, n, rho, gamma)
        tau_vec = update_source(claim, index, s_set, m, n)
        err = 1 - np.dot(tau_vec,
                         tau_old) / (la.norm(tau_vec) * la.norm(tau_old))
        print itr, err
    truth = np.zeros(n)
    for i in range(n):
        truth[i] = claim[i][np.argmax(s_set[i])]
    return ([truth, tau_vec])
Esempio n. 17
0
def KDE_twist(data, m, n, method, argmax=False, h=-1, p_unre=0.1):
    index, claim, count = bsf.extract(data, m, n)
    n = len(claim)
    truth = []
    ind_c = []
    conf = []
    evl = np.zeros((20, 4))
    for i in range(n):
        l = len(claim[i])
        x_new = bsf.DENCLUE(claim[i], np.ones(l) / l, method, h=h)
        center, ind, conf_i = twist_AUC(x_new, claim[i],
                                        np.ones(l) / l, argmax, h)
        tmp0 = ind < 0
        tmp1 = data[i][:, 0] >= np.int(m * (1 - p_unre))
        evl[:, 0] = evl[:, 0] + np.sum(tmp0 * tmp1, axis=1)
        evl[:, 1] = evl[:, 1] + np.sum(tmp0 * (tmp1 == False), axis=1)
        evl[:, 2] = evl[:, 2] + np.sum((tmp0 == False) * tmp1, axis=1)
        evl[:, 3] = evl[:, 3] + np.sum(
            (tmp0 == False) * (tmp1 == False), axis=1)
        truth.append(center)
        ind_c.append(ind)
        conf.append(conf_i)
    tpr = evl[:, 0] / (evl[:, 0] + evl[:, 2])
    fpr = evl[:, 1] / (evl[:, 1] + evl[:, 3])
    auc = compute_AUC(fpr, tpr)
    rtn_truth = []
    rtn_ind = []
    rtn_conf = []
    for j in range(20):
        tmp1 = []
        tmp2 = []
        tmp3 = []
        for i in range(n):
            tmp1.append(np.copy(truth[i][j]))
            tmp2.append(np.copy(ind_c[i][j]))
            tmp3.append(np.copy(conf[i][j]))
        rtn_truth.append(tmp1)
        rtn_ind.append(tmp2)
        rtn_conf.append(tmp3)
    return ([rtn_truth, rtn_ind, rtn_conf, auc])
Esempio n. 18
0
def GTM(data,
        m,
        n,
        intl=[],
        tol=1e-3,
        max_itr=99,
        alpha=10,
        beta=10,
        mu0=0,
        sigma0=1):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    itr = 0
    truth, sigma_vec = Initialization(intl, claim, index, m, n, alpha, beta)
    #truth, tau = TruthFinder.TruthFinder(data, m, n)
    while ((err > tol) & (itr < max_itr)):
        itr = itr + 1
        truth_old = np.copy(truth)
        truth = E_step(claim, index, m, n, sigma_vec, mu0, sigma0)
        sigma_vec = M_step(claim, index, m, n, truth, alpha, beta)
        err = la.norm(truth - truth_old) / la.norm(truth_old)
    return ([truth, sigma_vec])
Esempio n. 19
0
def KDE_twist(data, m, n, method, argmax=False, h=-1, p_unre=0.1):
    index, claim, count = bsf.extract(data, m, n)
    n = len(claim)
    truth = []
    ind_c = []
    conf = []
    evl = np.zeros((20,4))
    for i in range(n):        
        l = len(claim[i])
        x_new = bsf.DENCLUE(claim[i], np.ones(l)/l, method, h=h)
        center,ind,conf_i = twist_AUC(x_new, claim[i], np.ones(l)/l, argmax, h)
        tmp0 = ind<0
        tmp1 = data[i][:,0]>=np.int(m*(1-p_unre))
        evl[:,0] = evl[:,0] + np.sum(tmp0*tmp1, axis=1)
        evl[:,1] = evl[:,1] + np.sum(tmp0*(tmp1==False), axis=1)
        evl[:,2] = evl[:,2] + np.sum((tmp0==False)*tmp1, axis=1)
        evl[:,3] = evl[:,3] + np.sum((tmp0==False)*(tmp1==False), axis=1)
        truth.append(center)
        ind_c.append(ind)
        conf.append(conf_i)
    tpr = evl[:,0]/(evl[:,0]+evl[:,2])
    fpr = evl[:,1]/(evl[:,1]+evl[:,3])
    auc = compute_AUC(fpr,tpr)
    rtn_truth = []
    rtn_ind = []
    rtn_conf = []
    for j in range(20):
        tmp1 = []
        tmp2 = []
        tmp3 = []
        for i in range(n):
            tmp1.append(np.copy(truth[i][j]))
            tmp2.append(np.copy(ind_c[i][j]))
            tmp3.append(np.copy(conf[i][j]))
        rtn_truth.append(tmp1)
        rtn_ind.append(tmp2)
        rtn_conf.append(tmp3)
    return([rtn_truth,rtn_ind,rtn_conf, auc])
Esempio n. 20
0
def KDEm(data, m, n, tol=1e-5, max_itr=99, method="Gaussian", h=-1):
    err = 99
    index, claim, count = bsf.extract(data, m, n)
    w_M = []
    for i in range(n):
        l = len(index[i])
        w_M.append(np.ones(l)/l)        
    itr=1
    kernel_M = bsf.get_kernel_matrix(claim, n, method)
    norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method)
    c_vec, J = update_c(index, m, n, count, norm_M, method)
    while((err > tol) & (itr < max_itr)):
        itr=itr+1
        J_old = J
        c_old = np.copy(c_vec)
        w_M = update_w(index, m, n, c_old, norm_M, method)
        norm_M = bsf.get_norm_matrix(kernel_M, n, w_M, method)
        c_vec, J = update_c(index, m, n, count, norm_M, method) 
        #err = la.norm(c_vec - c_old)/la.norm(c_old)
        err = abs((J-J_old)/J_old)
        #print itr,err
    print "#iteration:",itr
    return([c_vec, w_M, itr])