Exemple #1
0
def run_one(srcPath=None,
            tgtPath=None,
            prlPath=None,
            prlSize=None,
            source_n_features=None,
            target_n_features=None,
            kernel_type='cosine'):

    dc = DataClass(srcPath=srcPath,
                   tgtPath=tgtPath,
                   prlPath=prlPath,
                   valid_flag=False,
                   zero_diag_flag=False,
                   source_data_type='full',
                   source_n_features=source_n_features,
                   target_n_features=target_n_features,
                   kernel_type=kernel_type,
                   kernel_normal=False)
    y, I, K, offset = dc.get_TL_Kernel()
    y, I, K, offset = DataClass.reduce_para(y, I, K, offset, prlSize)

    # run eigen decomposition on K
    v_s, Q_s, v_t, Q_t = eigen_decompose(K, offset, max_k=128)
    beta = 2**(-10)
    wreg = 2**(-10)
    K_exp = get_K_exp_by_eigen(K, offset, v_s, Q_s, v_t, Q_t, beta)
    K_exp[K_exp < 0] = 0
    y_true, y_prob = cvxopt_solver(y, I, K_exp, offset, wreg)
    auc, acc = eval_binary(y_true, y_prob)
    return auc, acc
    def experiment(self, name_text_file):
        inputs = DataClass()
        #name_text_file = input()
        inputs.Read(str(name_text_file))

        # Initialize graph
        g = Graph()
        # Initialize nodes on the graph
        g.set_node_names(inputs.names)

        # This can be used to reduce computation to reduce the number of connected nodes that are defined, however it is preferred not to,
        # because the weight in the graph might be chosen to be defined differently
        # visited=[]

        for i in range(0, inputs.n + 2):
            for j in range(0, inputs.n + 2):

                # visited.append(i)
                if j != i:
                    calculate_risk = np.linalg.norm(
                        inputs.input_information[str(i)] -
                        inputs.input_information[str(j)])
                    # insert edge betweem two nodes along with the corresponding weight
                    g.insert_edge(calculate_risk, int(i), int(j))
                else:
                    continue

        return g.dijkstar_output(0)
Exemple #3
0
def run_testset(kernel_type='cosine',
                log_2gs=-12,
                log_2gt=-12,
                log_2w=-2,
                log_2p=-1,
                complete_flag=1,
                zero_diag_flag=True):
    dc = DataClass(valid_flag=False)
    dc.kernel_type = kernel_type
    dc.source_gamma = 2**log_2gs
    dc.target_gamma = 2**log_2gt
    dc.zero_diag_flag = zero_diag_flag
    y, I, K, offset = dc.get_TL_Kernel()

    w_2 = 2**log_2w
    p = 2**log_2p

    if complete_flag == 1:
        K = DataClass.complete_TL_Kernel(K, offset)

    # log_2p == -1 means that using full kernel w/o sparsify
    if log_2p != -1:
        K = DataClass.sym_sparsify_K(K, p)

    auc, ap, rl = solve_and_eval(y, I, K, offset, w_2)
    print('tst test: auc %6f ap %6f rl %6f' % (auc, ap, rl))
Exemple #4
0
def grid_search(gList,
                wList,
                pList,
                complete_flag=False,
                zero_diag_flag=False):

    dc = DataClass(valid_flag=True,
                   zero_diag_flag=zero_diag_flag,
                   kernel_normal=False)
    y, I, K, offset = dc.get_TL_Kernel()

    n = len(y)
    f = np.zeros(n)
    best_auc = 0.0
    best_gs = 0.0
    best_gt = 0.0
    best_w = 0.0
    best_p = -1
    # pList padding
    if pList[0] != -1:
        pList = np.insert(pList, 0, -1)

    if kernel_type == 'cosine' and len(gList) != 1:
        raise Warning('For cosine kernel, no need to tune gamma!')

    for g_s in gList:
        for g_t in gList:
            dc.source_gamma = 2.0**g_s
            dc.target_gamma = 2.0**g_t
            y, I, K, offset = dc.get_TL_Kernel()
            if complete_flag == 1:
                K = DataClass.complete_TL_Kernel(K, offset)

            for w in wList:
                w_2 = 2.0**w

                for p in pList:
                    _p = 2**p
                    if p == -1:
                        K_sp = K
                    else:
                        K_sp = DataClass.sym_sparsify_K(K, _p)
                    auc, ap, rl = solve_and_eval(y, I, K_sp, offset, w_2)
                    print('log_2gs %3d log_2gt %3d log_2w %3d log_2p %3d auc %6f ap %6f rl %6f' \
                            % (g_s, g_t, w, p, auc, ap, rl))
                    if auc > best_auc:
                        best_auc = auc
                        best_gs = g_s
                        best_gt = g_t
                        best_w = w
                        best_p = p

    print('best parameters: log_2gs %3d log_2gt %3d log_2w %3d log_2p %3d auc %6f' \
            % (best_gs, best_gt, best_w, best_p, best_auc))
Exemple #5
0
def run_testset(kernel_type='cosine',
                zero_diag_flag=True,
                kernel_normal=False,
                log2_b=None,
                log2_w=None,
                log2_p=None):
    dc = DataClass(valid_flag=False, kernel_normal=kernel_normal)
    dc.kernel_type = kernel_type
    dc.zero_diag_flag = zero_diag_flag
    # dc.source_data_type = 'parallel' # CorrNet test
    y, I, K, offset = dc.get_TL_Kernel()

    # run eigen decomposition on K
    v_s, Q_s, v_t, Q_t = eigen_decompose(K, offset, max_k=128)
    # v_s, Q_s, v_t, Q_t = eigen_decompose(K, offset, max_k=5)

    beta = 2**log2_b
    K_exp = K.copy()
    K_exp = get_K_exp(K_exp, offset, v_s, Q_s, v_t, Q_t, beta, kernel_normal)

    if log2_p == -1:
        K_sp = K_exp
    else:
        K_sp = DataClass.sym_sparsify_K(K_exp, 2**log2_p)

    auc, ap, rl = solve_and_eval(y, I, K_sp, offset, 2**log2_w)
    print('test set: auc %6f ap %6f rl %6f' % (auc, ap, rl))
Exemple #6
0
def run_one(srcPath=None,
            tgtPath=None,
            prlPath=None,
            source_n_features=None,
            target_n_features=None):

    dc = DataClass(srcPath=srcPath,
                   tgtPath=tgtPath,
                   prlPath=prlPath,
                   valid_flag=False,
                   zero_diag_flag=True,
                   source_data_type='full',
                   source_n_features=source_n_features,
                   target_n_features=target_n_features,
                   kernel_type='cosine',
                   kernel_normal=False)
    y, I, K, offset = dc.get_SSL_Kernel()

    #K_sp = DataClass.sym_sparsify_K(K, 2**(-1))
    wreg = 2**(-12)
    y_true, y_prob = cvxopt_solver(y, I, K, offset, wreg)
    auc, acc = eval_binary(y_true, y_prob)
    return auc, acc
Exemple #7
0
def run_one(srcPath=None,
            tgtPath=None,
            prlPath=None,
            source_n_features=None,
            target_n_features=None):

    dc = DataClass(srcPath=srcPath,
                   tgtPath=tgtPath,
                   prlPath=prlPath,
                   valid_flag=False,
                   zero_diag_flag=False,
                   source_data_type='full',
                   source_n_features=source_n_features,
                   target_n_features=target_n_features,
                   kernel_type='cosine',
                   kernel_normal=False)
    y, I, K, offset = dc.get_TL_Kernel()
    # make sure diagonal is 1 so that
    # after normalizing lapliacian,
    # nan does not happen
    np.fill_diagonal(K, 1)

    # run eigen decomposition on K
    v_s, Q_s, v_t, Q_t = eigen_decompose(K, offset, max_k=128)
    K_exp = get_K_exp_by_eigen(K, offset, v_s, Q_s, v_t, Q_t)
    #y_true, y_prob, f = cvxopt_solver(y, I, K_exp, offset, 2**(-10))
    #y_true, y_prob, f = sgd_solver(y, I, K_exp, offset, nr_epoch=1000, stepsize=2**(-1), loss='l2')
    y_true, y_prob, f = sgd_solver(y,
                                   I,
                                   K_exp,
                                   offset,
                                   gamma=2**(-10),
                                   nr_epoch=1000,
                                   stepsize=2**(-1))
    auc, acc = eval_binary(y_true, y_prob)
    return auc, acc
Exemple #8
0
def run_testset(kernel_type='cosine',
                zero_diag_flag=False,
                kernel_normal=False,
                alpha=None,
                log2_w=None,
                log2_p=None):
    dc = DataClass(valid_flag=False, kernel_normal=kernel_normal)
    dc.kernel_type = kernel_type
    dc.zero_diag_flag = zero_diag_flag
    y, I, K, offset = dc.get_TL_Kernel()

    K_rw = K.copy()
    K_rw = get_K_rw(K_rw, offset, alpha=alpha)
    if log2_p == -1:
        K_sp = K_rw
    else:
        K_sp = DataClass.sym_sparsify_K(K_rw, 2**log2_p)
    auc, ap, rl = solve_and_eval(y, I, K_sp, offset, 2**log2_w)
    print('test set: auc %8f ap %6f rl %6f' % (auc, ap, rl))
Exemple #9
0
def grid(kernel_type='cosine',
         zero_diag_flag=True,
         kernel_normal=False,
         bList=None,
         wList=None,
         pList=None):
    dc = DataClass(valid_flag=True, kernel_normal=kernel_normal)
    dc.kernel_type = kernel_type
    dc.zero_diag_flag = zero_diag_flag
    y, I, K, offset = dc.get_TL_Kernel()

    # run eigen decomposition on K
    v_s, Q_s, v_t, Q_t = eigen_decompose(K, offset, max_k=128)

    best_b = -1
    best_w = -1
    best_p = -1
    best_auc = -1
    for log2_b in bList:
        beta = 2**log2_b
        K_exp = K.copy()
        K_exp = get_K_exp(K_exp, offset, v_s, Q_s, v_t, Q_t, beta,
                          kernel_normal)
        for log2_w in wList:
            for log2_p in pList:
                if log2_p == -1:
                    K_sp = K_exp
                else:
                    K_sp = DataClass.sym_sparsify_K(K_exp, 2**log2_p)
                auc, ap, rl = solve_and_eval(y, I, K_sp, offset, 2**log2_w)
                print(
                    'log2_b %3d log2_w %3d log2_p %3d auc %8f ap %6f rl %6f' %
                    (log2_b, log2_w, log2_p, auc, ap, rl))
                if best_auc < auc:
                    best_b = log2_b
                    best_w = log2_w
                    best_p = log2_p
                return
    print('best parameters: log2_b %3d log2_w %3d log2_p %3d auc %6f' \
            % (best_b, best_w, best_p, best_auc))
Exemple #10
0
def grid(kernel_type='cosine',
         zero_diag_flag=False,
         kernel_normal=False,
         aList=None,
         wList=None,
         pList=None):
    dc = DataClass(valid_flag=True, kernel_normal=kernel_normal)
    dc.kernel_type = kernel_type
    dc.zero_diag_flag = zero_diag_flag
    y, I, K, offset = dc.get_TL_Kernel()

    best_a = -1
    best_w = -1
    best_p = -1
    best_auc = -1
    for alpha in aList:
        K_rw = K.copy()
        K_rw = get_K_rw(K_rw, offset, alpha=alpha)

        for log2_w in wList:
            for log2_p in pList:
                if log2_p == -1:
                    K_sp = K_rw
                else:
                    K_sp = DataClass.sym_sparsify_K(K_rw, 2**log2_p)
                #print DataClass.sparse_K_stat(K_sp, offset)
                auc, ap, rl = solve_and_eval(y, I, K_sp, offset, 2**log2_w)
                print('alpha %3d log2_w %3d log2_p %3d auc %8f ap %6f rl %6f' %
                      (alpha, log2_w, log2_p, auc, ap, rl))
                if best_auc < auc:
                    best_a = alpha
                    best_w = log2_w
                    best_p = log2_p
                    best_auc = auc

    print('best parameters: alpha %3d log2_w %3d log2_p %3d auc %6f' \
            % (best_a, best_w, best_p, best_auc))
import numpy as np
from Find_Max_Path import Graph
from dataclass import DataClass

#Read data
inputs = DataClass()
inputs.Read('data')

#Initialize graph
g = Graph()
#Initialize nodes on the graph
g.set_node_names(inputs.names)

#This can be used to reduce computation to reduce the number of connected nodes that are defined, however it is preferred not to,
#because the weight in the graph might be chosen to be defined differently
#visited=[]

for i in range(0, inputs.n + 2):
    for j in range(0, inputs.n + 2):

        #visited.append(i)
        if j != i:
            calculate_risk = np.linalg.norm(inputs.input_information[str(i)] -
                                            inputs.input_information[str(j)])
            #insert edge betweem two nodes along with the corresponding weight
            g.insert_edge(calculate_risk, int(i), int(j))
        else:
            continue

import pprint
pp = pprint.PrettyPrinter(indent=2)