Example #1
0
def config_initialization():
    # image shape and feature layers shape inference
    config.default_config()
    image_shape = (config.train_image_height, config.train_image_width)

    if not config.dataset_path:
        raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    util.init_logger(
        log_file='log_train_pixel_link_%d_%d.log' % image_shape,
        log_path=config.train_dir, stdout=False, mode='a')

    # config.load_config(config.train_dir)
    config.init_config(image_shape,
                       batch_size=config.batch_size,
                       weight_decay=config.weight_decay,
                       num_gpus=config.num_gpus
                       )
    config.default_config()
    config.score_map_shape = (config.train_image_height // config.strides[0],
                              config.train_image_width // config.strides[0])
    height = config.train_image_height
    score_map = config.score_map_shape
    stride = config.strides[0]
    batch_size = config.batch_size
    batch_size_per_gpu = config.batch_size_per_gpu

    util.proc.set_proc_name('train_pixel_link_on' + '_' + config.dataset_name)
Example #2
0
def cnmf(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    alpha = cfg[post + '_alpha']
    beta = cfg[post + '_beta']
    H = H * dot(W.T, V) / (dot(W.T, dot(W, H)) + beta * H + eps)
    W = W * dot(V, H.T) / (dot(W, dot(H, H.T)) + alpha * W + eps)
    return W, H
Example #3
0
 def __init__(self):
     self.window = tk.Tk()        
     self.window.title("ReachMaster")
     self.window.configure(bg="white")
     self.window.protocol("WM_DELETE_WINDOW", self.on_quit)        
     self.config = config.default_config()        
     config.save_tmp(self.config)
     self.data_dir = tk.StringVar()        
     self.data_dir.set(self.config['ReachMaster']['data_dir']) 
     self.config_file = tk.StringVar()
     self.config_file.set(self.config['ReachMaster']['config_file'])
     self.port_list = expint.get_ports()        
     self.exp_control_port = tk.StringVar()
     self.rob_control_port = tk.StringVar()
     if self.config['ReachMaster']['exp_control_port'] in self.port_list:
         self.exp_control_port.set(self.config['ReachMaster']['exp_control_port']) 
     else:
         self.exp_control_port.set(self.port_list[0])
     if self.config['ReachMaster']['rob_control_port'] in self.port_list:
         self.rob_control_port.set(self.config['ReachMaster']['rob_control_port']) 
     else:
         self.rob_control_port.set(self.port_list[0])        
     self.protocol_list = protocols.list_protocols()
     self.protocol = tk.StringVar()
     self.protocol.set(self.protocol_list[0])
     self.running = False  
     self.exp_connected = False
     self.rob_connected = False 
     self.protocol_running = False        
     self.child = None         
     self._configure_window()                   
Example #4
0
def cnmf(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    alpha = cfg[post + '_alpha']
    beta = cfg[post + '_beta']
    H = H * dot(W.T, V) / (dot(W.T, dot(W, H)) + beta * H + eps)
    W = W * dot(V, H.T) / (dot(W, dot(H, H.T)) + alpha * W + eps)
    return W, H
Example #5
0
def gen_init(cfg=config.default_config()):
    """Generate real valued initialization matrices.
       - Return:
       Phi
       Theta
       - Used params:
       N
       T
       M
       gen_phi
       phi_sparsity
       gen_theta
       theta_sparsity
    """
    N = cfg['N']
    T = cfg['T']
    M = cfg['M']

    gen_phi = getattr(generators, cfg['phi_init'])
    cfg['rows'] = N
    cfg['cols'] = T
    cfg['sparsity'] = cfg['phi_sparsity']
    Phi = gen_phi(cfg)

    gen_theta = getattr(generators, cfg['theta_init'])
    cfg['rows'] = T
    cfg['cols'] = M
    cfg['sparsity'] = cfg['theta_sparsity']
    Theta = gen_theta(cfg)

    return (Phi, Theta)
Example #6
0
def gen_real(cfg=config.default_config()):
    """Generate matrices with real values for model experiment.
       - Return:
       F
       Phi_r
       Theta_r
       - Used params:
       N
       T_0
       M
       gen_phi
       real_phi_sparsity
       gen_theta
       real_theta_sparsity
    """
    N = cfg['N']
    T_0 = cfg['T_0']
    M = cfg['M']

    gen_phi = getattr(generators, cfg['gen_phi'])
    cfg['rows'] = N
    cfg['cols'] = T_0
    cfg['sparsity'] = cfg['real_phi_sparsity']
    Phi_r = gen_phi(cfg)

    gen_theta = getattr(generators, cfg['gen_theta'])
    cfg['rows'] = T_0
    cfg['cols'] = M
    cfg['sparsity'] = cfg['real_theta_sparsity']
    Theta_r = gen_theta(cfg)

    F = np.dot(Phi_r, Theta_r)
    for i in xrange(F.shape[1]):
        F[:, i] = F[:,i] * np.random.randint(100,8000)
    return (F, Phi_r, Theta_r)
Example #7
0
def mult_kl(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = V / (dot(W, H) + eps)
    W0 = W * dot(tmp, H.T)
    W = W0 / tile(maximum(sum(H.T, 0), eps), (W0.shape[0], 1))
    H0 = H * dot(W.T, tmp)
    H = H0 / tile(maximum(sum(W.T, 1), eps), (1, H0.shape[1]))
    return W, H
Example #8
0
def als(V, W, H, post='', cfg=config.default_config()):
    #print('Alternating Least Squares.')
    eps = cfg['eps']
    H = linalg.solve(dot(W.T, W) + eye(W.shape[1]) * eps, dot(W.T, V))
    H[H < eps] = 0
    W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
    W[W < eps] = 0
    return (W, H)
Example #9
0
def als(V, W, H, post='', cfg=config.default_config()):
    #print('Alternating Least Squares.')
    eps = cfg['eps']
    H = linalg.solve(dot(W.T, W) + eye(W.shape[1]) * eps, dot(W.T, V))
    H[H < eps] = 0
    W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
    W[W < eps] = 0
    return (W, H)
Example #10
0
def load_csv(name, cfg=config.default_config()):
    V = np.loadtxt(open(join(cfg['data_dir'], name + '_V.csv'), 'r'),
                   delimiter=',')
    W = np.loadtxt(open(join(cfg['data_dir'], name + '_W.csv'), 'r'),
                   delimiter=',')
    H = np.loadtxt(open(join(cfg['data_dir'], name + '_H.csv'), 'r'),
                   delimiter=',')
    return (V, W, H)
Example #11
0
def mult_kl(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = V / (dot(W, H) + eps)
    W0 = W * dot(tmp, H.T)
    W = W0 / tile(maximum(sum(H.T, 0), eps), (W0.shape[0], 1))
    H0 = H * dot(W.T, tmp)
    H = H0 / tile(maximum(sum(W.T, 1), eps), (1, H0.shape[1]))
    return W, H
Example #12
0
    def __init__(self):
        config.default_config()

        self.height = config.train_image_height
        self.width = config.train_image_width

        self.input_shape = (self.height, self.width, 3)
        # self.input = Input(tensor=image)
        self.input = Input(shape=self.input_shape)
        self.width_multiplier = 1
        # trainign will change
        self.is_training = True
        pixel_cls_logits, pixel_link_logits = self.create_model()

        output = [pixel_cls_logits, pixel_link_logits]
        # self.model = keras.models.Model(inputs=self.input, outputs=[pixel_cls_logits, pixel_link_logits])
        merged = concatenate([pixel_cls_logits, pixel_link_logits], axis=-1)
        self.model = keras.models.Model(inputs=self.input, outputs=merged)
Example #13
0
def hals(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    T = H.shape[0]
    W0 = W
    H0 = H
    for k in range(T):
        R = V - dot(W0, H0) + dot(W0[:, [k]], H0[[k], :])
        H0[k, :] = maximum(dot(R.T, W0[:, k]), 0).T / maximum(sum(W0[:, k] ** 2, 0), eps)
        W0[:, k] =  maximum(dot(R, H0[k, :].T), 0) / maximum(sum(H0[k, :] ** 2), eps)
    return W, H
Example #14
0
def reduce_cluster(D, num_clusters, params=config.default_config()):
    print('Clustering:')
    D = ascontiguousarray(D.astype('float32'))
    centroids, qerr, dis, labels, nassign = ynumpy.kmeans(D, num_clusters, init='kmeans++', nt=params['num_threads'], output='all', redo=3, niter=params['kmeans_max_iter'], verbose=False)
    #kmeans = KMeans(n_init=1, n_clusters=params['num_clusters'], n_jobs=2, max_iter=params['kmeans_max_iter'])
    #kmeans.fit(D)
    print('Done.')
    #centroids = kmeans.cluster_centers_
    #labels = kmeans.labels_
    return centroids, labels
Example #15
0
def show_matrices_recovered(W_r, H_r, W, H, cfg=config.default_config(), permute=True):
    if permute:
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
    else:
        idx = np.array([range(W.shape[1]), range(W.shape[1])])
    #f, axarr = plt.subplots(nrows=1, ncols=1)
    #axarr[0, 0].imshow(1-W_r, cmap='gray')
    #axarr[0, 0].set_title('W real')
    #axarr[0, 1].imshow(1-H_r, cmap='gray')
    #axarr[0, 1].set_title('H real')
    plt.matshow(1-W[:, idx[:, 1]], cmap=plt.cm.gray)
Example #16
0
def show_matrices_recovered(W_r, H_r, W, H, cfg=config.default_config(), permute=True):
    if permute:
        idx = get_permute(W_r, H_r, W, H, cfg["munkres"])
    else:
        idx = np.array([range(W.shape[1]), range(W.shape[1])])
    # f, axarr = plt.subplots(nrows=1, ncols=1)
    # axarr[0, 0].imshow(1-W_r, cmap='gray')
    # axarr[0, 0].set_title('W real')
    # axarr[0, 1].imshow(1-H_r, cmap='gray')
    # axarr[0, 1].set_title('H real')
    plt.matshow(1 - W[:, idx[:, 1]], cmap=plt.cm.gray)
Example #17
0
def hals(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    T = H.shape[0]
    W0 = W
    H0 = H
    for k in range(T):
        R = V - dot(W0, H0) + dot(W0[:, [k]], H0[[k], :])
        H0[k, :] = maximum(dot(R.T, W0[:, k]), 0).T / maximum(
            sum(W0[:, k]**2, 0), eps)
        W0[:, k] = maximum(dot(R, H0[k, :].T), 0) / maximum(
            sum(H0[k, :]**2), eps)
    return W, H
Example #18
0
def plsa3D(V, W, H, post='', cfg=config.default_config()):
    #print('Probabilistic Latent Semantic Analysis.')
    eps = cfg['eps']
    (N, M) = V.shape
    T = H.shape[0]
    V3 = V.reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    W3 = W.reshape(N, T, 1).repeat(M, 2)
    H3 = H.T.reshape(M, T, 1).repeat(N, 2).swapaxes(0, 2)
    Q3 = dot(W, H).reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    Z = V3 * W3 * H3 / (Q3 + eps)
    W = normalize_cols(sum(Z, 2).reshape(N, T))
    H = normalize_cols(sum(Z, 0).reshape(T, M))
    return W, H
Example #19
0
def plsa3D(V, W, H, post='', cfg=config.default_config()):
    #print('Probabilistic Latent Semantic Analysis.')
    eps = cfg['eps']
    (N, M) = V.shape
    T = H.shape[0]
    V3 = V.reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    W3 = W.reshape(N, T, 1).repeat(M, 2)
    H3 = H.T.reshape(M, T, 1).repeat(N, 2).swapaxes(0, 2)
    Q3 = dot(W, H).reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    Z = V3 * W3 * H3 / (Q3 + eps)
    W = normalize_cols(sum(Z, 2).reshape(N, T))
    H = normalize_cols(sum(Z, 0).reshape(T, M))
    return W, H
Example #20
0
def gen_init(cfg=config.default_config()):
    N = cfg['N']
    T = cfg['T']
    M = cfg['M']
    gen_phi = getattr(generators, cfg['phi_init'])
    cfg['rows'] = N
    cfg['cols'] = T
    cfg['sparsity'] = cfg['phi_sparsity']
    W = gen_phi(cfg)
    gen_theta = getattr(generators, cfg['theta_init'])
    cfg['rows'] = T
    cfg['cols'] = M
    cfg['sparsity'] = cfg['theta_sparsity']
    H = gen_theta(cfg)
    return (W, H)
Example #21
0
def gen_init(cfg=config.default_config()):
    N = cfg['N']
    T = cfg['T']
    M = cfg['M']
    gen_phi = getattr(generators, cfg['phi_init'])
    cfg['rows'] = N
    cfg['cols'] = T
    cfg['sparsity'] = cfg['phi_sparsity']
    W = gen_phi(cfg)
    gen_theta = getattr(generators, cfg['theta_init'])
    cfg['rows'] = T
    cfg['cols'] = M
    cfg['sparsity'] = cfg['theta_sparsity']
    H = gen_theta(cfg)
    return (W, H)
Example #22
0
def load_dataset(cfg=config.default_config()):
    """Load or generate dataset.
       - Return:
       F
       vocab
       N
       M
       Phi_r
       Theta_r 
       - Used params:
       load_data
       data_name?
    """
    if cfg['load_data'] == 'uci' or cfg['load_data'] == 1:
        print("uci")
        F, vocab = data.load_uci(cfg['data_name'], cfg)
        N, M = F.shape
        cfg['N'], cfg['M'] = F.shape
        print('Dimensions of F:', N, M)
        print('Checking assumption on F:', np.sum(F, axis=0).max())
        return F, vocab, N, M, None, None
    elif cfg['load_data'] == 2:
        F, Phi_r, Theta_r = gen_real(cfg)
        print(Phi_r)
        print('Checking assumption on F:', np.sum(F, axis=0).max())
        return F, None, F.shape[0], F.shape[1], Phi_r, Theta_r
    elif cfg['load_data'] == 3:
    	print("uci halfmodel", cfg["alpha"])
        F, vocab = data.load_uci(cfg['data_name'], cfg)
        N, M = F.shape
        cfg['N'], cfg['M'] = F.shape
        Phi_r, Theta_r = load_obj('Phi_'+cfg['data_name']), load_obj('Theta_'+cfg['data_name'])
        F_merged = merge_halfmodel(F, Phi_r, Theta_r, cfg)
        print('Dimensions of F:', N, M)
        print('Checking assumption on F:', np.sum(F_merged, axis=0).max())
        return F_merged, vocab, N, M, Phi_r, Theta_r
    elif cfg['load_data'] == 4:
        F = np.eye(cfg['T'])
        cfg['N'], cfg['M'] = F.shape
        Phi_r = np.eye(cfg['T'])
        Theta_r = np.eye(cfg['T'])
        return F, None, cfg['T'], cfg['T'], Phi_r, Theta_r
    elif cfg['load_data'] == 5:
        cfg['real_theta_sparsity'] = 1.
        cfg['real_phi_sparsity'] = 1.
        F, Phi_r, Theta_r = gen_real(cfg)
        print('Checking assumption on F:', np.sum(F, axis=0).max())
        return F, None, F.shape[0], F.shape[1], Phi_r, Theta_r
Example #23
0
def load_uci(name, cfg=config.default_config()):
    print('Loading data in UCI format.')
    print('From:', cfg['data_dir'])
    print('Collection name:', name)
    N = 0
    with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'r') as f:
        M = int(f.readline())
        N = int(f.readline())
        D = np.zeros((N, M), dtype='float32')
        f.readline()
        for line in f:
            d, w, nwd = [int(x) for x in line.split(' ')]
            D[w-1, d-1] = D[w-1, d-1] + nwd
    vocab = np.arange(N).tolist()
    with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'r') as f:
        vocab = f.read().splitlines()
    return D, vocab
Example #24
0
def load_uci(name, cfg=config.default_config()):
    print('Loading data in UCI format.')
    print('From:', cfg['data_dir'])
    print('Collection name:', name)
    N = 0
    with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'r') as f:
        M = int(f.readline())
        N = int(f.readline())
        D = np.zeros((N, M), dtype='float32')
        f.readline()
        for line in f:
            d, w, nwd = [int(x) for x in line.split(' ')]
            D[w - 1, d - 1] = D[w - 1, d - 1] + nwd
    vocab = np.arange(N).tolist()
    with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'r') as f:
        vocab = f.read().splitlines()
    return D, vocab
Example #25
0
def reduce_multi_cluster(D, num_clusters, params=config.default_config()):
    print('Clustering:')
    D = ascontiguousarray(D.astype('float32'))
    #ncc = maximum(minimum(random.poisson(num_clusters, 15), 1000), 15)
    N = D.shape[0]
    #ncc = array([20, 50, 100, 250, 500, 1000, 2000, 4000, 6000])
    ncc = array([25 * (2 ** p) for p in xrange(int(log2(N / 75))  )])
    print(ncc)
    centroids = zeros((sum(ncc), D.shape[1]))
    labels = zeros((N, len(ncc)), dtype='int32')
    c = 0
    for it, nc in enumerate(ncc):
        new_centroids, _, _, new_labels, _ = ynumpy.kmeans(D.astype('float32'), nc, init='random', nt=params['num_threads'], output='all', redo=1, niter=params['kmeans_max_iter'], verbose=False)
        centroids[c:c+nc, :] = new_centroids
        labels[:, it] = new_labels.squeeze() + c
        c += nc
    print('Done.')
    return centroids, labels
Example #26
0
def store_uci(D, name=str(date.today()), cfg=config.default_config()):
    print('Storing data in UCI format.')
    print('Destination:', cfg['data_dir'])
    print('Collection name:', name)
    N, M = D.shape
    nw = D.sum()
    with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'w') as f:
        print('Vocablurary...')
        for i in range(N):
            print(i, file=f)
    with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'w') as f:
        print('DocWord matrix...')
        print(M, file=f)
        print(N, file=f)
        print(nw, file=f)
        cD = coo_matrix(D)  # faster print
        for d, w, ndw in zip(cD.row, cD.col, cD.data):
            print(d + 1, w + 1, ndw, file=f)
    print('Done.')
Example #27
0
def gen_real(cfg=config.default_config()):
    N = cfg['N']
    T = cfg['T_0']
    M = cfg['M']
    gen_phi = getattr(generators, cfg['gen_phi'])
    cfg['rows'] = N
    cfg['cols'] = T
    cfg['sparsity'] = cfg['phi_sparsity']
    W_r = gen_phi(cfg)
    gen_theta = getattr(generators, cfg['gen_theta'])
    cfg['rows'] = T
    cfg['cols'] = M
    cfg['sparsity'] = cfg['theta_sparsity']
    H_r = gen_theta(cfg)
    #W_r = gen_matrix_sparse(N, T, 0.2)
    #W_r = gen_matrix_topic(cfg)
    #H_r = gen_matrix_sparse(T, M, 0.3)
    V = np.dot(W_r, H_r)
    return (V, W_r, H_r)
Example #28
0
def gen_real(cfg=config.default_config()):
    N = cfg['N']
    T = cfg['T_0']
    M = cfg['M']
    gen_phi = getattr(generators, cfg['gen_phi'])
    cfg['rows'] = N
    cfg['cols'] = T
    cfg['sparsity'] = cfg['phi_sparsity']
    W_r = gen_phi(cfg)
    gen_theta = getattr(generators, cfg['gen_theta'])
    cfg['rows'] = T
    cfg['cols'] = M
    cfg['sparsity'] = cfg['theta_sparsity']
    H_r = gen_theta(cfg)
    #W_r = gen_matrix_sparse(N, T, 0.2)
    #W_r = gen_matrix_topic(cfg)
    #H_r = gen_matrix_sparse(T, M, 0.3)
    V = np.dot(W_r, H_r)
    return (V, W_r, H_r)
Example #29
0
def store_uci(D, name=str(date.today()), cfg=config.default_config()):
    print('Storing data in UCI format.')
    print('Destination:', cfg['data_dir'])
    print('Collection name:', name)
    N, M = D.shape
    nw = D.sum()
    with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'w') as f:
        print('Vocablurary...')
        for i in range(N):
            print(i, file=f)
    with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'w') as f:
        print('DocWord matrix...')
        print(M, file=f)
        print(N, file=f)
        print(nw, file=f)
        cD = coo_matrix(D) # faster print
        for d, w, ndw in zip(cD.row, cD.col, cD.data):
            print(d + 1, w + 1, ndw, file=f)
    print('Done.')
Example #30
0
def train(**kwargs):
    setup_seed(2020)

    model_param = default_config()
    model_param = parse_kwargs(model_param, kwargs)

    # load training data
    train_data = ehr.EHR("dataset/EHR", "train")

    # load validation data
    val_data = ehr.EHR("dataset/EHR", "val")

    # use data model to update model_param
    data_model_param = parse_data_model(train_data)
    model_param.update(data_model_param)

    # init model
    model = GradientBoostingClassifier(n_estimators=100,
                                       learning_rate=0.1,
                                       verbose=1,
                                       n_iter_no_change=10,
                                       random_state=10)

    train_feat, train_label = train_data.get_feat_data()

    print("Start Training.")
    model.fit(train_feat, train_label)

    print("Training Finished.")

    # eval on test set
    # load test data
    test_data = ehr.EHR("dataset/EHR", "test")
    test_feat, test_label = test_data.get_feat_data()

    test_metric, test_log, test_result = evaluate_clf(model,
                                                      test_feat,
                                                      test_label,
                                                      top_k_list=[3, 5, 10])

    print("[Test] {}: {}".format(now(), test_log))
    print("Training Done.")
Example #31
0
def grad_desc(V, W, H, post='', cfg=config.default_config()):
    alpha = cfg[post + '_alpha']
    step = cfg[post + '_alpha_step']
    eps = cfg['eps']
    #print('Gradient Descent with alpha={alpha}.'.format(alpha=alpha))
    grad_W = dot((V - dot(W, H)), H.T)
    grad_H = dot(W.T, (V - dot(W, H)))
    #grad_W[grad_W < eps] = 0
    #grad_H[grad_H < eps] = 0
    W = W + alpha * grad_W
    W[(grad_W < eps) & (W < eps)] = 0
    W = normalize_cols(W)

    H = H + alpha * grad_H
    H[(grad_H < eps) & (H < eps)] = 0
    H = normalize_cols(H)

    alpha = alpha * step
    cfg[post + '_alpha'] = alpha
    return (W, H)
Example #32
0
def grad_desc(V, W, H, post='', cfg=config.default_config()):
    alpha = cfg[post + '_alpha']
    step = cfg[post + '_alpha_step']
    eps = cfg['eps']
    #print('Gradient Descent with alpha={alpha}.'.format(alpha=alpha))
    grad_W = dot((V - dot(W, H)), H.T)
    grad_H = dot(W.T, (V - dot(W, H)))
    #grad_W[grad_W < eps] = 0
    #grad_H[grad_H < eps] = 0
    W = W + alpha * grad_W
    W[(grad_W < eps) & (W < eps)] = 0
    W = normalize_cols(W)
    
    H = H + alpha * grad_H
    H[(grad_H < eps) & (H < eps)] = 0
    H = normalize_cols(H)
    
    alpha = alpha * step
    cfg[post + '_alpha'] = alpha
    return (W, H)
Example #33
0
    def __init__(self):

        help(config._config)

        arguments = self.parse_arguments()

        if arguments.print_config:
            print(config.print(config.default_config()))
            return

        if not arguments.config_file:
            print("Must specify configuration file via -c. If no configuration"
                  + " file exists, you can generate a blank one with the -p"
                  + " flag")
            return

        try:
            self.config = config.parse(arguments.config_file)
        except config.BadConfiguration:
            print("Your configuration file is invalid. To generate a new,"
                  + " blank configuration, use the -p flag.")
Example #34
0
def train(**kwargs):
    setup_seed(2020)

    model_param = default_config()
    model_param = parse_kwargs(model_param, kwargs)

    # load training data
    train_data = ehr.EHR("dataset/EHR", "train")
    train_data_loader = DataLoader(train_data,
                                   model_param["batch_size"],
                                   shuffle=True,
                                   num_workers=0,
                                   collate_fn=collate_fn)

    # load validation data
    val_data = ehr.EHR("dataset/EHR", "val")
    val_data_loader = DataLoader(val_data,
                                 model_param["batch_size"],
                                 shuffle=False,
                                 num_workers=0,
                                 collate_fn=collate_fn)

    # use data model to update model_param
    data_model_param = parse_data_model(train_data)
    model_param.update(data_model_param)
    use_gpu = model_param["use_gpu"]

    # init model
    model = TextCNN(**model_param)
    early_stopper = EarlyStopping(patience=model_param["early_stop"],
                                  larger_better=True)

    if model_param["use_gpu"]:
        model.cuda()

    print("Model Inited.")
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=model_param["lr"],
                                 weight_decay=0)

    for epoch in range(model_param["num_epoch"]):
        total_loss = 0
        model.train()

        for idx, (feat, dise) in enumerate(train_data_loader):
            pred = model.forward(feat)

            if model_param["use_gpu"]:
                label = torch.LongTensor(dise).cuda()
            else:
                label = torch.LongTensor(dise)

            # label is [1,2,3...,27]
            loss = F.cross_entropy(pred, label - 1)

            # multi-class xent loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print("{} Epoch {}/{}: train loss: {:.6f}".format(
            now(), epoch + 1, model_param["num_epoch"], total_loss))

        # do evaluation on recall and ndcg
        metric_result, eval_log, eval_result = evaluate_clf(
            model, val_data_loader, [5])
        print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1,
                                                model_param["num_epoch"],
                                                eval_log))

        early_stopper(metric_result["ndcg_5"], model, "textcnn")

        if early_stopper.early_stop:
            print("[Early Stop] {} Epoch {}/{}: {}".format(
                now(), epoch + 1, model_param["num_epoch"], eval_log))
            break

    # eval on test set
    # load test data
    test_data = ehr.EHR("dataset/EHR", "test")
    test_data_loader = DataLoader(test_data,
                                  model_param["batch_size"],
                                  shuffle=False,
                                  num_workers=0,
                                  collate_fn=collate_fn)

    test_metric, test_log, test_result = evaluate_clf(model,
                                                      test_data_loader,
                                                      top_k_list=[1, 3, 5, 10])
    print("[Test] {}: {}".format(now(), test_log))
    print("Training Done.")
Example #35
0
from server import ShellServer
from config import default_config
import sys

config = default_config()

try:
    port = sys.argv[1]
except IndexError:
    port = config.get('server.port') or input('Enter port: ')

port = int(port)
s = ShellServer(port)
print('Server running on 0.0.0.0:%i' % port)
try:
    s.serve()
except KeyboardInterrupt:
    pass
Example #36
0
def train(**kwargs):
    setup_seed(2020)
    model_param = default_config()
    model_param = parse_kwargs(model_param, kwargs)

    # load training data
    train_data = ehr.EHR("dataset/EHR", "train")
    train_data_loader = DataLoader(train_data,
                                   model_param["batch_size"],
                                   shuffle=True,
                                   num_workers=0,
                                   collate_fn=collate_fn)

    # init model
    data_model_param = parse_data_model(train_data)
    model_param.update(data_model_param)
    use_gpu = model_param["use_gpu"]

    gnn = HGNN_SDS(**model_param)
    if model_param["w2v"] is not None:
        # load w2v data
        gnn.load_symp_embed(model_param["w2v"])

    if use_gpu:
        gnn.cuda()

    print("Model Inited.")

    sds_sampler = SDS_sampler("dataset/EHR")

    # load pmi ss mat
    symp2symp_mat = sp.load_npz(os.path.join("dataset/EHR", "pmi_ss_mat.npz"))
    symp2symp_mat.setdiag(0)

    # total number of symptoms
    num_total_batch = gnn.num_symp // model_param["batch_size"]
    all_symp_index = np.arange(1, gnn.num_symp + 1)

    lambda_hard_r = lambda epoch: epoch * model_param[
        "hard_ratio"] / model_param["num_epoch"]

    # build hard map and pos map
    symp2symp_hard_map = [0]
    symp2symp_pos_map = [0]
    for k in all_symp_index:
        symp2symp_b_ar = symp2symp_mat[k].toarray().flatten()
        max_index = np.argmax(symp2symp_b_ar)
        if max_index == 0:
            symp2symp_pos_map.append(np.random.randint(1, k))
            symp2symp_hard_map.append(np.random.randint(1, k))

        else:
            symp2symp_pos_map.append(max_index)
            symp2symp_b_ar[max_index] = -1
            max_2nd_index = np.argmax(symp2symp_b_ar)
            if max_2nd_index == 0:
                symp2symp_hard_map.append(np.random.randint(1, k))
            else:
                symp2symp_hard_map.append(max_2nd_index)

    symp2symp_hard_map = np.array(symp2symp_hard_map)
    symp2symp_pos_map = np.array(symp2symp_pos_map)
    print("Pos / Hard symptom map Inited.")

    optimizer = torch.optim.Adam(gnn.parameters(),
                                 lr=model_param["lr"],
                                 weight_decay=model_param["lr"])
    last_total_loss = 1e10

    for epoch in range(model_param["num_epoch"]):
        total_loss = 0
        gnn.train()
        np.random.shuffle(all_symp_index)

        hard_ratio = lambda_hard_r(epoch)

        for idx in range(num_total_batch):
            batch_symp = all_symp_index[idx *
                                        model_param["batch_size"]:(idx + 1) *
                                        model_param["batch_size"]]

            # get pos symp and neg symp
            pos_symp = symp2symp_pos_map[batch_symp]

            # sample neg
            neg_symp = np.random.randint(1, gnn.num_symp,
                                         model_param["batch_size"])

            # cope with overlapping in pos and neg symps
            overlap_index = (neg_symp == pos_symp)
            overlap_symp = neg_symp[overlap_index]
            neg_symp[overlap_index] = symp2symp_hard_map[overlap_symp]

            if hard_ratio > 0:
                num_hard = int(hard_ratio * model_param["batch_size"])
                neg_symp[:num_hard] = symp2symp_hard_map[neg_symp[:num_hard]]

            batch_symp_ts = torch.LongTensor(batch_symp)
            pos_symp_ts = torch.LongTensor(pos_symp)
            neg_symp_ts = torch.LongTensor(neg_symp)

            if model_param["use_gpu"]:
                batch_symp_ts = batch_symp_ts.cuda()
                pos_symp_ts = pos_symp_ts.cuda()
                neg_symp_ts = neg_symp_ts.cuda()

            # forward batch symp
            batch_symp_data = sds_sampler(batch_symp, 1, 20)
            symp_emb = gnn.forward(batch_symp_ts, batch_symp_data)

            pos_symp_data = sds_sampler(pos_symp, 1, 20)
            pos_emb = gnn.forward(pos_symp_ts, pos_symp_data)

            neg_symp_data = sds_sampler(neg_symp, 1, 20)
            neg_emb = gnn.forward(neg_symp_ts, neg_symp_data)

            # create loss
            scores = symp_emb.mul(pos_emb).sum(1) - symp_emb.mul(neg_emb).sum(
                1) + 1.0
            scores[scores < 0] = 0
            loss = scores.mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print("{} Epoch {}/{}: train loss: {:.6f}".format(
            now(), epoch + 1, model_param["num_epoch"], total_loss))

        if total_loss - last_total_loss > 0:
            print("Loss stops to decrease, converge.")
            break

        last_total_loss = total_loss

    # save model
    torch.save(gnn.state_dict(), "./ckpt/sds_gnn.pt")
    print("Model saved.")
Example #37
0
def train(**kwargs):
    w2v_model_name = "./ckpt/w2v"

    if os.path.exists(w2v_model_name):
        print("load word2vec model from", w2v_model_name)
        # load model directly
        w2v_model = Word2Vec.load(w2v_model_name)

    else:
        # load data
        filename = "./dataset/EHR/train/data.txt"
        fin = open(filename, "r")
        corpus = []
        for line in fin.readlines():
            corpus.append(line.strip().split()[2:])

        # learn word2vec model
        start_time = time.time()
        w2v_model = Word2Vec(corpus,
                             size=64,
                             window=3,
                             min_count=1,
                             workers=4,
                             sg=1)
        w2v_model.save("./ckpt/w2v")
        print("training done, costs {} secs.".format(time.time() - start_time))

    # start training and testing the MLP model
    setup_seed(2020)

    model_param = default_config()
    model_param = parse_kwargs(model_param, kwargs)

    # load training data
    train_data = ehr.EHR("dataset/EHR", "train")
    train_data_loader = DataLoader(train_data,
                                   model_param["batch_size"],
                                   shuffle=True,
                                   num_workers=0,
                                   collate_fn=collate_fn)

    # load validation data
    val_data = ehr.EHR("dataset/EHR", "val")
    val_data_loader = DataLoader(val_data,
                                 model_param["batch_size"],
                                 shuffle=False,
                                 num_workers=0,
                                 collate_fn=collate_fn)

    # use data model to update model_param
    data_model_param = parse_data_model(train_data)
    model_param.update(data_model_param)
    use_gpu = model_param["use_gpu"]

    # let's build a MLP for prediction
    model_param["w2v_model"] = w2v_model
    model = MLP(**model_param)

    early_stopper = EarlyStopping(patience=model_param["early_stop"],
                                  larger_better=True)

    if model_param["use_gpu"]:
        model.cuda()

    print("Model Inited.")
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=model_param["lr"],
                                 weight_decay=kwargs["weight_decay"])

    for epoch in range(model_param["num_epoch"]):
        total_loss = 0
        model.train()

        for idx, (feat, dise) in enumerate(train_data_loader):
            pred = model.forward(feat)

            if model_param["use_gpu"]:
                label = torch.LongTensor(dise).cuda()
            else:
                label = torch.LongTensor(dise)

            # label is [1,2,3...,27]
            loss = F.cross_entropy(pred, label - 1)

            # multi-class xent loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print("{} Epoch {}/{}: train loss: {:.6f}".format(
            now(), epoch + 1, model_param["num_epoch"], total_loss))

        # do evaluation on recall and ndcg
        metric_result, eval_log, eval_result = evaluate_clf(
            model, val_data_loader, [5])
        print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1,
                                                model_param["num_epoch"],
                                                eval_log))

        early_stopper(metric_result["ndcg_5"], model, "med2vec")

        if early_stopper.early_stop:
            print("[Early Stop] {} Epoch {}/{}: {}".format(
                now(), epoch + 1, model_param["num_epoch"], eval_log))
            break

    # eval on test set
    # load test data
    test_data = ehr.EHR("dataset/EHR", "test")
    test_data_loader = DataLoader(test_data,
                                  model_param["batch_size"],
                                  shuffle=False,
                                  num_workers=0,
                                  collate_fn=collate_fn)

    test_metric, test_log, test_result = evaluate_clf(model,
                                                      test_data_loader,
                                                      top_k_list=[1, 3, 5, 10])
    print("[Test] {}: {}".format(now(), test_log))
    print("Training Done.")
    pass
Example #38
0
def anchor_words(D, loss='L2', params=config.default_config()):
    Q = generate_Q_matrix(D * 100)
    anchors = findAnchors(Q, params['T'], params)
    W, topic_likelihoods = do_recovery(Q, anchors, loss, params)
    return W
Example #39
0
def run(V, W, H, W_r=None, H_r=None, cfg=config.default_config()):
    T = H.shape[0]
    eps = cfg['eps']
    schedule = cfg['schedule'].split(',')
    meas = cfg['measure'].split(',')
    val = np.zeros((cfg['max_iter'] + 2, len(meas)))
    hdist = np.zeros((cfg['max_iter'] + 2, 1))

    for i, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[0, i] = fun(V, np.dot(W, H))

    if cfg['compare_real']:
        #m = Munkres()
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[0] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    if cfg['print_lvl'] > 1:
        print('Initial loss:', val[0])
    status = 0
    methods_num = len(schedule)
    it = -1
    for it in range(cfg['max_iter']):
        if cfg['print_lvl'] > 1:
            print('Iteration', it + 1)
        W_old = deepcopy(W)
        H_old = deepcopy(H)
        method_name = schedule[it % methods_num]
        if cfg['print_lvl'] > 1:
            print('Method:', method_name)
        method = getattr(methods, method_name)
        (W, H) = method(V, W, H, method_name, cfg)
        if (it + 1) % cfg['normalize_iter'] == 0:
            W = normalize_cols(W)
            H = normalize_cols(H)
        for j, fun_name in enumerate(meas):
            fun = getattr(measure, fun_name)
            val[it + 1, j] = fun(V, np.dot(W, H))

        if cfg['compare_real']:
            idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
            hdist[it + 1] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T

        if cfg['print_lvl'] > 1:
            print(val[it + 1])
        if all(val[it, :] < eps):
            if cfg['print_lvl'] > 1:
                print('By cost.')
            status = 1
            break
        if abs(W_old - W).max() < eps and abs(H_old - H).max() < eps:
            if cfg['print_lvl'] > 1:
                print('By argument.')
            status = 2
            break
        #del W_old
        #del H_old
    if cfg['print_lvl'] > 1:
        print('Final:')
    W = normalize_cols(W)
    H = normalize_cols(H)
    for j, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[it + 2:, j] = fun(V, np.dot(W, H))

    if cfg['compare_real']:
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[it + 2:] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    return (val, hdist, it, W, H, status)
Example #40
0
def initialize_matrices(i, F, cfg=config.default_config()):
    """Initialize matrices Phi Theta.
       - Return:
       Phi
       Theta
       - Used params:
       prepare_method
    """
    if (int(cfg['prepare_method'].split(',')[i]) == 1):
        print("Arora")
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        Phi = prepare.anchor_words(F_norm, 'L2', cfg)
        print('Solving for Theta')
        Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm))
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 2):
        print("Random rare")
        cfg['phi_sparsity'] = 0.05
        cfg['theta_sparsity'] = 0.1
        return gen_init(cfg)
    elif (int(cfg['prepare_method'].split(',')[i]) == 3):
        print("Random uniform")
        cfg['phi_sparsity'] = 1.
        cfg['theta_sparsity'] = 1.
        return gen_init(cfg)
    elif (int(cfg['prepare_method'].split(',')[i]) == 4):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("Clustering of words")
        centroids, labels = prepare.reduce_cluster(F_norm, cfg['T'], cfg)
        Theta = centroids
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        print('Solving for Phi')
        Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_norm.T)))
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 5):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("SVD init")
        U, s, V = np.linalg.svd(F_norm)
        Phi, Theta = construct_from_svd(U, s, V, cfg)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 6):
        eps = cfg['eps']
        transformer = TfidfTransformer()
        transformer.fit(F)
        F_tfidf = (transformer.transform(F)).toarray()
        print("Clustering of tf-idf")
        centroids, labels = prepare.reduce_cluster(F_tfidf, cfg['T'], cfg)
        Theta = centroids
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        print('Solving for Phi')
        Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_tfidf.T)))
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 7):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("Clustering of words mixed")
        centroids, labels = prepare.reduce_cluster(F_norm, cfg['T'], cfg)
        Theta = centroids
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        print('Solving for Phi')
        Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_norm.T)))
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        cfg['phi_sparsity'] = 1.
        cfg['theta_sparsity'] = 1.
        Phi1, Theta1 = gen_init(cfg)
        zzz = 0.3
        return zzz*Phi1+(1.-zzz)*Phi, zzz*Theta1+(1.-zzz)*Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 8):
        print("Arora mixed")
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        Phi = prepare.anchor_words(F_norm, 'L2', cfg)
        print('Solving for Theta')
        Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm))
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        cfg['phi_sparsity'] = 1.
        cfg['theta_sparsity'] = 1.
        Phi1, Theta1 = gen_init(cfg)
        zzz = 0.3
        return zzz*Phi1+(1.-zzz)*Phi, zzz*Theta1+(1.-zzz)*Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 9):
        print("Arora unifrom")
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        Phi = prepare.anchor_words(F_norm, 'L2', cfg)
        print('Solving for Theta')
        Theta = np.ones((Phi.shape[1], F.shape[1]))
        Theta = normalize_cols(Theta)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 10):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("Clustering of docs")
        centroids, labels = prepare.reduce_cluster(F_norm.T, cfg['T'], cfg)
        Phi = centroids.T
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        print('Solving for Theta')
        Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm))
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        return Phi, Theta
Example #41
0
def plsa(F, Phi, Theta, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = F / maximum(dot(Phi, Theta), eps)
    Theta, Phi = normalize_cols(Theta * dot(Phi.T, tmp)), normalize_cols(Phi * dot(tmp, Theta.T))
    return Phi, Theta
Example #42
0
def run(F, Phi, Theta, Phi_r=None, Theta_r=None, cfg=config.default_config()):
    """Em-algo method.
       - Return:
       val
       hdist
       it
       Phi
       Theta
       status
       - Used params:

    """
    #F_norm = normalize_cols(F)
    T = Theta.shape[0]
    eps = cfg['eps']
    schedule = cfg['schedule'].split(',')
    meas = cfg['measure'].split(',')
    val = np.zeros((cfg['max_iter']+2, len(meas)))
    hdist = np.zeros((2, cfg['max_iter']+2))#Phi - first row, Theta - second
    
    for i, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[0, i] = fun(F, np.dot(Phi, Theta))
    
    if cfg['compare_real']:
        #m = Munkres()
        idx = get_permute(Phi_r, Theta_r, Phi, Theta, cfg['munkres'])
        hdist[0][0] = hellinger(Phi[:, idx[:, 1]], Phi_r[:, idx[:, 0]])
        hdist[1][0] = hellinger(Theta[idx[:, 1],:], Theta_r[idx[:, 0],:])

    if cfg['print_lvl'] > 1:
        print('Initial loss:', val[0])
    status = 0
    methods_num = len(schedule)
    it = -1
    for it in range(cfg['max_iter']+1):
        if cfg['print_lvl'] > 1:
            print('Iteration', it+1)
        ####Phi_old = deepcopy(Phi)
        ####Theta_old = deepcopy(Theta)
        method_name = schedule[it % methods_num]
        if cfg['print_lvl'] > 1:
            print('Method:', method_name)
        method = getattr(methods, method_name)
        (Phi, Theta) = method(F, Phi, Theta, method_name, cfg)
        #jogging of weights
        if cfg['jogging'] == 1 and it < 10:
            joh_alpha = 0.25
            cfg['phi_sparsity'] = 0.05
            cfg['theta_sparsity'] = 0.1
            Phi_jog, Theta_jog = gen_init(cfg)
            Phi = (1-joh_alpha**(it+1))*Phi + joh_alpha**(it+1)*Phi_jog
            Theta = (1-joh_alpha**(it+1))*Theta + joh_alpha**(it+1)*Theta_jog
        for j, fun_name in enumerate(meas):
            fun = getattr(measure, fun_name)
            val[it+1, j] = fun(F, np.dot(Phi, Theta))#fun(F_norm, np.dot(Phi, Theta))
        
        if cfg['compare_real']:
            idx = get_permute(Phi_r, Theta_r, Phi, Theta, cfg['munkres'])
            hdist[0][it+1] = hellinger(Phi[:, idx[:, 1]], Phi_r[:, idx[:, 0]])
            hdist[1][it+1] = hellinger(Theta[idx[:, 1], :], Theta_r[idx[:, 0], :])
        
        if cfg['print_lvl'] > 1:
            print(val[it+1])
        if all(val[it, :] < eps):
            if cfg['print_lvl'] > 1:
                print('By cost.')
            status = 1
            break
        '''if abs(Phi_old - Phi).max() < eps and abs(Theta_old - Theta).max() < eps:
            if cfg['print_lvl'] > 1:
                print('By argument.')
            status = 2
            break'''
        #del W_old
        #del H_old
    if cfg['print_lvl'] > 1:
        print('Final:')
    #Phi = normalize_cols(Phi)
    #Theta = normalize_cols(Theta)
    #for j, fun_name in enumerate(meas):
    #    fun = getattr(measure, fun_name)
    #    val[it+2:, j] = fun(F, np.dot(Phi, Theta))#fun(F_norm, np.dot(Phi, Theta))
    
    #if cfg['compare_real']:
    #    idx = get_permute(Phi_r, Theta_r, Phi, Theta, cfg['munkres'])
    #    hdist[0][it+2:] = hellinger(Phi[:, idx[:, 1]], Phi_r[:, idx[:, 0]])
    #    hdist[1][it+2:] = hellinger(Theta[idx[:, 1],:], Theta_r[idx[:, 0], :])

    return (val, hdist, it, Phi, Theta, status)
Example #43
0
def load_csv(name, cfg=config.default_config()):
    V = np.loadtxt(open(join(cfg['data_dir'], name+'_V.csv'), 'r'), delimiter=',')
    W = np.loadtxt(open(join(cfg['data_dir'], name+'_W.csv'), 'r'), delimiter=',')
    H = np.loadtxt(open(join(cfg['data_dir'], name+'_H.csv'), 'r'), delimiter=',')
    return (V, W, H)
Example #44
0
	def default_api(cls):
		conf = config.default_config()
		return cls(conf.get('jira_default','host'), conf.get('jira_default','path'), conf.get('jira_default','username'), conf.get('jira_default','password'))
Example #45
0
def train(**kwargs):

    setup_seed(2020)

    model_param = default_config()
    model_param = parse_kwargs(model_param, kwargs)

    dataset_name = model_param["dataset"]

    # load hard maps
    if model_param["hard_ratio"] > 0:
        model_param["hard_map"] = np.load("dataset/hard_dise.npy",
                                          allow_pickle=True).item()

    # load training data
    train_data = ehr.EHR("dataset/{}".format(dataset_name), "train")
    train_data_loader = DataLoader(train_data,
                                   model_param["batch_size"],
                                   shuffle=True,
                                   num_workers=0,
                                   collate_fn=collate_fn)

    # load validation data
    val_data = ehr.EHR("dataset/{}".format(dataset_name), "val")
    val_data_loader = DataLoader(val_data,
                                 model_param["batch_size"],
                                 shuffle=False,
                                 num_workers=0,
                                 collate_fn=collate_fn)

    # use data model to update model_param
    data_model_param = parse_data_model(train_data)
    model_param.update(data_model_param)
    use_gpu = model_param["use_gpu"]

    # init model
    gnn = HGNN(**model_param)
    if kwargs["w2v"] is not None:
        if os.path.exists(kwargs["w2v"]):
            # load w2v data
            gnn.load_symp_embed(kwargs["w2v"])
        else:
            from gensim.models import Word2Vec
            # build word2vec embeddings
            filename = "./dataset/EHR/train/data.txt"
            fin = open(filename, "r")
            corpus = []
            for line in fin.readlines():
                corpus.append(line.strip().split()[2:])
            # learn word2vec model
            start_time = time.time()
            w2v_model = Word2Vec(corpus,
                                 size=64,
                                 window=3,
                                 min_count=1,
                                 workers=4,
                                 sg=1)
            w2v_model.save("./ckpt/w2v")
            print("word2vec training done, costs {} secs.".format(time.time() -
                                                                  start_time))

    early_stopper = EarlyStopping(patience=model_param["early_stop"],
                                  larger_better=True)

    if use_gpu:
        gnn.cuda()

    print("Model Inited.")

    # optimizer = torch.optim.Adam(gnn.parameters(),lr=model_param["lr"],weight_decay=model_param["weight_decay"])

    optimizer = torch.optim.Adam(gnn.parameters(),
                                 lr=model_param["lr"],
                                 weight_decay=0)

    # init sampler for netative sampling during training.
    dsd_sampler = DSD_sampler("dataset/{}".format(dataset_name))
    print("D-S-D Sampler Inited.")

    for epoch in range(model_param["num_epoch"]):
        total_loss = 0
        gnn.train()

        for idx, (feat, dise) in enumerate(train_data_loader):
            pred, pred_neg, emb_user, emb_dise, neg_emb_dise = gnn.forward(
                feat, dise, dsd_sampler)

            bpr_loss = create_bpr_loss(pred, pred_neg)

            l2_loss = create_l2_loss(emb_user, emb_dise, neg_emb_dise)
            loss = bpr_loss + model_param["weight_decay"] * l2_loss
            # loss = bpr_loss

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            total_loss += bpr_loss.item()
            # print(idx,total_loss)

        print("{} Epoch {}/{}: train loss: {:.6f}".format(
            now(), epoch + 1, model_param["num_epoch"], total_loss))

        # do evaluation on recall and ndcg

        metric_result, eval_log, eval_result = evaluate(
            gnn, val_data_loader, dsd_sampler, [5])
        print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1,
                                                model_param["num_epoch"],
                                                eval_log))

        early_stopper(metric_result["ndcg_5"], gnn, "gnn")

        if early_stopper.early_stop:
            print("[Early Stop] {} Epoch {}/{}: {}".format(
                now(), epoch + 1, model_param["num_epoch"], eval_log))
            break

    # eval on test set
    # load test data
    test_data = ehr.EHR("dataset/{}".format(dataset_name), "test")
    test_data_loader = DataLoader(test_data,
                                  model_param["batch_size"],
                                  shuffle=False,
                                  num_workers=0,
                                  collate_fn=collate_fn)

    test_metric, test_log, test_result = evaluate(gnn,
                                                  test_data_loader,
                                                  dsd_sampler,
                                                  top_k_list=[1, 3, 5, 10])
    print("[Test] {}: {}".format(now(), test_log))
    print("Training Done.")
Example #46
0
 def default_api(cls):
     conf = config.default_config()
     return cls(conf.get('jira_default', 'host'),
                conf.get('jira_default', 'path'),
                conf.get('jira_default', 'username'),
                conf.get('jira_default', 'password'))
Example #47
0
	M_ = ZA + M
	e = hash_function(M_)
	e = bytes_to_int(bits_to_bytes(e))
	t = (r + s) % n
	if(t == 0):
		#print("wrong signature : t is 0")
		return False

	x1 = ECG_ele_add( ECG_k_point(s, Point(Gx, Gy)), ECG_k_point(t, PA) ).x
	R = (e + x1) % n
	if R!=r:
		#print("wrong signature: R unequal r")
		return False
	return True

'''
### test Signature ###
config.default_config()
parameters = config.get_parameters()
key = key_pair_generation(parameters)
dA = key[0]
PA = key[1]
IDA = '*****@*****.**'
M = '100'
Sig = Signature(M, IDA, dA, PA)
print(Sig)

### test Verification ###
Verification(M, Sig, IDA, dA, PA)

#print('ECG_k_point(2, PA)', ECG_k_point(2, Point(2,2)))
Example #48
0
def train(**kwargs):

    setup_seed(2020)

    model_param = default_config()
    model_param = parse_kwargs(model_param, kwargs)

    # load hard maps
    if model_param["hard_ratio"] > 0:
        model_param["hard_map"] = np.load("dataset/hard_dise.npy",
                                          allow_pickle=True).item()

    # load training data
    train_data = ehr.EHR("dataset/EHR", "train")
    train_data_loader = DataLoader(train_data,
                                   model_param["batch_size"],
                                   shuffle=True,
                                   num_workers=0,
                                   collate_fn=collate_fn)

    # load validation data
    val_data = ehr.EHR("dataset/EHR", "val")
    val_data_loader = DataLoader(val_data,
                                 model_param["batch_size"],
                                 shuffle=False,
                                 num_workers=0,
                                 collate_fn=collate_fn)

    # use data model to update model_param
    data_model_param = parse_data_model(train_data)
    model_param.update(data_model_param)
    use_gpu = model_param["use_gpu"]

    # init model
    gnn = HGNN_DSD(**model_param)
    if kwargs["w2v"] is not None:
        # load w2v data
        gnn.load_symp_embed(kwargs["w2v"])
    early_stopper = EarlyStopping(patience=model_param["early_stop"],
                                  larger_better=True)

    if use_gpu:
        gnn.cuda()

    print("Model Inited.")

    # optimizer = torch.optim.Adam(gnn.parameters(),lr=model_param["lr"],weight_decay=model_param["weight_decay"])

    optimizer = torch.optim.Adam(gnn.parameters(),
                                 lr=model_param["lr"],
                                 weight_decay=0)

    # init sampler for netative sampling during training.
    dsd_sampler = DSD_sampler("dataset/EHR")
    print("D-S-D Sampler Inited.")

    for epoch in range(model_param["num_epoch"]):
        total_loss = 0
        gnn.train()

        for idx, (feat, dise) in enumerate(train_data_loader):
            pred, pred_neg, emb_user, emb_dise, neg_emb_dise = gnn.forward(
                feat, dise, dsd_sampler)

            bpr_loss = create_bpr_loss(pred, pred_neg)

            l2_loss = create_l2_loss(emb_user, emb_dise, neg_emb_dise)
            loss = bpr_loss + model_param["weight_decay"] * l2_loss
            # loss = bpr_loss

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            total_loss += bpr_loss.item()
            # print(idx,total_loss)

        print("{} Epoch {}/{}: train loss: {:.6f}".format(
            now(), epoch + 1, model_param["num_epoch"], total_loss))

        # do evaluation on recall and ndcg

        metric_result, eval_log, eval_result = evaluate(
            gnn, val_data_loader, dsd_sampler, [5])
        print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1,
                                                model_param["num_epoch"],
                                                eval_log))

        early_stopper(metric_result["ndcg_5"], gnn, "gnn_dsd")

        if early_stopper.early_stop:
            print("[Early Stop] {} Epoch {}/{}: {}".format(
                now(), epoch + 1, model_param["num_epoch"], eval_log))
            break

    # eval on test set
    # load test data
    test_data = ehr.EHR("dataset/EHR", "test")
    test_data_loader = DataLoader(test_data,
                                  model_param["batch_size"],
                                  shuffle=False,
                                  num_workers=0,
                                  collate_fn=collate_fn)

    test_metric, test_log, test_result = evaluate(gnn,
                                                  test_data_loader,
                                                  dsd_sampler,
                                                  top_k_list=[1, 3, 5, 10])
    print("[Test] {}: {}".format(now(), test_log))
    print("Training Done.")
Example #49
0
File: api.py Project: gimlids/riker
# http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_website_region_endpoints
s3_website_regions = {
    'us-east-1': ('s3-website-us-east-1.amazonaws.com.', 'Z3AQBSTGFYJSTF'),
    'us-west-2': ('s3-website-us-west-2.amazonaws.com.', 'Z3BJ6K6RIION7M'),
    'us-west-1': ('s3-website-us-west-1.amazonaws.com.', 'Z2F56UZL2M1ACD'),
    'eu-west-1': ('s3-website-eu-west-1.amazonaws.com.', 'Z1BKCTXD74EZPE'),
    'ap-southeast-1': ('s3-website-ap-southeast-1.amazonaws.com.', 'Z3O0J2DXBE1FTB'),
    'ap-southeast-2': ('s3-website-ap-southeast-2.amazonaws.com.', 'Z1WCIGYICN2BYD'),
    'ap-northeast-1': ('s3-website-ap-northeast-1.amazonaws.com.', 'Z2M4EHUR26P7ZW'),
    'sa-east-1': ('s3-website-sa-east-1.amazonaws.com.', 'Z7KQH4QJS55SO'),
    'us-gov-west-1': ('s3-website-us-gov-west-1.amazonaws.com.', 'Z31GFT0UA1I2HV')
}

aws = None
initialized = False
config = riker_config.default_config()

def get_public_dns(instances):
    return [inst.public_dns_name for inst in instances]

def ensure_running(instances, timeout=600, poll_delay=10):
    if len(instances) == 0:
        return
    log('info', 'Waiting for instances {} to be running'.format(instances), show_header=True)
    def get_status():
        try:
            return aws.conn.get_all_instance_status([inst.id for inst in instances])
        except boto.exception.EC2ResponseError:
            log('info', 'No status yet')
    def is_status_ok(statuses):
        #for s in statuses:
Example #50
0
    x1 = ECG_ele_add(ECG_k_point(s, Point(Gx, Gy)), ECG_k_point(t, PA)).x
    # print("x1:", x1)
    R = (e1 + x1) % n
    #print("R:", R)
    if R == r:
        # print("wrong signature: R unequal r")
        # return False
        print("R等于r,验证通过")
    else:
        print("R不等于r,验证不通过")
    return True


### test Signature ###
config.default_config()
parameters = config.get_parameters()
point_g = Point(config.get_Gx(), config.get_Gy())
n = config.get_n()

print("请输入待验证的文件:")
f1 = input()
f = open(f1, 'r')
M = f.read()

IDA = '*****@*****.**'

print("请输入需要验证的签名:")
f2 = input()
sign = open(f2, "r")
signature = sign.read().replace("[", "").replace("]",
Example #51
0
def run(V, W, H, W_r=None, H_r=None, cfg=config.default_config()):
    T = H.shape[0]
    eps = cfg['eps']
    schedule = cfg['schedule'].split(',')
    meas = cfg['measure'].split(',')
    val = np.zeros((cfg['max_iter']+2, len(meas)))
    hdist = np.zeros((cfg['max_iter']+2, 1))
    
    for i, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[0, i] = fun(V, np.dot(W, H))
    
    if cfg['compare_real']:
        #m = Munkres()
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[0] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    if cfg['print_lvl'] > 1:
        print('Initial loss:', val[0])
    status = 0
    methods_num = len(schedule)
    it = -1
    for it in range(cfg['max_iter']):
        if cfg['print_lvl'] > 1:
            print('Iteration', it+1)
        W_old = deepcopy(W)
        H_old = deepcopy(H)
        method_name = schedule[it % methods_num]
        if cfg['print_lvl'] > 1:
            print('Method:', method_name)
        method = getattr(methods, method_name)
        (W, H) = method(V, W, H, method_name, cfg)
        if (it+1) % cfg['normalize_iter'] == 0:
            W = normalize_cols(W)
            H = normalize_cols(H)
        for j, fun_name in enumerate(meas):
            fun = getattr(measure, fun_name)
            val[it+1, j] = fun(V, np.dot(W, H))
        
        if cfg['compare_real']:
            idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
            hdist[it+1] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
        
        if cfg['print_lvl'] > 1:
            print(val[it+1])
        if all(val[it, :] < eps):
            if cfg['print_lvl'] > 1:
                print('By cost.')
            status = 1
            break
        if abs(W_old - W).max() < eps and abs(H_old - H).max() < eps:
            if cfg['print_lvl'] > 1:
                print('By argument.')
            status = 2
            break
        #del W_old
        #del H_old
    if cfg['print_lvl'] > 1:
        print('Final:')
    W = normalize_cols(W)
    H = normalize_cols(H)
    for j, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[it+2:, j] = fun(V, np.dot(W, H))
    
    if cfg['compare_real']:
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[it+2:] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    return (val, hdist, it, W, H, status)
def main(**kwargs):
    # parse parameters
    param = default_config()
    param.update({
        "mode": "sds",
        "top_k": 10,
        "ckpt": "ckpt/gnn.pt",
        "use_gpu": False
    })

    param.update(kwargs)

    # read maps
    symp2id, id2symp = read_symp2id()
    dise2id, id2dise = read_dise2id()

    # read data
    datapath = os.path.join("dataset/EHR/test/data.txt")
    fin = open(datapath, "r", encoding="utf-8")
    lines = fin.readlines()

    data_model = ehr.EHR("dataset/EHR", "train")

    # init retrieval system
    ehr_ret = EHR_retrieval(mode=param["mode"])

    # init and load model
    data_model_param = parse_data_model(data_model)
    param.update(data_model_param)
    param = parse_kwargs(param, kwargs)
    gnn = HGNN(**param)

    if param["use_gpu"]:
        gnn.cuda()

    ckpt_path = param.get("ckpt")
    if ckpt_path is None:
        print("[Warning] Do not set ckpt path, load from the default path.")
        load_ckpt("ckpt/checkpoint.pt", gnn, param["use_gpu"])
    else:
        load_ckpt(ckpt_path, gnn, param["use_gpu"])

    dsd_sampler = DSD_sampler("dataset/EHR")
    usu_sampler = USU_sampler("dataset/EHR")

    gnn.eval()

    emb_dise = gnn.gen_all_dise_emb(dsd_sampler)

    # init result list
    before_list = []
    after_list = []
    real_dise_list = []
    init_symp_list = []
    after_symp_list = []

    result_map_bfo = defaultdict(list)
    result_map_aft = defaultdict(list)
    # this is top_k for evaluation p@N, Rec@N, ...
    top_k_list = [1, 5]

    for i, line in enumerate(lines):
        line_data = line.strip().split()
        uid = line_data[0]
        did = line_data[1]
        real_dise_list.append(did)
        symps = line_data[2:]

        # select the first symptom and do inference
        init_symp = symps[0]
        init_symp_list.append(id2symp[init_symp])

        symp_ar = np.array([[init_symp]])

        pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5)

        # calculate statistics
        for top_k in top_k_list:
            pred_top_k = pred_rank[0][:top_k]
            calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_bfo)

        # print("true did:", did)
        # print("before:", pred_rank)
        before_list.append(pred_rank[0])

        rank_symp = ehr_ret(symp_idx=init_symp, top_k=param["top_k"])
        after_symp_list.append([id2symp[str(t)] for t in rank_symp])
        symp_ar = [np.concatenate([[init_symp], rank_symp], 0)]

        # symp_ar = np.array([symps])
        pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5)
        for top_k in top_k_list:
            pred_top_k = pred_rank[0][:top_k]
            calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_aft)

        # print("after:", pred_rank)
        after_list.append(pred_rank[0])

        ret_symps = ehr_ret(init_symp, param["top_k"])
        ret_symp_list = []
        for sid in ret_symps:
            ret_symp_list.append(id2symp[str(sid)])

        if i % 100 == 0:
            print("[line]:", i)

    # summary
    bf_log = build_result_log(result_map_bfo, top_k_list)
    af_log = build_result_log(result_map_aft, top_k_list)

    print("[before]: {}".format(bf_log))
    print("[after]: {}".format(af_log))

    # to result csv
    fout = open("retrieval_result_{}.txt".format(param["mode"]),
                "w",
                encoding="utf-8")
    fout.write("did\tbefore_pred\tafter_pred\tinit_symp\taftersymp\n")
    for i in range(len(init_symp_list)):
        wrtline = id2dise[int(real_dise_list[i])] + "\t" + id2dise[int(
            before_list[i][0])] + "\t" + id2dise[int(
                after_list[i]
                [0])] + "\t" + init_symp_list[i] + "\t" + "#".join(
                    after_symp_list[i]) + "\n"
        fout.write(wrtline)

    fin.close()
    fout.close()

    df_res = pd.read_table("retrieval_result_{}.txt".format(param["mode"]))
    df_res.to_excel("retrieval_result_{}.xlsx".format(param["mode"]),
                    encoding="utf-8")
    print("Done")
Example #53
0
def mult(V, W, H, post='', cfg=config.default_config()):
    #print('Gradient Descent with Multiplicative Update Rule.')
    eps = cfg['eps']
    H = H * dot(W.T, V) / maximum(dot(W.T, dot(W, H)), eps)
    W = W * dot(V, H.T) / maximum(dot(W, dot(H, H.T)), eps)
    return (W, H)
Example #54
0
def plsa(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = V / maximum(dot(W, H), eps)
    H = normalize_cols(H * dot(W.T, tmp))
    W = normalize_cols(W * dot(tmp, H.T))
    return W, H