def config_initialization(): # image shape and feature layers shape inference config.default_config() image_shape = (config.train_image_height, config.train_image_width) if not config.dataset_path: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.DEBUG) util.init_logger( log_file='log_train_pixel_link_%d_%d.log' % image_shape, log_path=config.train_dir, stdout=False, mode='a') # config.load_config(config.train_dir) config.init_config(image_shape, batch_size=config.batch_size, weight_decay=config.weight_decay, num_gpus=config.num_gpus ) config.default_config() config.score_map_shape = (config.train_image_height // config.strides[0], config.train_image_width // config.strides[0]) height = config.train_image_height score_map = config.score_map_shape stride = config.strides[0] batch_size = config.batch_size batch_size_per_gpu = config.batch_size_per_gpu util.proc.set_proc_name('train_pixel_link_on' + '_' + config.dataset_name)
def cnmf(V, W, H, post='', cfg=config.default_config()): eps = cfg['eps'] alpha = cfg[post + '_alpha'] beta = cfg[post + '_beta'] H = H * dot(W.T, V) / (dot(W.T, dot(W, H)) + beta * H + eps) W = W * dot(V, H.T) / (dot(W, dot(H, H.T)) + alpha * W + eps) return W, H
def __init__(self): self.window = tk.Tk() self.window.title("ReachMaster") self.window.configure(bg="white") self.window.protocol("WM_DELETE_WINDOW", self.on_quit) self.config = config.default_config() config.save_tmp(self.config) self.data_dir = tk.StringVar() self.data_dir.set(self.config['ReachMaster']['data_dir']) self.config_file = tk.StringVar() self.config_file.set(self.config['ReachMaster']['config_file']) self.port_list = expint.get_ports() self.exp_control_port = tk.StringVar() self.rob_control_port = tk.StringVar() if self.config['ReachMaster']['exp_control_port'] in self.port_list: self.exp_control_port.set(self.config['ReachMaster']['exp_control_port']) else: self.exp_control_port.set(self.port_list[0]) if self.config['ReachMaster']['rob_control_port'] in self.port_list: self.rob_control_port.set(self.config['ReachMaster']['rob_control_port']) else: self.rob_control_port.set(self.port_list[0]) self.protocol_list = protocols.list_protocols() self.protocol = tk.StringVar() self.protocol.set(self.protocol_list[0]) self.running = False self.exp_connected = False self.rob_connected = False self.protocol_running = False self.child = None self._configure_window()
def gen_init(cfg=config.default_config()): """Generate real valued initialization matrices. - Return: Phi Theta - Used params: N T M gen_phi phi_sparsity gen_theta theta_sparsity """ N = cfg['N'] T = cfg['T'] M = cfg['M'] gen_phi = getattr(generators, cfg['phi_init']) cfg['rows'] = N cfg['cols'] = T cfg['sparsity'] = cfg['phi_sparsity'] Phi = gen_phi(cfg) gen_theta = getattr(generators, cfg['theta_init']) cfg['rows'] = T cfg['cols'] = M cfg['sparsity'] = cfg['theta_sparsity'] Theta = gen_theta(cfg) return (Phi, Theta)
def gen_real(cfg=config.default_config()): """Generate matrices with real values for model experiment. - Return: F Phi_r Theta_r - Used params: N T_0 M gen_phi real_phi_sparsity gen_theta real_theta_sparsity """ N = cfg['N'] T_0 = cfg['T_0'] M = cfg['M'] gen_phi = getattr(generators, cfg['gen_phi']) cfg['rows'] = N cfg['cols'] = T_0 cfg['sparsity'] = cfg['real_phi_sparsity'] Phi_r = gen_phi(cfg) gen_theta = getattr(generators, cfg['gen_theta']) cfg['rows'] = T_0 cfg['cols'] = M cfg['sparsity'] = cfg['real_theta_sparsity'] Theta_r = gen_theta(cfg) F = np.dot(Phi_r, Theta_r) for i in xrange(F.shape[1]): F[:, i] = F[:,i] * np.random.randint(100,8000) return (F, Phi_r, Theta_r)
def mult_kl(V, W, H, post='', cfg=config.default_config()): eps = cfg['eps'] tmp = V / (dot(W, H) + eps) W0 = W * dot(tmp, H.T) W = W0 / tile(maximum(sum(H.T, 0), eps), (W0.shape[0], 1)) H0 = H * dot(W.T, tmp) H = H0 / tile(maximum(sum(W.T, 1), eps), (1, H0.shape[1])) return W, H
def als(V, W, H, post='', cfg=config.default_config()): #print('Alternating Least Squares.') eps = cfg['eps'] H = linalg.solve(dot(W.T, W) + eye(W.shape[1]) * eps, dot(W.T, V)) H[H < eps] = 0 W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T W[W < eps] = 0 return (W, H)
def load_csv(name, cfg=config.default_config()): V = np.loadtxt(open(join(cfg['data_dir'], name + '_V.csv'), 'r'), delimiter=',') W = np.loadtxt(open(join(cfg['data_dir'], name + '_W.csv'), 'r'), delimiter=',') H = np.loadtxt(open(join(cfg['data_dir'], name + '_H.csv'), 'r'), delimiter=',') return (V, W, H)
def __init__(self): config.default_config() self.height = config.train_image_height self.width = config.train_image_width self.input_shape = (self.height, self.width, 3) # self.input = Input(tensor=image) self.input = Input(shape=self.input_shape) self.width_multiplier = 1 # trainign will change self.is_training = True pixel_cls_logits, pixel_link_logits = self.create_model() output = [pixel_cls_logits, pixel_link_logits] # self.model = keras.models.Model(inputs=self.input, outputs=[pixel_cls_logits, pixel_link_logits]) merged = concatenate([pixel_cls_logits, pixel_link_logits], axis=-1) self.model = keras.models.Model(inputs=self.input, outputs=merged)
def hals(V, W, H, post='', cfg=config.default_config()): eps = cfg['eps'] T = H.shape[0] W0 = W H0 = H for k in range(T): R = V - dot(W0, H0) + dot(W0[:, [k]], H0[[k], :]) H0[k, :] = maximum(dot(R.T, W0[:, k]), 0).T / maximum(sum(W0[:, k] ** 2, 0), eps) W0[:, k] = maximum(dot(R, H0[k, :].T), 0) / maximum(sum(H0[k, :] ** 2), eps) return W, H
def reduce_cluster(D, num_clusters, params=config.default_config()): print('Clustering:') D = ascontiguousarray(D.astype('float32')) centroids, qerr, dis, labels, nassign = ynumpy.kmeans(D, num_clusters, init='kmeans++', nt=params['num_threads'], output='all', redo=3, niter=params['kmeans_max_iter'], verbose=False) #kmeans = KMeans(n_init=1, n_clusters=params['num_clusters'], n_jobs=2, max_iter=params['kmeans_max_iter']) #kmeans.fit(D) print('Done.') #centroids = kmeans.cluster_centers_ #labels = kmeans.labels_ return centroids, labels
def show_matrices_recovered(W_r, H_r, W, H, cfg=config.default_config(), permute=True): if permute: idx = get_permute(W_r, H_r, W, H, cfg['munkres']) else: idx = np.array([range(W.shape[1]), range(W.shape[1])]) #f, axarr = plt.subplots(nrows=1, ncols=1) #axarr[0, 0].imshow(1-W_r, cmap='gray') #axarr[0, 0].set_title('W real') #axarr[0, 1].imshow(1-H_r, cmap='gray') #axarr[0, 1].set_title('H real') plt.matshow(1-W[:, idx[:, 1]], cmap=plt.cm.gray)
def show_matrices_recovered(W_r, H_r, W, H, cfg=config.default_config(), permute=True): if permute: idx = get_permute(W_r, H_r, W, H, cfg["munkres"]) else: idx = np.array([range(W.shape[1]), range(W.shape[1])]) # f, axarr = plt.subplots(nrows=1, ncols=1) # axarr[0, 0].imshow(1-W_r, cmap='gray') # axarr[0, 0].set_title('W real') # axarr[0, 1].imshow(1-H_r, cmap='gray') # axarr[0, 1].set_title('H real') plt.matshow(1 - W[:, idx[:, 1]], cmap=plt.cm.gray)
def hals(V, W, H, post='', cfg=config.default_config()): eps = cfg['eps'] T = H.shape[0] W0 = W H0 = H for k in range(T): R = V - dot(W0, H0) + dot(W0[:, [k]], H0[[k], :]) H0[k, :] = maximum(dot(R.T, W0[:, k]), 0).T / maximum( sum(W0[:, k]**2, 0), eps) W0[:, k] = maximum(dot(R, H0[k, :].T), 0) / maximum( sum(H0[k, :]**2), eps) return W, H
def plsa3D(V, W, H, post='', cfg=config.default_config()): #print('Probabilistic Latent Semantic Analysis.') eps = cfg['eps'] (N, M) = V.shape T = H.shape[0] V3 = V.reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2) W3 = W.reshape(N, T, 1).repeat(M, 2) H3 = H.T.reshape(M, T, 1).repeat(N, 2).swapaxes(0, 2) Q3 = dot(W, H).reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2) Z = V3 * W3 * H3 / (Q3 + eps) W = normalize_cols(sum(Z, 2).reshape(N, T)) H = normalize_cols(sum(Z, 0).reshape(T, M)) return W, H
def gen_init(cfg=config.default_config()): N = cfg['N'] T = cfg['T'] M = cfg['M'] gen_phi = getattr(generators, cfg['phi_init']) cfg['rows'] = N cfg['cols'] = T cfg['sparsity'] = cfg['phi_sparsity'] W = gen_phi(cfg) gen_theta = getattr(generators, cfg['theta_init']) cfg['rows'] = T cfg['cols'] = M cfg['sparsity'] = cfg['theta_sparsity'] H = gen_theta(cfg) return (W, H)
def load_dataset(cfg=config.default_config()): """Load or generate dataset. - Return: F vocab N M Phi_r Theta_r - Used params: load_data data_name? """ if cfg['load_data'] == 'uci' or cfg['load_data'] == 1: print("uci") F, vocab = data.load_uci(cfg['data_name'], cfg) N, M = F.shape cfg['N'], cfg['M'] = F.shape print('Dimensions of F:', N, M) print('Checking assumption on F:', np.sum(F, axis=0).max()) return F, vocab, N, M, None, None elif cfg['load_data'] == 2: F, Phi_r, Theta_r = gen_real(cfg) print(Phi_r) print('Checking assumption on F:', np.sum(F, axis=0).max()) return F, None, F.shape[0], F.shape[1], Phi_r, Theta_r elif cfg['load_data'] == 3: print("uci halfmodel", cfg["alpha"]) F, vocab = data.load_uci(cfg['data_name'], cfg) N, M = F.shape cfg['N'], cfg['M'] = F.shape Phi_r, Theta_r = load_obj('Phi_'+cfg['data_name']), load_obj('Theta_'+cfg['data_name']) F_merged = merge_halfmodel(F, Phi_r, Theta_r, cfg) print('Dimensions of F:', N, M) print('Checking assumption on F:', np.sum(F_merged, axis=0).max()) return F_merged, vocab, N, M, Phi_r, Theta_r elif cfg['load_data'] == 4: F = np.eye(cfg['T']) cfg['N'], cfg['M'] = F.shape Phi_r = np.eye(cfg['T']) Theta_r = np.eye(cfg['T']) return F, None, cfg['T'], cfg['T'], Phi_r, Theta_r elif cfg['load_data'] == 5: cfg['real_theta_sparsity'] = 1. cfg['real_phi_sparsity'] = 1. F, Phi_r, Theta_r = gen_real(cfg) print('Checking assumption on F:', np.sum(F, axis=0).max()) return F, None, F.shape[0], F.shape[1], Phi_r, Theta_r
def load_uci(name, cfg=config.default_config()): print('Loading data in UCI format.') print('From:', cfg['data_dir']) print('Collection name:', name) N = 0 with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'r') as f: M = int(f.readline()) N = int(f.readline()) D = np.zeros((N, M), dtype='float32') f.readline() for line in f: d, w, nwd = [int(x) for x in line.split(' ')] D[w-1, d-1] = D[w-1, d-1] + nwd vocab = np.arange(N).tolist() with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'r') as f: vocab = f.read().splitlines() return D, vocab
def load_uci(name, cfg=config.default_config()): print('Loading data in UCI format.') print('From:', cfg['data_dir']) print('Collection name:', name) N = 0 with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'r') as f: M = int(f.readline()) N = int(f.readline()) D = np.zeros((N, M), dtype='float32') f.readline() for line in f: d, w, nwd = [int(x) for x in line.split(' ')] D[w - 1, d - 1] = D[w - 1, d - 1] + nwd vocab = np.arange(N).tolist() with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'r') as f: vocab = f.read().splitlines() return D, vocab
def reduce_multi_cluster(D, num_clusters, params=config.default_config()): print('Clustering:') D = ascontiguousarray(D.astype('float32')) #ncc = maximum(minimum(random.poisson(num_clusters, 15), 1000), 15) N = D.shape[0] #ncc = array([20, 50, 100, 250, 500, 1000, 2000, 4000, 6000]) ncc = array([25 * (2 ** p) for p in xrange(int(log2(N / 75)) )]) print(ncc) centroids = zeros((sum(ncc), D.shape[1])) labels = zeros((N, len(ncc)), dtype='int32') c = 0 for it, nc in enumerate(ncc): new_centroids, _, _, new_labels, _ = ynumpy.kmeans(D.astype('float32'), nc, init='random', nt=params['num_threads'], output='all', redo=1, niter=params['kmeans_max_iter'], verbose=False) centroids[c:c+nc, :] = new_centroids labels[:, it] = new_labels.squeeze() + c c += nc print('Done.') return centroids, labels
def store_uci(D, name=str(date.today()), cfg=config.default_config()): print('Storing data in UCI format.') print('Destination:', cfg['data_dir']) print('Collection name:', name) N, M = D.shape nw = D.sum() with open(join(cfg['data_dir'], 'vocab.' + name + '.txt'), 'w') as f: print('Vocablurary...') for i in range(N): print(i, file=f) with open(join(cfg['data_dir'], 'docword.' + name + '.txt'), 'w') as f: print('DocWord matrix...') print(M, file=f) print(N, file=f) print(nw, file=f) cD = coo_matrix(D) # faster print for d, w, ndw in zip(cD.row, cD.col, cD.data): print(d + 1, w + 1, ndw, file=f) print('Done.')
def gen_real(cfg=config.default_config()): N = cfg['N'] T = cfg['T_0'] M = cfg['M'] gen_phi = getattr(generators, cfg['gen_phi']) cfg['rows'] = N cfg['cols'] = T cfg['sparsity'] = cfg['phi_sparsity'] W_r = gen_phi(cfg) gen_theta = getattr(generators, cfg['gen_theta']) cfg['rows'] = T cfg['cols'] = M cfg['sparsity'] = cfg['theta_sparsity'] H_r = gen_theta(cfg) #W_r = gen_matrix_sparse(N, T, 0.2) #W_r = gen_matrix_topic(cfg) #H_r = gen_matrix_sparse(T, M, 0.3) V = np.dot(W_r, H_r) return (V, W_r, H_r)
def train(**kwargs): setup_seed(2020) model_param = default_config() model_param = parse_kwargs(model_param, kwargs) # load training data train_data = ehr.EHR("dataset/EHR", "train") # load validation data val_data = ehr.EHR("dataset/EHR", "val") # use data model to update model_param data_model_param = parse_data_model(train_data) model_param.update(data_model_param) # init model model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, verbose=1, n_iter_no_change=10, random_state=10) train_feat, train_label = train_data.get_feat_data() print("Start Training.") model.fit(train_feat, train_label) print("Training Finished.") # eval on test set # load test data test_data = ehr.EHR("dataset/EHR", "test") test_feat, test_label = test_data.get_feat_data() test_metric, test_log, test_result = evaluate_clf(model, test_feat, test_label, top_k_list=[3, 5, 10]) print("[Test] {}: {}".format(now(), test_log)) print("Training Done.")
def grad_desc(V, W, H, post='', cfg=config.default_config()): alpha = cfg[post + '_alpha'] step = cfg[post + '_alpha_step'] eps = cfg['eps'] #print('Gradient Descent with alpha={alpha}.'.format(alpha=alpha)) grad_W = dot((V - dot(W, H)), H.T) grad_H = dot(W.T, (V - dot(W, H))) #grad_W[grad_W < eps] = 0 #grad_H[grad_H < eps] = 0 W = W + alpha * grad_W W[(grad_W < eps) & (W < eps)] = 0 W = normalize_cols(W) H = H + alpha * grad_H H[(grad_H < eps) & (H < eps)] = 0 H = normalize_cols(H) alpha = alpha * step cfg[post + '_alpha'] = alpha return (W, H)
def __init__(self): help(config._config) arguments = self.parse_arguments() if arguments.print_config: print(config.print(config.default_config())) return if not arguments.config_file: print("Must specify configuration file via -c. If no configuration" + " file exists, you can generate a blank one with the -p" + " flag") return try: self.config = config.parse(arguments.config_file) except config.BadConfiguration: print("Your configuration file is invalid. To generate a new," + " blank configuration, use the -p flag.")
def train(**kwargs): setup_seed(2020) model_param = default_config() model_param = parse_kwargs(model_param, kwargs) # load training data train_data = ehr.EHR("dataset/EHR", "train") train_data_loader = DataLoader(train_data, model_param["batch_size"], shuffle=True, num_workers=0, collate_fn=collate_fn) # load validation data val_data = ehr.EHR("dataset/EHR", "val") val_data_loader = DataLoader(val_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) # use data model to update model_param data_model_param = parse_data_model(train_data) model_param.update(data_model_param) use_gpu = model_param["use_gpu"] # init model model = TextCNN(**model_param) early_stopper = EarlyStopping(patience=model_param["early_stop"], larger_better=True) if model_param["use_gpu"]: model.cuda() print("Model Inited.") optimizer = torch.optim.Adam(model.parameters(), lr=model_param["lr"], weight_decay=0) for epoch in range(model_param["num_epoch"]): total_loss = 0 model.train() for idx, (feat, dise) in enumerate(train_data_loader): pred = model.forward(feat) if model_param["use_gpu"]: label = torch.LongTensor(dise).cuda() else: label = torch.LongTensor(dise) # label is [1,2,3...,27] loss = F.cross_entropy(pred, label - 1) # multi-class xent loss optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print("{} Epoch {}/{}: train loss: {:.6f}".format( now(), epoch + 1, model_param["num_epoch"], total_loss)) # do evaluation on recall and ndcg metric_result, eval_log, eval_result = evaluate_clf( model, val_data_loader, [5]) print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1, model_param["num_epoch"], eval_log)) early_stopper(metric_result["ndcg_5"], model, "textcnn") if early_stopper.early_stop: print("[Early Stop] {} Epoch {}/{}: {}".format( now(), epoch + 1, model_param["num_epoch"], eval_log)) break # eval on test set # load test data test_data = ehr.EHR("dataset/EHR", "test") test_data_loader = DataLoader(test_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) test_metric, test_log, test_result = evaluate_clf(model, test_data_loader, top_k_list=[1, 3, 5, 10]) print("[Test] {}: {}".format(now(), test_log)) print("Training Done.")
from server import ShellServer from config import default_config import sys config = default_config() try: port = sys.argv[1] except IndexError: port = config.get('server.port') or input('Enter port: ') port = int(port) s = ShellServer(port) print('Server running on 0.0.0.0:%i' % port) try: s.serve() except KeyboardInterrupt: pass
def train(**kwargs): setup_seed(2020) model_param = default_config() model_param = parse_kwargs(model_param, kwargs) # load training data train_data = ehr.EHR("dataset/EHR", "train") train_data_loader = DataLoader(train_data, model_param["batch_size"], shuffle=True, num_workers=0, collate_fn=collate_fn) # init model data_model_param = parse_data_model(train_data) model_param.update(data_model_param) use_gpu = model_param["use_gpu"] gnn = HGNN_SDS(**model_param) if model_param["w2v"] is not None: # load w2v data gnn.load_symp_embed(model_param["w2v"]) if use_gpu: gnn.cuda() print("Model Inited.") sds_sampler = SDS_sampler("dataset/EHR") # load pmi ss mat symp2symp_mat = sp.load_npz(os.path.join("dataset/EHR", "pmi_ss_mat.npz")) symp2symp_mat.setdiag(0) # total number of symptoms num_total_batch = gnn.num_symp // model_param["batch_size"] all_symp_index = np.arange(1, gnn.num_symp + 1) lambda_hard_r = lambda epoch: epoch * model_param[ "hard_ratio"] / model_param["num_epoch"] # build hard map and pos map symp2symp_hard_map = [0] symp2symp_pos_map = [0] for k in all_symp_index: symp2symp_b_ar = symp2symp_mat[k].toarray().flatten() max_index = np.argmax(symp2symp_b_ar) if max_index == 0: symp2symp_pos_map.append(np.random.randint(1, k)) symp2symp_hard_map.append(np.random.randint(1, k)) else: symp2symp_pos_map.append(max_index) symp2symp_b_ar[max_index] = -1 max_2nd_index = np.argmax(symp2symp_b_ar) if max_2nd_index == 0: symp2symp_hard_map.append(np.random.randint(1, k)) else: symp2symp_hard_map.append(max_2nd_index) symp2symp_hard_map = np.array(symp2symp_hard_map) symp2symp_pos_map = np.array(symp2symp_pos_map) print("Pos / Hard symptom map Inited.") optimizer = torch.optim.Adam(gnn.parameters(), lr=model_param["lr"], weight_decay=model_param["lr"]) last_total_loss = 1e10 for epoch in range(model_param["num_epoch"]): total_loss = 0 gnn.train() np.random.shuffle(all_symp_index) hard_ratio = lambda_hard_r(epoch) for idx in range(num_total_batch): batch_symp = all_symp_index[idx * model_param["batch_size"]:(idx + 1) * model_param["batch_size"]] # get pos symp and neg symp pos_symp = symp2symp_pos_map[batch_symp] # sample neg neg_symp = np.random.randint(1, gnn.num_symp, model_param["batch_size"]) # cope with overlapping in pos and neg symps overlap_index = (neg_symp == pos_symp) overlap_symp = neg_symp[overlap_index] neg_symp[overlap_index] = symp2symp_hard_map[overlap_symp] if hard_ratio > 0: num_hard = int(hard_ratio * model_param["batch_size"]) neg_symp[:num_hard] = symp2symp_hard_map[neg_symp[:num_hard]] batch_symp_ts = torch.LongTensor(batch_symp) pos_symp_ts = torch.LongTensor(pos_symp) neg_symp_ts = torch.LongTensor(neg_symp) if model_param["use_gpu"]: batch_symp_ts = batch_symp_ts.cuda() pos_symp_ts = pos_symp_ts.cuda() neg_symp_ts = neg_symp_ts.cuda() # forward batch symp batch_symp_data = sds_sampler(batch_symp, 1, 20) symp_emb = gnn.forward(batch_symp_ts, batch_symp_data) pos_symp_data = sds_sampler(pos_symp, 1, 20) pos_emb = gnn.forward(pos_symp_ts, pos_symp_data) neg_symp_data = sds_sampler(neg_symp, 1, 20) neg_emb = gnn.forward(neg_symp_ts, neg_symp_data) # create loss scores = symp_emb.mul(pos_emb).sum(1) - symp_emb.mul(neg_emb).sum( 1) + 1.0 scores[scores < 0] = 0 loss = scores.mean() optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print("{} Epoch {}/{}: train loss: {:.6f}".format( now(), epoch + 1, model_param["num_epoch"], total_loss)) if total_loss - last_total_loss > 0: print("Loss stops to decrease, converge.") break last_total_loss = total_loss # save model torch.save(gnn.state_dict(), "./ckpt/sds_gnn.pt") print("Model saved.")
def train(**kwargs): w2v_model_name = "./ckpt/w2v" if os.path.exists(w2v_model_name): print("load word2vec model from", w2v_model_name) # load model directly w2v_model = Word2Vec.load(w2v_model_name) else: # load data filename = "./dataset/EHR/train/data.txt" fin = open(filename, "r") corpus = [] for line in fin.readlines(): corpus.append(line.strip().split()[2:]) # learn word2vec model start_time = time.time() w2v_model = Word2Vec(corpus, size=64, window=3, min_count=1, workers=4, sg=1) w2v_model.save("./ckpt/w2v") print("training done, costs {} secs.".format(time.time() - start_time)) # start training and testing the MLP model setup_seed(2020) model_param = default_config() model_param = parse_kwargs(model_param, kwargs) # load training data train_data = ehr.EHR("dataset/EHR", "train") train_data_loader = DataLoader(train_data, model_param["batch_size"], shuffle=True, num_workers=0, collate_fn=collate_fn) # load validation data val_data = ehr.EHR("dataset/EHR", "val") val_data_loader = DataLoader(val_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) # use data model to update model_param data_model_param = parse_data_model(train_data) model_param.update(data_model_param) use_gpu = model_param["use_gpu"] # let's build a MLP for prediction model_param["w2v_model"] = w2v_model model = MLP(**model_param) early_stopper = EarlyStopping(patience=model_param["early_stop"], larger_better=True) if model_param["use_gpu"]: model.cuda() print("Model Inited.") optimizer = torch.optim.Adam(model.parameters(), lr=model_param["lr"], weight_decay=kwargs["weight_decay"]) for epoch in range(model_param["num_epoch"]): total_loss = 0 model.train() for idx, (feat, dise) in enumerate(train_data_loader): pred = model.forward(feat) if model_param["use_gpu"]: label = torch.LongTensor(dise).cuda() else: label = torch.LongTensor(dise) # label is [1,2,3...,27] loss = F.cross_entropy(pred, label - 1) # multi-class xent loss optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print("{} Epoch {}/{}: train loss: {:.6f}".format( now(), epoch + 1, model_param["num_epoch"], total_loss)) # do evaluation on recall and ndcg metric_result, eval_log, eval_result = evaluate_clf( model, val_data_loader, [5]) print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1, model_param["num_epoch"], eval_log)) early_stopper(metric_result["ndcg_5"], model, "med2vec") if early_stopper.early_stop: print("[Early Stop] {} Epoch {}/{}: {}".format( now(), epoch + 1, model_param["num_epoch"], eval_log)) break # eval on test set # load test data test_data = ehr.EHR("dataset/EHR", "test") test_data_loader = DataLoader(test_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) test_metric, test_log, test_result = evaluate_clf(model, test_data_loader, top_k_list=[1, 3, 5, 10]) print("[Test] {}: {}".format(now(), test_log)) print("Training Done.") pass
def anchor_words(D, loss='L2', params=config.default_config()): Q = generate_Q_matrix(D * 100) anchors = findAnchors(Q, params['T'], params) W, topic_likelihoods = do_recovery(Q, anchors, loss, params) return W
def run(V, W, H, W_r=None, H_r=None, cfg=config.default_config()): T = H.shape[0] eps = cfg['eps'] schedule = cfg['schedule'].split(',') meas = cfg['measure'].split(',') val = np.zeros((cfg['max_iter'] + 2, len(meas))) hdist = np.zeros((cfg['max_iter'] + 2, 1)) for i, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[0, i] = fun(V, np.dot(W, H)) if cfg['compare_real']: #m = Munkres() idx = get_permute(W_r, H_r, W, H, cfg['munkres']) hdist[0] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T if cfg['print_lvl'] > 1: print('Initial loss:', val[0]) status = 0 methods_num = len(schedule) it = -1 for it in range(cfg['max_iter']): if cfg['print_lvl'] > 1: print('Iteration', it + 1) W_old = deepcopy(W) H_old = deepcopy(H) method_name = schedule[it % methods_num] if cfg['print_lvl'] > 1: print('Method:', method_name) method = getattr(methods, method_name) (W, H) = method(V, W, H, method_name, cfg) if (it + 1) % cfg['normalize_iter'] == 0: W = normalize_cols(W) H = normalize_cols(H) for j, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[it + 1, j] = fun(V, np.dot(W, H)) if cfg['compare_real']: idx = get_permute(W_r, H_r, W, H, cfg['munkres']) hdist[it + 1] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T if cfg['print_lvl'] > 1: print(val[it + 1]) if all(val[it, :] < eps): if cfg['print_lvl'] > 1: print('By cost.') status = 1 break if abs(W_old - W).max() < eps and abs(H_old - H).max() < eps: if cfg['print_lvl'] > 1: print('By argument.') status = 2 break #del W_old #del H_old if cfg['print_lvl'] > 1: print('Final:') W = normalize_cols(W) H = normalize_cols(H) for j, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[it + 2:, j] = fun(V, np.dot(W, H)) if cfg['compare_real']: idx = get_permute(W_r, H_r, W, H, cfg['munkres']) hdist[it + 2:] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T return (val, hdist, it, W, H, status)
def initialize_matrices(i, F, cfg=config.default_config()): """Initialize matrices Phi Theta. - Return: Phi Theta - Used params: prepare_method """ if (int(cfg['prepare_method'].split(',')[i]) == 1): print("Arora") eps = cfg['eps'] F_norm = normalize_cols(F) Phi = prepare.anchor_words(F_norm, 'L2', cfg) print('Solving for Theta') Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm)) Theta[Theta < eps] = 0 Theta = normalize_cols(Theta) return Phi, Theta elif (int(cfg['prepare_method'].split(',')[i]) == 2): print("Random rare") cfg['phi_sparsity'] = 0.05 cfg['theta_sparsity'] = 0.1 return gen_init(cfg) elif (int(cfg['prepare_method'].split(',')[i]) == 3): print("Random uniform") cfg['phi_sparsity'] = 1. cfg['theta_sparsity'] = 1. return gen_init(cfg) elif (int(cfg['prepare_method'].split(',')[i]) == 4): eps = cfg['eps'] F_norm = normalize_cols(F) print("Clustering of words") centroids, labels = prepare.reduce_cluster(F_norm, cfg['T'], cfg) Theta = centroids Theta[Theta < eps] = 0 Theta = normalize_cols(Theta) print('Solving for Phi') Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_norm.T))) Phi[Phi < eps] = 0 Phi = normalize_cols(Phi) return Phi, Theta elif (int(cfg['prepare_method'].split(',')[i]) == 5): eps = cfg['eps'] F_norm = normalize_cols(F) print("SVD init") U, s, V = np.linalg.svd(F_norm) Phi, Theta = construct_from_svd(U, s, V, cfg) return Phi, Theta elif (int(cfg['prepare_method'].split(',')[i]) == 6): eps = cfg['eps'] transformer = TfidfTransformer() transformer.fit(F) F_tfidf = (transformer.transform(F)).toarray() print("Clustering of tf-idf") centroids, labels = prepare.reduce_cluster(F_tfidf, cfg['T'], cfg) Theta = centroids Theta[Theta < eps] = 0 Theta = normalize_cols(Theta) print('Solving for Phi') Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_tfidf.T))) Phi[Phi < eps] = 0 Phi = normalize_cols(Phi) return Phi, Theta elif (int(cfg['prepare_method'].split(',')[i]) == 7): eps = cfg['eps'] F_norm = normalize_cols(F) print("Clustering of words mixed") centroids, labels = prepare.reduce_cluster(F_norm, cfg['T'], cfg) Theta = centroids Theta[Theta < eps] = 0 Theta = normalize_cols(Theta) print('Solving for Phi') Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_norm.T))) Phi[Phi < eps] = 0 Phi = normalize_cols(Phi) cfg['phi_sparsity'] = 1. cfg['theta_sparsity'] = 1. Phi1, Theta1 = gen_init(cfg) zzz = 0.3 return zzz*Phi1+(1.-zzz)*Phi, zzz*Theta1+(1.-zzz)*Theta elif (int(cfg['prepare_method'].split(',')[i]) == 8): print("Arora mixed") eps = cfg['eps'] F_norm = normalize_cols(F) Phi = prepare.anchor_words(F_norm, 'L2', cfg) print('Solving for Theta') Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm)) Theta[Theta < eps] = 0 Theta = normalize_cols(Theta) cfg['phi_sparsity'] = 1. cfg['theta_sparsity'] = 1. Phi1, Theta1 = gen_init(cfg) zzz = 0.3 return zzz*Phi1+(1.-zzz)*Phi, zzz*Theta1+(1.-zzz)*Theta elif (int(cfg['prepare_method'].split(',')[i]) == 9): print("Arora unifrom") eps = cfg['eps'] F_norm = normalize_cols(F) Phi = prepare.anchor_words(F_norm, 'L2', cfg) print('Solving for Theta') Theta = np.ones((Phi.shape[1], F.shape[1])) Theta = normalize_cols(Theta) return Phi, Theta elif (int(cfg['prepare_method'].split(',')[i]) == 10): eps = cfg['eps'] F_norm = normalize_cols(F) print("Clustering of docs") centroids, labels = prepare.reduce_cluster(F_norm.T, cfg['T'], cfg) Phi = centroids.T Phi[Phi < eps] = 0 Phi = normalize_cols(Phi) print('Solving for Theta') Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm)) Theta[Theta < eps] = 0 Theta = normalize_cols(Theta) return Phi, Theta
def plsa(F, Phi, Theta, post='', cfg=config.default_config()): eps = cfg['eps'] tmp = F / maximum(dot(Phi, Theta), eps) Theta, Phi = normalize_cols(Theta * dot(Phi.T, tmp)), normalize_cols(Phi * dot(tmp, Theta.T)) return Phi, Theta
def run(F, Phi, Theta, Phi_r=None, Theta_r=None, cfg=config.default_config()): """Em-algo method. - Return: val hdist it Phi Theta status - Used params: """ #F_norm = normalize_cols(F) T = Theta.shape[0] eps = cfg['eps'] schedule = cfg['schedule'].split(',') meas = cfg['measure'].split(',') val = np.zeros((cfg['max_iter']+2, len(meas))) hdist = np.zeros((2, cfg['max_iter']+2))#Phi - first row, Theta - second for i, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[0, i] = fun(F, np.dot(Phi, Theta)) if cfg['compare_real']: #m = Munkres() idx = get_permute(Phi_r, Theta_r, Phi, Theta, cfg['munkres']) hdist[0][0] = hellinger(Phi[:, idx[:, 1]], Phi_r[:, idx[:, 0]]) hdist[1][0] = hellinger(Theta[idx[:, 1],:], Theta_r[idx[:, 0],:]) if cfg['print_lvl'] > 1: print('Initial loss:', val[0]) status = 0 methods_num = len(schedule) it = -1 for it in range(cfg['max_iter']+1): if cfg['print_lvl'] > 1: print('Iteration', it+1) ####Phi_old = deepcopy(Phi) ####Theta_old = deepcopy(Theta) method_name = schedule[it % methods_num] if cfg['print_lvl'] > 1: print('Method:', method_name) method = getattr(methods, method_name) (Phi, Theta) = method(F, Phi, Theta, method_name, cfg) #jogging of weights if cfg['jogging'] == 1 and it < 10: joh_alpha = 0.25 cfg['phi_sparsity'] = 0.05 cfg['theta_sparsity'] = 0.1 Phi_jog, Theta_jog = gen_init(cfg) Phi = (1-joh_alpha**(it+1))*Phi + joh_alpha**(it+1)*Phi_jog Theta = (1-joh_alpha**(it+1))*Theta + joh_alpha**(it+1)*Theta_jog for j, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[it+1, j] = fun(F, np.dot(Phi, Theta))#fun(F_norm, np.dot(Phi, Theta)) if cfg['compare_real']: idx = get_permute(Phi_r, Theta_r, Phi, Theta, cfg['munkres']) hdist[0][it+1] = hellinger(Phi[:, idx[:, 1]], Phi_r[:, idx[:, 0]]) hdist[1][it+1] = hellinger(Theta[idx[:, 1], :], Theta_r[idx[:, 0], :]) if cfg['print_lvl'] > 1: print(val[it+1]) if all(val[it, :] < eps): if cfg['print_lvl'] > 1: print('By cost.') status = 1 break '''if abs(Phi_old - Phi).max() < eps and abs(Theta_old - Theta).max() < eps: if cfg['print_lvl'] > 1: print('By argument.') status = 2 break''' #del W_old #del H_old if cfg['print_lvl'] > 1: print('Final:') #Phi = normalize_cols(Phi) #Theta = normalize_cols(Theta) #for j, fun_name in enumerate(meas): # fun = getattr(measure, fun_name) # val[it+2:, j] = fun(F, np.dot(Phi, Theta))#fun(F_norm, np.dot(Phi, Theta)) #if cfg['compare_real']: # idx = get_permute(Phi_r, Theta_r, Phi, Theta, cfg['munkres']) # hdist[0][it+2:] = hellinger(Phi[:, idx[:, 1]], Phi_r[:, idx[:, 0]]) # hdist[1][it+2:] = hellinger(Theta[idx[:, 1],:], Theta_r[idx[:, 0], :]) return (val, hdist, it, Phi, Theta, status)
def load_csv(name, cfg=config.default_config()): V = np.loadtxt(open(join(cfg['data_dir'], name+'_V.csv'), 'r'), delimiter=',') W = np.loadtxt(open(join(cfg['data_dir'], name+'_W.csv'), 'r'), delimiter=',') H = np.loadtxt(open(join(cfg['data_dir'], name+'_H.csv'), 'r'), delimiter=',') return (V, W, H)
def default_api(cls): conf = config.default_config() return cls(conf.get('jira_default','host'), conf.get('jira_default','path'), conf.get('jira_default','username'), conf.get('jira_default','password'))
def train(**kwargs): setup_seed(2020) model_param = default_config() model_param = parse_kwargs(model_param, kwargs) dataset_name = model_param["dataset"] # load hard maps if model_param["hard_ratio"] > 0: model_param["hard_map"] = np.load("dataset/hard_dise.npy", allow_pickle=True).item() # load training data train_data = ehr.EHR("dataset/{}".format(dataset_name), "train") train_data_loader = DataLoader(train_data, model_param["batch_size"], shuffle=True, num_workers=0, collate_fn=collate_fn) # load validation data val_data = ehr.EHR("dataset/{}".format(dataset_name), "val") val_data_loader = DataLoader(val_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) # use data model to update model_param data_model_param = parse_data_model(train_data) model_param.update(data_model_param) use_gpu = model_param["use_gpu"] # init model gnn = HGNN(**model_param) if kwargs["w2v"] is not None: if os.path.exists(kwargs["w2v"]): # load w2v data gnn.load_symp_embed(kwargs["w2v"]) else: from gensim.models import Word2Vec # build word2vec embeddings filename = "./dataset/EHR/train/data.txt" fin = open(filename, "r") corpus = [] for line in fin.readlines(): corpus.append(line.strip().split()[2:]) # learn word2vec model start_time = time.time() w2v_model = Word2Vec(corpus, size=64, window=3, min_count=1, workers=4, sg=1) w2v_model.save("./ckpt/w2v") print("word2vec training done, costs {} secs.".format(time.time() - start_time)) early_stopper = EarlyStopping(patience=model_param["early_stop"], larger_better=True) if use_gpu: gnn.cuda() print("Model Inited.") # optimizer = torch.optim.Adam(gnn.parameters(),lr=model_param["lr"],weight_decay=model_param["weight_decay"]) optimizer = torch.optim.Adam(gnn.parameters(), lr=model_param["lr"], weight_decay=0) # init sampler for netative sampling during training. dsd_sampler = DSD_sampler("dataset/{}".format(dataset_name)) print("D-S-D Sampler Inited.") for epoch in range(model_param["num_epoch"]): total_loss = 0 gnn.train() for idx, (feat, dise) in enumerate(train_data_loader): pred, pred_neg, emb_user, emb_dise, neg_emb_dise = gnn.forward( feat, dise, dsd_sampler) bpr_loss = create_bpr_loss(pred, pred_neg) l2_loss = create_l2_loss(emb_user, emb_dise, neg_emb_dise) loss = bpr_loss + model_param["weight_decay"] * l2_loss # loss = bpr_loss optimizer.zero_grad() loss.backward() optimizer.step() total_loss += bpr_loss.item() # print(idx,total_loss) print("{} Epoch {}/{}: train loss: {:.6f}".format( now(), epoch + 1, model_param["num_epoch"], total_loss)) # do evaluation on recall and ndcg metric_result, eval_log, eval_result = evaluate( gnn, val_data_loader, dsd_sampler, [5]) print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1, model_param["num_epoch"], eval_log)) early_stopper(metric_result["ndcg_5"], gnn, "gnn") if early_stopper.early_stop: print("[Early Stop] {} Epoch {}/{}: {}".format( now(), epoch + 1, model_param["num_epoch"], eval_log)) break # eval on test set # load test data test_data = ehr.EHR("dataset/{}".format(dataset_name), "test") test_data_loader = DataLoader(test_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) test_metric, test_log, test_result = evaluate(gnn, test_data_loader, dsd_sampler, top_k_list=[1, 3, 5, 10]) print("[Test] {}: {}".format(now(), test_log)) print("Training Done.")
def default_api(cls): conf = config.default_config() return cls(conf.get('jira_default', 'host'), conf.get('jira_default', 'path'), conf.get('jira_default', 'username'), conf.get('jira_default', 'password'))
M_ = ZA + M e = hash_function(M_) e = bytes_to_int(bits_to_bytes(e)) t = (r + s) % n if(t == 0): #print("wrong signature : t is 0") return False x1 = ECG_ele_add( ECG_k_point(s, Point(Gx, Gy)), ECG_k_point(t, PA) ).x R = (e + x1) % n if R!=r: #print("wrong signature: R unequal r") return False return True ''' ### test Signature ### config.default_config() parameters = config.get_parameters() key = key_pair_generation(parameters) dA = key[0] PA = key[1] IDA = '*****@*****.**' M = '100' Sig = Signature(M, IDA, dA, PA) print(Sig) ### test Verification ### Verification(M, Sig, IDA, dA, PA) #print('ECG_k_point(2, PA)', ECG_k_point(2, Point(2,2)))
def train(**kwargs): setup_seed(2020) model_param = default_config() model_param = parse_kwargs(model_param, kwargs) # load hard maps if model_param["hard_ratio"] > 0: model_param["hard_map"] = np.load("dataset/hard_dise.npy", allow_pickle=True).item() # load training data train_data = ehr.EHR("dataset/EHR", "train") train_data_loader = DataLoader(train_data, model_param["batch_size"], shuffle=True, num_workers=0, collate_fn=collate_fn) # load validation data val_data = ehr.EHR("dataset/EHR", "val") val_data_loader = DataLoader(val_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) # use data model to update model_param data_model_param = parse_data_model(train_data) model_param.update(data_model_param) use_gpu = model_param["use_gpu"] # init model gnn = HGNN_DSD(**model_param) if kwargs["w2v"] is not None: # load w2v data gnn.load_symp_embed(kwargs["w2v"]) early_stopper = EarlyStopping(patience=model_param["early_stop"], larger_better=True) if use_gpu: gnn.cuda() print("Model Inited.") # optimizer = torch.optim.Adam(gnn.parameters(),lr=model_param["lr"],weight_decay=model_param["weight_decay"]) optimizer = torch.optim.Adam(gnn.parameters(), lr=model_param["lr"], weight_decay=0) # init sampler for netative sampling during training. dsd_sampler = DSD_sampler("dataset/EHR") print("D-S-D Sampler Inited.") for epoch in range(model_param["num_epoch"]): total_loss = 0 gnn.train() for idx, (feat, dise) in enumerate(train_data_loader): pred, pred_neg, emb_user, emb_dise, neg_emb_dise = gnn.forward( feat, dise, dsd_sampler) bpr_loss = create_bpr_loss(pred, pred_neg) l2_loss = create_l2_loss(emb_user, emb_dise, neg_emb_dise) loss = bpr_loss + model_param["weight_decay"] * l2_loss # loss = bpr_loss optimizer.zero_grad() loss.backward() optimizer.step() total_loss += bpr_loss.item() # print(idx,total_loss) print("{} Epoch {}/{}: train loss: {:.6f}".format( now(), epoch + 1, model_param["num_epoch"], total_loss)) # do evaluation on recall and ndcg metric_result, eval_log, eval_result = evaluate( gnn, val_data_loader, dsd_sampler, [5]) print("{} Epoch {}/{}: [Val] {}".format(now(), epoch + 1, model_param["num_epoch"], eval_log)) early_stopper(metric_result["ndcg_5"], gnn, "gnn_dsd") if early_stopper.early_stop: print("[Early Stop] {} Epoch {}/{}: {}".format( now(), epoch + 1, model_param["num_epoch"], eval_log)) break # eval on test set # load test data test_data = ehr.EHR("dataset/EHR", "test") test_data_loader = DataLoader(test_data, model_param["batch_size"], shuffle=False, num_workers=0, collate_fn=collate_fn) test_metric, test_log, test_result = evaluate(gnn, test_data_loader, dsd_sampler, top_k_list=[1, 3, 5, 10]) print("[Test] {}: {}".format(now(), test_log)) print("Training Done.")
# http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_website_region_endpoints s3_website_regions = { 'us-east-1': ('s3-website-us-east-1.amazonaws.com.', 'Z3AQBSTGFYJSTF'), 'us-west-2': ('s3-website-us-west-2.amazonaws.com.', 'Z3BJ6K6RIION7M'), 'us-west-1': ('s3-website-us-west-1.amazonaws.com.', 'Z2F56UZL2M1ACD'), 'eu-west-1': ('s3-website-eu-west-1.amazonaws.com.', 'Z1BKCTXD74EZPE'), 'ap-southeast-1': ('s3-website-ap-southeast-1.amazonaws.com.', 'Z3O0J2DXBE1FTB'), 'ap-southeast-2': ('s3-website-ap-southeast-2.amazonaws.com.', 'Z1WCIGYICN2BYD'), 'ap-northeast-1': ('s3-website-ap-northeast-1.amazonaws.com.', 'Z2M4EHUR26P7ZW'), 'sa-east-1': ('s3-website-sa-east-1.amazonaws.com.', 'Z7KQH4QJS55SO'), 'us-gov-west-1': ('s3-website-us-gov-west-1.amazonaws.com.', 'Z31GFT0UA1I2HV') } aws = None initialized = False config = riker_config.default_config() def get_public_dns(instances): return [inst.public_dns_name for inst in instances] def ensure_running(instances, timeout=600, poll_delay=10): if len(instances) == 0: return log('info', 'Waiting for instances {} to be running'.format(instances), show_header=True) def get_status(): try: return aws.conn.get_all_instance_status([inst.id for inst in instances]) except boto.exception.EC2ResponseError: log('info', 'No status yet') def is_status_ok(statuses): #for s in statuses:
x1 = ECG_ele_add(ECG_k_point(s, Point(Gx, Gy)), ECG_k_point(t, PA)).x # print("x1:", x1) R = (e1 + x1) % n #print("R:", R) if R == r: # print("wrong signature: R unequal r") # return False print("R等于r,验证通过") else: print("R不等于r,验证不通过") return True ### test Signature ### config.default_config() parameters = config.get_parameters() point_g = Point(config.get_Gx(), config.get_Gy()) n = config.get_n() print("请输入待验证的文件:") f1 = input() f = open(f1, 'r') M = f.read() IDA = '*****@*****.**' print("请输入需要验证的签名:") f2 = input() sign = open(f2, "r") signature = sign.read().replace("[", "").replace("]",
def run(V, W, H, W_r=None, H_r=None, cfg=config.default_config()): T = H.shape[0] eps = cfg['eps'] schedule = cfg['schedule'].split(',') meas = cfg['measure'].split(',') val = np.zeros((cfg['max_iter']+2, len(meas))) hdist = np.zeros((cfg['max_iter']+2, 1)) for i, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[0, i] = fun(V, np.dot(W, H)) if cfg['compare_real']: #m = Munkres() idx = get_permute(W_r, H_r, W, H, cfg['munkres']) hdist[0] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T if cfg['print_lvl'] > 1: print('Initial loss:', val[0]) status = 0 methods_num = len(schedule) it = -1 for it in range(cfg['max_iter']): if cfg['print_lvl'] > 1: print('Iteration', it+1) W_old = deepcopy(W) H_old = deepcopy(H) method_name = schedule[it % methods_num] if cfg['print_lvl'] > 1: print('Method:', method_name) method = getattr(methods, method_name) (W, H) = method(V, W, H, method_name, cfg) if (it+1) % cfg['normalize_iter'] == 0: W = normalize_cols(W) H = normalize_cols(H) for j, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[it+1, j] = fun(V, np.dot(W, H)) if cfg['compare_real']: idx = get_permute(W_r, H_r, W, H, cfg['munkres']) hdist[it+1] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T if cfg['print_lvl'] > 1: print(val[it+1]) if all(val[it, :] < eps): if cfg['print_lvl'] > 1: print('By cost.') status = 1 break if abs(W_old - W).max() < eps and abs(H_old - H).max() < eps: if cfg['print_lvl'] > 1: print('By argument.') status = 2 break #del W_old #del H_old if cfg['print_lvl'] > 1: print('Final:') W = normalize_cols(W) H = normalize_cols(H) for j, fun_name in enumerate(meas): fun = getattr(measure, fun_name) val[it+2:, j] = fun(V, np.dot(W, H)) if cfg['compare_real']: idx = get_permute(W_r, H_r, W, H, cfg['munkres']) hdist[it+2:] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T return (val, hdist, it, W, H, status)
def main(**kwargs): # parse parameters param = default_config() param.update({ "mode": "sds", "top_k": 10, "ckpt": "ckpt/gnn.pt", "use_gpu": False }) param.update(kwargs) # read maps symp2id, id2symp = read_symp2id() dise2id, id2dise = read_dise2id() # read data datapath = os.path.join("dataset/EHR/test/data.txt") fin = open(datapath, "r", encoding="utf-8") lines = fin.readlines() data_model = ehr.EHR("dataset/EHR", "train") # init retrieval system ehr_ret = EHR_retrieval(mode=param["mode"]) # init and load model data_model_param = parse_data_model(data_model) param.update(data_model_param) param = parse_kwargs(param, kwargs) gnn = HGNN(**param) if param["use_gpu"]: gnn.cuda() ckpt_path = param.get("ckpt") if ckpt_path is None: print("[Warning] Do not set ckpt path, load from the default path.") load_ckpt("ckpt/checkpoint.pt", gnn, param["use_gpu"]) else: load_ckpt(ckpt_path, gnn, param["use_gpu"]) dsd_sampler = DSD_sampler("dataset/EHR") usu_sampler = USU_sampler("dataset/EHR") gnn.eval() emb_dise = gnn.gen_all_dise_emb(dsd_sampler) # init result list before_list = [] after_list = [] real_dise_list = [] init_symp_list = [] after_symp_list = [] result_map_bfo = defaultdict(list) result_map_aft = defaultdict(list) # this is top_k for evaluation p@N, Rec@N, ... top_k_list = [1, 5] for i, line in enumerate(lines): line_data = line.strip().split() uid = line_data[0] did = line_data[1] real_dise_list.append(did) symps = line_data[2:] # select the first symptom and do inference init_symp = symps[0] init_symp_list.append(id2symp[init_symp]) symp_ar = np.array([[init_symp]]) pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5) # calculate statistics for top_k in top_k_list: pred_top_k = pred_rank[0][:top_k] calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_bfo) # print("true did:", did) # print("before:", pred_rank) before_list.append(pred_rank[0]) rank_symp = ehr_ret(symp_idx=init_symp, top_k=param["top_k"]) after_symp_list.append([id2symp[str(t)] for t in rank_symp]) symp_ar = [np.concatenate([[init_symp], rank_symp], 0)] # symp_ar = np.array([symps]) pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5) for top_k in top_k_list: pred_top_k = pred_rank[0][:top_k] calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_aft) # print("after:", pred_rank) after_list.append(pred_rank[0]) ret_symps = ehr_ret(init_symp, param["top_k"]) ret_symp_list = [] for sid in ret_symps: ret_symp_list.append(id2symp[str(sid)]) if i % 100 == 0: print("[line]:", i) # summary bf_log = build_result_log(result_map_bfo, top_k_list) af_log = build_result_log(result_map_aft, top_k_list) print("[before]: {}".format(bf_log)) print("[after]: {}".format(af_log)) # to result csv fout = open("retrieval_result_{}.txt".format(param["mode"]), "w", encoding="utf-8") fout.write("did\tbefore_pred\tafter_pred\tinit_symp\taftersymp\n") for i in range(len(init_symp_list)): wrtline = id2dise[int(real_dise_list[i])] + "\t" + id2dise[int( before_list[i][0])] + "\t" + id2dise[int( after_list[i] [0])] + "\t" + init_symp_list[i] + "\t" + "#".join( after_symp_list[i]) + "\n" fout.write(wrtline) fin.close() fout.close() df_res = pd.read_table("retrieval_result_{}.txt".format(param["mode"])) df_res.to_excel("retrieval_result_{}.xlsx".format(param["mode"]), encoding="utf-8") print("Done")
def mult(V, W, H, post='', cfg=config.default_config()): #print('Gradient Descent with Multiplicative Update Rule.') eps = cfg['eps'] H = H * dot(W.T, V) / maximum(dot(W.T, dot(W, H)), eps) W = W * dot(V, H.T) / maximum(dot(W, dot(H, H.T)), eps) return (W, H)
def plsa(V, W, H, post='', cfg=config.default_config()): eps = cfg['eps'] tmp = V / maximum(dot(W, H), eps) H = normalize_cols(H * dot(W.T, tmp)) W = normalize_cols(W * dot(tmp, H.T)) return W, H