def test(self): with torch.no_grad(): self.model.eval() AUC_list, MRR_list, nDCG5_list, nDCG10_list = [], [], [], [] total_news_id = {} with tqdm(total=len(self.test_news_loader), desc='Generating test news vector') as p: for i, batch in enumerate(self.test_news_loader): news_id_batch, news_batch = batch news_vector = self.model.model.get_news_vector(news_batch) if i == 0: total_test_news = news_vector else: total_test_news = torch.cat( [total_test_news, news_vector]) for news_id in news_id_batch: total_news_id[news_id] = len(total_news_id) p.update(1) test_user_loader = DataLoader(TestUserDataset( self.args, total_test_news, total_news_id), batch_size=self.args.batch_size, shuffle=False, num_workers=self.args.n_threads, pin_memory=False) with tqdm(total=len(test_user_loader), desc='Generating test user vector') as p: for i, batch in enumerate(test_user_loader): user_vector = self.model.model.get_user_vector(batch) if i == 0: total_test_user = user_vector else: total_test_user = torch.cat( [total_test_user, user_vector]) p.update(1) behaviors = pd.read_table(os.path.join(self.args.data_dir, 'test_behaviors.csv'), na_filter=False, usecols=[2, 3], names=['impressions', 'y_true'], header=0) with tqdm(total=len(behaviors), desc='Predicting') as p: for row in behaviors.itertuples(): user_vector = total_test_user[row.Index] tmp = row.impressions.split(' ') news_vector = torch.cat([ total_test_news[total_news_id[i]].unsqueeze(dim=0) for i in tmp ]) y_true = [int(x) for x in row.y_true.split(' ')] predict = torch.matmul(news_vector, user_vector).tolist() AUC_list.append(AUC(y_true, predict)) MRR_list.append(MRR(y_true, predict)) nDCG5_list.append(nDCG(y_true, predict, 5)) nDCG10_list.append(nDCG(y_true, predict, 10)) p.update(1) print('AUC:', np.mean(AUC_list)) print('MRR:', np.mean(MRR_list)) print('nDCG@5:', np.mean(nDCG5_list)) print('nDCG@10:', np.mean(nDCG10_list))
cfg = edict(json.load(f)) if isinstance(cfg.batch_size, list) and isinstance(cfg.long_side, list): list_batch = cfg.batch_size list_res = cfg.long_side elif isinstance(cfg.batch_size, int) and isinstance(cfg.long_side, int): list_batch = [cfg.batch_size] list_res = [cfg.long_side] else: raise Exception("'batch_size' and 'long_side' in config file should be same instance!!!") loss_func = BCEWithLogitsLoss() # data_dir = '/home/tungthanhlee/bdi_xray/data/images' data_dir = '/home/dual1/thanhtt/assigned_jpeg' metrics_dict = {'acc': ACC(), 'auc':AUC(), 'precision':Precision(), 'recall':Recall(), 'specificity':Specificity(), 'f1':F1()} model_names=[ 'dense', 'dense', 'dense', # 'resnet', # 'dense', # 'efficient', #'resnest' ] ids = [ '121', '121', '121', # '101',
selected_sum = [] for i in np.arange(num_classes): selected_sum.append(0) print num_results for i in np.arange(num_results): print i st = time.time() sub_auc = [[], [], [], [], [], []] if i > 0: for j in np.arange(num_classes): selected_sum[j] = selected_sum[j] + data[j][:, selected_idcs[j][-1]] for k in np.arange(num_classes): s = time.time() for idx in candidate_idcs[k]: sub_auc[k].append(AUC(labels[:, k], selected_sum[k] + data[k][:, idx])) e = time.time() print (e - s) max_idx = np.argmax(np.asarray(sub_auc[k])) selected_idcs[k].append(candidate_idcs[k][max_idx]) candidate_idcs[k].remove(candidate_idcs[k][max_idx]) auc[i, k] = sub_auc[k][max_idx] selected_features[k].append(results[selected_idcs[k][-1]]) et = time.time() print "elapsed time is %f seconds, auc is: " % (et - st) print auc[i] np.save('selection_result.npy', [auc, selected_idcs, selected_features])
mode='train', dicom=False, type=cfg.type) val_loader = create_loader(cfg.dev_csv, data_dir, cfg, mode='val', dicom=False, type=cfg.type) # loss_func = BCELoss() # loss_func = BCEWithLogitsLoss() loss_func = MSELoss() metrics_dict = { 'auc': AUC(), 'sensitivity': Recall(), 'specificity': Specificity(), 'f1': F1() } loader_dict = {'train': train_loader, 'val': val_loader} #------------------------------- additional config for ensemble --------------------------------------- model_names = [ 'dense', 'resnet', 'dense', # 'efficient', #'resnest' ] ids = [
data_class = ImageDataset_full else: data_class = ImageDataset train_loader = DataLoader(data_class(cfg.train_csv, cfg, mode='train'), num_workers=4, drop_last=True, shuffle=True, batch_size=cfg.batch_size) val_loader = DataLoader(data_class(cfg.dev_csv, cfg, mode='dev'), num_workers=4, drop_last=False, shuffle=False, batch_size=cfg.batch_size) metrics_dict = {'acc': ACC(), 'auc': AUC()} loader_dict = {'train': train_loader, 'val': val_loader} chexpert_model = CheXpert_model(cfg, loss_func, metrics_dict) # chexpert_model.load_ckp(cfg.ckp_path) # chexpert_model.freeze_backbone() writer = SummaryWriter(os.path.join('experiment', cfg.log_dir)) ckp_dir = os.path.join('experiment', cfg.log_dir, 'checkpoint') chexpert_model.train(train_loader, val_loader, epochs=cfg.epochs, iter_log=cfg.iter_log, writer=writer,
N_VALIDATION = 6000 N_VAL_GOOD = N_VALIDATION * 0.9 N_VAL_BAD = N_VALIDATION * 0.1 # constants IF_DATA_AUGMENTATION = True NUM_CLASSES = 2 IMAGE_WIDTH = IMAGE_HEIGHT = 224 IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT) IMAGE_CHANNELS = 1 INPUT_SHAPE = [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS] METRICS = [ keras.metrics.BinaryAccuracy(name='accuracy'), AUC(name='auc_good_0') # AUC(name='auc_bad_1') # 以 bad 为 positive 的 AUC ] def main(): """ Use tensorflow version 2 """ assert tf.__version__[0] == "2" """ Load Config """ with open('./config/config.json', 'r') as f: CONFIG = json.load(f) BATCH_SIZE = 32 # CONFIG["BATCH_SIZE"] ROOT_PATH = CONFIG["ROOT_PATH"] TRAIN_DATA_DIR = CONFIG["TRAIN_DATA_DIR"] VALIDATION_DATA_DIR = CONFIG["VALIDATION_DATA_DIR"] TRAIN_DATA_DIR = os.path.join(ROOT_PATH, TRAIN_DATA_DIR)
if __name__ == '__main__': # NetData = np.loadtxt("./Datasets/raw/wikivote.txt") f = open("./Datasets/raw/wikivote.txt", "r") # train, test = train_test_split.k_fold_split(f, 10) # sim = np.dot(train, train) # score = AUC.Calculation_AUC(train, test, sim, train.shape[0]) # print(score) # train, test = train_test_split.time_based_split(f, 0.9) f_tr = open("train.txt", "r") f_te = open("test.txt", "r") train, test = train_test_split.read_from_txt(f_tr, f_te) sim_1 = np.dot(train, train) score_1 = AUC.Calculation_AUC(train, test, sim_1, train.shape[0], 10000) print(score_1) sim_2 = np.dot(np.dot(train, train.T), train) score_2 = AUC.Calculation_AUC(train, test, sim_2, train.shape[0], 10000) print(score_2) sim_3 = basic_measures.IP(train, 0.3) score_3 = AUC.Calculation_AUC(train, test, sim_3, train.shape[0], 10000) print(score_3) # n_folds = 10 # linklist = [] # train_list = [] # test_list = [] # f = open("./Datasets/temporal_sort/email-Eu-core-temporal_sorted.txt", "r") # train_list, test_list = train_test_split.train_test_split(f, 0.8) # # f_t = open("train.txt", "r")
from preprocess import train_test_split from metrics import evaluationMetric from metrics import AUC from algorithms import basic_measures import numpy as np if __name__ == '__main__': # f = open("./Datasets/raw/wikivote.txt", "r") # linklist = prehandle_dataset.gen_linklist_from_txt(f) # f = open("linklist.txt", "r") # linklist = prehandle_dataset.gen_linklist_from_txt(f) # adj_train, adj_test = train_test_split.k_fold_split(linklist, 10) network = "PB" N_exp = 5 auc = np.zeros((1, N_exp)) pre = np.zeros((1, N_exp)) for ith_exp in range(N_exp): f_tr = open("./divided_dataset/"+network+"_tr_0.9_"+str(ith_exp+1)+".txt", "r") f_te = open("./divided_dataset/"+network+"_te_0.9_"+str(ith_exp+1)+".txt", "r") adj_train, adj_test = train_test_split.read_from_txt(f_tr, f_te) sim = basic_measures.Bifan(adj_train) auc[0, ith_exp] = AUC.Calculation_AUC(adj_train, adj_test, sim, adj_train.shape[0], 10000) pre[0, ith_exp] = evaluationMetric.cal_precision(adj_train, adj_test, sim, 100) print(auc.mean()) print(pre.mean())