sampler=sampler, shuffle=False, num_workers=4, drop_last=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, drop_last=True) resnet101 = torchvision.models.resnet101(pretrained=True) resnet101.fc = nn.Linear(2048, 18) if continue_train == True: print('Loading network') load_model(resnet101, checkpoint_dir, 5, was_parallel=was_parallel) resnet101.cuda(cuda_id) Loss_function = nn.CrossEntropyLoss() if parallel: resnet101 = torch.nn.DataParallel(resnet101, device_ids=cuda_id_list) total_val_img = np.zeros(18) total_correct_val_img = np.zeros(18) #--------------------------training----------------------- print('evaluating now') resnet101.eval()
""" #if using zhang's code data_transforms = transforms.Compose([ transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dataloader = torch.utils.data.DataLoader(testData(transforms=data_transforms), batch_size=4, shuffle=False, num_workers=4) net = torchvision.models.resnet152(pretrained=True) net.fc = nn.Linear(2048, 18) load_model(net, checkpoint_dir, 'latest', was_parallel=was_parallel) if use_gpu: net = net.cuda(gpu_id) net.eval() id_list = [] id_no = 1 pred_list = [] for inputs in dataloader: if use_gpu: inputs = Variable(inputs.cuda(gpu_id)) else: inputs = Variable(inputs)
def main(): p_dict = dict() # 所有参数 p_dict['args'] = args ### 加载数据 word_index_dict = json.load(open(args.word_index_json)) args.words = { v:k for k,v in word_index_dict.items() } p_dict['word_index_dict'] = word_index_dict num_classes = args.num_classes p_dict['num_classes'] = num_classes image_label_dict = json.load(open(args.image_label_json)) p_dict['image_label_dict'] = image_label_dict # 划分数据集 test_filelist = sorted(glob(os.path.join(args.data_dir,'test/plots','*.png'))) trainval_filelist = sorted(glob(os.path.join(args.data_dir,'train/plots','*.png'))) # 两种输入size训练 # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入 # train_filelist2: 长宽比大于8:1的图片,经过padding,crop后变成 64*1024的输入 ''' train_filelist1, train_filelist2 = [],[] # 黑名单,这些图片的label是有问题的 black_list = set(json.load(open(args.black_json))['black_list']) image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json)) for f in trainval_filelist: image = f.split('/')[-1] if image in black_list: continue r = image_hw_ratio_dict[image] if r == 0: train_filelist1.append(f) else: train_filelist2.append(f) val_filelist = train_filelist1[-2048:] train_filelist = train_filelist1[:-2048] ''' # train_filelist1 = sorted(trainval_filelist) # val_filelist = train_filelist1[-2048:] # train_filelist = train_filelist1[:-2048] val_filelist = test_filelist train_filelist = trainval_filelist # generated_list = glob(os.path.join(args.data_dir.replace('dataset', 'generated_images'), '*_image.png')) # n_test = 4096 # pretrain_filelist = generated_list[:-n_test] # preval_filelist = generated_list[-n_test:] # train_filelist2 = train_filelist2 image_size = args.input_shape test_dataset = dataloader.DataSet( test_filelist, image_label_dict, num_classes, # transform=train_transform, args=args, image_size=image_size, phase='test') test_loader = DataLoader( dataset=test_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) train_dataset = dataloader.DataSet( train_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='train') train_loader = DataLoader( dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_dataset = dataloader.DataSet( val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='val') val_loader = DataLoader( dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) ''' pretrain_dataset = dataloader.DataSet( pretrain_filelist, image_label_dict, num_classes, image_size=image_size, word_index_dict = word_index_dict, args=args, font_range=[8,32], margin=10, rotate_range=[-10., 10. ], phase='pretrain') pretrain_loader = DataLoader( dataset=pretrain_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) preval_dataset = dataloader.DataSet( preval_filelist, image_label_dict, num_classes, image_size=image_size, word_index_dict = word_index_dict, args=args, font_range=[8,32], margin=10, rotate_range=[-10., 10. ], phase='pretrain') preval_loader = DataLoader( dataset=preval_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) ''' p_dict['train_loader'] = train_loader p_dict['val_loader'] = val_loader p_dict['test_loader'] = test_loader # p_dict['pretrain_loader'] = pretrain_loader # p_dict['train_loader'] = pretrain_loader # p_dict['val_loader'] = preval_loader # p_dict['test_loader'] = preval_loader ### 定义模型 cudnn.benchmark = True framework.build_model(p_dict) parameters = [] model_dict = p_dict['model_dict'] for model in model_dict.values(): for p in model.parameters(): parameters.append(p) optimizer = torch.optim.Adam(parameters, lr=args.lr) p_dict['optimizer'] = optimizer # model = torch.nn.DataParallel(model).cuda() # loss = Loss().cuda() start_epoch = 0 # args.epoch = start_epoch # print ('best_f1score' + str(best_f1score)) p_dict['epoch'] = 0 p_dict['best_metric'] = 0 ### 加载预训练模型与参数 if os.path.exists(args.resume): function.load_model(p_dict, args.resume) ### 训练及测试模型 if args.phase == 'test': # 测试输出文字检测结果 test(p_dict) elif args.phase == 'train': best_f1score = 0 eval_mode = 'eval' best_macc = 0 p_dict['best_metric'] = 0 for epoch in range(p_dict['epoch'] + 1, args.epochs): p_dict['epoch'] = epoch if best_f1score > 0.9: args.lr = 0.0001 if best_f1score > 0.9: args.hard_mining = 1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr train_eval(p_dict, 'train') train_eval(p_dict, 'val')
def main(): p_dict = dict() # All the parameters p_dict['args'] = args args.split_nn = args.split_num + args.split_nor * 3 args.vocab_size = args.split_nn * 145 + 1 print 'vocab_size', args.vocab_size ### load data print 'read data ...' patient_time_record_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_time_record_dict.json')) patient_master_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_master_dict.json')) patient_label_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_label_dict.json')) patient_train = list( json.load(open(os.path.join(args.file_dir, args.task, 'train.json')))) patient_valid = list( json.load(open(os.path.join(args.file_dir, args.task, 'val.json')))) if len(patient_train) > len(patient_label_dict): patients = patient_time_record_dict.keys() patients = patient_label_dict.keys() n = int(0.8 * len(patients)) patient_train = patients[:n] patient_valid = patients[n:] print 'data loading ...' train_dataset = dataloader.DataSet(patient_train, patient_time_record_dict, patient_label_dict, patient_master_dict, args=args, phase='train') train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) val_dataset = dataloader.DataSet(patient_valid, patient_time_record_dict, patient_label_dict, patient_master_dict, args=args, phase='val') val_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=8, pin_memory=True) p_dict['train_loader'] = train_loader p_dict['val_loader'] = val_loader cudnn.benchmark = True net = lstm.LSTM(args) if args.gpu: net = net.cuda() p_dict['loss'] = loss.Loss().cuda() else: p_dict['loss'] = loss.Loss() parameters = [] for p in net.parameters(): parameters.append(p) optimizer = torch.optim.Adam(parameters, lr=args.lr) p_dict['optimizer'] = optimizer p_dict['model'] = net start_epoch = 0 # args.epoch = start_epoch # print ('best_f1score' + str(best_f1score)) p_dict['epoch'] = 0 p_dict['best_metric'] = [0, 0] ### resume pretrained model if os.path.exists(args.resume): print 'resume from model ' + args.resume function.load_model(p_dict, args.resume) print 'best_metric', p_dict['best_metric'] # return if args.phase == 'train': best_f1score = 0 for epoch in range(p_dict['epoch'] + 1, args.epochs): p_dict['epoch'] = epoch for param_group in optimizer.param_groups: param_group['lr'] = args.lr train_eval(p_dict, 'train') train_eval(p_dict, 'val')
def main(): args.n_ehr = len( json.load( open(os.path.join(args.files_dir, 'demo_index_dict.json'), 'r'))) + 10 args.name_list = json.load( open(os.path.join(args.files_dir, 'feature_list.json'), 'r'))[1:] args.input_size = len(args.name_list) files = sorted(glob(os.path.join(args.data_dir, 'resample_data/*.csv'))) data_splits = json.load( open(os.path.join(args.files_dir, 'splits.json'), 'r')) train_files = [ f for idx in [0, 1, 2, 3, 4, 5, 6] for f in data_splits[idx] ] valid_files = [f for idx in [7] for f in data_splits[idx]] test_files = [f for idx in [8, 9] for f in data_splits[idx]] if args.phase == 'test': train_phase, valid_phase, test_phase, train_shuffle = 'test', 'test', 'test', False else: train_phase, valid_phase, test_phase, train_shuffle = 'train', 'valid', 'test', True train_dataset = data_loader.DataBowl(args, train_files, phase=train_phase) valid_dataset = data_loader.DataBowl(args, valid_files, phase=valid_phase) test_dataset = data_loader.DataBowl(args, test_files, phase=test_phase) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=train_shuffle, num_workers=args.workers, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) args.vocab_size = args.input_size + 2 if args.use_unstructure: args.unstructure_size = len( py_op.myreadjson(os.path.join(args.files_dir, 'vocab_list.json'))) + 10 # net = icnn.CNN(args) # net = cnn.CNN(args) net = lstm.LSTM(args) # net = torch.nn.DataParallel(net) # loss = myloss.Loss(0) loss = myloss.MultiClassLoss(0) net = _cuda(net, 0) loss = _cuda(loss, 0) best_metric = [0, 0] start_epoch = 0 if args.resume: p_dict = {'model': net} function.load_model(p_dict, args.resume) best_metric = p_dict['best_metric'] start_epoch = p_dict['epoch'] + 1 parameters_all = [] for p in net.parameters(): parameters_all.append(p) optimizer = torch.optim.Adam(parameters_all, args.lr) if args.phase == 'train': for epoch in range(start_epoch, args.epochs): print('start epoch :', epoch) t0 = time.time() train_eval(train_loader, net, loss, epoch, optimizer, best_metric) t1 = time.time() print('Running time:', t1 - t0) best_metric = train_eval(valid_loader, net, loss, epoch, optimizer, best_metric, phase='valid') print('best metric', best_metric) elif args.phase == 'test': train_eval(test_loader, net, loss, 0, optimizer, best_metric, 'test')
def main(): p_dict = dict() # All the parameters p_dict['args'] = args args.split_nn = 3 * 5 args.vocab_size = args.split_nn * 145 + 2 print('vocab_size', args.vocab_size) ### load data print('read data ...') if args.task == 'mortality': patient_time_record_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_time_record_dict.json')) patient_master_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_master_dict.json')) patient_label_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_label_dict.json')) if os.path.exists(os.path.join(args.result_dir, 'train.json')): patient_train = list( json.load(open(os.path.join(args.result_dir, 'train.json')))) patient_valid = list( json.load(open(os.path.join(args.result_dir, 'valid.json')))) patient_test = list( json.load(open(os.path.join(args.result_dir, 'test.json')))) else: patients = sorted( set(patient_label_dict.keys()) & set(patient_time_record_dict) & set(patient_master_dict)) print(len(patient_master_dict), len(patient_label_dict), len(patient_time_record_dict)) print('There are {:d} patients.'.format(len(patients))) n_train = int(0.7 * len(patients)) n_valid = int(0.2 * len(patients)) patient_train = patients[:n_train] patient_valid = patients[n_train:n_train + n_valid] patient_test = patients[n_train + n_valid:] args.master_size = len(patient_master_dict[patients[0]]) elif args.task == 'sepsis': patient_time_record_dict = py_op.myreadjson( os.path.join(args.result_dir, 'sepsis_time_record_dict.json')) patient_master_dict = py_op.myreadjson( os.path.join(args.result_dir, 'patient_master_dict.json')) patient_label_dict = py_op.myreadjson( os.path.join(args.result_dir, 'sepsis_label_dict.json')) sepsis_split = py_op.myreadjson( os.path.join(args.result_dir, 'sepsis_split.json')) print(sepsis_split.keys()) sepsis_split = sepsis_split[str(-args.last_time)] patient_train = sepsis_split['train'] patient_valid = sepsis_split['valid'] print('train: {:d}'.format(len(patient_train))) print('valid: {:d}'.format(len(patient_valid))) print('data loading ...') train_dataset = dataloader.DataSet(patient_train, patient_time_record_dict, patient_label_dict, patient_master_dict, args=args, phase='train') train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) val_dataset = dataloader.DataSet(patient_valid, patient_time_record_dict, patient_label_dict, patient_master_dict, args=args, phase='val') val_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) test_dataset = dataloader.DataSet(patient_test, patient_time_record_dict, patient_label_dict, patient_master_dict, args=args, phase='val') test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) p_dict['train_loader'] = train_loader if args.phase == 'train': p_dict['val_loader'] = val_loader else: p_dict['val_loader'] = test_loader cudnn.benchmark = True net = attention.Attention(args) if args.gpu: net = net.cuda() p_dict['loss'] = loss.Loss().cuda() else: p_dict['loss'] = loss.Loss() parameters = [] for p in net.parameters(): parameters.append(p) optimizer = torch.optim.Adam(parameters, lr=args.lr) p_dict['optimizer'] = optimizer p_dict['model'] = net start_epoch = 0 # args.epoch = start_epoch # print ('best_f1score' + str(best_f1score)) p_dict['epoch'] = 0 p_dict['best_metric'] = [0, 0] ### resume pretrained model if os.path.exists(args.resume): print('resume from model ' + args.resume) function.load_model(p_dict, args.resume) print('best_metric', p_dict['best_metric']) if args.phase == 'train': best_f1score = 0 for epoch in range(p_dict['epoch'] + 1, args.epochs): p_dict['epoch'] = epoch for param_group in optimizer.param_groups: param_group['lr'] = args.lr train_eval(p_dict, 'train') train_eval(p_dict, 'val') log_info = '# task : {:s}; model: {:s} ; last_time: {:d} ; auc: {:3.4f} \n'.format( args.task, args.model, args.last_time, p_dict['best_metric'][0]) with open('../result/log.txt', 'a') as f: f.write(log_info) else: train_eval(p_dict, 'test')
weight=torch.DoubleTensor(weight) sampler = torch.utils.data.sampler.WeightedRandomSampler(weight, len(weight)) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,sampler=sampler,shuffle=False, num_workers=4, drop_last=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, drop_last=True) resnet101 = torchvision.models.resnet152(pretrained=True) resnet101.fc=nn.Linear(2048,18) if continue_train==True: print('Loading network') load_model(resnet101,checkpoint_dir,'latest',was_parallel=was_parallel) loss_list=list(np.loadtxt(os.path.join(checkpoint_dir,'loss_history_train.txt'))) acc_list = list(np.loadtxt(os.path.join(checkpoint_dir, 'acc_history_train.txt'))) loss_list_val = list(np.loadtxt(os.path.join(checkpoint_dir,'loss_history_val.txt'))) acc_list_val = list(np.loadtxt(os.path.join(checkpoint_dir, 'acc_history_val.txt'))) lowest_loss=min(loss_list_val) start_epoch=len(loss_list) else: start_epoch=0 loss_list = [] loss_list_val = [] acc_list=[] acc_list_val=[] lowest_loss=9999 resnet101.cuda(cuda_id)
def main(): assert args.dataset in ['DACMI', 'MIMIC'] if args.dataset == 'MIMIC': args.n_ehr = len(py_op.myreadjson(os.path.join(args.data_dir, args.dataset, 'ehr_list.json'))) args.name_list = py_op.myreadjson(os.path.join(args.file_dir, args.dataset+'_feature_list.json'))[1:] args.output_size = len(args.name_list) files = sorted(glob(os.path.join(args.data_dir, args.dataset, 'train_with_missing/*.csv'))) data_splits = py_op.myreadjson(os.path.join(args.file_dir, args.dataset + '_splits.json')) train_files = [f for idx in [0, 1, 2, 3, 4, 5, 6] for f in data_splits[idx]] valid_files = [f for idx in [7] for f in data_splits[idx]] test_files = [f for idx in [8, 9] for f in data_splits[idx]] if args.phase == 'test': train_phase, valid_phase, test_phase, train_shuffle = 'test', 'test', 'test', False else: train_phase, valid_phase, test_phase, train_shuffle = 'train', 'valid', 'test', True train_dataset = data_loader.DataBowl(args, train_files, phase=train_phase) valid_dataset = data_loader.DataBowl(args, valid_files, phase=valid_phase) test_dataset = data_loader.DataBowl(args, test_files, phase=test_phase) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=train_shuffle, num_workers=args.workers, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) args.vocab_size = (args.output_size + 2) * (1 + args.split_num) + 5 if args.model == 'tame': net = tame.AutoEncoder(args) loss = myloss.MSELoss(args) net = _cuda(net, 0) loss = _cuda(loss, 0) best_metric= [0,0] start_epoch = 0 if args.resume: p_dict = {'model': net} function.load_model(p_dict, args.resume) best_metric = p_dict['best_metric'] start_epoch = p_dict['epoch'] + 1 parameters_all = [] for p in net.parameters(): parameters_all.append(p) optimizer = torch.optim.Adam(parameters_all, args.lr) if args.phase == 'train': for epoch in range(start_epoch, args.epochs): print('start epoch :', epoch) train_eval(train_loader, net, loss, epoch, optimizer, best_metric) best_metric = train_eval(valid_loader, net, loss, epoch, optimizer, best_metric, phase='valid') print 'best metric', best_metric elif args.phase == 'test': folder = os.path.join(args.result_dir, args.dataset, 'imputation_result') os.system('rm -r ' + folder) os.system('mkdir ' + folder) train_eval(train_loader, net, loss, 0, optimizer, best_metric, 'test') train_eval(valid_loader, net, loss, 0, optimizer, best_metric, 'test') train_eval(test_loader, net, loss, 0, optimizer, best_metric, 'test')