def prepare_dataset(args, category=None): if args.mode == 'train': train_csv = re.sub('\.csv$', '', args.in_train_csv) dataset_train = KaggleDataset(train_csv, transform=Compose(), img_folder=args.in_train_img, category=category, resize_scale=[128, 128]) return dataset_train if args.mode == 'valid': valid_csv = re.sub('\.csv$', '', args.in_valid_csv) dataset_valid = KaggleDataset(valid_csv, transform=Compose(), img_folder=args.in_valid_img, category=category, resize_scale=[128, 128]) return dataset_valid if args.mode == 'test': test_csv = re.sub('\.csv$', '', args.in_test_csv) dataset_test = KaggleDataset(test_csv, transform=Compose(), img_folder=args.in_test_img, category=category, resize_scale=[128, 128]) return dataset_test
def main(ckpt, img_dir, save_dir): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load one or more checkpoint models = [] for fn in ckpt or [None]: # load model model = load_ckpt(filepath=fn) if not model: print("Aborted: checkpoint {} not found!".format(fn)) return # Sets the model in evaluation mode. model.eval() # put model to GPU # Note: Comment out DataParallel due to # (1) we won't need it in our inference scenario # (2) it will change model's class name to 'dataparallel' # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model) model = model.to(device) # append to model list models.append(model) resize = not config['valid'].getboolean('pred_orig_size') compose = Compose(augment=False, resize=resize) # decide which dataset to pick sample dataset = AgTestDataset(img_dir, transform=compose) # iterate dataset and inference each sample ious = [] writer = csvfile = None for data in tqdm(dataset): with torch.no_grad(): inference(data, models, resize, save_dir)
def main(tocsv=False, save=False, mask=False, valid_train=False, toiou=False): model_name = config['param']['model'] resize = not config['valid'].getboolean('pred_orig_size') if model_name == 'unet_vgg16': model = UNetVgg16(3, 1, fixed_vgg=True) elif model_name == 'dcan': model = DCAN(3, 1) elif model_name == 'caunet': model = CAUNet() elif model_name == 'camunet': model = CAMUNet() else: model = UNet() if torch.cuda.is_available(): model = model.cuda() # model = torch.nn.DataParallel(model).cuda() # Sets the model in evaluation mode. model.eval() epoch = load_ckpt(model) if epoch == 0: print("Aborted: checkpoint not found!") return # prepare dataset compose = Compose(augment=False, resize=resize) data_dir = 'data/stage1_train' if valid_train else 'data/stage1_test' dataset = KaggleDataset(data_dir, transform=compose) iter = predict(model, dataset, compose, resize) if tocsv: with open('result.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'EncodedPixels']) for uid, _, y, y_c, y_m, _, _, _, _ in iter: for rle in prob_to_rles(y, y_c, y_m): writer.writerow([uid, ' '.join([str(i) for i in rle])]) elif toiou and valid_train: with open('iou.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'IoU']) for uid, _, y, y_c, y_m, gt, _, _, _ in tqdm(iter): iou = get_iou(y, y_c, y_m, gt) writer.writerow([uid, iou]) else: for uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m in tqdm(iter): if valid_train: show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m, save) elif mask: save_mask(uid, y, y_c, y_m) else: show(uid, x, y, y_c, y_m, save)
# t = [ToTensor()] # train_data = DatasetSyn(args.train_data, "train", transform=Compose(t), sample_shape=args.sample_shape) # train_data.reset_Sample() # train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, # num_workers=args.num_workers) # eval_data = DatasetSyn(args.eval_data, "eval", transform=Compose(t), sample_shape=args.sample_shape) # eval_data.reset_Sample() # eval_loader = DataLoader(eval_data, batch_size=args.batch_size, shuffle=True, # num_workers=args.num_workers) t = [ToTensor()] train_data = DatasetReal(REAL_DATA_PATH, mode="train", transform=Compose(t), sample_shape=args.sample_shape) train_data.reset_Sample() train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) eval_data = DatasetReal(REAL_DATA_PATH, mode="eval", transform=Compose(t), sample_shape=args.sample_shape) eval_data.reset_Sample() eval_loader = DataLoader(eval_data, batch_size=1, shuffle=True, num_workers=1) # t = [ToTensor()]
train_tfLogger = TFLogger(osp.join(args.logs_dir, 'train')) eval_tfLogger = TFLogger(osp.join(args.logs_dir, 'eval')) # t = [ToTensor()] # train_data = DatasetTrain(args.train_data, transform=Compose(t), sample_shape=args.sample_shape) # # train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, # # num_workers=args.num_workers) # eval_data = DatasetEvaluate(args.eval_data, transform=Compose(t), sample_shape=args.sample_shape) # eval_data.reset_Sample() # eval_loader = DataLoader(eval_data, batch_size=args.batch_size, shuffle=True, # num_workers=args.num_workers) t = [ToTensor()] train_data = DatasetReal(REAL_DATA_PATH, mode="train", transform=Compose(t), sample_shape=args.sample_shape) # train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, # num_workers=args.num_workers) train_data.reset_Sample() train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) eval_data = DatasetReal(REAL_DATA_PATH, mode="eval", transform=Compose(t), sample_shape=args.sample_shape) eval_data.reset_Sample() eval_loader = DataLoader(eval_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) for epoch in range(args.start_epoch, 150): scheduler.step() lr = scheduler.get_lr()[0]
def main(resume=True, n_epoch=None, learn_rate=None): model_name = config['param']['model'] if learn_rate is None: learn_rate = config['param'].getfloat('learn_rate') width = config.getint(model_name, 'width') weight_map = config['param'].getboolean('weight_map') c = config['train'] log_name = c.get('log_name') n_batch = c.getint('n_batch') n_worker = c.getint('n_worker') n_cv_epoch = c.getint('n_cv_epoch') if n_epoch is None: n_epoch = c.getint('n_epoch') balance_group = c.getboolean('balance_group') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = build_model(model_name) model = model.to(device) # define optimizer optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=args.learn_rate, weight_decay=1e-6 ) # dataloader workers are forked process thus we need a IPC manager to keep cache in same memory space manager = Manager() cache = manager.dict() compose = Compose() # prepare dataset if os.path.exists('data/valid'): # advance mode: use valid folder as CV train_dataset = KaggleDataset('data/train', transform=compose, cache=cache) valid_dataset = KaggleDataset('data/valid', transform=compose, cache=cache) else: # auto mode: split part of train dataset as CV train_dataset = KaggleDataset('data/train', transform=compose, cache=cache, use_filter=True) train_dataset, valid_dataset = train_dataset.split() # decide whether to balance training set if balance_group: weights, ratio = train_dataset.class_weight() # Len of weights is number of original epoch samples. # After oversample balance, majority class will be under-sampled (least sampled) # Multipling raito is to gain chance for each sample to be visited at least once in each epoch sampler = WeightedRandomSampler(weights, int(len(weights) * ratio)) else: sampler = RandomSampler(train_dataset) # data loader train_loader = DataLoader( train_dataset, sampler=sampler, batch_size=n_batch, num_workers=n_worker, pin_memory=torch.cuda.is_available()) valid_loader = DataLoader( valid_dataset, shuffle=False, batch_size=n_batch, num_workers=n_worker) # resume checkpoint start_epoch = iou_tr = iou_cv = 0 if resume: start_epoch = load_ckpt(model, optimizer) if start_epoch == 0: print('Grand new training ...') # put model to GPU if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) # decide log directory name log_dir = os.path.join( 'logs', log_name, '{}-{}'.format(model_name, width), 'ep_{},{}-lr_{}'.format( start_epoch, n_epoch + start_epoch, learn_rate, ) ) with SummaryWriter(log_dir) as writer: if start_epoch == 0 and False: # dump graph only for very first training, disable by default dump_graph(model, writer, n_batch, width) print('Training started...') for epoch in range(start_epoch + 1, n_epoch + start_epoch + 1): # 1 base iou_tr = train(train_loader, model, optimizer, epoch, writer) if len(valid_dataset) > 0 and epoch % n_cv_epoch == 0: with torch.no_grad(): iou_cv = valid(valid_loader, model, epoch, writer, len(train_loader)) save_ckpt(model, optimizer, epoch, iou_tr, iou_cv) print('Training finished...')
# FOV_SHAPE = [41, 41, 41] # FOV_SHAPE = [57, 57, 57] FOV_SHAPE = [64, 64, 64] device = torch.device("cuda") path = osp.join(working_dir, OUTPUT_DIR) mkdir_if_not_exist(path) # model = SegFFNNet(in_dims=1) model = SegFFNNet(in_dims=1, ins_dims=5) model = model.to(device) # infer = Inference(model, MODEL_PATH, PATCH_SHAPE, FOV_SHAPE, path) infer = DF_Inference(model, MODEL_PATH, PATCH_SHAPE, FOV_SHAPE, path) t = [ToTensor()] # dataset = DatasetTest(TEST_DATA_PATH, transform=Compose(t)) dataset = DatasetRealTest(REAL_TEST_DATA_PATH, transform=Compose(t)) for i in range(1): img, gt = dataset[i] name = dataset.datanames[i] print(img.shape, gt.shape) print("Processing {} ...".format(name)) # ins_mask = infer.forward(np.squeeze(img.cpu().numpy())) ins_mask = infer.forward(np.squeeze(img.cpu().numpy()), np.squeeze(gt.cpu().numpy())) # ins_mask = infer.forward_gt(np.squeeze(img.cpu().numpy()), np.squeeze(gt.cpu().numpy())) tifffile.imsave(osp.join(path, name+"_pred.tif"), ins_mask[:300, :300, :300].astype(np.float16))
def main(tocsv=False, save=False, mask=False, valid_train=False, toiou=False, submit_folder=False): model_name = config['param']['model'] resize = not config['valid'].getboolean('pred_orig_size') if model_name == 'unet_vgg16': model = UNetVgg16(3, 1, fixed_vgg=True) elif model_name == 'dcan': model = DCAN(3, 1) elif model_name == 'caunet': model = CAUNet() elif model_name == 'camunet': model = CAMUNet() else: model = UNet() if torch.cuda.is_available(): model = model.cuda() # model = torch.nn.DataParallel(model).cuda() # Sets the model in evaluation mode. model.eval() epoch = load_ckpt(model) if epoch == 0: print("Aborted: checkpoint not found!") return # prepare dataset compose = Compose(augment=False, resize=resize) #data_dir = 'data/stage1_train' if valid_train else 'data/stage1_test' #data_dir = 'data/stage1_train' if valid_train else '../bowl_classifier/stage2_test' data_dir = 'data/stage1_train' if valid_train else config['param'][ 'CSV_PATH'] print(data_dir) data_dir = re.sub('\.csv', '', data_dir) dataset = KaggleDataset(data_dir, transform=compose, img_folder=config['param']['img_folder']) iter = predict(model, dataset, compose, resize) if tocsv: if valid_train: print('Saving %s/train_result.csv... Done!' % submit_folder) with open('%s/train_result.csv' % submit_folder, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'EncodedPixels']) for uid, _, y, y_c, y_m, _, _, _, _ in iter: for rle in prob_to_rles(y, y_c, y_m): writer.writerow([uid, ' '.join([str(i) for i in rle])]) else: print('Saving %s/test_result.csv... Done!' % submit_folder) with open('%s/test_result.csv' % submit_folder, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'EncodedPixels']) for uid, _, y, y_c, y_m, _, _, _, _ in iter: for rle in prob_to_rles(y, y_c, y_m): writer.writerow([uid, ' '.join([str(i) for i in rle])]) elif toiou and valid_train: print('Saving %s/iou_train.csv...Done!' % submit_folder) with open('%s/iou_train.csv' % submit_folder, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'IoU']) for uid, _, y, y_c, y_m, gt, _, _, _ in tqdm(iter): iou = get_iou(y, y_c, y_m, gt) writer.writerow([uid, iou]) else: for uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m in tqdm(iter): if valid_train: show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m, save) elif mask: save_mask(uid, y, y_c, y_m) else: show(uid, x, y, y_c, y_m, save) if valid_train: data_dir = 'data/stage1_valid' if not os.path.exists(data_dir): print( '%s does not exist. It will not generate %s/iou_valid.csv\nBye bye!' % (data_dir, submit_folder)) else: dataset = KaggleDataset(data_dir, transform=compose) iter = predict(model, dataset, compose, resize) if toiou and valid_train: print('Saving %s/iou_valid.csv... Done!' % submit_folder) with open('%s/iou_valid.csv' % submit_folder, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'IoU']) for uid, _, y, y_c, y_m, gt, _, _, _ in tqdm(iter): iou = get_iou(y, y_c, y_m, gt) writer.writerow([uid, iou])
def main(ckpt, tocsv=False, save=False, mask=False, target='test', toiou=False): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load one or more checkpoint models = [] for fn in ckpt or [None]: # load model model = load_ckpt(filepath=fn) if not model: print("Aborted: checkpoint {} not found!".format(fn)) return # Sets the model in evaluation mode. model.eval() # put model to GPU if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) # append to model list models.append(model) resize = not config['valid'].getboolean('pred_orig_size') compose = Compose(augment=False, resize=resize) # decide which dataset to pick sample data_dir = os.path.join('data', target) if target == 'test': dataset = KaggleDataset(data_dir, transform=compose) elif os.path.exists('data/valid'): # advance mode: use valid folder as CV dataset = KaggleDataset(data_dir, transform=compose) else: # auto mode: split part of train dataset as CV dataset = KaggleDataset('data/train', transform=compose, use_filter=True) if target == 'train': dataset, _ = dataset.split() elif target == 'valid': _, dataset = dataset.split() # iterate dataset and inference each sample ious = [] writer = csvfile = None for data in tqdm(dataset): with torch.no_grad(): uid, y, y_c, y_m = inference(data, models, resize) x, gt, gt_s, gt_c, gt_m = unpack_data(data, compose, resize) if tocsv: if writer is None: csvfile = open('result.csv', 'w') writer = csv.writer(csvfile) writer.writerow(['ImageId', 'EncodedPixels']) for rle in prob_to_rles(y, y_c, y_m): writer.writerow([uid, ' '.join([str(i) for i in rle])]) elif toiou: assert target != 'test' if writer is None: csvfile = open('iou.csv', 'w') writer = csv.writer(csvfile) writer.writerow(['ImageId', 'IoU']) iou = get_iou(y, y_c, y_m, gt) writer.writerow([uid, iou]) ious.append(iou) elif mask: save_mask(uid, y, y_c, y_m) elif target == 'test': show(uid, x, y, y_c, y_m, save) else: # train or valid show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m, save) # end of for-loop if csvfile is not None: csvfile.close() if toiou: print('\nIoU Metrics:\n mean: {0:.4f}\t std: {1:.4f}\t max: {2:.4f}\t min: {3:.4f}\t count: {4}\n' .format(np.mean(ious), np.std(ious), np.max(ious), np.min(ious), len(ious)))
# print('id_ ' + id_ + '\t' + f_content[id_]) # debug fn.write(f_content[id_]) fn.close() # _ _ ____ ____ _ _____ _____ ____ ____ __ __ # | | | | | _ \ | _ \ / \ |_ _| | ____| / ___| / ___| \ \ / / # | | | | | |_) | | | | | / _ \ | | | _| _____ | | \___ \ \ \ / / # | |_| | | __/ | |_| | / ___ \ | | | |___ |_____| | |___ ___) | \ V / # \___/ |_| |____/ /_/ \_\ |_| |_____| \____| |____/ \_/ # CSV_FILE = re.sub('\.csv','',CSV_OUT_PATH) dataset_test = KaggleDataset(CSV_FILE,transform=Compose(), img_folder= TEST_IMG_DIR, resize_scale=[128,128]) confidence_alert = 0 valid_idx = range(dataset_test.__len__()) valid_loader = DataLoader(dataset_test, sampler=SubsetRandomSampler(valid_idx),batch_size=4,num_workers=2) # network net = VGG('VGG16') print(net) net.cuda() net.eval() net.load_state_dict(torch.load(MODEL_IN_PATH)) invert_majorlabel = {v:k for k,v in dataset_test.majorlabels.items()} for i, data in enumerate(valid_loader, 0):
if single_line_list[M_Category_idx] != 'Histology': single_line_list[Pred_sub_idx] = single_line_list[M_Category_idx] single_line_list[S_Category_idx] = single_line_list[M_Category_idx] f_content[single_line_list[Image_id_idx]] = ','.join(single_line_list) fn.close() # _ _ ____ ____ _ _____ _____ ____ ____ __ __ # | | | | | _ \ | _ \ / \ |_ _| | ____| / ___| / ___| \ \ / / # | | | | | |_) | | | | | / _ \ | | | _| _____ | | \___ \ \ \ / / # | |_| | | __/ | |_| | / ___ \ | | | |___ |_____| | |___ ___) | \ V / # \___/ |_| |____/ /_/ \_\ |_| |_____| \____| |____/ \_/ # CSV_FILE = re.sub('\.csv', '', CSV_IN_PATH) dataset_test = KaggleDataset(CSV_FILE, transform=Compose(), category='Histology', img_folder=TEST_IMG_DIR, resize_scale=[128, 128]) confidence_alert = 0 valid_idx = range(dataset_test.__len__()) valid_loader = DataLoader(dataset_test, sampler=SubsetRandomSampler(valid_idx), batch_size=4, num_workers=2) # network net = VGG('VGG16') print(net) net.cuda()
def main(resume=True, n_epoch=None, learn_rate=None): model_name = config['param']['model'] cv_ratio = config['param'].getfloat('cv_ratio') if learn_rate is None: learn_rate = config['param'].getfloat('learn_rate') width = config[model_name].getint('width') weight_map = config['param'].getboolean('weight_map') c = config['train'] log_name = c.get('log_name') n_batch = c.getint('n_batch') n_worker = c.getint('n_worker') n_ckpt_epoch = c.getint('n_ckpt_epoch') if n_epoch is None: n_epoch = c.getint('n_epoch') # initialize model if model_name == 'unet_vgg16': model = UNetVgg16(3, 1, fixed_vgg=True) elif model_name == 'dcan': model = DCAN(3, 1) elif model_name == 'caunet': model = CAUNet() elif model_name == 'camunet': model = CAMUNet() else: model = UNet() if torch.cuda.is_available(): model = model.cuda() # model = torch.nn.DataParallel(model).cuda() # define optimizer optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=args.learn_rate, weight_decay=1e-6 ) # dataloader workers are forked process thus we need a IPC manager to keep cache in same memory space manager = Manager() cache = manager.dict() # prepare dataset and loader dataset = KaggleDataset('data/stage1_train', transform=Compose(), cache=cache) train_idx, valid_idx = dataset.split() train_loader = DataLoader( dataset, sampler=SubsetRandomSampler(train_idx), batch_size=n_batch, num_workers=n_worker, pin_memory=torch.cuda.is_available()) valid_loader = DataLoader( dataset, sampler=SubsetRandomSampler(valid_idx), batch_size=n_batch, num_workers=n_worker) # resume checkpoint start_epoch = 0 if resume: start_epoch = load_ckpt(model, optimizer) if start_epoch == 0: print('Grand new training ...') # decide log directory name log_dir = os.path.join( 'logs', log_name, '{}-{}'.format(model_name, width), 'ep_{},{}-lr_{}'.format( start_epoch, n_epoch + start_epoch, learn_rate, ) ) with SummaryWriter(log_dir) as writer: if start_epoch == 0 and False: # dump graph only for very first training, disable by default dump_graph(model, writer, n_batch, width) print('Training started...') for epoch in range(start_epoch, n_epoch + start_epoch): train(train_loader, model, optimizer, epoch, writer) if cv_ratio > 0 and epoch % 3 == 2: valid(valid_loader, model, epoch, writer, len(train_loader)) # save checkpoint per n epoch if epoch % n_ckpt_epoch == n_ckpt_epoch - 1: save_ckpt(model, optimizer, epoch+1) print('Training finished...')
# # data must be a ndarray # # target must be a ndarray,and encode as one-hot (1,num_class) # datas.append(data) # targets.append(target) return np.asarray(datas), np.asarray(targets)#,np.asarray(videos_name) if __name__ == '__main__': from PIL import Image import matplotlib.pyplot as plt from dataset import Dataset, Compose, Scale, CenterCrop, Normalize import scipy.misc as misc # mean=[114.7748, 107.7354, 99.4750] # std=[58.395, 57.12, 57.375] spatial_transform = Compose([Scale((224, 224))]) json_file1 = "/home/pr606/python_vir/yuan/i3d-kinects/dataset/ucf101_lmdb.json" json_file2 = '/home/pr606/Pictures/dataset_annotations/ucf101_json_file/ucf101_01.json' train_set = DataSet(clip_length=25, sample_step=9, data_root='/home/pr606/Pictures/UCF101DATASET/ucf101', annotation_path=json_file1, spatial_transform=None, mode='train', with_start=True, multi_sample=True) train_generator = DataGenerator(train_set, batch_size=7, ordered_file_path='./names_in_order.csv') print(train_generator.__len__()) # NUM_of_datas / batchsize for i,(datas,labels) in enumerate(train_generator): # print(datas.shape)