def __init__(self, path, s, size, shuffle=False, transform=None): # path : ./Predict/10 slidenames = [] targets = [] grid = [] #path of patch temp_grid = [] for each_slide in path: if 'pos' in each_slide: targets.append(1) elif 'neg' in each_slide: targets.append(0) slidenames.append(each_slide.split('/')[-1]) for each_patch in os.listdir(each_slide): temp_grid.append(os.path.join(each_slide,each_patch)) grid.append(temp_grid) temp_grid = [] cp('(#g)Total length: {}(#) (#y)grid: {}(#)'.format(len(targets),len(grid))) print(grid[0]) assert len(targets) == len(slidenames) , cp('(#r) targets and slidenames not match (#)') self.s = s self.transform = transform self.slidenames = slidenames self.targets = targets self.grid = grid self.level = 0 self.size = size self.shuffle = shuffle
def train_single(epoch, embedder, rnn, loader, criterion, optimizer): rnn.train() running_loss = 0. running_fps = 0. running_fns = 0. for i,(inputs,target) in enumerate(loader): cp('(#y)Training - Epoch: [{}/{}](#)\t(#g)Batch: [{}/{}](#)'.format(epoch+1, args.nepochs, i+1, len(loader))) print(inputs[0].size(),len(inputs),len(target)) batch_size = inputs[0].size(0) rnn.zero_grad() state = rnn.module.init_hidden(batch_size).cuda() for s in range(len(inputs)): input = inputs[s].cuda() _, input = embedder(input) output, state = rnn(input, state) target = target.cuda() loss = criterion(output, target) loss.backward() optimizer.step() running_loss += loss.item()*target.size(0) fps, fns = errors(output.detach(), target.cpu()) running_fps += fps running_fns += fns running_loss = running_loss/len(loader.dataset) running_fps = running_fps/(np.array(loader.dataset.targets)==0).sum() running_fns = running_fns/(np.array(loader.dataset.targets)==1).sum() cp('(#y)Training - Epoch: [{}/{}](#)\t(#r)Loss: {}(#)\t(#g)FPR: {}(#)\t(#b)FNR: {}(#)' .format(epoch+1, args.nepochs, running_loss, running_fps, running_fns)) return running_loss, running_fps, running_fns
def __init__(self, data_path, transform=None): # Flatten grid grid = [] # path for per patch slideIDX = [] slidenames = [] targets = [] slideLen = [0] idx = 0 for each_file in data_path: slidenames.append(each_file.split('/')[-1]) if 'pos' in each_file: targets.append(1) else: targets.append(0) slideLen.append(slideLen[-1] + len(os.listdir(each_file))) for each_patch in os.listdir(each_file): img_path = os.path.join(each_file, each_patch) grid.append(img_path) slideIDX.append(idx) idx += 1 cp('(#g)index: {}(#)\t(#r)name: {}(#)\t(#y)len: {}(#)'.format( idx, each_file.split('/')[-1], len(os.listdir(each_file)))) cp('(#g)total: {}(#)'.format(len(grid))) assert len(targets) == len(slidenames), print( "targets and names not match") assert len(slideIDX) == len(grid), print("idx and mask not match") seq = iaa.Sequential( [ iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Crop(percent=(0.1, 0.3)), # iaa.Sometimes(0.4, iaa.GaussianBlur(sigma=(0, 0.5))), # iaa.Sometimes(0.6, iaa.ContrastNormalization((0.75, 1.5))), # iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), iaa.Rot90((1, 3)) ], random_order=True) self.slidenames = slidenames self.targets = targets self.grid = grid self.slideIDX = slideIDX self.transform = transform self.mode = None self.slideLen = slideLen # patches for each slide self.size = config.DATASET.PATCHSIZE self.seq = seq self.multi_scale = config.DATASET.MULTISCALE self.unit = self.size self.overlap = config.DATASET.OVERLAP self.step = self.unit self.blocks_per_slide = 1 self.ms_slideLen = self.slideLen #for multi_scale self.ms_slideIDX = self.slideIDX
def rename(): for root, dirs, filenames in os.walk(cfg.data_append_path): for each_tiff in filenames: if '.tif' in each_tiff and '.enp' not in each_tiff : img_path = os.path.join(root, each_tiff) new_path = os.path.join(root, each_tiff.split('_')[0].replace(".tif", "")+(".tif")) if img_path == new_path: continue os.rename(img_path, new_path) cp('(#r){}(#)\t(#g){}(#)'.format(img_path, new_path))
def main(): #load model with procedure('load model'): model = models.resnet34(True) model.fc = nn.Linear(model.fc.in_features, cfg.n_classes) model = torch.nn.DataParallel(model.cuda()) if args.resume: ch = torch.load(args.model) model.load_state_dict(ch['state_dict']) cudnn.benchmark = True with procedure('prepare dataset'): # normalization normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std) trans = transforms.Compose([transforms.ToTensor(), normalize]) #load data with open(cfg.data_split) as f: data = json.load(f) dset = MILdataset(data['train_pos'][:2], args.patch_size, trans) loader = torch.utils.data.DataLoader(dset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) dset.setmode(1) probs = inference(loader, model) maxs, index = group_topk(np.array(dset.slideIDX), probs, args.k) if not os.path.isdir(cfg.color_img_path): os.makedirs(cfg.color_img_path) for name, target, probs, idxs in zip(dset.slidenames, dset.targets, maxs, index): assert len(dset.slidenames) == len(maxs), print("length, error") flag = 'pos' if target == 1 else 'neg' orign_img_path = os.path.join(cfg.data_path, flag, name, name + '_orign.jpg') color_img_path = os.path.join(cfg.color_img_path, name + '.jpg') #print("orign_img_path: ",orign_img_path) patch_names = [] orign_img = cv2.imread(orign_img_path) for i in range(args.k): idx = idxs[i] src = dset.grid[idx] dst = os.path.join(cfg.patch_predict, flag, src.split('/')[-2]) if not os.path.isdir(dst): os.makedirs(dst) shutil.copy(src, dst) cp('(#r){}(#)\t(#g){}(#)'.format(src, dst)) patch_names.append(src.split('/')[-1]) plot_label(orign_img, patch_names, probs, color_img_path)
def inference(run, loader, model): model.eval() probs = torch.FloatTensor(len(loader.dataset)) with torch.no_grad(): for i, input in enumerate(loader): #print('Inference\tEpoch: [{}/{}]\tBatch: [{}/{}]'.format(run+1, args.nepochs, i+1, len(loader))) if (i + 1) % 50 == 0: cp('(#y)Inference\t(#)(#b)Epoch:[{}/{}]\t(#)(#g)Batch: [{}/{}](#)' .format(run + 1, args.nepochs, i + 1, len(loader))) input = input.cuda() output = F.softmax(model(input), dim=1) probs[i * args.batch_size:i * args.batch_size + input.size(0)] = output.detach()[:, 1].clone() return probs.cpu().numpy()
def group_topk(groups, data, k=1): out = [] #topk prob out_index = [] #topk index order = np.lexsort((data, groups)) groups = groups[order] data = data[order] index = np.empty(len(groups), 'bool') index[-k:] = True index[:-k] = groups[k:] != groups[:-k] data = data[index] res_order = order[index] assert len(data) % k == 0, cp('(#r) topk lenth error(#): {}'.format( len(data))) assert len(data) == len(res_order), cp('(#r)prob and index not match(#)') for i in range(0, len(data), k): out.append(list(data[i:i + k])) out_index.append(list(res_order[i:i + k])) return out, out_index
def work(args): img_path, size, scale, output_patch_path, patch_size, nums, bin, thresh = args ''' img_path: path of tif (e.g. ./data_append/1/1.tif) size: size of patch (from tiff to jpeg) (e.g. 20000) scale: scale (riff2jpeg) (e.g. 4) output_patch_path: path of patch (e.g. ./Patch/pos/1) patch_size: during cut_image (2048) ''' output_mask_path = img_path[:-4] + '_mask.jpg' output_img_path = img_path[:-4] + '_orign.jpg' slide = opsl.OpenSlide(img_path) [n, m] = slide.dimensions with procedure('Tiff2jpeg'): if not os.path.isfile(output_mask_path) or not os.path.isfile(output_img_path) : blocks_pre_col = math.ceil(m / size) blocks_pre_row = math.ceil(n / size) row_cache = [] img_cache = [] for i in range(blocks_pre_col): for j in range(blocks_pre_row): x = i * size y = j * size height = min(x + size, m) - x width = min(y + size, n) - y img = np.array(slide.read_region((y, x), 0, (width, height))) img = cv2.resize(img, (width // scale, height // scale)) row_cache.append(img) img_cache.append(np.concatenate(row_cache, axis=1)) row_cache = [] img = np.concatenate(img_cache, axis=0) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret1, th1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU) mask = 255 - th1 cv2.imwrite(output_mask_path, mask) cv2.imwrite(output_img_path,img) cp('(#g)save_mask_path:{}(#)'.format(output_mask_path)) cp('(#g)save_orign_path:{}(#)'.format(output_img_path)) with procedure('Cut image'): if not os.path.isdir(output_patch_path): mask = cv2.imread(output_mask_path,0) assert len(mask.shape) == 2 ,print('size error') mask_patch_size = patch_size // scale step = mask_patch_size // 2 try: os.makedirs(output_patch_path) except: pass data = {} #patch_overlap_count = [] data['roi'] = [] h, w =mask.shape[0], mask.shape[1] threshold = get_threshold(img_path.split('/')[-2], nums, bin, thresh) data['threshold'] = threshold cp('(#r)Processinf:{}\tThreshold:{}'.format(img_path.split('/')[-2],threshold)) for i in range(0, h, step): for j in range(0, w, step): si = min(i, h - mask_patch_size) sj = min(j, w - mask_patch_size) si = max(0, si) # 有可能h比size还要小 sj = max(0, sj) x = min(scale * si, m - patch_size) y = min(scale * sj, n - patch_size) sub_img = mask[si: si + mask_patch_size, sj: sj + mask_patch_size] cur_scale = (np.sum(sub_img) // 255) / (sub_img.shape[0] * sub_img.shape[1]) #patch_overlap_count.append([x, y, cur_threshold]) if cur_scale > threshold: data['roi'].append([x, y,cur_scale]) patch = np.array(slide.read_region((y, x), 0, (patch_size, patch_size)).convert('RGB')) patch_name = "{}_{}.jpg".format(x, y) patch_path = os.path.join(output_patch_path, patch_name) cv2.imwrite(patch_path, patch) cp('(#y)save_path:\t{}(#)'.format(patch_path)) if sj != j: break if si != i: break json_path = output_mask_path[:-9] + '_mask.json' data['id'] = img_path.split('/')[-2] with open(json_path, 'w') as f: json.dump(data, f) cp('(#g)save_json:\t{}(#)'.format(json_path)) '''
labels = df.values m = {} for val in labels: m[val[0]] = val[1] params = [] idx = 0 for root, dirs, filenames in os.walk(cfg.data_neg_path): for each_tif in filenames: if '.tif' in each_tif: name = each_tif.split('.')[0] flag = 'pos' if m[int(name)] == 'Positive' else 'neg' path = os.path.join(root, each_tif) # ./EDCP/data_append/1/1.tif out_patch_path = os.path.join(cfg.patch_data, flag, name) # ./EDCP_PATCH/pos/1/ idx += 1 params.append([path, args.size, args.scale, out_patch_path, args.patch_size, args.nums, args.bin, args.threshold]) # print(idx) cp('(#b)total_img:\t{}(#)'.format(idx)) pool = threadpool.ThreadPool(args.poolsize) requests = threadpool.makeRequests(work, params) [pool.putRequest(req) for req in requests] pool.wait()
def main(): global args, best_acc args = get_args() #cnn with procedure('init model'): model = models.resnet34(True) model.fc = nn.Linear(model.fc.in_features, 2) model = torch.nn.parallel.DataParallel(model.cuda()) with procedure('loss and optimizer'): if cfg.weights == 0.5: criterion = nn.CrossEntropyLoss().cuda() else: w = torch.Tensor([1 - cfg.weights, cfg.weights]) criterion = nn.CrossEntropyLoss(w).cuda() optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4) cudnn.benchmark = True #normalization normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std) trans = transforms.Compose([transforms.ToTensor(), normalize]) with procedure('prepare dataset'): #load data with open(cfg.data_split) as f: # data = json.load(f) train_dset = MILdataset(data['train_neg'][:14] + data['train_pos'], args.patch_size, trans) train_loader = torch.utils.data.DataLoader(train_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.val: val_dset = MILdataset(data['val_pos'] + data['val_neg'], args.patch_size, trans) val_loader = torch.utils.data.DataLoader( val_dset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) with procedure('init tensorboardX'): tensorboard_path = os.path.join(args.output, 'tensorboard') if not os.path.isdir(tensorboard_path): os.makedirs(tensorboard_path) summary = TensorboardSummary(tensorboard_path, args.dis_slide) writer = summary.create_writer() #open output file fconv = open(os.path.join(args.output, 'convergence.csv'), 'w') fconv.write('epoch,metric,value\n') fconv.close() #loop throuh epochs for epoch in range(args.nepochs): train_dset.setmode(1) probs = inference(epoch, train_loader, model) topk = group_argtopk(np.array(train_dset.slideIDX), probs, args.k) images, names, labels = train_dset.getpatchinfo(topk) summary.plot_calsses_pred(writer, images, names, labels, np.array([probs[k] for k in topk]), args.k, epoch) slidenames, length = train_dset.getslideinfo() summary.plot_histogram(writer, slidenames, probs, length, epoch) #print([probs[k] for k in topk ]) train_dset.maketraindata(topk) train_dset.shuffletraindata() train_dset.setmode(2) loss = train(epoch, train_loader, model, criterion, optimizer, writer) cp('(#r)Training(#)\t(#b)Epoch: [{}/{}](#)\t(#g)Loss:{}(#)'.format( epoch + 1, args.nepochs, loss)) fconv = open(os.path.join(args.output, 'convergence.csv'), 'a') fconv.write('{},loss,{}\n'.format(epoch + 1, loss)) fconv.close() #Validation if args.val and (epoch + 1) % args.test_every == 0: val_dset.setmode(1) probs = inference(epoch, val_loader, model) maxs = group_max(np.array(val_dset.slideIDX), probs, len(val_dset.targets)) pred = [1 if x >= 0.5 else 0 for x in maxs] err, fpr, fnr = calc_err(pred, val_dset.targets) #print('Validation\tEpoch: [{}/{}]\tError: {}\tFPR: {}\tFNR: {}'.format(epoch+1, args.nepochs, err, fpr, fnr)) cp('(#y)Vaildation\t(#)(#b)Epoch: [{}/{}]\t(#)(#g)Error: {}\tFPR: {}\tFNR: {}(#)' .format(epoch + 1, args.nepochs, err, fpr, fnr)) fconv = open(os.path.join(args.output, 'convergence.csv'), 'a') fconv.write('{},error,{}\n'.format(epoch + 1, err)) fconv.write('{},fpr,{}\n'.format(epoch + 1, fpr)) fconv.write('{},fnr,{}\n'.format(epoch + 1, fnr)) fconv.close() #Save best model err = (fpr + fnr) / 2. if 1 - err >= best_acc: best_acc = 1 - err obj = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict() } torch.save(obj, os.path.join(args.output, 'checkpoint_best.pth'))
def main(): args = get_args() global best_dsc #cnn with procedure('init model'): model = get_model(config) model = torch.nn.parallel.DataParallel(model.cuda()) with procedure('loss and optimizer'): criterion = FocalLoss(config.TRAIN.LOSS.GAMMA, config.DATASET.ALPHA).cuda() optimizer = optim.Adam(model.parameters(), lr=config.TRAIN.LR, weight_decay=config.TRAIN.LR) start_epoch = 0 if config.TRAIN.RESUME: with procedure('resume model'): start_epoch, best_acc, model, optimizer = load_model( model, optimizer) cudnn.benchmark = True #normalization normalize = transforms.Normalize(mean=config.DATASET.MEAN, std=config.DATASET.STD) trans = transforms.Compose([transforms.ToTensor(), normalize]) with procedure('prepare dataset'): #load data data_split = config.DATASET.SPLIT with open(data_split) as f: data = json.load(f) train_dset = MILdataset(data['train_neg'] + data['train_pos'], trans) train_loader = DataLoader(train_dset, batch_size=config.TRAIN.BATCHSIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=True) if config.TRAIN.VAL: val_dset = MILdataset(data['val_pos'] + data['val_neg'], trans) val_loader = DataLoader(val_dset, batch_size=config.TEST.BATCHSIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=True) with procedure('init tensorboardX'): train_log_path = os.path.join( config.TRAIN.OUTPUT, time.strftime('%Y%m%d_%H%M%S', time.localtime())) if not os.path.isdir(train_log_path): os.makedirs(train_log_path) tensorboard_path = os.path.join(train_log_path, 'tensorboard') with open(os.path.join(train_log_path, 'cfg.yaml'), 'w') as f: print(config, file=f) if not os.path.isdir(tensorboard_path): os.makedirs(tensorboard_path) summary = TensorboardSummary(tensorboard_path) writer = summary.create_writer() for epoch in range(start_epoch, config.TRAIN.EPOCHS): index = [] for idx, each_scale in enumerate(config.DATASET.MULTISCALE): train_dset.setmode(idx) #print(len(train_loader), len(train_dset)) probs = inference(epoch, train_loader, model) topk = group_argtopk(train_dset.ms_slideIDX[:], probs, train_dset.targets[:], train_dset.ms_slideLen[:], each_scale) index.extend([[each[0], each[1]] for each in zip(topk, [idx] * len(topk))]) train_dset.maketraindata(index) train_dset.shuffletraindata() train_dset.setmode(-1) loss = trainer(epoch, train_loader, model, criterion, optimizer, writer) cp('(#r)Training(#)\t(#b)Epoch: [{}/{}](#)\t(#g)Loss:{}(#)'.format( epoch + 1, config.TRAIN.EPOCHS, loss)) if config.TRAIN.VAL and (epoch + 1) % config.TRAIN.VALGAP == 0: patch_info = {} for idx, each_scale in enumerate(config.DATASET.MULTISCALE): val_dset.setmode(idx) probs, img_idxs, rows, cols = inference_vt( epoch, val_loader, model) res = probs_parser(probs, img_idxs, rows, cols, val_dset, each_scale) for key, val in res.items(): if key not in patch_info: patch_info[key] = val else: patch_info[key].extend(val) res = [] dsc = [] with multiprocessing.Pool(processes=16) as pool: for each_img, each_labels in patch_info.items(): res.append( pool.apply(get_mask, (each_img, each_labels, None, False))) pool.join() for each_res in res: dsc.extend([each_val for each_val in each_res.values()]) dsc = np.array(dsc).mean() ''' maxs = group_max(np.array(val_dset.slideLen), probs, len(val_dset.targets), config.DATASET.MULTISCALE[-1]) threshold = 0.5 pred = [1 if x >= threshold else 0 for x in maxs] err, fpr, fnr, f1 = calc_err(pred, val_dset.targets) cp('(#y)Vaildation\t(#)(#b)Epoch: [{}/{}]\t(#)(#g)Error: {}\tFPR: {}\tFNR: {}\tF1: {}(#)'.format(epoch+1, config.TRAIN.EPOCHS, err, fpr, fnr, f1)) ''' cp('(#y)Vaildation\t(#)(#b)Epoch: [{}/{}]\t(#)(#g)DSC: {}(#)'. format(epoch + 1, config.TRAIN.EPOCHS, dsc)) writer.add_scalar('Val/dsc', dsc, epoch) if dsc >= best_dsc: best_dsc = dsc obj = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_dsc': best_dsc, 'optimizer': optimizer.state_dict() } torch.save(obj, os.path.join(train_log_path, 'BestCheckpoint.pth'))