def main(): parser = argparse.ArgumentParser( description='Compute TCGA features from SimCLR embedder') parser.add_argument('--num_classes', default=2, type=int, help='Number of output classes') parser.add_argument('--num_feats', default=512, type=int, help='Feature size') parser.add_argument('--batch_size', default=128, type=int, help='Batch size of dataloader') parser.add_argument('--num_workers', default=0, type=int, help='Number of threads for datalodaer') parser.add_argument('--dataset', default='wsi-tcga-lung', type=str, help='Nanme of dataset') parser.add_argument('--backbone', default='resnet18', type=str, help='Embedder backbone') parser.add_argument('--magnification', default='20x', type=str, help='Magnification to compute features') parser.add_argument('--weights', default=None, type=str, help='Folder of the pretrained weights, simclr/runs/*') args = parser.parse_args() if args.backbone == 'resnet18': resnet = models.resnet18(pretrained=False, norm_layer=nn.InstanceNorm2d) num_feats = 512 if args.backbone == 'resnet34': resnet = models.resnet34(pretrained=False, norm_layer=nn.InstanceNorm2d) num_feats = 512 if args.backbone == 'resnet50': resnet = models.resnet50(pretrained=False, norm_layer=nn.InstanceNorm2d) num_feats = 2048 if args.backbone == 'resnet101': resnet = models.resnet101(pretrained=False, norm_layer=nn.InstanceNorm2d) num_feats = 2048 for param in resnet.parameters(): param.requires_grad = False resnet.fc = nn.Identity() i_classifier = mil.IClassifier(resnet, num_feats, output_class=args.num_classes).cuda() if args.weights is not None: weight_path = os.path.join('simclr', 'runs', args.weights, 'checkpoints', 'model.pth') else: weight_path = glob.glob('simclr/runs/*/checkpoints/*.pth')[-1] state_dict_weights = torch.load(weight_path) try: state_dict_weights.pop('module.l1.weight') state_dict_weights.pop('module.l1.bias') state_dict_weights.pop('module.l2.weight') state_dict_weights.pop('module.l2.bias') except: state_dict_weights.pop('l1.weight') state_dict_weights.pop('l1.bias') state_dict_weights.pop('l2.weight') state_dict_weights.pop('l2.bias') state_dict_init = i_classifier.state_dict() new_state_dict = OrderedDict() for (k, v), (k_0, v_0) in zip(state_dict_weights.items(), state_dict_init.items()): name = k_0 new_state_dict[name] = v i_classifier.load_state_dict(new_state_dict, strict=False) if args.dataset == 'wsi-tcga-lung': bags_path = os.path.join('WSI', 'TCGA-lung', 'pyramid', '*', '*') feats_path = os.path.join('datasets', args.dataset) os.makedirs(feats_path, exist_ok=True) bags_list = glob.glob(bags_path + os.path.sep) compute_feats(args, bags_list, i_classifier, feats_path)
io.imsave(os.path.join('test-c16', 'output', slide_name+'.png'), img_as_ubyte(color_map)) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Testing workflow includes attention computing and color map production') parser.add_argument('--num_classes', type=int, default=1, help='Number of output classes') parser.add_argument('--batch_size', type=int, default=64, help='Batch size of feeding patches') parser.add_argument('--num_workers', type=int, default=0) parser.add_argument('--feats_size', type=int, default=512) parser.add_argument('--thres_tumor', type=float, default=0.1964) args = parser.parse_args() resnet = models.resnet18(pretrained=False, norm_layer=nn.InstanceNorm2d) for param in resnet.parameters(): param.requires_grad = False resnet.fc = nn.Identity() i_classifier = mil.IClassifier(resnet, args.feats_size, output_class=args.num_classes).cuda() b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_classes).cuda() milnet = mil.MILNet(i_classifier, b_classifier).cuda() state_dict_weights = torch.load(os.path.join('test-c16', 'weights', 'embedder.pth')) new_state_dict = OrderedDict() for i in range(4): state_dict_weights.popitem() state_dict_init = i_classifier.state_dict() for (k, v), (k_0, v_0) in zip(state_dict_weights.items(), state_dict_init.items()): name = k_0 new_state_dict[name] = v i_classifier.load_state_dict(new_state_dict, strict=False) state_dict_weights = torch.load(os.path.join('test-c16', 'weights', 'aggregator.pth')) state_dict_weights["i_classifier.fc.weight"] = state_dict_weights["i_classifier.fc.0.weight"] state_dict_weights["i_classifier.fc.bias"] = state_dict_weights["i_classifier.fc.0.bias"] milnet.load_state_dict(state_dict_weights, strict=False)
def main(): parser = argparse.ArgumentParser( description='Compute TCGA features from SimCLR embedder') parser.add_argument('--num_classes', default=2, type=int, help='Number of output classes [2]') parser.add_argument('--batch_size', default=128, type=int, help='Batch size of dataloader [128]') parser.add_argument('--num_workers', default=4, type=int, help='Number of threads for datalodaer') parser.add_argument('--gpu_index', type=int, nargs='+', default=(0, ), help='GPU ID(s) [0]') parser.add_argument('--backbone', default='resnet18', type=str, help='Embedder backbone [resnet18]') parser.add_argument('--norm_layer', default='instance', type=str, help='Normalization layer [instance]') parser.add_argument( '--magnification', default='single', type=str, help= 'Magnification to compute features. Use `tree` for multiple magnifications. Use `high` if patches are cropped for multiple resolution and only process higher level, `low` for only processing lower level.' ) parser.add_argument('--weights', default=None, type=str, help='Folder of the pretrained weights, simclr/runs/*') parser.add_argument( '--weights_high', default=None, type=str, help= 'Folder of the pretrained weights of high magnification, FOLDER < `simclr/runs/[FOLDER]`' ) parser.add_argument( '--weights_low', default=None, type=str, help= 'Folder of the pretrained weights of low magnification, FOLDER <`simclr/runs/[FOLDER]`' ) parser.add_argument('--dataset', default='TCGA-lung-single', type=str, help='Dataset folder name [TCGA-lung-single]') args = parser.parse_args() gpu_ids = tuple(args.gpu_index) os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in gpu_ids) if args.norm_layer == 'instance': norm = nn.InstanceNorm2d pretrain = False elif args.norm_layer == 'batch': norm = nn.BatchNorm2d if args.weights == 'ImageNet': pretrain = True else: pretrain = False if args.backbone == 'resnet18': resnet = models.resnet18(pretrained=pretrain, norm_layer=norm) num_feats = 512 if args.backbone == 'resnet34': resnet = models.resnet34(pretrained=pretrain, norm_layer=norm) num_feats = 512 if args.backbone == 'resnet50': resnet = models.resnet50(pretrained=pretrain, norm_layer=norm) num_feats = 2048 if args.backbone == 'resnet101': resnet = models.resnet101(pretrained=pretrain, norm_layer=norm) num_feats = 2048 for param in resnet.parameters(): param.requires_grad = False resnet.fc = nn.Identity() if args.magnification == 'tree' and args.weights_high != None and args.weights_low != None: i_classifier_h = mil.IClassifier(resnet, num_feats, output_class=args.num_classes).cuda() i_classifier_l = mil.IClassifier(copy.deepcopy(resnet), num_feats, output_class=args.num_classes).cuda() if args.weights_high == 'ImageNet' or args.weights_low == 'ImageNet' or args.weights == 'ImageNet': if args.norm_layer == 'batch': print('Use ImageNet features.') else: raise ValueError( 'Please use batch normalization for ImageNet feature') else: weight_path = os.path.join('simclr', 'runs', args.weights_high, 'checkpoints', 'model.pth') state_dict_weights = torch.load(weight_path) for i in range(4): state_dict_weights.popitem() state_dict_init = i_classifier_h.state_dict() new_state_dict = OrderedDict() for (k, v), (k_0, v_0) in zip(state_dict_weights.items(), state_dict_init.items()): name = k_0 new_state_dict[name] = v i_classifier_h.load_state_dict(new_state_dict, strict=False) os.makedirs(os.path.join('embedder', args.dataset), exist_ok=True) torch.save( new_state_dict, os.path.join('embedder', args.dataset, 'embedder-high.pth')) weight_path = os.path.join('simclr', 'runs', args.weights_low, 'checkpoints', 'model.pth') state_dict_weights = torch.load(weight_path) for i in range(4): state_dict_weights.popitem() state_dict_init = i_classifier_l.state_dict() new_state_dict = OrderedDict() for (k, v), (k_0, v_0) in zip(state_dict_weights.items(), state_dict_init.items()): name = k_0 new_state_dict[name] = v i_classifier_l.load_state_dict(new_state_dict, strict=False) os.makedirs(os.path.join('embedder', args.dataset), exist_ok=True) torch.save( new_state_dict, os.path.join('embedder', args.dataset, 'embedder-low.pth')) print('Use pretrained features.') elif args.magnification == 'single' or args.magnification == 'high' or args.magnification == 'low': i_classifier = mil.IClassifier(resnet, num_feats, output_class=args.num_classes).cuda() if args.weights == 'ImageNet': if args.norm_layer == 'batch': print('Use ImageNet features.') else: print('Please use batch normalization for ImageNet feature') else: if args.weights is not None: weight_path = os.path.join('simclr', 'runs', args.weights, 'checkpoints', 'model.pth') else: weight_path = glob.glob('simclr/runs/*/checkpoints/*.pth')[-1] state_dict_weights = torch.load(weight_path) for i in range(4): state_dict_weights.popitem() state_dict_init = i_classifier.state_dict() new_state_dict = OrderedDict() for (k, v), (k_0, v_0) in zip(state_dict_weights.items(), state_dict_init.items()): name = k_0 new_state_dict[name] = v i_classifier.load_state_dict(new_state_dict, strict=False) os.makedirs(os.path.join('embedder', args.dataset), exist_ok=True) torch.save(new_state_dict, os.path.join('embedder', args.dataset, 'embedder.pth')) print('Use pretrained features.') if args.magnification == 'tree' or args.magnification == 'low' or args.magnification == 'high': bags_path = os.path.join('WSI', args.dataset, 'pyramid', '*', '*') else: bags_path = os.path.join('WSI', args.dataset, 'single', '*', '*') feats_path = os.path.join('datasets', args.dataset) os.makedirs(feats_path, exist_ok=True) bags_list = glob.glob(bags_path) if args.magnification == 'tree': compute_tree_feats(args, bags_list, i_classifier_l, i_classifier_h, feats_path, 'fusion') else: compute_feats(args, bags_list, i_classifier, feats_path, args.magnification) n_classes = glob.glob( os.path.join('datasets', args.dataset, '*' + os.path.sep)) n_classes = sorted(n_classes) all_df = [] for i, item in enumerate(n_classes): bag_csvs = glob.glob(os.path.join(item, '*.csv')) bag_df = pd.DataFrame(bag_csvs) bag_df['label'] = i bag_df.to_csv(os.path.join('datasets', args.dataset, item.split(os.path.sep)[2] + '.csv'), index=False) all_df.append(bag_df) bags_path = pd.concat(all_df, axis=0, ignore_index=True) bags_path = shuffle(bags_path) bags_path.to_csv(os.path.join('datasets', args.dataset, args.dataset + '.csv'), index=False)