def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cuda cfg.cuda = not args.no_cuda and torch.cuda.is_available() # set cudnn_benchmark & cudnn_deterministic if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if cfg.get('cudnn_deterministic', False): torch.backends.cudnn.deterministic = True # update configs according to args if not hasattr(cfg, 'work_dir'): if args.work_dir is not None: cfg.work_dir = args.work_dir else: cfg_name = rm_suffix(os.path.basename(args.config)) cfg.work_dir = os.path.join('./data/work_dir', cfg_name) mkdir_if_no_exists(cfg.work_dir, is_folder=True) cfg.load_from = args.load_from cfg.resume_from = args.resume_from cfg.gpus = args.gpus cfg.distributed = args.distributed cfg.random_conns = args.random_conns cfg.eval_interim = args.eval_interim cfg.save_output = args.save_output cfg.force = args.force for data in ['train_data', 'test_data']: if not hasattr(cfg, data): continue cfg[data].eval_interim = cfg.eval_interim # import pdb # pdb.set_trace() if not hasattr(cfg[data], 'knn_graph_path') or not os.path.isfile( cfg[data].knn_graph_path): cfg[data].prefix = cfg.prefix cfg[data].knn = cfg.knn cfg[data].knn_method = cfg.knn_method name = 'train_name' if data == 'train_data' else 'test_name' cfg[data].name = cfg[name] logger = create_logger() # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_model(cfg.model['type'], **cfg.model['kwargs']) handler = build_handler(args.phase, cfg.model['type']) handler(model, cfg, logger)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cuda cfg.cuda = not args.no_cuda and torch.cuda.is_available() # set cudnn_benchmark & cudnn_deterministic if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if cfg.get('cudnn_deterministic', False): torch.backends.cudnn.deterministic = True # update configs according to args if not hasattr(cfg, 'work_dir'): if args.work_dir is not None: cfg.work_dir = args.work_dir else: cfg_name = rm_suffix(os.path.basename(args.config)) cfg.work_dir = os.path.join('./data/work_dir', cfg_name) mkdir_if_no_exists(cfg.work_dir, is_folder=True) if not hasattr(cfg, 'stage'): cfg.stage = args.stage cfg.load_from1 = args.load_from1 cfg.load_from2 = args.load_from2 cfg.load_from3 = args.load_from3 cfg.resume_from = args.resume_from #cfg.gpus = args.gpus cfg.distributed = args.distributed cfg.save_output = args.save_output cfg.phase = args.phase logger = create_logger() # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = [build_model(cfg.model1['type'], **cfg.model1['kwargs']), \ build_model(cfg.model2['type'], **cfg.model2['kwargs']), \ build_model(cfg.model3['type'], **cfg.model3['kwargs'])] if cfg.phase == 'train': if cfg.load_from1: model1, model2, model3 = model[0], model[1], model[2] model1.load_state_dict(torch.load(cfg.load_from1)) model[0] = model1 if cfg.load_from2: model2.load_state_dict(torch.load(cfg.load_from2)) model[1] = model2 if cfg.load_from3: model3.load_state_dict(torch.load(cfg.load_from3)) model[2] = model3 handler = build_handler(args.phase, args.stage) handler(model, cfg, logger)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cuda cfg.cuda = not args.no_cuda and torch.cuda.is_available() # set cudnn_benchmark & cudnn_deterministic if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if cfg.get('cudnn_deterministic', False): torch.backends.cudnn.deterministic = True # update configs according to args if not hasattr(cfg, 'work_dir'): if args.work_dir is not None: cfg.work_dir = args.work_dir else: cfg_name = rm_suffix(os.path.basename(args.config)) cfg.work_dir = os.path.join('./data/work_dir', cfg_name) mkdir_if_no_exists(cfg.work_dir, is_folder=True) if not hasattr(cfg, 'stage'): cfg.stage = args.stage if not hasattr(cfg, 'test_batch_size_per_gpu'): cfg.test_batch_size_per_gpu = cfg.batch_size_per_gpu cfg.load_from = args.load_from cfg.resume_from = args.resume_from cfg.pred_iou_score = args.pred_iou_score cfg.pred_iop_score = args.pred_iop_score cfg.gpus = args.gpus cfg.det_label = args.det_label cfg.distributed = args.distributed cfg.save_output = args.save_output logger = create_logger() # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_model(cfg.model['type'], **cfg.model['kwargs']) handler = build_handler(args.phase, args.stage) handler(model, cfg, logger)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.ignore_ratio = cfg.get('ignore_ratio', 0.8) self.ignore_small_confs = cfg.get('ignore_small_confs', True) self.use_candidate_set = cfg.get('use_candidate_set', True) self.nproc = cfg.get('nproc', 1) self.max_qsize = cfg.get('max_qsize', int(1e5)) with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = self.inst_num assert self.size == self.features.shape[0] print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) with Timer('read knn graph'): if knn_graph_path is not None: knns = np.load(knn_graph_path)['data'] else: prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) assert self.inst_num == len(knns), "{} vs {}".format( self.inst_num, len(knns)) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) self.adj = row_normalize(adj) # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns, sort=True) if cfg.pred_confs != '': print('read estimated confidence from {}'.format( cfg.pred_confs)) self.confs = np.load(cfg.pred_confs)['pred_confs'] else: print('use unsupervised density as confidence') assert self.radius from vegcn.confidence import density self.confs = density(self.dists, radius=self.radius) assert 0 <= self.ignore_ratio <= 1 if self.ignore_ratio == 1: self.ignore_set = set(np.arange(len(self.confs))) else: num = int(len(self.confs) * self.ignore_ratio) confs = self.confs if not self.ignore_small_confs: confs = -confs self.ignore_set = set(np.argpartition(confs, num)[:num]) print( 'ignore_ratio: {}, ignore_small_confs: {}, use_candidate_set: {}'. format(self.ignore_ratio, self.ignore_small_confs, self.use_candidate_set)) print('#ignore_set: {} / {} = {:.3f}'.format( len(self.ignore_set), self.inst_num, 1. * len(self.ignore_set) / self.inst_num)) with Timer('Prepare sub-graphs'): # construct subgraphs with larger confidence self.peaks = {i: [] for i in range(self.inst_num)} self.dist2peak = {i: [] for i in range(self.inst_num)} if self.nproc > 1: # multi-process import multiprocessing as mp pool = mp.Pool(self.nproc) results = [] num = int(self.inst_num / self.max_qsize) + 1 for i in tqdm(range(num)): beg = int(i * self.max_qsize) end = min(beg + self.max_qsize, self.inst_num) lst = [j for j in range(beg, end)] results.extend( list( tqdm(pool.map(self.get_subgraph, lst), total=len(lst)))) pool.close() pool.join() else: results = [ self.get_subgraph(i) for i in tqdm(range(self.inst_num)) ] self.adj_lst = [] self.feat_lst = [] self.lb_lst = [] self.subset_gt_labels = [] self.subset_idxs = [] self.subset_nbrs = [] self.subset_dists = [] for result in results: if result is None: continue elif len(result) == 3: i, nbr, dist = result self.peaks[i].extend(nbr) self.dist2peak[i].extend(dist) continue i, nbr, dist, feat, adj, lb = result self.subset_idxs.append(i) self.subset_nbrs.append(nbr) self.subset_dists.append(dist) self.feat_lst.append(feat) self.adj_lst.append(adj) if not self.ignore_label: self.subset_gt_labels.append(self.idx2lb[i]) self.lb_lst.append(lb) self.subset_gt_labels = np.array(self.subset_gt_labels) self.size = len(self.feat_lst) assert self.size == len(self.adj_lst) if not self.ignore_label: assert self.size == len(self.lb_lst)
test_name = 'part1_test' knn = 160 knn_method = 'faiss' th_sim = 0. # cut edges with similarity smaller than th_sim # testing args max_conn = 1 tau = 0.8 metrics = ['pairwise', 'bcubed', 'nmi'] # gcn_v configs _work_dir = 'work_dir' ckpt_name = 'latest' # epoch_80000 gcnv_cfg = './vegcn/configs/cfg_train_gcnv_ms1m.py' gcnv_cfg_name = rm_suffix(osp.basename(gcnv_cfg)) gcnv_cfg = Config.fromfile(gcnv_cfg) gcnv_cfg.load_from = '{}/{}/{}/{}.pth'.format(prefix, _work_dir, gcnv_cfg_name, ckpt_name) use_gcn_feat = True feat_paths = [] pred_conf_paths = [] gcnv_nhid = gcnv_cfg.model.kwargs.nhid for name in [train_name, test_name]: gcnv_prefix = '{}/{}/{}/{}_gcnv_k_{}_th_{}'.format(prefix, _work_dir, gcnv_cfg_name, name, gcnv_cfg.knn, gcnv_cfg.th_sim) feat_paths.append( osp.join(gcnv_prefix, 'features', '{}.bin'.format(ckpt_name)))
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
def test_gcn_v(model, cfg, logger): for k, v in cfg.model['kwargs'].items(): setattr(cfg.test_data, k, v) dataset = build_dataset(cfg.model['type'], cfg.test_data) folder = '{}_gcnv_k_{}_th_{}'.format(cfg.test_name, cfg.knn, cfg.th_sim) oprefix = osp.join(cfg.work_dir, folder) oname = osp.basename(rm_suffix(cfg.load_from)) opath_pred_confs = osp.join(oprefix, 'pred_confs', '{}.npz'.format(oname)) if osp.isfile(opath_pred_confs) and not cfg.force: data = np.load(opath_pred_confs) pred_confs = data['pred_confs'] inst_num = data['inst_num'] if inst_num != dataset.inst_num: logger.warn( 'instance number in {} is different from dataset: {} vs {}'. format(opath_pred_confs, inst_num, len(dataset))) else: pred_confs, gcn_feat = test(model, dataset, cfg, logger) inst_num = dataset.inst_num logger.info('pred_confs: mean({:.4f}). max({:.4f}), min({:.4f})'.format( pred_confs.mean(), pred_confs.max(), pred_confs.min())) logger.info('Convert to cluster') with Timer('Predition to peaks'): pred_dist2peak, pred_peaks = confidence_to_peaks( dataset.dists, dataset.nbrs, pred_confs, cfg.max_conn) if not dataset.ignore_label and cfg.eval_interim: # evaluate the intermediate results for i in range(cfg.max_conn): num = len(dataset.peaks) pred_peaks_i = np.arange(num) peaks_i = np.arange(num) for j in range(num): if len(pred_peaks[j]) > i: pred_peaks_i[j] = pred_peaks[j][i] if len(dataset.peaks[j]) > i: peaks_i[j] = dataset.peaks[j][i] acc = accuracy(pred_peaks_i, peaks_i) logger.info('[{}-th conn] accuracy of peak match: {:.4f}'.format( i + 1, acc)) acc = 0. for idx, peak in enumerate(pred_peaks_i): acc += int(dataset.idx2lb[peak] == dataset.idx2lb[idx]) acc /= len(pred_peaks_i) logger.info( '[{}-th conn] accuracy of peak label match: {:.4f}'.format( i + 1, acc)) with Timer('Peaks to clusters (th_cut={})'.format(cfg.tau_0)): pred_labels = peaks_to_labels(pred_peaks, pred_dist2peak, cfg.tau_0, inst_num) if cfg.save_output: logger.info('save predicted confs to {}'.format(opath_pred_confs)) mkdir_if_no_exists(opath_pred_confs) np.savez_compressed(opath_pred_confs, pred_confs=pred_confs, inst_num=inst_num) # save clustering results idx2lb = list2dict(pred_labels, ignore_value=-1) opath_pred_labels = osp.join( cfg.work_dir, folder, 'tau_{}_pred_labels.txt'.format(cfg.tau_0)) logger.info('save predicted labels to {}'.format(opath_pred_labels)) mkdir_if_no_exists(opath_pred_labels) write_meta(opath_pred_labels, idx2lb, inst_num=inst_num) # evaluation if not dataset.ignore_label: print('==> evaluation') for metric in cfg.metrics: evaluate(dataset.gt_labels, pred_labels, metric) if cfg.use_gcn_feat: # gcn_feat is saved to disk for GCN-E opath_feat = osp.join(oprefix, 'features', '{}.bin'.format(oname)) if not osp.isfile(opath_feat) or cfg.force: mkdir_if_no_exists(opath_feat) write_feat(opath_feat, gcn_feat) name = rm_suffix(osp.basename(opath_feat)) prefix = oprefix ds = BasicDataset(name=name, prefix=prefix, dim=cfg.model['kwargs']['nhid'], normalize=True) ds.info() # use top embedding of GCN to rebuild the kNN graph with Timer('connect to higher confidence with use_gcn_feat'): knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, ds.features, cfg.knn_method, cfg.knn, is_rebuild=True) dists, nbrs = knns2ordered_nbrs(knns) pred_dist2peak, pred_peaks = confidence_to_peaks( dists, nbrs, pred_confs, cfg.max_conn) pred_labels = peaks_to_labels(pred_peaks, pred_dist2peak, cfg.tau, inst_num) # save clustering results if cfg.save_output: oname_meta = '{}_gcn_feat'.format(name) opath_pred_labels = osp.join( oprefix, oname_meta, 'tau_{}_pred_labels.txt'.format(cfg.tau)) mkdir_if_no_exists(opath_pred_labels) idx2lb = list2dict(pred_labels, ignore_value=-1) write_meta(opath_pred_labels, idx2lb, inst_num=inst_num) # evaluation if not dataset.ignore_label: print('==> evaluation') for metric in cfg.metrics: evaluate(dataset.gt_labels, pred_labels, metric) import json import os import pdb pdb.set_trace() img_labels = json.load( open(r'/home/finn/research/data/clustering_data/test_index.json', 'r', encoding='utf-8')) import shutil output = r'/home/finn/research/data/clustering_data/mr_gcn_output' for label in set(pred_labels): if not os.path.exists(os.path.join(output, f'cluter_{label}')): os.mkdir(os.path.join(output, f'cluter_{label}')) for image in img_labels: shutil.copy2( image, os.path.join( os.path.join(output, f'cluter_{pred_labels[img_labels[image]]}'), os.path.split(image)[-1]))