def __init__(self, mode="train", roiSize=100, npatch=32): super(SPatchDataset, self).__init__() self.roiSize = roiSize self.npatch = npatch self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SPatchDataset", mode) with open("roi%d.json" % self.roiSize, 'r') as f: self.roi = json.load(f) self.gene_imgs = [(gene, img) for gene in self.genes for img in datautil.get_gene_pics(gene) if self.valid_img((gene, img))]
def run(): q = queue.Queue() all_genes = datautil.get_gene_list(0) gene_label = datautil.load_gene_label(0) all_genes = [gene for gene in all_genes if gene in gene_label] for gene in all_genes: src_dir = os.path.join(DATA_DIR, gene) tgt_dir = os.path.join(TARGET, gene) if not os.path.exists(tgt_dir) and os.listdir(src_dir): os.mkdir(tgt_dir) for img in datautil.get_gene_pics(gene): src_p = os.path.join(src_dir, img) tgt_p = os.path.join(tgt_dir, img) if not os.path.exists(tgt_p): q.put((src_p, tgt_p)) jobs = [] NUM_THREADS = 20 for i in range(NUM_THREADS): p = threading.Thread(target=copy, args=(q,)) jobs.append(p) p.start() q.join() for i in range(NUM_THREADS): q.put(None) for j in jobs: j.join()
def _load_data(gene_list, size=0): if size == 0: d = datautil.load_enhanced_label() elif size == 1: d = datautil.load_supported_label() else: d = datautil.load_approved_label() q = [x for x in gene_list if x in d and len(datautil.get_gene_pics(x))] q = [x for x in q if os.path.exists(os.path.join(MATLAB_FV_DIR, x))] return [_handle_load(x, d) for x in q]
def copy_testimgs(): DST = "/tmp/testimgs" test_genes = datautil.get_test_gene_list(size=0) for g in test_genes: pics = datautil.get_gene_pics(g) dst_dir = os.path.join(DST, g) if not os.path.exists(dst_dir): os.mkdir(dst_dir) for pic in pics: src = os.path.join(c.QDATA_DIR, g, pic) dst = os.path.join(dst_dir, pic) shutil.copy(src, dst)
def extract_image_fv(q, model, i): def _extract_image(image): img = cv2.imread(image) img = cv2.resize(img, (3000, 3000), interpolation=cv2.INTER_CUBIC) img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, axis=0) inputs = torch.from_numpy(img).type(torch.cuda.FloatTensor) pd = model(inputs) return pd while True: while get_gpu_usage() > 0.9: print("---gpu full---", get_gpu_usage()) time.sleep(1) torch.cuda.empty_cache() gene = q.get() if gene is None: break print("---extract -----", gene, q.qsize(), i) gene_dir = os.path.join(DATA_DIR, gene) if not os.path.exists(gene_dir): gene_dir = os.path.join(SUPP_DATA_DIR, gene) if not os.path.exists(gene_dir): gene_dir = os.path.join(APPROVE_DATA_DIR, gene) outpath = os.path.join(model.fvdir, "%s.npy" % gene) if os.path.exists(outpath): print("------already extracted---------", gene) q.task_done() continue # pds = [_extract_image(os.path.join(gene_dir, p)) # for p in datautil.get_gene_pics(gene) # if os.path.splitext(p)[-1] == ".jpg"] pds = [_extract_image(os.path.join(gene_dir, p)) for p in datautil.get_gene_pics(gene, datautil.all_tissue_list) if os.path.splitext(p)[-1] == ".jpg"] if pds: value = np.concatenate(pds, axis=0) print("----save-----", outpath) np.save(outpath, value) q.task_done()
def __init__(self, mode="train", size=3000): super(SImgDataset, self).__init__() self.size = size self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SImgDataset", mode) self.gene_imgs = [(gene, img) for gene in self.genes for img in datautil.get_gene_pics(gene)]
def evaluate_ilocator(): test_genes = datautil.get_test_gene_list(size=0) pd = [] gt = [] d = datautil.load_enhanced_label() for g in test_genes: pics = datautil.get_gene_pics(g) g_scores = [] for pic in [x.replace(".", "_") for x in pics]: spath = os.path.abspath( os.path.join("util/testimgs_ilocator_result/%s.txt" % pic)) with open(spath, 'r') as f: score = [float(x) for x in f.readline().strip().split()] if any([math.isnan(x) for x in score]): pass else: g_scores.append(score) g_scores = np.stack(g_scores) g_scores = np.mean(g_scores, axis=0) pd.append(g_scores) gene_label = np.zeros(6) for l in d[g]: gene_label[l] = 1 gt.append(gene_label) gt = np.stack(gt) pd = np.stack(pd) # rearrange label order idx = np.array([5, 0, 6, 4, 2, 1]) pd = pd[:, idx] thr = pd.max(axis=1) zeros = np.zeros(thr.shape[0]) thr = np.min(np.stack([zeros, thr], axis=1), axis=1) pd = np.greater_equal(pd, thr[:, np.newaxis]).astype(int) npmetrics.write_metrics(gt, pd, "util/ilocator.txt")
def extract(): gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in gene_label] q = queue.Queue() outq = queue.Queue() d_points = {} for gene in all_genes: d_points[gene] = {} for img in datautil.get_gene_pics(gene): q.put((gene, img)) NUM_THREADS = 20 jobs = [] for i in range(NUM_THREADS): p = threading.Thread(target=extract_img, args=(q, outq)) jobs.append(p) p.start() q.join() for i in range(NUM_THREADS): q.put(None) for j in jobs: j.join() while not outq.empty(): gene, img, points = outq.get() d_points[gene][img] = points with open("roi/roi%d.json" % roiSize, "w") as f: json.dump(d_points, f)