def __init__(self, mode='train', roiSize=224, balance=False, transform=None): super(SmallPatchDataset).__init__() self.roiSize = roiSize self.transform = transform self.patch_dir = os.path.expanduser("~/patch%s" % self.roiSize) self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] if balance: self.genes = datautil.get_balanced_gene_list(self.genes, 0) elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SPatchAllDataset", mode) with open("roi/roi%d.json" % self.roiSize, 'r') as f: self.roi = json.load(f) self.gene_img_pids = [(gene, img, pid) for gene in self.genes for img in self.roi[gene].keys() for pid in range(len(self.roi[gene][img]))] if mode == "train": idx = np.random.permutation(range(len(self.gene_img_pids))) self.gene_img_pids = [self.gene_img_pids[i] for i in idx]
def __init__(self, mode="train", roiSize=100, npatch=32): super(SPatchDataset, self).__init__() self.roiSize = roiSize self.npatch = npatch self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SPatchDataset", mode) with open("roi%d.json" % self.roiSize, 'r') as f: self.roi = json.load(f) self.gene_imgs = [(gene, img) for gene in self.genes for img in datautil.get_gene_pics(gene) if self.valid_img((gene, img))]
def extract(roiPath="roi/roi%s.json" % roiSize): gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in gene_label] with open(roiPath, 'r') as f: roi = json.load(f) gene_imgs = [(gene, img) for gene in all_genes for img in roi[gene].keys()] q = queue.Queue() for item in gene_imgs: q.put(item) if not os.path.exists(PATCH_DIR): os.mkdir(PATCH_DIR) nworker = psutil.cpu_count() jobs = [] for i in range(nworker): p = threading.Thread(target=_handle_extract, args=(q, roi)) jobs.append(p) p.daemon = True p.start() q.put(None) for j in jobs: j.join()
def __init__(self, mode='train', roiSize=100): super(ShufflePatchDataset).__init__() self.roiSize = roiSize self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SPatchAllDataset", mode) with open("roi%d.json" % self.roiSize, 'r') as f: self.roi = json.load(f) self.gene_img_points = [(gene, img, point) for gene in self.genes for img in self.roi[gene].keys() for point in self.roi[gene][img]] if mode == "train": idx = np.random.permutation(range(len(self.gene_img_points))) self.gene_img_points = [self.gene_img_points[i] for i in idx]
def __init__(self, mode="train", roiSize=100): super(SPatchAllDataset, self).__init__() self.roiSize = roiSize self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SPatchAllDataset", mode) with open("roi%d.json" % self.roiSize, 'r') as f: self.roi = json.load(f) self.gene_img_points = [(gene, img, point) for gene in self.genes for img in self.roi[gene].keys() for point in self.roi[gene][img]] self.cache_img = None self.cache_nimg = None
def extract(base="res18", dim=128, size=1): model = Extractor(base, dim) model.share_memory() model.cuda() gene_list = datautil.get_gene_list(size) do_extract(model, gene_list)
def run(): q = queue.Queue() all_genes = datautil.get_gene_list(0) gene_label = datautil.load_gene_label(0) all_genes = [gene for gene in all_genes if gene in gene_label] for gene in all_genes: src_dir = os.path.join(DATA_DIR, gene) tgt_dir = os.path.join(TARGET, gene) if not os.path.exists(tgt_dir) and os.listdir(src_dir): os.mkdir(tgt_dir) for img in datautil.get_gene_pics(gene): src_p = os.path.join(src_dir, img) tgt_p = os.path.join(tgt_dir, img) if not os.path.exists(tgt_p): q.put((src_p, tgt_p)) jobs = [] NUM_THREADS = 20 for i in range(NUM_THREADS): p = threading.Thread(target=copy, args=(q,)) jobs.append(p) p.start() q.join() for i in range(NUM_THREADS): q.put(None) for j in jobs: j.join()
def clear(): all_genes = datautil.get_gene_list(0) gene_label = datautil.load_gene_label(0) all_genes = [gene for gene in all_genes if gene in gene_label] for gene in all_genes: tgt_dir = os.path.join(TARGET, gene) if os.path.exists(tgt_dir) and not os.listdir(tgt_dir): os.rmdir(tgt_dir)
def __init__(self, mode="train", size=3000): super(SImgDataset, self).__init__() self.size = size self.gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in self.gene_label] spivot = int(len(all_genes) * 0.7) epivot = int(len(all_genes) * 0.9) if mode == "train": self.genes = all_genes[:spivot] elif mode == "val": self.genes = all_genes[spivot:epivot] elif mode == "test": self.genes = all_genes[epivot:] else: raise Exception("Unknown mode in SImgDataset", mode) self.gene_imgs = [(gene, img) for gene in self.genes for img in datautil.get_gene_pics(gene)]
def extract(): gene_label = datautil.load_gene_label(0) all_genes = datautil.get_gene_list(0) all_genes = [gene for gene in all_genes if gene in gene_label] q = queue.Queue() outq = queue.Queue() d_points = {} for gene in all_genes: d_points[gene] = {} for img in datautil.get_gene_pics(gene): q.put((gene, img)) NUM_THREADS = 20 jobs = [] for i in range(NUM_THREADS): p = threading.Thread(target=extract_img, args=(q, outq)) jobs.append(p) p.start() q.join() for i in range(NUM_THREADS): q.put(None) for j in jobs: j.join() while not outq.empty(): gene, img, points = outq.get() d_points[gene][img] = points with open("roi/roi%d.json" % roiSize, "w") as f: json.dump(d_points, f)
def separate(): gene_list = datautil.get_gene_list(size=0) do_separate(gene_list)