Esempio n. 1
0
    def __init__(self, mode='train', roiSize=224,
                 balance=False, transform=None):
        super(SmallPatchDataset).__init__()
        self.roiSize = roiSize
        self.transform = transform
        self.patch_dir = os.path.expanduser("~/patch%s" % self.roiSize)
        self.gene_label = datautil.load_gene_label(0)

        all_genes = datautil.get_gene_list(0)
        all_genes = [gene for gene in all_genes if gene in self.gene_label]

        spivot = int(len(all_genes) * 0.7)
        epivot = int(len(all_genes) * 0.9)
        if mode == "train":
            self.genes = all_genes[:spivot]
            if balance:
                self.genes = datautil.get_balanced_gene_list(self.genes, 0)
        elif mode == "val":
            self.genes = all_genes[spivot:epivot]
        elif mode == "test":
            self.genes = all_genes[epivot:]
        else:
            raise Exception("Unknown mode in SPatchAllDataset", mode)

        with open("roi/roi%d.json" % self.roiSize, 'r') as f:
            self.roi = json.load(f)

        self.gene_img_pids = [(gene, img, pid) for gene in self.genes
                              for img in self.roi[gene].keys()
                              for pid in range(len(self.roi[gene][img]))]
        if mode == "train":
            idx = np.random.permutation(range(len(self.gene_img_pids)))
            self.gene_img_pids = [self.gene_img_pids[i] for i in idx]
Esempio n. 2
0
    def __init__(self, mode="train", roiSize=100, npatch=32):
        super(SPatchDataset, self).__init__()
        self.roiSize = roiSize
        self.npatch = npatch
        self.gene_label = datautil.load_gene_label(0)
        all_genes = datautil.get_gene_list(0)
        all_genes = [gene for gene in all_genes if gene in self.gene_label]

        spivot = int(len(all_genes) * 0.7)
        epivot = int(len(all_genes) * 0.9)
        if mode == "train":
            self.genes = all_genes[:spivot]
        elif mode == "val":
            self.genes = all_genes[spivot:epivot]
        elif mode == "test":
            self.genes = all_genes[epivot:]
        else:
            raise Exception("Unknown mode in SPatchDataset", mode)

        with open("roi%d.json" % self.roiSize, 'r') as f:
            self.roi = json.load(f)

        self.gene_imgs = [(gene, img) for gene in self.genes
                          for img in datautil.get_gene_pics(gene)
                          if self.valid_img((gene, img))]
Esempio n. 3
0
def extract(roiPath="roi/roi%s.json" % roiSize):
    gene_label = datautil.load_gene_label(0)
    all_genes = datautil.get_gene_list(0)
    all_genes = [gene for gene in all_genes if gene in gene_label]

    with open(roiPath, 'r') as f:
        roi = json.load(f)

    gene_imgs = [(gene, img) for gene in all_genes for img in roi[gene].keys()]

    q = queue.Queue()

    for item in gene_imgs:
        q.put(item)

    if not os.path.exists(PATCH_DIR):
        os.mkdir(PATCH_DIR)

    nworker = psutil.cpu_count()
    jobs = []
    for i in range(nworker):
        p = threading.Thread(target=_handle_extract, args=(q, roi))
        jobs.append(p)
        p.daemon = True
        p.start()
        q.put(None)

    for j in jobs:
        j.join()
Esempio n. 4
0
    def __init__(self, mode='train', roiSize=100):
        super(ShufflePatchDataset).__init__()
        self.roiSize = roiSize
        self.gene_label = datautil.load_gene_label(0)
        all_genes = datautil.get_gene_list(0)
        all_genes = [gene for gene in all_genes if gene in self.gene_label]

        spivot = int(len(all_genes) * 0.7)
        epivot = int(len(all_genes) * 0.9)
        if mode == "train":
            self.genes = all_genes[:spivot]
        elif mode == "val":
            self.genes = all_genes[spivot:epivot]
        elif mode == "test":
            self.genes = all_genes[epivot:]
        else:
            raise Exception("Unknown mode in SPatchAllDataset", mode)

        with open("roi%d.json" % self.roiSize, 'r') as f:
            self.roi = json.load(f)

        self.gene_img_points = [(gene, img, point) for gene in self.genes
                                for img in self.roi[gene].keys()
                                for point in self.roi[gene][img]]
        if mode == "train":
            idx = np.random.permutation(range(len(self.gene_img_points)))
            self.gene_img_points = [self.gene_img_points[i] for i in idx]
Esempio n. 5
0
    def __init__(self, mode="train", roiSize=100):
        super(SPatchAllDataset, self).__init__()
        self.roiSize = roiSize
        self.gene_label = datautil.load_gene_label(0)
        all_genes = datautil.get_gene_list(0)
        all_genes = [gene for gene in all_genes if gene in self.gene_label]

        spivot = int(len(all_genes) * 0.7)
        epivot = int(len(all_genes) * 0.9)
        if mode == "train":
            self.genes = all_genes[:spivot]
        elif mode == "val":
            self.genes = all_genes[spivot:epivot]
        elif mode == "test":
            self.genes = all_genes[epivot:]
        else:
            raise Exception("Unknown mode in SPatchAllDataset", mode)

        with open("roi%d.json" % self.roiSize, 'r') as f:
            self.roi = json.load(f)

        self.gene_img_points = [(gene, img, point) for gene in self.genes
                                for img in self.roi[gene].keys()
                                for point in self.roi[gene][img]]

        self.cache_img = None
        self.cache_nimg = None
Esempio n. 6
0
def extract(base="res18", dim=128, size=1):
    model = Extractor(base, dim)
    model.share_memory()
    model.cuda()

    gene_list = datautil.get_gene_list(size)
    do_extract(model, gene_list)
Esempio n. 7
0
def run():
    q = queue.Queue()
    all_genes = datautil.get_gene_list(0)
    gene_label = datautil.load_gene_label(0)
    all_genes = [gene for gene in all_genes if gene in gene_label]

    for gene in all_genes:
        src_dir = os.path.join(DATA_DIR, gene)
        tgt_dir = os.path.join(TARGET, gene)
        if not os.path.exists(tgt_dir) and os.listdir(src_dir):
            os.mkdir(tgt_dir)
        for img in datautil.get_gene_pics(gene):
            src_p = os.path.join(src_dir, img)
            tgt_p = os.path.join(tgt_dir, img)
            if not os.path.exists(tgt_p):
                q.put((src_p, tgt_p))

    jobs = []
    NUM_THREADS = 20
    for i in range(NUM_THREADS):
        p = threading.Thread(target=copy, args=(q,))
        jobs.append(p)
        p.start()

    q.join()

    for i in range(NUM_THREADS):
        q.put(None)

    for j in jobs:
        j.join()
Esempio n. 8
0
def clear():
    all_genes = datautil.get_gene_list(0)
    gene_label = datautil.load_gene_label(0)
    all_genes = [gene for gene in all_genes if gene in gene_label]

    for gene in all_genes:
        tgt_dir = os.path.join(TARGET, gene)
        if os.path.exists(tgt_dir) and not os.listdir(tgt_dir):
            os.rmdir(tgt_dir)
Esempio n. 9
0
    def __init__(self, mode="train", size=3000):
        super(SImgDataset, self).__init__()
        self.size = size
        self.gene_label = datautil.load_gene_label(0)
        all_genes = datautil.get_gene_list(0)
        all_genes = [gene for gene in all_genes if gene in self.gene_label]

        spivot = int(len(all_genes) * 0.7)
        epivot = int(len(all_genes) * 0.9)
        if mode == "train":
            self.genes = all_genes[:spivot]
        elif mode == "val":
            self.genes = all_genes[spivot:epivot]
        elif mode == "test":
            self.genes = all_genes[epivot:]
        else:
            raise Exception("Unknown mode in SImgDataset", mode)

        self.gene_imgs = [(gene, img) for gene in self.genes
                          for img in datautil.get_gene_pics(gene)]
Esempio n. 10
0
def extract():
    gene_label = datautil.load_gene_label(0)
    all_genes = datautil.get_gene_list(0)
    all_genes = [gene for gene in all_genes if gene in gene_label]

    q = queue.Queue()
    outq = queue.Queue()
    d_points = {}

    for gene in all_genes:
        d_points[gene] = {}
        for img in datautil.get_gene_pics(gene):
            q.put((gene, img))

    NUM_THREADS = 20

    jobs = []
    for i in range(NUM_THREADS):
        p = threading.Thread(target=extract_img, args=(q, outq))
        jobs.append(p)
        p.start()

    q.join()

    for i in range(NUM_THREADS):
        q.put(None)

    for j in jobs:
        j.join()

    while not outq.empty():
        gene, img, points = outq.get()
        d_points[gene][img] = points

    with open("roi/roi%d.json" % roiSize, "w") as f:
        json.dump(d_points, f)
Esempio n. 11
0
def separate():
    gene_list = datautil.get_gene_list(size=0)
    do_separate(gene_list)