def __init__(self, selected_p, dst, logger=None):
     self.selected_p = selected_p
     self.dst = dst
     self.case = None
     self.logger = logger if logger is not None else gen_logger('SNAS_predictor')
     self.model = model_nas(d_size=256)
     self.model.load_weights('371.h5', by_name=True)
def judge_area(dir_p, dst='c:/', logger=None):
    os.makedirs(dst, exist_ok=True)
    if logger is None:
        logger = gen_logger('judge_area')

    model = model_nas_clf()
    model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])

    for case_name in os.listdir(dir_p):
        case_dir = os.path.join(dir_p, case_name)
        record = {}
        if not os.path.isdir(case_dir):
            continue
        
        try:
            for batch in chunk(case_dir, 32):
                X = [preprocess_input(cv2.imread(x)) for x in batch]
                X = np.array(X)
                preds_batch = predicting(model, X)
                record.update({key:value for key, value in zip(batch, preds_batch)})
            with open(os.path.join(dst, f'{case_name}.pkl'), 'wb') as temp_pkl:
                pickle.dump(record, temp_pkl, pickle.HIGHEST_PROTOCOL)
            logger.info(f'{case_name} is completed')
        except:
            logger.exception(f'{case_name} encounter mistakes')
 def __init__(self, selected_p, dst, train_size_ratio=0.8, epochs=40, inner_train_time=22,
 val_sel_num=42, aug_time=0, d_size=256, target_p='data/Target.xlsx', gene=False, logger=None):
     self.dst = dst
     self.logger = logger if logger is not None else gen_logger('SNAS')
     trainer = Train_table_creator(selected_p, dst, train_ratio=train_size_ratio, logger=self.logger, target_p=target_p)
     assert trainer.cache() == True, 'imgs searching entounters error'
     self.train_table = trainer.train_table
     self.test_table = trainer.test_table
     self.sel_num = val_sel_num
     self.d_size = d_size
     self.epochs = epochs
     self.start_epoch = 0
     self.inner_train_time = inner_train_time
     self.aug_time = aug_time
     self.trained = False
     self.gene = gene
     self.ada = None 
     self.seq = None
     self.pool = defaultdict(list)
     self.train_pool = defaultdict(list) # avoid too much repeat in training
     if self.gene:
         self.train_gene, self.test_gene = trainer.get_gene_table()
         self.model = model_gn(f_num=len(self.train_gene.columns))
     else:
         self.model = model_nas(d_size=d_size)
Ejemplo n.º 4
0
#!/usr/bin/env python
#coding:utf-8

from __future__ import unicode_literals
from lucene import *
from settings import db, INDEX_DIR, ANALYZER
from tools import gen_logger, pagination
from time import time

logger = gen_logger(__file__, 'w')


def index():
    '''
    :desc:重构索引
    '''
    logger.info('重构索引...')
    start_time = time()
    resource_count = db.get('select count(*) as count from resource')['count']
    logger.info('收录数据 {} 条'.format(resource_count))
    sql = 'select * from resource limit 10000 offset %s'
    counter = 0
    writer = IndexWriter(INDEX_DIR, ANALYZER, True,
                         IndexWriter.MaxFieldLength.UNLIMITED)
    for offset in range(0, resource_count, 10000):
        items = db.query(sql, offset)
        for item in items:
            doc = Document()
            doc.add(
                Field('title', item['title'], Field.Store.YES,
                      Field.Index.ANALYZED))
Ejemplo n.º 5
0
import logging
import string
import json
import re
from pymongo import MongoClient
from time import localtime, strftime, time
from lucene import *
from settings import *
from tools import gen_logger, pagination
from functools import wraps
from IPython import embed

testindexdir = SimpleFSDirectory(File('testindex'))
testsearcher = IndexSearcher(testindexdir)
source_count = None
logger = gen_logger(__file__, 'w')

def traffic_counter(func):
    '''
    :desc:流量统计
    '''
    @wraps(func)
    def wrapper(*args, **kwargs):
        db.monitor.update({}, {'$inc':{'traffic':1}})
        return func(*args, **kwargs)
    return wrapper

def rebuild_indexing():
    '''
    :desc:重构索引
    '''
import argparse
import os
import json
from snas import SNAS, SNAS_predictor
from tools import gen_logger, load_locs, marking, move_model_val
from tiles import batch_tiling
from area_judge import judge_area
from area_move import pkl_select

if __name__ == "__main__":
    logger = gen_logger('from_svs')
    parse = argparse.ArgumentParser(description='A pipeline from .svs files to survival models')
    parse.add_argument('i', help='the path of directory that saves imgs for cases')
    parse.add_argument('o', help='the path for output')
    parse.add_argument('-m', default='train', help='the working mode, if you want to use the prediction mode, just type "val"')
    parse.add_argument('-e', default='data/Target.xlsx', help='if mode is train, the path of event/censor table is needed')
    command = parse.parse_args()
    dst = command.o
    tile_dst = os.path.join(dst, 'tiles')
    batch_tiling(command.i, tile_dst, logger)
    logger.info('tiling done')
    pkl_dst = os.path.join(dst, 'pkl')
    judge_area(tile_dst, dst=pkl_dst, logger=logger)
    logger.info('judge done')
    sel_dst = os.path.join(dst, 'sel')
    pkl_select(pkl_dst, sel_dst)
    logger.info('selection done')
    if command.o == 'train':
        model_dst = os.path.join(dst, 'models')
        model = SNAS(sel_dst, model_dst, logger=logger, target_p=command.e)
        model.whole_train()
Ejemplo n.º 7
0
import os
import shutil
import argparse
from tools import gen_logger
from tqdm import tqdm

logger = gen_logger('make_archive')

def make_archive(cases_p, archive_p):
    os.makedirs(archive_p, exist_ok=True)
    for case in tqdm(os.listdir(cases_p)):
        try:
            shutil.make_archive(os.path.join(archive_p, case.split('.')[0]), 
            'zip', os.path.join(cases_p, case))
            logger.info(f'{case} is completed')
        except:
            logger.exception(f'{case} encounts some errors')

def fn_match(slides_p):
    slides = os.listdir(slides_p)
    return {fn.split('.')[0]:fn for fn in slides}

def copy_special_slide(slides_p, archive_p):
    archive = os.listdir(archive_p)
    special_p = f'{os.path.pardir(archive_p)}/slide_special'
    os.makedirs(special_p, exist_ok=True)
    fn_match_dict = fn_match(slides_p)
    for fn in archive:
        if os.path.getsize(os.path.join(archive_p, fn)) < 1000:
            case = os.path.splitext(fn)[0]
            src = os.path.join(slides_p, fn_match_dict[case])
Ejemplo n.º 8
0
        logger.info(
            f'train:{model_val_eval(model, X, Y, E)} num:{len(Y)}; val:{model_val_eval(model, X_val, Y_val, E_val)} num:{len(Y_val)}'
        )

    else:
        if whole:
            whole_train(model, train_table, test_table, epochs=epochs)
        else:
            batch_train(model,
                        train_table,
                        test_table,
                        epochs=epochs,
                        batch_size=64)


logger = gen_logger('train')
train_table_p = 'data/train_table.xlsx'
test_table_p = 'data/test_table.xlsx'
train_ratio = 0.5
if __name__ == "__main__":
    parse = argparse.ArgumentParser()
    parse.add_argument('i',
                       help='the path of directory that saves imgs for cases')
    parse.add_argument('-o', default='..', help='the path for output')
    parse.add_argument('-n', default='nas', help='the model name (pns or nas)')
    parse.add_argument('-m', help='the path of trained weights')
    parse.add_argument('-t', type=int, default=40, help='epochs')
    parse.add_argument('-v', type=bool, default=False, help='validation only')
    command = parse.parse_args()
    try:
        logger.info(f'Begin train on {command}')
# from keras.applications.nasnet import preprocess_input
import os
import cv2
import sys
sys.path.append('..')
from tools import gen_logger
from tqdm import tqdm


def img_gen(dir_p):
    cases = os.listdir(dir_p)
    for case in tqdm(cases):
        cp = os.path.join(dir_p, case)
        for f in os.listdir(cp):
            if f[-4:] == 'tiff':
                continue
            print(fp)
            break
            # try:
            #     preprocess_input(cv2.imread(fp))
            # except TypeError:
            #     logger.info(f'{fp} error')
    else:
        print('pass')


logger = gen_logger('img_test')

if __name__ == "__main__":
    img_gen(sys.argv[1])
                logger.exception(f'{slide_p} encounter errors')


@record_funcs
def collect_properties(dir_p, dst):
    container = {}
    for slide_p in tqdm(get_files(dir_p)):
        try:
            container[get_name(slide_p)] = dict(
                openslide.OpenSlide(slide_p).properties)
        except:
            logger.exception(f'{slide_p} encounter errors')
    save_pickle(container, dst, 'properties')


logger = gen_logger('overview')
if __name__ == "__main__":
    parse = argparse.ArgumentParser(
        description='This stript is used for slide info analysis')
    parse.add_argument('dir')
    parse.add_argument(
        '-f',
        help=
        '"dir" for outputing whole imgs in lowest resolution; "pow" for outputing power info;\
    "pro" for outputing a dict of properties in .pkl file')
    parse.add_argument('-o', required=True)
    command = parse.parse_args()
    func = funcs.get(command.f, collect_properties)
    dir_p = command.dir
    dst = command.o
    func(dir_p, dst)
    #     return True
    gray = image.convert("L")
    # 230 is a magic number, and it is not good. However, currently, I haven't found a better way
    # to select the informative images.
    return np.mean(gray) > 230


def batch_tiling(path, out_dir, logger):
    filter_func = None
    # filter_func = lambda x:get_name(x) not in cache
    slides = list(filter(filter_func, get_files(path)))

    for slide_path in slides:
        name = get_name(slide_path)
        try:
            logger.info(f'start {name}')
            divide(slide_path, out_dir)
        except:
            logger.exception(f'{name} encountered error in batch')


if __name__ == '__main__':
    logger = gen_logger('tile')
    parse = argparse.ArgumentParser(
        description=
        'A patch to separate large .svs file into 10X 96*96 .tif files.')
    parse.add_argument('i')
    parse.add_argument('o')
    command = parse.parse_args()
    batch_tiling(command.i, command.o, logger)
Ejemplo n.º 12
0
                 dst=dir_p,
                 val_sel_num=sel_num,
                 d_size=d_size,
                 logger=logger,
                 aug_time=aug_time,
                 train_size_ratio=train_size_ratio)
    logger.info(f'test {dir_p} {d_size}')
    for fn in filter(lambda x: x.endswith('h5'), os.listdir(dir_p)):
        fp = os.path.join(dir_p, fn)
        logger.info(f'now {fp}')
        model.load(fp)
        model.feedback()


if __name__ == "__main__":
    logger = gen_logger('main_val+')
    parse = argparse.ArgumentParser()
    parse.add_argument(
        'i',
        nargs='+',
        help='the path of directory that saves model weight for cases')
    parse.add_argument('-s',
                       type=int,
                       default=42,
                       help='the num of imgs used for validation')
    command = parse.parse_args()
    logger.info(f'Begin train on {command}')
    try:
        for dir_p in tqdm(command.i):
            models_val(dir_p, logger, sel_num=command.s)
    except:
Ejemplo n.º 13
0
            # weights = np.mean(grads_vals[idx, :, :, :], axis = (0, 1))
            #Create the class activation map.
            cam = np.zeros(dtype=np.float32, shape=conv_output.shape[0:2])
            # for i in range(conv_output.shape[-1]):
            #     cam += conv_output[:, :, i]
            cam += np.sum(conv_output, axis=2)
            cam /= np.max(cam)
            cam = cv2.resize(cam, (96, 96))
            heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
            heatmap[np.where(cam < 0.2)] = 0
            img = heatmap * 0.5 + ori_img
            cv2.imwrite(os.path.join(output_path, f"{fn}"), img)


if __name__ == "__main__":
    logger = gen_logger('vis')
    parse = argparse.ArgumentParser()
    parse.add_argument('i',
                       help='the path of directory that saves imgs for cases')
    parse.add_argument('-o', default='..', help='the path for output')
    parse.add_argument('-s',
                       type=int,
                       default=1,
                       help='the num of imgs used for visualization')

    command = parse.parse_args()
    logger.info(f'Begin train on {command}')
    dst = command.o
    os.makedirs(dst, exist_ok=True)
    try:
        visualize_class_activation_map(command.i, dst, sel_num=command.s)
Ejemplo n.º 14
0
from snas import SNAS
from tools import gen_logger
import argparse
import os

if __name__ == "__main__":
    logger = gen_logger('main+')
    parse = argparse.ArgumentParser()
    parse.add_argument('i',
                       help='the path of directory that saves imgs for cases')
    parse.add_argument('-e',
                       default='data/Target.xlsx',
                       help='the path of event/censor table')
    parse.add_argument('-o', default='..', help='the path for output')
    parse.add_argument('-r', type=float, default=0.8, help='training size')
    parse.add_argument('-m',
                       type=str,
                       default='',
                       help='the path of trained weights')
    parse.add_argument('-t', type=int, default=40, help='epochs')
    parse.add_argument('-v', type=bool, default=False, help='validation only')
    parse.add_argument('-s',
                       type=int,
                       default=42,
                       help='the num of imgs used for validation')
    parse.add_argument('-a',
                       type=int,
                       default=0,
                       help='the time of augmentation during training')
    parse.add_argument('-p',
                       type=bool,
import os
import shutil
from collections import defaultdict
import numpy as np
from tools import load_pickle, gen_logger
import sys

logger = gen_logger(name='collect.txt')


def weird_load(fp):
    container = {}
    with open(fp, 'rb') as file:
        while True:
            chunk = file.read(3408)
            if not chunk:
                break
            with open('_', 'wb') as temp:
                temp.write(chunk)
            data = load_pickle('_')
            container.update(data)
    return container


def to_case(result):
    outcome = defaultdict(list)
    for area in result.keys():
        case_n = area.rsplit('/', 2)[-2]
        outcome[case_n].append(area)
    return outcome