def __init__(self, selected_p, dst, logger=None): self.selected_p = selected_p self.dst = dst self.case = None self.logger = logger if logger is not None else gen_logger('SNAS_predictor') self.model = model_nas(d_size=256) self.model.load_weights('371.h5', by_name=True)
def judge_area(dir_p, dst='c:/', logger=None): os.makedirs(dst, exist_ok=True) if logger is None: logger = gen_logger('judge_area') model = model_nas_clf() model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc']) for case_name in os.listdir(dir_p): case_dir = os.path.join(dir_p, case_name) record = {} if not os.path.isdir(case_dir): continue try: for batch in chunk(case_dir, 32): X = [preprocess_input(cv2.imread(x)) for x in batch] X = np.array(X) preds_batch = predicting(model, X) record.update({key:value for key, value in zip(batch, preds_batch)}) with open(os.path.join(dst, f'{case_name}.pkl'), 'wb') as temp_pkl: pickle.dump(record, temp_pkl, pickle.HIGHEST_PROTOCOL) logger.info(f'{case_name} is completed') except: logger.exception(f'{case_name} encounter mistakes')
def __init__(self, selected_p, dst, train_size_ratio=0.8, epochs=40, inner_train_time=22, val_sel_num=42, aug_time=0, d_size=256, target_p='data/Target.xlsx', gene=False, logger=None): self.dst = dst self.logger = logger if logger is not None else gen_logger('SNAS') trainer = Train_table_creator(selected_p, dst, train_ratio=train_size_ratio, logger=self.logger, target_p=target_p) assert trainer.cache() == True, 'imgs searching entounters error' self.train_table = trainer.train_table self.test_table = trainer.test_table self.sel_num = val_sel_num self.d_size = d_size self.epochs = epochs self.start_epoch = 0 self.inner_train_time = inner_train_time self.aug_time = aug_time self.trained = False self.gene = gene self.ada = None self.seq = None self.pool = defaultdict(list) self.train_pool = defaultdict(list) # avoid too much repeat in training if self.gene: self.train_gene, self.test_gene = trainer.get_gene_table() self.model = model_gn(f_num=len(self.train_gene.columns)) else: self.model = model_nas(d_size=d_size)
#!/usr/bin/env python #coding:utf-8 from __future__ import unicode_literals from lucene import * from settings import db, INDEX_DIR, ANALYZER from tools import gen_logger, pagination from time import time logger = gen_logger(__file__, 'w') def index(): ''' :desc:重构索引 ''' logger.info('重构索引...') start_time = time() resource_count = db.get('select count(*) as count from resource')['count'] logger.info('收录数据 {} 条'.format(resource_count)) sql = 'select * from resource limit 10000 offset %s' counter = 0 writer = IndexWriter(INDEX_DIR, ANALYZER, True, IndexWriter.MaxFieldLength.UNLIMITED) for offset in range(0, resource_count, 10000): items = db.query(sql, offset) for item in items: doc = Document() doc.add( Field('title', item['title'], Field.Store.YES, Field.Index.ANALYZED))
import logging import string import json import re from pymongo import MongoClient from time import localtime, strftime, time from lucene import * from settings import * from tools import gen_logger, pagination from functools import wraps from IPython import embed testindexdir = SimpleFSDirectory(File('testindex')) testsearcher = IndexSearcher(testindexdir) source_count = None logger = gen_logger(__file__, 'w') def traffic_counter(func): ''' :desc:流量统计 ''' @wraps(func) def wrapper(*args, **kwargs): db.monitor.update({}, {'$inc':{'traffic':1}}) return func(*args, **kwargs) return wrapper def rebuild_indexing(): ''' :desc:重构索引 '''
import argparse import os import json from snas import SNAS, SNAS_predictor from tools import gen_logger, load_locs, marking, move_model_val from tiles import batch_tiling from area_judge import judge_area from area_move import pkl_select if __name__ == "__main__": logger = gen_logger('from_svs') parse = argparse.ArgumentParser(description='A pipeline from .svs files to survival models') parse.add_argument('i', help='the path of directory that saves imgs for cases') parse.add_argument('o', help='the path for output') parse.add_argument('-m', default='train', help='the working mode, if you want to use the prediction mode, just type "val"') parse.add_argument('-e', default='data/Target.xlsx', help='if mode is train, the path of event/censor table is needed') command = parse.parse_args() dst = command.o tile_dst = os.path.join(dst, 'tiles') batch_tiling(command.i, tile_dst, logger) logger.info('tiling done') pkl_dst = os.path.join(dst, 'pkl') judge_area(tile_dst, dst=pkl_dst, logger=logger) logger.info('judge done') sel_dst = os.path.join(dst, 'sel') pkl_select(pkl_dst, sel_dst) logger.info('selection done') if command.o == 'train': model_dst = os.path.join(dst, 'models') model = SNAS(sel_dst, model_dst, logger=logger, target_p=command.e) model.whole_train()
import os import shutil import argparse from tools import gen_logger from tqdm import tqdm logger = gen_logger('make_archive') def make_archive(cases_p, archive_p): os.makedirs(archive_p, exist_ok=True) for case in tqdm(os.listdir(cases_p)): try: shutil.make_archive(os.path.join(archive_p, case.split('.')[0]), 'zip', os.path.join(cases_p, case)) logger.info(f'{case} is completed') except: logger.exception(f'{case} encounts some errors') def fn_match(slides_p): slides = os.listdir(slides_p) return {fn.split('.')[0]:fn for fn in slides} def copy_special_slide(slides_p, archive_p): archive = os.listdir(archive_p) special_p = f'{os.path.pardir(archive_p)}/slide_special' os.makedirs(special_p, exist_ok=True) fn_match_dict = fn_match(slides_p) for fn in archive: if os.path.getsize(os.path.join(archive_p, fn)) < 1000: case = os.path.splitext(fn)[0] src = os.path.join(slides_p, fn_match_dict[case])
logger.info( f'train:{model_val_eval(model, X, Y, E)} num:{len(Y)}; val:{model_val_eval(model, X_val, Y_val, E_val)} num:{len(Y_val)}' ) else: if whole: whole_train(model, train_table, test_table, epochs=epochs) else: batch_train(model, train_table, test_table, epochs=epochs, batch_size=64) logger = gen_logger('train') train_table_p = 'data/train_table.xlsx' test_table_p = 'data/test_table.xlsx' train_ratio = 0.5 if __name__ == "__main__": parse = argparse.ArgumentParser() parse.add_argument('i', help='the path of directory that saves imgs for cases') parse.add_argument('-o', default='..', help='the path for output') parse.add_argument('-n', default='nas', help='the model name (pns or nas)') parse.add_argument('-m', help='the path of trained weights') parse.add_argument('-t', type=int, default=40, help='epochs') parse.add_argument('-v', type=bool, default=False, help='validation only') command = parse.parse_args() try: logger.info(f'Begin train on {command}')
# from keras.applications.nasnet import preprocess_input import os import cv2 import sys sys.path.append('..') from tools import gen_logger from tqdm import tqdm def img_gen(dir_p): cases = os.listdir(dir_p) for case in tqdm(cases): cp = os.path.join(dir_p, case) for f in os.listdir(cp): if f[-4:] == 'tiff': continue print(fp) break # try: # preprocess_input(cv2.imread(fp)) # except TypeError: # logger.info(f'{fp} error') else: print('pass') logger = gen_logger('img_test') if __name__ == "__main__": img_gen(sys.argv[1])
logger.exception(f'{slide_p} encounter errors') @record_funcs def collect_properties(dir_p, dst): container = {} for slide_p in tqdm(get_files(dir_p)): try: container[get_name(slide_p)] = dict( openslide.OpenSlide(slide_p).properties) except: logger.exception(f'{slide_p} encounter errors') save_pickle(container, dst, 'properties') logger = gen_logger('overview') if __name__ == "__main__": parse = argparse.ArgumentParser( description='This stript is used for slide info analysis') parse.add_argument('dir') parse.add_argument( '-f', help= '"dir" for outputing whole imgs in lowest resolution; "pow" for outputing power info;\ "pro" for outputing a dict of properties in .pkl file') parse.add_argument('-o', required=True) command = parse.parse_args() func = funcs.get(command.f, collect_properties) dir_p = command.dir dst = command.o func(dir_p, dst)
# return True gray = image.convert("L") # 230 is a magic number, and it is not good. However, currently, I haven't found a better way # to select the informative images. return np.mean(gray) > 230 def batch_tiling(path, out_dir, logger): filter_func = None # filter_func = lambda x:get_name(x) not in cache slides = list(filter(filter_func, get_files(path))) for slide_path in slides: name = get_name(slide_path) try: logger.info(f'start {name}') divide(slide_path, out_dir) except: logger.exception(f'{name} encountered error in batch') if __name__ == '__main__': logger = gen_logger('tile') parse = argparse.ArgumentParser( description= 'A patch to separate large .svs file into 10X 96*96 .tif files.') parse.add_argument('i') parse.add_argument('o') command = parse.parse_args() batch_tiling(command.i, command.o, logger)
dst=dir_p, val_sel_num=sel_num, d_size=d_size, logger=logger, aug_time=aug_time, train_size_ratio=train_size_ratio) logger.info(f'test {dir_p} {d_size}') for fn in filter(lambda x: x.endswith('h5'), os.listdir(dir_p)): fp = os.path.join(dir_p, fn) logger.info(f'now {fp}') model.load(fp) model.feedback() if __name__ == "__main__": logger = gen_logger('main_val+') parse = argparse.ArgumentParser() parse.add_argument( 'i', nargs='+', help='the path of directory that saves model weight for cases') parse.add_argument('-s', type=int, default=42, help='the num of imgs used for validation') command = parse.parse_args() logger.info(f'Begin train on {command}') try: for dir_p in tqdm(command.i): models_val(dir_p, logger, sel_num=command.s) except:
# weights = np.mean(grads_vals[idx, :, :, :], axis = (0, 1)) #Create the class activation map. cam = np.zeros(dtype=np.float32, shape=conv_output.shape[0:2]) # for i in range(conv_output.shape[-1]): # cam += conv_output[:, :, i] cam += np.sum(conv_output, axis=2) cam /= np.max(cam) cam = cv2.resize(cam, (96, 96)) heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET) heatmap[np.where(cam < 0.2)] = 0 img = heatmap * 0.5 + ori_img cv2.imwrite(os.path.join(output_path, f"{fn}"), img) if __name__ == "__main__": logger = gen_logger('vis') parse = argparse.ArgumentParser() parse.add_argument('i', help='the path of directory that saves imgs for cases') parse.add_argument('-o', default='..', help='the path for output') parse.add_argument('-s', type=int, default=1, help='the num of imgs used for visualization') command = parse.parse_args() logger.info(f'Begin train on {command}') dst = command.o os.makedirs(dst, exist_ok=True) try: visualize_class_activation_map(command.i, dst, sel_num=command.s)
from snas import SNAS from tools import gen_logger import argparse import os if __name__ == "__main__": logger = gen_logger('main+') parse = argparse.ArgumentParser() parse.add_argument('i', help='the path of directory that saves imgs for cases') parse.add_argument('-e', default='data/Target.xlsx', help='the path of event/censor table') parse.add_argument('-o', default='..', help='the path for output') parse.add_argument('-r', type=float, default=0.8, help='training size') parse.add_argument('-m', type=str, default='', help='the path of trained weights') parse.add_argument('-t', type=int, default=40, help='epochs') parse.add_argument('-v', type=bool, default=False, help='validation only') parse.add_argument('-s', type=int, default=42, help='the num of imgs used for validation') parse.add_argument('-a', type=int, default=0, help='the time of augmentation during training') parse.add_argument('-p', type=bool,
import os import shutil from collections import defaultdict import numpy as np from tools import load_pickle, gen_logger import sys logger = gen_logger(name='collect.txt') def weird_load(fp): container = {} with open(fp, 'rb') as file: while True: chunk = file.read(3408) if not chunk: break with open('_', 'wb') as temp: temp.write(chunk) data = load_pickle('_') container.update(data) return container def to_case(result): outcome = defaultdict(list) for area in result.keys(): case_n = area.rsplit('/', 2)[-2] outcome[case_n].append(area) return outcome