Ejemplo n.º 1
0
def batch_image_mask(patch_R, patch_C):
    """
    Batch Create mask for image by using mask rcnn
    :param patch_R: patch row
    :param patch_C: patch column
    :return:
    """

    conf = configparser.ConfigParser()
    conf.read(os.path.join(current_path, "..", "sys.ini"))
    image_dir = conf.get("UTILS_MASK", "IMAGE_DIR")
    images = glob.glob(os.path.join(image_dir, "*.png"))
    images = sorted(images)

    info_logger = get_logger(level="info")
    error_logger = get_logger(level="error")

    DEVICE = "/gpu:1"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    with tf.device(DEVICE):
        seg_model = load_maskrcnn_model()
        for image in images:
            try:
                image_mask(image, patch_R, patch_C, seg_model)
                info_logger.info(f"Create mask {image} success")
            except Exception as e:
                error_logger.error(f"Create mask {image} error", exc_info=True)
Ejemplo n.º 2
0
def get_logger():
  global _log_path, _logger, _log_file_name

  if _logger is not None:
    return _logger
  _logger = log_utils.get_logger(os.path.join(_log_path, _log_file_name))
  return _logger
Ejemplo n.º 3
0
def cancer_cell_caculating(size_img,image_list,mask_list,coordinate_list,large_flag_list,model,patch_size):
    #print ("Img numbers : "+str(len(image_list)))
    #print ("mask numbers: "+str(len(mask_list)))
    is_success_flag_list =[]
    image_result_list=[]
 
    for i in range(len(image_list)):
        is_success_flag = 1
        im_input = image_list[i]
        mask_data = mask_list[i]
        large_flag = large_flag_list[i]
        coordinate = coordinate_list[i]
        try:
            result = read_process(size_img,im_input,mask_data,coordinate,large_flag,model,patch_size)
        except Exception as e: #if program with wrong
            error_logger = get_logger(level="error")
            error_logger.error('Cell Classification Error', exc_info=True)
            is_success_flag =0
            result = []
            image_result_list.append(result)
            is_success_flag_list.append(is_success_flag)
        else:
            if len(result)==1 and result[0,0]==1:
               is_success_flag =0
               result = []
            image_result_list.append(result)
            is_success_flag_list.append(is_success_flag)
                 
    
    return image_result_list,is_success_flag_list
Ejemplo n.º 4
0
def main():
    args = argument_parser()
    set_seed(args.seed)
    config_module = importlib.import_module("configs." + args.config)
    model_def = importlib.import_module("model." + args.model).model
    dataset = importlib.import_module("dataset." + args.dataset).dataset
    train = importlib.import_module("trainer." + args.trainer).train
    logger = get_logger(args)

    tried_configs = []
    end = False
    while True:
        importlib.reload(config_module)
        configs = config_module.config
        possible_configs = get_configurations(configs)
        for config_idx, config in enumerate(possible_configs):
            if config_idx == len(possible_configs) - 1:
                end = True
            if config in tried_configs:
                continue
            else:
                tried_configs.append(config)
                train(**{
                    'c': config,
                    'm': model_def,
                    'd': dataset,
                    'e': logger
                })
                break
        if end:
            break
Ejemplo n.º 5
0
def get_img_create_time(path):
    f = open(path, 'rb')

    try:
        tags = exifread.process_file(f)

        dt = tags.get('EXIF DateTimeOriginal', None) if tags.get(
            'EXIF DateTimeOriginal', None) != None else tags.get(
                'Image DateTime', '')
        dt = str(dt).replace(' ', '_').replace(':', '')
        return dt
    except Exception as e:
        log_utils.get_logger('.\\rename_error.log').info(
            traceback.format_exc())
        return None
    finally:
        f.close()
def walk_file():
    """
    walk through the svs file and prediction them
    :param patch_R: patch row
    :param patch_C: patch column
    :param level: svs file level
    :param patch_size: patch size
    :return:
    """
    #the parameter cc_prob_threshold is no longer in use,I have modified the involved definition in all the methods are related
    # by Bohrium Kwong 2019.01.21   
    #probmat_dir = os.path.join(current_path, "..", "output", "output_probmat")
    #if not os.path.isdir(probmat_dir): os.makedirs(probmat_dir)
    #region_result_dir = os.path.join(current_path, "..", "output", "region_result")
    #if not os.path.isdir(region_result_dir): os.makedirs(region_result_dir)
    #add the method of saving region_result variable base on openslide_region_predict and openslide_region_predict
    # by Bohrium Kwong 2019.02.01

   # img_save_dir = os.path.join(current_path, "..", "output", "ori_image_save")
    #if not os.path.isdir(img_save_dir): os.makedirs(img_save_dir)
    # mask_save_dir = os.path.join(current_path, "..", "output", "ballooning")
    mask_save_dir = '/cptjack/totem/barrylee/cut_small_cell/hepat-tri-classification/renew_img/mask_rcnn_mask'
    if not os.path.isdir(mask_save_dir): os.makedirs(mask_save_dir)
    
    info_logger = get_logger(level="info")
    error_logger = get_logger(level="error")

    seg_model = load_maskrcnn_model()
#    cls_model = load_cell_classification_model()
#    region_model,datagen = load_region_classification_model()
    start_time = time.time()
    input_path = '/cptjack/totem/barrylee/cut_small_cell/hepat-tri-classification/renew_img/mask'
    file_name = os.listdir(input_path)
    print(file_name)
    svs_file = sorted(file_name)
#    file_svs_to_flag = int(len(svs_file) // 2)
    for file in svs_file:
        input_name = input_path + '/' + file
        try:
            svs_region_to_probmat_save_img_mask(input_name,seg_model,mask_save_dir,file.split('.')[0])
            info_logger.info("Finished inference %s, needed %.2f sec" % (file, time.time() - start_time))
        except Exception as e:
            print(e)
            error_logger.error('Inference %s Error' % file, exc_info=True)
Ejemplo n.º 7
0
def load_model_config(model_path, log_name=None):
    import json
    from .utils.config_utils import load_json
    config = load_json(model_path)
    if log_name is not None:
        logger = get_logger(log_name)
        logger.info(log_name)
        logger.info("\n" + json.dumps(
            config, sort_keys=True, indent=4, separators=(',', ':')))
    return config
Ejemplo n.º 8
0
 def __init__(self):
     self._logger = log_utils.get_logger(
         os.path.join(config.APP_CONFIG['log_path'], 'run.log'))
     self._send_wx_users = config.APP_CONFIG['msg_send_users']
     self._check_time = time.time()
     self._check_interval = 48 * 60 * 60
     self._msg_map = {}
     self._redis_utils = redis_utils.get_redis_utils(
         **config.APP_CONFIG['redis_config'])
     self._ding_headers = {
         'Content-Type': 'application/json',
     }
Ejemplo n.º 9
0
import pandas as pd
from sklearn.utils import shuffle
import numpy as np

np.random.seed(1234)
import tensorflow as tf
from tensorflow import keras

tf.set_random_seed(1234)

from utils.log_utils import get_logger
LOGGER = get_logger("train")

from models import get_model
from utils.cleandata import split_sentence, get_the_label

# Glabol Variable
MAX_LENGTH = 0
VOCAB_SIZE = 0
MODEL_NAME = ""
CSV_NAME = "MBTI.csv"
CTYPE = 4
IS_SEQ = False
POS_CATEGORY = ['I', 'N', 'T', 'J']
NEG_CATEGORY = ['E', 'S', 'F', 'P']

# Parse args
import argparse


def parse_args():
Ejemplo n.º 10
0
def svs_region_to_probmat(slide, region_result,patch_R, patch_C, seg_model, class_model,patch_size):
    """
    region_classification_model predict image
    :svs_file: read by method of openslide.OpenSlide
    :param patch_R: patch row
    :param patch_C: patch column
    :param gpu_count: the nums of parallel GPUS that you want to use,each GPU could only afford an patch_R * patch_C image 
    :param seg_model: mask-rcnn segmentation model
    :param class_model: nucleus classification model
    :param patch_size: patch size
    :return:cell classification prediction information
    """
    slide_width, slide_height = slide.get_level_dimension(0)
    N = patch_R // patch_C 
    # N should be no more than 2
    W_ps_NI = slide_width // patch_C
    H_ps_NI = slide_height // patch_R
    widen = patch_size // 2 + 2           # widen bounding
    # Change the value of widen from original(patch_size // 2) to (patch_size // 2 + 2)
    # In order to reduce bugs when calls the method of cancer_cell_caculating
    # by Bohrium Kwong 2019.01.21
    CancerProb_arr = np.zeros((slide_height, slide_width), dtype=np.float16)

    cell_cls_prediction_info = np.zeros((1,8))
    svs_W_H_info = np.zeros((1, 4), dtype=np.uint32)
    for w in range(W_ps_NI):
        for h in range(H_ps_NI):
            bound_C = 1                             # bound column flag
            bound_R = 1                             # bound row flag
            widen_patch_C = patch_C + widen
#            widen_patch_R = patch_R + widen
#            step_patch_R = widen_patch_C
            if (w+1) * patch_C + widen > slide_width:
                widen_patch_C = patch_C
                bound_C = 0
            if (h+1) * patch_R + widen > slide_height:
#                widen_patch_R = patch_R
#                step_patch_R = patch_C
                bound_R = 0
                
            cc_widen_subHIC_list = []
            ul_region_point = []
            bound_list = [(bound_C, bound_R)] * N
            region_raw_tensor = np.zeros((1,patch_C,patch_C,3))
            region_flag = 0
            for g in range(N):
                if region_result[h * N + g,w] > 0:
                    widen_subHIC = np.array(slide.read_region((w * patch_C, h * patch_R + g * patch_C), 0, (widen_patch_C, widen_patch_C)))
                    region_point = (w * patch_C, h * patch_R + g * patch_C)
                    ul_region_point.append(region_point)
                    cc_widen_subHIC_list.append(widen_subHIC[:, :, :3])
                    if region_flag == 0 :
                        region_raw_tensor[0,:,:,:] = widen_subHIC[:patch_C, :patch_C, :3]
                        region_flag = region_flag + 1
                    else:
                        region_raw_tensor = np.row_stack((region_raw_tensor,
                                                              np.expand_dims(widen_subHIC[:patch_C, :patch_C, :3], axis=0)))
            
            if len(ul_region_point) > 0:
                image_tensor_len = region_raw_tensor.shape[0]
                input_data = region_raw_tensor.reshape(image_tensor_len*patch_C,patch_C,3) 
                imagesMask = maskrcnn_detection(seg_model, [input_data])
                imagesMask = imagesMask[0]
                #If there are more than one region to be segmented,reshape it into an iamge(patch_C * N,patch_C),is better than
                # cutting 2 patches(make sure the image's maskrcnn_detection memory cost is under the upper limit of graphic card), 
                # because the processing of keeping one big size image is more efficient than two patches of image in mask rcnn model detection. 
                # base on the value of region_flag(=1 or 2),the size of input image will turn to (region_flag *patch_C,patch_R)
                # the above method could save about 10% detection cost time 
                # by Bohrium Kwong 2019.01.23                
                imagesMask_list = [imagesMask[l * patch_C : patch_C * (l + 1) , :patch_C, :] for l in range(len(ul_region_point))]
                
                image_result_list, is_success_flag_list = cancer_cell_caculating(
                                            patch_C, cc_widen_subHIC_list, imagesMask_list,
                                            ul_region_point, bound_list, class_model,
                                            patch_size)
                
                del imagesMask, widen_subHIC,imagesMask_list,cc_widen_subHIC_list
                gc.collect()
                    
                valit_flag = 0
                
                for each in range(N):
                    if region_result[h * N + each ,w] > 0 and each - valit_flag < len(image_result_list):  
                        if 1 == is_success_flag_list[each - valit_flag] and each - valit_flag >=0:
                            # storage cell classification prediction information
                            ul_w = w * patch_C
                            ul_h = h * patch_R + each * patch_C
                            temp_svs_W_H_info = np.asarray([[ul_w,
                                                             ul_h,
                                                             (cell_cls_prediction_info.shape[0] - 1),
                                                             (cell_cls_prediction_info.shape[0] - 1 + image_result_list[each - valit_flag].shape[0])]])
                            svs_W_H_info = np.row_stack((svs_W_H_info, temp_svs_W_H_info))
                            cell_cls_prediction_info = np.row_stack((cell_cls_prediction_info, image_result_list[each - valit_flag]))
                            # patch cancer probability
                            h_upper = h * patch_R + each * patch_C
                            h_bottom = h * patch_R + (each+1) * patch_C
                            w_left = w * patch_C
                            w_right = (w+1) * patch_C
                            cell_sum,cancel_cell_count,_,_,_  = nuclei_statistics(image_result_list[each - valit_flag])
                            # this method return return cell_sum,cancel_cell_count,fibroblast_cell_count,inflammatory_cell_count,miscellaneous_cell_count
                            try:
                                cancer_prob = cancel_cell_count / cell_sum
                            except Exception as e:
                                error_logger = get_logger(level="error")
                                error_logger.error('y: '+str(h * N + each)+ ' x: '+str(w) + ' something wrong with the value of cell_sum.', exc_info=True)
                                cancer_prob = 0
                            #cancer_prob = np.sum(image_result_list[each][:, -2] > cc_prob_threshold) / len(image_result_list[each])
                            # Change the method of cancer_prob 's caculating from original codes to call the 'nuclei_statistics' method ,
                            # becareful sometimes it will return a division by zero value,that is the reason that I add a try Exception module.
                            # and so on,the parameter cc_prob_threshold is no longer in use,I have modified the involved definition in the methods are related
                            # by Bohrium Kwong 2019.01.21                          
                            CancerProb_arr[h_upper:h_bottom, w_left:w_right] = cancer_prob
                        else:
                            valit_flag = valit_flag + 1    
                            
            # save cell classification prediction information
    cell_cls_prediction_info = np.delete(cell_cls_prediction_info, 0, axis=0)
    svs_W_H_info = np.delete(svs_W_H_info, 0, axis=0)
#    pkl_result = (cell_cls_prediction_info, svs_W_H_info)
#    pkl_thread = threading.Thread(target=cell_cls_prediction_to_pickle, args=(slide.get_basename(), pkl_result,))
#    pkl_thread.start()

    return  CancerProb_arr,cell_cls_prediction_info, svs_W_H_info
Ejemplo n.º 11
0
    login_result = itchat.check_login()
    if '200' is not login_result:
        _logger.error('wx login fail')
        sys.exit(-2)
    return True


def run():
    load_ignore_keywords(self_path)

    # itchat.auto_login(enableCmdQR=2, hotReload=True)
    # check_wx_login()

    # init_send_wx_users()
    smz = smzdm_watcher.SmzdmWatcher()
    smz.run()


if __name__ == '__main__':

    self_path = os.path.split(os.path.realpath(__file__))[0]
    config.APP_CONFIG['log_path'] = os.path.join(self_path, '..', 'logs')
    _logger = log_utils.get_logger(
        os.path.join(config.APP_CONFIG['log_path'], 'run.log'))

    system = platform.system()
    if system is 'Windows':
        _logger.error('soc-web-watcher can not run windows.')
        sys.exit(-1)
    run()
Ejemplo n.º 12
0
def svs_to_probmat(svs, patch_R, patch_C, seg_model, class_model, region_model,patch_size):
    """
    convert svs file to probability matrix
    :param svs: svs file path
    :param patch_R: patch row
    :param patch_C: patch column
    :param seg_model: mask-rcnn segmentation model
    :param class_model: nucleus classification model
    :param patch_size: patch size
    :return:
    """
#the parameter cc_prob_threshold is no longer in use,I have modified the involved definition in the methods are related

    slide = Slide(svs)
    slide_width, slide_height = slide.get_level_dimension(0)

    N = patch_R // patch_C
    widen = patch_size // 2 + 2           # widen bounding
    # Change the value of widen from original(patch_size // 2) to (patch_size // 2 + 2)
    # In order to reduce bugs when calls the method of cancer_cell_caculating
    # by Bohrium Kwong 2019.01.21

    W_ps_NI = slide_width // patch_C   # 31782 // 299  = 106
    H_ps_NI = slide_height // patch_R  # 24529 // 598 = 41
    
    CancerProb_arr = np.zeros((slide_height, slide_width), dtype=np.float16)

    cell_ratio = 0.55   # the threshold that decide the patch is background or not

    cell_cls_prediction_info = np.zeros((1,8))
    svs_W_H_info = np.zeros((1, 4), dtype=np.uint32)

    # left-up data
    for w in range(1 , W_ps_NI - 1):
        for h in range(H_ps_NI):
            bound_C = 1                             # bound column flag
            bound_R = 1                             # bound row flag
            widen_patch_C = patch_C + widen
            widen_patch_R = patch_R + widen
            step_patch_R = widen_patch_C
            if (w+1) * patch_C + widen > slide_width:
                widen_patch_C = patch_C
                bound_C = 0
            if (h+1) * patch_R + widen > slide_height:
                widen_patch_R = patch_R
                step_patch_R = patch_C
                bound_R = 0

            widen_subHIC = np.array(slide.read_region((w * patch_C, h * patch_R), 0, (widen_patch_C, widen_patch_R)))
            widen_subHIC = widen_subHIC[:, :, :3]  # exclude alpha
            cc_widen_subHIC = widen_subHIC.copy()
           
            # print("widen_subHIC.shape: ", widen_subHIC.shape)
            # print("subHIC.shape: ", subHIC.shape)

            # rgb three channels value that >200 and  <40 are ignored segment
            rgb_s = (abs(widen_subHIC[:, :, 0] - 120) >= 80) & (abs(widen_subHIC[:, :, 1] - 120) >= 80) & (
                    abs(widen_subHIC[:, :, 2] - 120) >= 80)  # >200  <40
#            widen_subHIC_list = []
            cc_widen_subHIC_list = []
            bound_list = [(bound_C, bound_R)] * N
            if np.sum(rgb_s) <= (widen_patch_R * widen_patch_C) * cell_ratio:
                
                region_input_image = widen_subHIC[:patch_R,:patch_C,:]
            #region_input_image.reshape(N,patch_C,patch_C , 3)
                try:
                    region_prob = region_classification(region_model,region_input_image,N,patch_C)
                except Exception as e:
                    error_logger = get_logger(level="error")
                    error_logger.error('y: '+str(h * N)+ ' x: '+str(w) + ' something wrong in region classifying.', exc_info=True)
                    region_prob = np.zeros((N,2))
               #before the processing of nuclei segmentation in mask rcnn, we should screen the regions to be detected,
               # base on the result of region_classification,while a region is predicted as a cancer region,then start next step
               # by Bohrium Kwong 2019.01.21 
                ul_region_point = []
                region_flag = 0
                region_raw_tensor = np.zeros((1,patch_C,patch_C,3))
                for i in range(N):
                    if region_prob[i,0] >= 0.5:
                        region_point = (w * patch_C, h * patch_R + i * patch_C)
                        ul_region_point.append(region_point)
                        cc_widen_subHIC_list.append(cc_widen_subHIC[i * patch_C : i * patch_C + step_patch_R, :, :])
                        # get the region upper left point
                        if region_flag == 0 :
                            region_raw_tensor[0,:,:,:] = widen_subHIC[i*patch_C : (i + 1) * patch_C, :patch_C, :3]
                            region_flag = region_flag + 1
                        else:
                            region_raw_tensor = np.row_stack((region_raw_tensor,
                                                              np.expand_dims(widen_subHIC[i*patch_C : (i + 1) * patch_C, :patch_C, :3], axis=0)))
                       
                if len(ul_region_point) > 0:
#                if len(widen_subHIC_list) > 0:
                    image_tensor_len = region_raw_tensor.shape[0]
                    input_data = region_raw_tensor.reshape(image_tensor_len*patch_C,patch_C,3) 
                    imagesMask = maskrcnn_detection(seg_model, [input_data])
                    imagesMask = imagesMask[0]
                    #the original weight of mask rcnn input image is patch_C + step_patch_R,which is not necessary,I put a limit ':patch_C' on it.
                    # And if there are more than one region to be segmented,reshape it into an iamge(patch_C * N,patch_C),is better than
                    # cutting 2 patches(make sure the image's maskrcnn_detection memory cost is under the upper limit of graphic card), 
                    # because the processing of keeping one big size image is more efficient than two patches of image in mask rcnn model detection. 
                    # base on the value of region_flag(=1 or 2),the size of input image will turn to (region_flag *patch_C,patch_R)
                    # the above method could save about 10% detection cost time 
                    # by Bohrium Kwong 2019.01.23
                    imagesMask_list = [imagesMask[l * patch_C : patch_C * (l + 1) , :patch_C, :] for l in range(len(ul_region_point))]

                    
                    image_result_list, is_success_flag_list = cancer_cell_caculating(
                                            patch_C, cc_widen_subHIC_list, imagesMask_list,
                                            ul_region_point, bound_list, class_model,
                                            patch_size)
                    
                    del imagesMask, widen_subHIC,imagesMask_list,cc_widen_subHIC_list,cc_widen_subHIC
                    gc.collect()
                    
                    valit_flag = 0
                    #valit_flag is used to flag  how many regions are not predicted as a cancel region base on the region_classification result region_prob, 
                    # we could let the loop's variable 'each' minus to the value of valit_flag to return to real index in region_input_image which could be
                    #  consistent one-to-one match image_result_list
                    # by Bohrium Kwong 2019.01.23
                    for each in range(N):
                        if region_prob[each,0] >= 0.5 :    
                            if 1 == is_success_flag_list[each - valit_flag] and each - valit_flag >=0:
                                # storage cell classification prediction information
                                ul_w = w * patch_C
                                ul_h = h * patch_R + each * patch_C
                                temp_svs_W_H_info = np.asarray([[ul_w,
                                                                 ul_h,
                                                                 (cell_cls_prediction_info.shape[0] - 1),
                                                                 (cell_cls_prediction_info.shape[0] - 1 + image_result_list[each - valit_flag].shape[0])]])
                                svs_W_H_info = np.row_stack((svs_W_H_info, temp_svs_W_H_info))
                                cell_cls_prediction_info = np.row_stack((cell_cls_prediction_info, image_result_list[each - valit_flag]))
                                # patch cancer probability
                                h_upper = h * patch_R + each * patch_C
                                h_bottom = h * patch_R + (each+1) * patch_C
                                w_left = w * patch_C
                                w_right = (w+1) * patch_C
                                cell_sum,cancel_cell_count,_,_,_  = nuclei_statistics(image_result_list[each - valit_flag])
                                # this method return return cell_sum,cancel_cell_count,fibroblast_cell_count,inflammatory_cell_count,miscellaneous_cell_count
                                try:
                                    cancer_prob = cancel_cell_count / cell_sum
                                except Exception as e:
                                    error_logger = get_logger(level="error")
                                    error_logger.error('y: '+str(h * N + each)+ ' x: '+str(w) + ' something wrong with the value of cell_sum.', exc_info=True)
                                    cancer_prob = 0
                                #cancer_prob = np.sum(image_result_list[each][:, -2] > cc_prob_threshold) / len(image_result_list[each])
                                # Change the method of cancer_prob 's caculating from original codes to call the 'nuclei_statistics' method ,
                                # becareful sometimes it will return a division by zero value,that is the reason that I add a try Exception module.
                                # and so on,the parameter cc_prob_threshold is no longer in use,I have modified the involved definition in the methods are related
                                # by Bohrium Kwong 2019.01.21                          
                                CancerProb_arr[h_upper:h_bottom, w_left:w_right] = cancer_prob
                        else:
                            valit_flag = valit_flag + 1
                            

    # save cell classification prediction information
    cell_cls_prediction_info = np.delete(cell_cls_prediction_info, 0, axis=0)
    svs_W_H_info = np.delete(svs_W_H_info, 0, axis=0)
    pkl_result = (cell_cls_prediction_info, svs_W_H_info)
    pkl_thread = threading.Thread(target=cell_cls_prediction_to_pickle, args=(slide.get_basename(), pkl_result,))
    pkl_thread.start()

    return slide, CancerProb_arr
Ejemplo n.º 13
0
# -*- coding: utf-8 -*-
"""
@author: cicada
@contact: [email protected]
@file: snipthumb.py
@time: 2019/5/16 上午10:19 
"""
import os
import subprocess

from configs import PVPYTHON_PATH
from utils.log_utils import get_logger

convert_logger = get_logger("convert")


class SnipThumb(object):
    def __init__(self,
                 vf_file,
                 img_file,
                 command=f"{PVPYTHON_PATH} ./sub_snipthumb.py" +
                 " {vf_file} {img_file}"):
        self.vf_file = vf_file
        self.img_file = img_file
        self.command = command.format(vf_file=vf_file, img_file=img_file)

    def start(self):
        try:
            subprocess.run(self.command.split(),
                           cwd=os.path.dirname(os.path.abspath(__file__)))
            return 0
Ejemplo n.º 14
0
"""
Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or
fit your own datasets. 
Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017.  (https://arxiv.org/abs/1702.08835v2 )
Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt
ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou([email protected])
ATTN2: This package was developed by Mr.Ji Feng([email protected]). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. 
"""
import numpy as np
from scipy.sparse import issparse

from utils.log_utils import get_logger

LOGGER = get_logger('gcforest.exp_utils')


def load_model_config(model_path, log_name=None):
    import json
    from .utils.config_utils import load_json
    config = load_json(model_path)
    if log_name is not None:
        logger = get_logger(log_name)
        logger.info(log_name)
        logger.info("\n" + json.dumps(
            config, sort_keys=True, indent=4, separators=(',', ':')))
    return config


def concat_datas(datas):
    if type(datas) != list:
        return datas
def batch_create_heatmap(pkl_dir, svs_dir, proba_matrix_dir, output_dir, step,
                         cancel_threshold, nr, min_area, n, open_kernel_size,
                         triangle_maxlen, invasive_thickness, region_filter):
    """
    Batch create cancer heatmap
    :param pkl_dir:
    :param svs_dir:
    :param min_ration:
    :param proba_matrix_dir:
    :param singlewhole:
    :return:
    """
    # 针对癌细胞这一类,可以指定读取固定后缀格式的概率矩阵文件来实现区域画图,如"{pkl_name}-left-cnm.npy"
    #  如果是针对其他类别进行作图,可以直接修改下面读取文件的后缀格式来进行对应的画图,但比较建议以类别名字
    #  新建一个方法,如fibroblast就新建一个fibroblast.py文件(主要对应变量名字也要修改)
    #  如果想要单独执行这一类细胞的热力图,可直接执行对应的py文件则可。如果要进行癌细胞的统计,就直接执行本脚本
    svss = glob.glob(os.path.join(svs_dir, "*.svs"))
    svss = sorted(svss)

    error_logger = get_logger(level="error")
    info_logger = get_logger(level="info")
    for svs in svss:
        try:
            start_time = time.time()
            info_logger.info("Starting create cancer heatmap %s..." % svs)
            svs_name = os.path.splitext(os.path.basename(svs))[0]
            pkl_file = os.path.join(pkl_dir, svs_name + ".pkl")
            svs_file = os.path.join(svs_dir, svs_name + ".svs")
            matrix_file = os.path.join(
                proba_matrix_dir, f"{svs_name}-region_dead_other_output.npy")
            if os.path.exists(matrix_file) and os.path.exists(pkl_file):
                # 生成二分类矩阵可视化图
                #                matrix_visual_dir = os.path.join(output_dir, 'matrix_visual')
                #                matrix_visual_show(svs_file, matrix_file, svs_name, matrix_visual_dir,xml=True)
                #
                #                # 生成癌症区域,间质区域,以及浸润区域图
                #                cancer_invasive_dir = os.path.join(output_dir, 'cancer_invasive')
                #                cancer_invasive_show(svs_file, matrix_file, svs_name, cancer_invasive_dir, open_kernel_size,
                #                                     triangle_maxlen, invasive_thickness)

                ##                 获取肿瘤细胞,成纤维细胞,淋巴细胞,混合细胞的细胞数量分布矩阵以及肿瘤,浸润,间质区域矩阵和组织区域矩阵
                ##                AmountT, AmountF, AmountL, AmountM, AreaT, AreaS, AreaI, AreaM = get_nuclei_invasive_region_matrix(pkl_file,
                ##                                                                                                                   svs_file,
                ##                                                                                                                   step,
                ##                                                                                                                   matrix_file,
                ##                                                                                                                   open_kernel_size,
                ##                                                                                                                   triangle_maxlen,
                ##                                                                                                                   invasive_thickness,
                ##                                                                                                                   region_filter)
                Amount_epithelial, Amount_lymphocyte, Amount_other = get_nuclei_amount_region_matrix(
                    pkl_file, svs_file, step)
                #                # 分别生成肿瘤细胞,成纤维细胞,淋巴细胞的密度图
                heatmap_dir = os.path.join(output_dir, 'heatmap')
                create_heatmap(svs_file,
                               Amount_epithelial,
                               svs_name,
                               step,
                               'Cancer',
                               heatmap_dir,
                               xml=True)
                create_heatmap(svs_file,
                               Amount_lymphocyte,
                               svs_name,
                               step,
                               'Lymphocyte',
                               heatmap_dir,
                               xml=True)
                create_heatmap(svs_file,
                               Amount_other,
                               svs_name,
                               step,
                               'Other',
                               heatmap_dir,
                               xml=True)
                #                Amount_epithelial[(Amount_epithelial < Amount_lymphocyte) | (Amount_epithelial < Amount_other)]=0
                Amount_epithelial[Amount_epithelial < Amount_other * 0.5] = 0
                # 只取肿瘤细胞矩阵中同等位置比其他两类细胞都多的元素
                cancel_dead_matrix_visual_dir = os.path.join(
                    output_dir, 'cancel_dead_matrix_visual_dir')
                cancel_dead_matrix_visual_show(svs_file, matrix_file,
                                               Amount_epithelial, step,
                                               cancel_threshold, svs_name,
                                               cancel_dead_matrix_visual_dir)
                #                # 分别生成肿瘤细胞,成纤维细胞,淋巴细胞的热点图,n大于0时则绘制热点区域的相邻区域连线图
                #                hotspot_dir = os.path.join(output_dir, 'hotspot')
                #                create_hotspot(svs_file, AmountT, nr, svs_name, 'cancer', [255, 0, 0, 255], min_area, n, hotspot_dir)
                #                create_hotspot(svs_file, AmountF, nr, svs_name, 'fibroblast', [0, 255, 0, 255], min_area, n, hotspot_dir)
                #                create_hotspot(svs_file, AmountL, nr, svs_name, 'inflammatory', [0, 0, 255, 255], min_area, n, hotspot_dir)

                info_logger.info(
                    "Finished create cancer heatmap %s, needed %.2f sec" %
                    (svs, time.time() - start_time))
        except Exception:
            error_logger.error('Create cancer heatmap %s Error' % svs,
                               exc_info=True)
'''

import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss
import xgboost as xgb
from utils.log_utils import get_logger
from prepare_data import prepare_data
from preprocess_data import preprocess_data
import os
import time

LOG = get_logger('kweonwooj_solution.log')


def main():

    ##################################################################################################################
    # Prepare data
    ##################################################################################################################

    LOG.info('=' * 50)
    LOG.info('# Prepare data..')
    prepare_data(LOG)

    ##################################################################################################################
    # Preprocessing
    ##################################################################################################################
Ejemplo n.º 17
0
def walk_file(patch_R, patch_C, level, patch_size):
    """
    walk through the svs file and prediction them
    :param patch_R: patch row
    :param patch_C: patch column
    :param level: svs file level
    :param patch_size: patch size
    :return:
    """
    #the parameter cc_prob_threshold is no longer in use,I have modified the involved definition in all the methods are related
    # by Bohrium Kwong 2019.01.21
    probmat_dir = os.path.join(current_path, "..", "output", "output_probmat")
    if not os.path.isdir(probmat_dir): os.makedirs(probmat_dir)
    region_result_dir = os.path.join(current_path, "..", "output",
                                     "region_result")
    if not os.path.isdir(region_result_dir): os.makedirs(region_result_dir)
    #add the method of saving region_result variable base on openslide_region_predict and openslide_region_predict
    # by Bohrium Kwong 2019.02.01

    img_save_dir = os.path.join(current_path, "..", "output", "ori_image_save")
    if not os.path.isdir(img_save_dir): os.makedirs(img_save_dir)
    mask_save_dir = os.path.join(current_path, "..", "output",
                                 "mask_image_save")
    if not os.path.isdir(mask_save_dir): os.makedirs(mask_save_dir)

    info_logger = get_logger(level="info")
    error_logger = get_logger(level="error")

    seg_model = load_maskrcnn_model()
    cls_model1, cls_model2 = load_cell_classification_model()
    #    region_model,datagen = load_region_classification_model()

    svs_file = glob.glob(os.path.join(INPUT_IMAGE_DIR, "*.ndpi"))
    svs_file = sorted(svs_file)

    #    file_svs_to_flag = int(len(svs_file) // 2)
    for i in range(len(svs_file)):
        svs = svs_file[i]
        #        svs_name = os.path.basename(svs).split('.')[0]
        try:
            start_time = time.time()
            info_logger.info("Starting inference %s..." % svs)
            slide = Slide(svs)
            cell_cls_prediction_info, svs_W_H_info = svs_to_probmat(
                slide, patch_R, patch_C, seg_model, cls_model1, cls_model2,
                patch_size, cell_predict)
            pkl_result = (cell_cls_prediction_info, svs_W_H_info)
            pkl_thread = threading.Thread(target=cell_cls_prediction_to_pickle,
                                          args=(
                                              slide.get_basename(),
                                              pkl_result,
                                          ))
            pkl_thread.start()

            #            level_W, level_H = slide.get_level_dimension(level=level)
            #            level_prob_matrix = cv2.resize(result_prob_matrix.astype(np.float32), (level_W, level_H), interpolation=cv2.INTER_AREA)
            #            del result_prob_matrix
            del cell_cls_prediction_info, svs_W_H_info
            gc.collect()
            slide.close()
            info_logger.info("Finished inference %s, needed %.2f sec" %
                             (svs, time.time() - start_time))
        except Exception:
            error_logger.error('Inference %s Error' % svs, exc_info=True)
Ejemplo n.º 18
0
from typing import Dict
from gevent.pywsgi import WSGIServer
from actions import solve_flow
from flask import Flask
from flask_restplus import Resource, Api, Namespace

from actions.compute_domain_flow import ComputeDomainControler
from actions.mesh_flow import MeshControler
from actions.convert_flow import ConvertControler
from actions.solve_flow import stop_solve, SolveController
from actions.su2mesh_flow import SU2MeshControler
from dbs import DB, SlurmDB
from servers import AshuraServer
from utils.log_utils import get_logger

http_server_logger = get_logger("http_server")

ns = Namespace("/", description="中间件接口文档!")

solve_parser = ns.parser()
solve_parser.add_argument("work-path",
                          type=str,
                          help="workspace of job",
                          location='form')
solve_parser.add_argument("mesh-file-name",
                          type=str,
                          help="the path of mesh",
                          location='form')
solve_parser.add_argument("username",
                          type=str,
                          help="who sends the solve",
Ejemplo n.º 19
0
import numpy as np
from cascade.cascade_classifier import CascadeClassifier
from config import GCTrainConfig
from fgnet import FGNet
from utils.log_utils import get_logger

LOGGER = get_logger("gcforest.gcforest")


class GCForest(object):
    def __init__(self, config):
        self.config = config
        self.train_config = GCTrainConfig(config.get("train", {}))
        if "net" in self.config:
            self.fg = FGNet(self.config["net"], self.train_config.data_cache)
        else:
            self.fg = None
        if "cascade" in self.config:
            self.ca = CascadeClassifier(self.config["cascade"])
        else:
            self.ca = None

    def fit_transform(self,
                      X_train,
                      y_train,
                      X_test=None,
                      y_test=None,
                      train_config=None):
        train_config = train_config or self.train_config
        if X_test is None or y_test is None:
            if "test" in train_config.phases:
Ejemplo n.º 20
0
import pandas as pd
import csv
import sys
import concurrent.futures

sys.path.append('..')
from utils.html_utils import finalize_html_report, ColorDFRows
from utils.email_utils import Emailer
from utils.log_utils import get_logger
from utils.config_utils import get_yaml_params
from utils.bs4_utils import get_ticker_price
from utils.requests_utils import download_hist_prices

params = get_yaml_params()
logger = get_logger(params)

EMAIL_ENABLED = params['email']['enabled']
ATTACHMENTS_ENABLED = params['email']['attachments']
to = params['email']['to']
subject = params['email']['subject']


def convert_to_dict(stockscsv):
    raw_data = {}
    with open(stockscsv, 'r') as f:
        info = csv.reader(f)
        logger.info(
            'Successfully opened and read data from {}'.format(stockscsv))
        for stock_purchase in info:
            vwap = 0
            stock = stock_purchase[0]
Ejemplo n.º 21
0
"""
@author: cicada
@contact: [email protected]
@file: SU2Parser.py
@time: 2019/4/30 上午10:03 
"""
import copy
import json
from typing import Dict

from constants.maps import JSON_2_SU2CONFIG
from utils.log_utils import get_logger
from utils.offset_file import offset_file, def_end_func


parser_logger = get_logger(logger_name="parser")


class SU2Parser(object):

    # TODO: 待重构

    def __init__(self, res_file, offset=0):
        self.res_file = res_file

    def res_parse(self, res_dict_tmp, keys, offset=0, end_func=def_end_func):
        line_gen = offset_file(self.res_file, offset=offset, end_func=end_func)
        results = []
        result_pos = 0
        for line, pos in line_gen:
            result = copy.deepcopy(res_dict_tmp)
Ejemplo n.º 22
0
@contact: [email protected]
@file: Slurm.py
@time: 2019/4/24 上午10:27 
"""
import codecs
import copy
import re
import subprocess

from jinja2 import FileSystemLoader, Template
from pathlib import Path
from configs import SCRIPTS_PATH, CORE_NUM_PER_NODE
from schedulers import Scheduler
from utils.log_utils import get_logger

scheduler_logger = get_logger("scheduler")


class Slurm(Scheduler.Scheduler):
    def __init__(self, **kwargs):
        self.core_per_node = CORE_NUM_PER_NODE
        self.param = kwargs

    def job_info(self):
        pass

    @classmethod
    def kill_job(cls, slurm_id):
        try:
            subprocess.run(["scancel", f"{slurm_id}"])
        except subprocess.CalledProcessError:
Ejemplo n.º 23
0
import subprocess

from downloader_wanmen_constants import COURSE_ID
from utils.file_utils import get_dirs
from utils.log_utils import get_logger

logger = get_logger(__name__)


def merge_course_videos(c_id):
    dir_list = get_dirs('video_cache_all\\{0}'.format(c_id))

    for dir_name in dir_list:
        elements = dir_name.split('\\')

        cmd = "cmd.exe /c merge_python.cmd {0} \"{1}\" \"{2}\"".format(elements[1], elements[2], elements[3])
        logger.info("command: {0}".format(cmd))

        ex = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
        out, err = ex.communicate()
        status = ex.wait()


if __name__ == "__main__":
    merge_course_videos(COURSE_ID)
Ejemplo n.º 24
0
"""
@author: cicada
@contact: [email protected]
@file: dbs.py
@time: 2019/4/25 上午11:39 
"""
from typing import List

from mysql import connector

from constants.maps import DB_2_JSON
from models.model import *
import json
from utils.log_utils import get_logger

db_logger = get_logger("db")


class DB:

    # TODO 上下文管理器实现DB类

    def __init__(self):
        pass

    @classmethod
    def write_solve(cls, work_path, mesh_file_name, username, solve_app, launch_script, solve_config):
        _session = DBsession()
        try:
            solve = Solve(
                solve_path=work_path,
'''
    This file is a python-implementation of Jack (Japan)'s 3rd place solution in Kaggle \
    Santander Product Recommendation competition.

'''

from utils.log_utils import get_logger
from make_data_v3 import make_data
from train_predict import train_predict
from make_submission import make_submission

LOG = get_logger('3rd_place_solution.log')

if __name__ == "__main__":
    LOG.info('=' * 50)
    make_data(LOG)

    LOG.info('=' * 50)
    train_predict(LOG)

    LOG.info('=' * 50)
    #make_submission(LOG)
Ejemplo n.º 26
0
# -*- coding: utf-8 -*-
"""
@author: cicada
@contact: [email protected]
@file: MeshOpt.py
@time: 19-3-28 上午11:06 
"""
import copy
from importlib import import_module
from typing import Dict

from constants.maps import MESH_APP
from utils.log_utils import get_logger

logger = get_logger(logger_name="core")


class MeshOpt:
    def __new__(cls,
                cad_file,
                mesh_dir,
                mesh_config: Dict,
                mesh_app=1,
                **kwargs):
        mdl_name, cls_name = MESH_APP[mesh_app], MESH_APP[mesh_app]
        mdl = import_module(mdl_name, package="core")
        cls: MeshOpt = getattr(mdl, cls_name)
        self = cls._from_parts(cad_file=cad_file,
                               mesh_dir=mesh_dir,
                               mesh_config=mesh_config,
                               mesh_app=mesh_app,
Ejemplo n.º 27
0
    def setup(self):
        args = self.args
        sub_dir = 'input-{}_wot-{}_wtv-{}_reg-{}_nIter-{}_normCood-{}'.format(
            args.crop_size, args.wot, args.wtv, args.reg,
            args.num_of_iter_in_ot, args.norm_cood)

        self.save_dir = os.path.join('ckpts', sub_dir)
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)

        time_str = datetime.strftime(datetime.now(), '%m%d-%H%M%S')
        self.logger = log_utils.get_logger(
            os.path.join(self.save_dir, 'train-{:s}.log'.format(time_str)))
        log_utils.print_config(vars(args), self.logger)

        if torch.cuda.is_available():
            self.device = torch.device("cuda")
            self.device_count = torch.cuda.device_count()
            assert self.device_count == 1
            self.logger.info('using {} gpus'.format(self.device_count))
        else:
            raise Exception("gpu is not available")

        downsample_ratio = 8
        if args.dataset.lower() == 'qnrf':
            self.datasets = {
                x: Crowd_qnrf(os.path.join(args.data_dir, x), args.crop_size,
                              downsample_ratio, x)
                for x in ['train', 'val']
            }
        elif args.dataset.lower() == 'nwpu':
            self.datasets = {
                x: Crowd_nwpu(os.path.join(args.data_dir, x), args.crop_size,
                              downsample_ratio, x)
                for x in ['train', 'val']
            }
        elif args.dataset.lower() == 'sha' or args.dataset.lower() == 'shb':
            self.datasets = {
                'train':
                Crowd_sh(os.path.join(args.data_dir, 'train_data'),
                         args.crop_size, downsample_ratio, 'train'),
                'val':
                Crowd_sh(os.path.join(args.data_dir, 'test_data'),
                         args.crop_size, downsample_ratio, 'val'),
            }
        else:
            raise NotImplementedError

        self.dataloaders = {
            x: DataLoader(self.datasets[x],
                          collate_fn=(train_collate
                                      if x == 'train' else default_collate),
                          batch_size=(args.batch_size if x == 'train' else 1),
                          shuffle=(True if x == 'train' else False),
                          num_workers=args.num_workers * self.device_count,
                          pin_memory=(True if x == 'train' else False))
            for x in ['train', 'val']
        }
        #self.model = vgg19()
        self.model = TR_CC()
        self.model.to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay)

        self.start_epoch = 0
        if args.resume:
            self.logger.info('loading pretrained model from ' + args.resume)
            suf = args.resume.rsplit('.', 1)[-1]
            if suf == 'tar':
                checkpoint = torch.load(args.resume, self.device)
                self.model.load_state_dict(checkpoint['model_state_dict'])
                self.optimizer.load_state_dict(
                    checkpoint['optimizer_state_dict'])
                self.start_epoch = checkpoint['epoch'] + 1
            elif suf == 'pth':
                self.model.load_state_dict(torch.load(args.resume,
                                                      self.device))
        else:
            self.logger.info('random initialization')

        self.ot_loss = OT_Loss(args.crop_size, downsample_ratio,
                               args.norm_cood, self.device,
                               args.num_of_iter_in_ot, args.reg)
        self.tv_loss = nn.L1Loss(reduction='none').to(self.device)
        self.mse = nn.MSELoss().to(self.device)
        self.mae = nn.L1Loss().to(self.device)
        self.save_list = Save_Handle(max_num=1)
        self.best_mae = np.inf
        self.best_mse = np.inf
        self.best_count = 0
Ejemplo n.º 28
0
# -*- coding:utf-8 -*-
"""
Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or
fit your own datasets. 
Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017.  (https://arxiv.org/abs/1702.08835v2 )
Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt
ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou([email protected])
ATTN2: This package was developed by Mr.Ji Feng([email protected]). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. 
"""
import os, os.path as osp
import numpy as np

from utils.log_utils import get_logger
from utils.cache_utils import name2path

LOGGER = get_logger("gcforest.data_cache")


def check_dir(path):
    """ make sure the dir specified by path got created """
    d = osp.abspath(osp.join(path, osp.pardir))
    if not osp.exists(d):
        os.makedirs(d)


def data_disk_path(cache_dir, phase, data_name):
    data_path = osp.join(cache_dir, phase, name2path(data_name) + ".npy")
    return data_path


class DataCache(object):
Ejemplo n.º 29
0
@author: cicada
@contact: [email protected]
@file: CFMesh.py
@time: 2019/5/21 上午10:54 
"""
import shutil
from typing import Dict
from pathlib import Path

from configs import TEMPLATES_FILES_PATH
from core import MeshOpt
from parsers.CFmeshConfigParsers import cfmesh_config_parser
from parsers.OFDictParser import OFDictParse
from utils.log_utils import get_logger

core_logger = get_logger("core")

# TODO 添加日志


class CFMesh(MeshOpt):

    # cfmesh对3D网格的操作

    def _init(self, **pars):
        self._render_command()
        self.cad_file_name = Path(self.cad_file).name

    def _render_command(self):
        self.command_dict = {"mesh_command": "cartesianMesh"}
        yi = labels[i]
        xi = np.mat(xi)
        yi = np.mat(yi)
        res = -1 * yi * (w * xi.T + b)
        if res >= 0:
            errorCnt += 1
    accruRate = 1 - (errorCnt / m)
    # logg.info(to_magenta("===== tested ====="))
    # logg.info(to_red("accRate: {0}".format(accruRate)))
    logg.info("===== tested =====")
    logg.info("accRate: {0}".format(accruRate))
    return accruRate


if __name__ == "__main__":
    # ===== 初始化参数 =====
    parser = argparse.ArgumentParser(description="perceptron")
    parser.add_argument("--eta", default=0.0001, type=int, help="eta")
    parser.add_argument("--iters", default=100, type=int, help="iters")
    args = parser.parse_args()
    # ===== 获取logger =====
    logger = get_logger("perceptron")
    # ===== 读取训练数据 =====
    train_datas, train_labels = load_data("../data/mnist_train.csv", logger)
    # ===== 训练,并返回训练好的权重和偏置 =====
    w, b = perceptron(train_datas, train_labels, logger, args)
    # ===== 读取测试数据 =====
    test_datas, test_labels = load_data("../data/mnist_test.csv", logger)
    # ===== 得到测试结果 =====
    accRate = val(test_datas, test_labels, w, b, logger)