def main(): logger = get_logger() as_server = sys.argv[-1] == "server" # 如果是以server方式运行,则一直监听目录,否则只对目录中的文件进行一次转换 if as_server: print( "\033[34m" "Welcome to use OCR server\n" "Put your image(jpg/jpeg/png/bmp) into `{}`\n\033[0m".format( realpath(INPUT_DIR))) before = dict([(f, None) for f in os.listdir(INPUT_DIR)]) try: # 检测目录,如果有新增的图片文件,则使用OCR接口转换成文字 while 1: time.sleep(1) after = dict([(f, None) for f in os.listdir(INPUT_DIR)]) added = [f for f in after if f not in before] # 过滤只转换特定后缀名的图片 added_img = [f for f in added if path.splitext(f)[-1] in EXTS] if added_img: logger.info("Added file: {}".format(", ".join(added_img))) fullpaths = [realpath(join(INPUT_DIR, f)) for f in added] convert2txt(fullpaths) before = after except KeyboardInterrupt as e: print("\033[34mbye~") sys.exit(0) else: files = os.listdir(INPUT_DIR) fullpaths = [realpath(join(INPUT_DIR, f)) for f in files] convert2txt(fullpaths)
def error_handler(app: Flask): status_code = 500 app_logger = get_logger('app') def handle_error_response(error: Exception): app_logger.error(request.path) app_logger.exception(error) _, _, tb = sys.exc_info() json_output = { 'error': error.__message__ if isinstance(error, HttpException) else str(error), } if app.debug: json_output['traceback'] = traceback.format_list(traceback.extract_tb(tb)) response = jsonify(json_output) if hasattr(error, 'status_code'): response.status_code = error.status_code elif hasattr(error, 'code') and isinstance(error.code, int): response.status_code = error.code else: response.status_code = status_code return response app.register_error_handler(Exception, handle_error_response)
def __init__(self, config=os.getenv("CONFIG", os.path.join(root, "configs", "config.yaml"))): with open(config) as f: content = f.read().strip() metrics_info = yaml.safe_load(content) name = metrics_info.get("name", "prometheus_exporter") metrics_defs = metrics_info.get("metrics", []) self.logger = get_logger( os.path.join(root, "logs", "{}.log".format(name)), name) self.metrics = dict() self.instant_metrics = [] self.continus_metrics = [] for item in metrics_defs: metric_name = item.get("name") metric_desc = item.get("description") metric_labels = item.get("labels") metric_type = item.get("type", "gauge") metric_instant = item.get("instant", True) if metric_instant: self.instant_metrics.append(metric_name) else: self.continus_metrics.append(metric_name) self.metrics[metric_name] = metrics_classes[metric_type]( metric_name, metric_desc, metric_labels, registry=metrics_registry)
def main(args): pid = os.getpid() print('Running with PID', pid) logger = get_logger('XML-to-MASK-{}'.format(pid)) output_dir_path = args.output_parent_dir / args.output_folder_name logger.info('Creating output directory at %s', str(output_dir_path)) output_dir_path.mkdir(parents=True, exist_ok=True) logger.info('Reading WSI data objects.') start = args.data_offset wsi_data = parse_dataset(args.data_list_file)[start:] count = args.count if count > len(wsi_data): raise ValueError('Offset and count out of bounds.') wsi_data = wsi_data[:count] while wsi_data: data = wsi_data.pop(0) logger.info('Creating mask for %s', data.name) reader = mir.MultiResolutionImageReader() if not data.tif_path.is_file(): logger.warning('TIF File not found. Ignoring %s', data.name) continue mr_image = reader.open(str(data.tif_path)) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) if data.label_xml_path is None: logger.info('No annotation exists. Ignoring %s', data.name) continue elif not data.label_xml_path.is_file(): logger.warning('Label File not found. Ignoring %s', data.name) continue xml_repository.setSource(str(data.label_xml_path)) xml_repository.load() annotation_mask = mir.AnnotationToMask() output_path = output_dir_path / (data.name + '_Mask.tif') annotation_mask.convert( annotation_list, str(output_path), mr_image.getDimensions(), mr_image.getSpacing(), _LABEL_MAP, ) logger.info('Mask saved for %s at %s', data.name, str(output_path)) del data
def init_queue(): atexit.register(terminate_worker) logger = get_logger('queue') logger.info('Starting queue service') try: os.unlink(SERVER_ADDRESS) except OSError: if os.path.exists(SERVER_ADDRESS): raise socket_client.bind(SERVER_ADDRESS) socket_client.listen(1) return logger
def main(args): logger = get_logger('XML-to-MASK') output_dir_path = args.output_path / args.output_dir_name logger.info('Creating output directory at %s', str(output_dir_path)) output_dir_path.mkdir(parents=True, exist_ok=True) logger.info('Reading WSI data objects.') wsi_data = read_wsi_list_file(args.data_list_file) while wsi_data: data = wsi_data.pop(0) logger.info('Creating mask for %s', data.name) reader = mir.MultiResolutionImageReader() if not data.tif_path.is_file(): logger.warning('TIF File not found. Ignoring %s', data.name) continue mr_image = reader.open(str(data.tif_path)) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) if data.label_path is None: logger.info('No annotation exists. Ignoring %s', data.name) continue elif not data.label_path.is_file(): logger.warning('Label File not found. Ignoring %s', data.name) continue xml_repository.setSource(str(data.label_path)) xml_repository.load() annotation_mask = mir.AnnotationToMask() output_path = output_dir_path / (data.name + '_Mask.tif') annotation_mask.convert( annotation_list, str(output_path), mr_image.getDimensions(), mr_image.getSpacing(), _LABEL_MAP, ) logger.info('Mask saved for %s at %s', data.name, str(output_path)) # slide = openslide.OpenSlide(str(output)) # img = slide.read_region( # location=(0, 0), # level=args.level, # size=slide.level_dimensions[args.level], # ).convert('L') del data
def send_message(user_id, subject, body): try: message = UserMessage(user_id=user_id, subject=subject, message=body) db.session.add(message) db.session.commit() db.session.refresh(message) emit( 'USER_WS_MESSAGE', {'data': dict(message)}, namespace='/messages/%s' %user_id, broadcast=True ) except sqlalchemy.exc.SQLAlchemyError as e: logger = get_logger('app') logger.exception('Could not send message') return False return True
def __init__(self, get_response): self.get_response = get_response self.logger = get_logger()
from __future__ import annotations import asyncio import ipaddress import json import sys import traceback from argparse import ArgumentParser from core import AzureAPI, NamingError, ResourceDB, TaggingError, get_logger ############################################################ # Common ############################################################ log = get_logger('validation') class Result: def __init__(self, valid=True): self.valid = valid def update(self, new_result) -> Result: if not new_result.valid: self.valid = False return self def info(self, *args, **kwargs) -> Result: log.info(*args, **kwargs) return self
# -*- coding: utf-8 -*- import glob import os import shutil import sys import fnmatch from core.Config import config from core.ProgressBar import ProgressBar from core import get_logger pbar = ProgressBar("") logger = get_logger(__name__) def findMovieInDir(folder): file = None files = [] for f in os.listdir(folder): name, ext = os.path.splitext(f) if ext in config.movie_exts: files.extend([f]) if len(files) == 1: return os.path.join(folder, files[0]) return file def isFolderEmpty(folder):
#!/usr/bin/env python3 import atexit import sys from os import path from core import FileUtils, get_logger from core.Config import config from core.Imdb import Imdb from core.Movie import Movie logger = get_logger('pymdb-rename') logger.info("starting") logger.debug(config) def usage(): print("pymdb-rename.py <INPUT_FILE>") def exit_handler(): logger.info('quitting') atexit.register(exit_handler) if __name__ == "__main__": if len(sys.argv) < 2: usage() exit(1)
from datetime import datetime from datetime import timedelta import core import psh import re import settings import sys reload(sys) sys.setdefaultencoding("utf-8") NAME = __file__.split(".")[-2] LOG = core.get_logger(NAME) pipe = psh.Sh() class Checks(object): def __init__(self): LOG.debug("Checks initilized. Time period=%s" % settings.UPDATE_PERIOD) def load_average(self): nproc = pipe.nproc(_defer=False).stdout().strip() load_average = pipe.cat("/proc/loadavg").execute().stdout().split()[:3] return ", ".join(["{0}/{1}".format( load_average[0], nproc)] + load_average[1:]) def _service_status(self, service): process = pipe.systemctl("status", service) | pipe.grep("Active:")
def __init__(self, request): self.request = request self.logger = get_logger()
#!/usr/bin/env python3 import atexit import sys from os import path from core import FileUtils, get_logger from core.Config import config from core.Imdb import Imdb from core.Movie import Movie logger = get_logger("pymdb-rename") logger.info("starting") logger.debug(config) def usage(): print("pymdb-rename.py <INPUT_FILE>") def exit_handler(): logger.info("quitting") atexit.register(exit_handler) if __name__ == "__main__": if len(sys.argv) < 2: usage() exit(1)
class Movie: logger = get_logger(__name__) def __init__(self, inpath): self.title = None self.year = None self.in_is_dir = False if path.isdir(inpath): self.in_is_dir = True FileUtils.cleanFolder(inpath) inpath = FileUtils.findMovieInDir(inpath) if not inpath: raise FileNotFoundError( "could not determine input from folder") self.path = inpath self.dirname = path.dirname(inpath) self.filename = path.basename(inpath) self.name, self.ext = path.splitext(self.filename) self.name = self.__fix_name(self.name) self.__parse_fixed_name() self.logger.debug(self.__dict__) def get_formatted_name(self, withext=False): f = config.movie_format.format(n=self.title, y=self.year) if withext: return f + self.ext return f def do_output(self, outpath=None): if outpath: # called specifying an output folder (unit test) output = path.join(outpath, path.basename(self.get_formatted_name(True))) else: output = self.get_formatted_name(True) if path.exists(output): raise IOError("File exists: {}".format(output)) if config.action.upper() == "COPY": self.__copy(output) elif config.action.upper() == "MOVE": self.__move(output) elif config.action.upper() == "TEST": self.__test(output) else: self.logger.error("unknown action '{}'".format(config.action)) return None def __copy(self, output): try: self.logger.info("starting data copy") self.logger.info("{} -> {}".format(self.path, output)) FileUtils.copyFile(self.path, output) except FileExistsError as e: self.logger.error(e) except FileNotFoundError as e: self.logger.error(e) else: return output return None def __move(self, output): try: os.rename(self.path, output) self.logger.info("removing input file") if self.in_is_dir: if FileUtils.isFolderEmpty(self.dirname): os.rmdir(self.dirname) self.logger.info("removed empty input folder") except OSError: self.logger.error("input file could not be removed") def __test(self, output): self.logger.info("{} -> {}".format(self.path, output)) def __parse_fixed_name(self): m = re.match(r".*([1-3][0-9]{3})", self.name) if m is not None: self.name = m.group(0) self.year = m.group(1) else: self.year = None def __fix_name(self, name): rep_chars = [ '<', '>', '*', '?', '|', '\\', '/', '"', ':', '.', '[', ']', '_', '-', '(', ')', "1080p", "1080", "720p", "720", "4k", "4K", "2160p", "BluRay", "WEBRip", "x264", "AAC5.1", "YTS.MX", " " ] for c in rep_chars: name = name.replace(c, " ") return name
def terminate_worker(code=0): logger = get_logger('queue') logger.info('Terminating service') socket_client.close() exit(code)
def extract_patches_v2(args): # create outputdir output_dir = args.output_parent_dir / args.output_folder_name output_patches_dir = output_dir / 'data' output_labels_dir = output_dir / 'labels' output_patches_dir.mkdir(parents=True, exist_ok=True) output_labels_dir.mkdir(parents=True, exist_ok=True) # initialize normalizers # pid = os.getpid() # print('Running with PID', pid) # lg = get_logger('Extract-Patches-{}'.format(pid)) lg = get_logger('Extract-Patches') sample_dir = Path('../sampled_tiles_from_centers') centre_samples = tuple( map( lambda sample: utils.read_image(str(sample_dir / sample)), ( 'centre_0_patient_006_node_1.jpeg', 'centre_1_patient_024_node_2.jpeg', 'centre_2_patient_056_node_1.jpeg', 'centre_3_patient_066_node_1.jpeg', 'centre_4_patient_097_node_3.jpeg', ), )) normalizers = tuple(stainNorm_Vahadane.Normalizer() for i in range(len(centre_samples))) lg.info('Fitting stain normalizers...') for sample, normalizer in zip(centre_samples, normalizers): normalizer.fit(sample) lg.info('Done fitting.') # fetch data set lg.info('Reading data list file...') wsi_data = parse_dataset(args.data_list_file) lg.info('Done reading.') # for each slide while wsi_data: slide = wsi_data.pop(0) lg.info('Sampling from %s', slide.name) level = args.level_downsample ds_level = 5 side = args.patch_side dim = side, side wsi_width, wsi_height = slide.get_level_dimension(level) large_patch_side = args.large_patch_span * side large_patch_dim = large_patch_side, large_patch_side upscale_factor = 2**level downscale_factor = 2**(ds_level - level) roi_patch_cover_count = (_ROI_PATCH_COVER_PERCENTAGE * (side // downscale_factor)**2) roi = slide.get_roi_mask(ds_level) # x_lvl, y_lvl -- coords relative to specified level for y_lvl in range(0, wsi_height, large_patch_side): for x_lvl in range(0, wsi_width, large_patch_side): lg.info('Sampling from large region (%s, %s)', x_lvl, y_lvl) w_large = min(large_patch_side, wsi_width - x_lvl) h_large = min(large_patch_side, wsi_width - y_lvl) w_large = (w_large // side) * side h_large = (h_large // side) * side # w_pad = large_patch_side - w_large # h_pad = large_patch_side - h_large if w_large * h_large == 0: lg.info( 'Insufficient area for a single patch (W:%s, H:%s). ' + 'Skipping...', w_large, h_large) continue large_dim = (w_large, h_large) x_0 = x_lvl * upscale_factor y_0 = y_lvl * upscale_factor large_region, large_label = slide.read_region_and_label( (x_0, y_0), level, large_dim, ) # large_region = pad( # large_region, # pad_width=((0, h_pad), (0, w_pad), (0, 0)), # mode='constant', # ) # large_label = pad( # large_label, # pad_width=((0, h_pad), (0, w_pad)), # mode='constant', # ) x_ds_lvl = x_lvl // downscale_factor y_ds_lvl = y_lvl // downscale_factor large_roi = roi[y_ds_lvl:(y_ds_lvl + h_large // downscale_factor), x_ds_lvl:(x_ds_lvl + w_large // downscale_factor)] roi_cover = np.sum(large_roi) if roi_cover < roi_patch_cover_count: lg.info( 'Insufficient ROI area w/in large patch (%s). ' + 'Skipping...', roi_cover) continue fig, axes = plt.subplots(nrows=1, ncols=2) axes[0].imshow(large_region) axes[1].imshow(large_roi) plt.show() continue normalized_pa = [] # lg.info('[WSI %s] - Normalizing...', slide.name) for n in normalizers: if n == slide.centre: normalized_patches.append(patch) else: normalized_patch = n.transform(patch) normalized_patches.append(normalized_patch)
def extract_patches_v1(args): # create outputdir output_dir = args.output_parent_dir / args.output_folder_name output_patches_dir = output_dir / 'data' output_labels_dir = output_dir / 'labels' output_patches_dir.mkdir(parents=True, exist_ok=True) output_labels_dir.mkdir(parents=True, exist_ok=True) # initialize normalizers pid = os.getpid() print('Running with PID', pid) lg = get_logger('Extract-Patches-{}'.format(pid)) sample_dir = Path('../sampled_tiles_from_centers') centre_samples = tuple( map( lambda sample: utils.read_image(str(sample_dir / sample)), ( 'centre_0_patient_006_node_1.jpeg', 'centre_1_patient_024_node_2.jpeg', 'centre_2_patient_056_node_1.jpeg', 'centre_3_patient_066_node_1.jpeg', 'centre_4_patient_097_node_3.jpeg', ), )) normalizers = tuple(stainNorm_Vahadane.Normalizer() for i in range(len(centre_samples))) lg.info('Fitting stain normalizers...') for sample, normalizer in zip(centre_samples, normalizers): normalizer.fit(sample) lg.info('Done fitting.') # fetch data set lg.info('Reading data list file...') wsi_data = parse_dataset(args.data_list_file) lg.info('Done reading.') # for each slide while wsi_data: slide = wsi_data.pop(0) # for each pos in slide level = args.level_downsample side = args.patch_side dim = side, side patch_count = 0 pos_args = level, args.stride, args.patch_side lg.info('[WSI %s] - Counting patches...', slide.name) centre_count = len(centre_samples) patch_count = count_patches(slide, *pos_args) * centre_count lg.info('[WSI %s] - Extracting %s patches', slide.name, patch_count) for i, pos in enumerate(slide.get_roi_patch_positions(*pos_args)): idx = (i + 1) * centre_count lg.info('[WSI %s] - Patch (%s / %s)', slide.name, idx, patch_count) # get patch and label patch, label = slide.read_region_and_label(pos, level, dim) # ignore when low variance gray = np.array(Image.fromarray(patch).convert('L')) img_var = gray.var() if img_var < 350: lg.info('[WSI %s] - Dropping. Low color variance.', slide.name) continue normalized_patches = [] # for each normalizer lg.info('[WSI %s] - Normalizing...', slide.name) for n in normalizers: if n == slide.centre: normalized_patches.append(patch) else: # normalize patch normalized_patch = n.transform(patch) normalized_patches.append(normalized_patch) lg.info('[WSI %s] - Done normalizing.', slide.name) lg.info('[WSI %s] - Writing patches to disk.', slide.name) uuid_suffix = str(uuid4()).replace('-', '_') stem = slide.name + '_' + uuid_suffix for i, normalized_patch in enumerate(normalized_patches): centre_stem = stem + '_c' + str(i) patch_filename = centre_stem + '.jpeg' label_filename = centre_stem + '.mat' save_patch(normalized_patch, output_patches_dir / patch_filename) save_label(label, output_labels_dir / label_filename) lg.info('[WSI %s] - Done writing.', slide.name)
def show_info(args): pid = os.getpid() print('Running with PID', pid) lg = get_logger('Extract-Patches-{}'.format(pid)) wsi_data = parse_dataset(args.data_list_file) slide = wsi_data[0] level = args.level_downsample side = args.patch_side dim = side, side patch_count = 0 pos_args = level, args.stride, args.patch_side for i, pos in enumerate(slide.get_roi_patch_positions(*pos_args)): if i < 100: continue patch, label = slide.read_region_and_label(pos, level, dim) patch_filename = _SAMPLE_PATCH_FILENAME_TEMPLATE.format(patch_count) label_filename = _SAMPLE_LABEL_FILENAME_TEMPLATE.format(patch_count) save_patch(patch, _TMP_PATH / patch_filename) save_label(label, _TMP_PATH / label_filename) if patch_count == _INFO_SAMPLE_COUNT - 1: break else: patch_count += 1 avg_patch_size_B = 0 avg_label_size_B = 0 for i in range(patch_count): patch_filename = _SAMPLE_PATCH_FILENAME_TEMPLATE.format(patch_count) label_filename = _SAMPLE_LABEL_FILENAME_TEMPLATE.format(patch_count) avg_patch_size_B += os.stat(str(_TMP_PATH / patch_filename)).st_size avg_label_size_B += os.stat(str(_TMP_PATH / label_filename)).st_size avg_patch_size_B /= patch_count avg_label_size_B /= patch_count avg_data_B = avg_patch_size_B + avg_label_size_B lg.info('Average patch size ------------------------- %s kB', int(avg_patch_size_B // 1024)) lg.info('Average label size ------------------------- %s kB', int(avg_label_size_B // 1024)) total_sz_GB = 0 total_patch_count = 0 lg.info('Size of data per WSI:') while wsi_data: slide = wsi_data.pop(0) count = count_patches(slide, *pos_args) * 5 total_patch_count += count sz_GB = count * avg_data_B / 1024**3 total_sz_GB += sz_GB s = '' s += ' ' * 2 s += slide.name s += ' ' + ('-' * (40 - len(slide.name))) + ' ' s += '{} tiles ({:.3f} GB); '.format(count, sz_GB) s += '[{}w x {}h]'.format(*slide.get_level_dimension(level)) lg.info(s) lg.info('Total: %s tiles (%s GB)', total_patch_count, round(total_sz_GB, 4))
def main(): args = collect_arguments() pid = os.getpid() print('Running with PID', pid) lg = get_logger('Extract-Patches-Random-{}'.format(pid)) output_dir = args.output_parent_dir / args.output_folder_name output_tumor_dir = output_dir / _OUTPUT_TUMOR_DIRNAME output_normal_dir = output_dir / _OUTPUT_NORMAL_DIRNAME for od in (output_tumor_dir, output_normal_dir): (od / _PATCHES_DIRNAME).mkdir(parents=True, exist_ok=True) (od / _LABELS_DIRNAME).mkdir(parents=True, exist_ok=True) batch_size = args.normalization_batch_size patch_side = args.patch_side patch_dim = patch_side, patch_side level = args.level lg.info('Reading dataset...') dataset = parse_dataset(args.data_list_file) lg.info('Done reading..') lg.info('Initializing normalizers...') normalizers = get_normalizers() lg.info('Done initializing.') while dataset: slide = dataset.pop(0) lg.info('Sampling from %s.', slide.name) positive_roi = slide.get_metastases_mask(_MASK_LEVEL) negative_roi = np.bitwise_and( slide.get_roi_mask(_MASK_LEVEL), np.invert(positive_roi), ) positive_area_0 = np.argwhere(positive_roi).shape[0] positive_area_0 = positive_area_0 * (2**_MASK_LEVEL)**2 patch_area_0 = (patch_side * 2**level)**2 approx_positive_count = round(positive_area_0 / patch_area_0) * 8 sampling_kwargs = { 'patch_side': patch_side, 'level': level, 'normalizers': normalizers, 'slide': slide, 'batch_size': batch_size, 'lg': lg, } if slide.label_xml_path is not None: lg.info('[%s] - Sampling tumor patches.', slide.name) sample_patches( max( _MIN_TUMOR_PATCHES, min(approx_positive_count, _SAMPLE_SIZES['tumor']['positive'])), positive_roi, output_tumor_dir, **sampling_kwargs, ) lg.info('[%s] - Done sampling tumor patches.', slide.name) if not slide.is_excluded: lg.info('[%s] - Sampling normal patches.', slide.name) sample_patches( _SAMPLE_SIZES['tumor']['negative'], negative_roi, output_normal_dir, **sampling_kwargs, ) lg.info('[%s] - Done sampling normal patches.', slide.name) else: lg.info('[%s] - Sampling normal patches.', slide.name) sample_patches( _SAMPLE_SIZES['normal']['negative'], negative_roi, output_normal_dir, **sampling_kwargs, ) lg.info('[%s] - Done sampling normal patches.', slide.name) lg.info('Done slide %s', slide.name) del positive_roi del negative_roi slide.close() lg.info('Freed %s', gc.collect())
import aioredis import asyncio import time from datetime import datetime, timedelta import pickle from playhouse.shortcuts import model_to_dict from models import Novel, UserFavorite from core import get_database_manager, get_logger, get_redis_url import settings objects = get_database_manager() logger = get_logger() async def fav_main(redis_pool): while True: logger.info("process fav_novel start...") total = 0 start_time = time.time() favs = await objects.execute( UserFavorite.select(UserFavorite.novel_id).group_by( UserFavorite.novel_id)) novel_ids = [fav.novel_id for fav in favs] novels = await objects.execute(Novel.select().where( Novel.id.in_(novel_ids))) for novel in novels: async with redis_pool.get() as redis: value = pickle.dumps(novel, protocol=-1)
from datetime import datetime from datetime import timedelta from logging import DEBUG import core import settings import copy import cPickle as pickle import os PROGRAMNAME = __file__.split(".")[-2] LOG = core.get_logger(PROGRAMNAME, DEBUG) class Error(object): errors = { 1000: "Unspecified error", 1001: "Memo doesn't exists", 1002: "Memo has been removed", 1003: "Bad params", 1004: "Not enough params", 1005: "Bad query", 1006: "Memo already completed" } @staticmethod def get(code, expl=None): if code in Error.errors: er = { "ok": 0, "code": code,