def get_adenoma_histology(pm: PathManager): """ Uses JarManager to identify histology :param pm: PathManager :return: """ tub = pm.get_histology(Histology.TUBULAR) tbv = pm.get_histology(Histology.TUBULOVILLOUS) vil = pm.get_histology(Histology.VILLOUS) return tub[0], tbv[0], vil[0]
def has_large_adenoma_precise(pm: PathManager, cm: CspyManager, min_size=10): """ Adds option of 'maybe' WARNING: Logic will break down if >= 3 jars with the same location :param pm: :param cm: :param min_size: :return: 1=large adenoma, 0=no large adenoma, 9=maybe large adenoma """ if len(list(pm.get_locations_with_large_adenoma())) > 0: logger.info('Found size and adenoma-ness in pathology') return 1 # early exit: we found size and adenoma-ness path_adenoma = list(pm.get_locations_with_unknown_adenoma_size()) # if not path_adenoma: # return 0 # early exit: no adenomas path_small = list(pm.get_locations_with_adenoma_size(max_size=min_size)) # NOTE: small adenoma not reliable due to fragments path_adenoma += path_small cspy_large = list() cspy_small = list() for f in cm.get_findings(): if f.size >= min_size: for loc in f.locations_or_none(): cspy_large.append(loc) else: for loc in f.locations_or_none(): cspy_small.append(loc) logger.info(f'Adenoma locations: {[str(ss) for ss in path_adenoma]}') logger.info(f'Large polyp locations: {cspy_large}') if not cspy_large: return 0 # no large polyp has_maybe = False for aden_loc in path_adenoma: if aden_loc in cspy_large: # is large adenoma if aden_loc in cspy_small: # might be small ## NOTE: small path is unreliable # if aden_loc in path_small: # here's the small one # return 1 # else: # if path_adenoma.count(aden_loc) == 1: # return 1 if path_adenoma.count(aden_loc) < cspy_large.count( aden_loc) + cspy_small.count(aden_loc): has_maybe = True else: return 1 else: return 1 return 9 if has_maybe else 0
def has_dysplasia(pm: PathManager): """ Identify mentions of highgrade dysplasia :param pm: :return: """ return 1 if pm.has_dysplasia() else 0
def has_large_adenoma_broad(pm: PathManager, cm: CspyManager, min_size=10): """ Location has large polyp and an adenoma :param pm: :param cm: :param min_size: :return: """ s = set(pm.get_locations_with_adenoma()) s2 = set(pm.get_locations_with_size(min_size)) for f in cm.get_findings_of_size(min_size): if not f.locations: s2.add(None) for loc in f.locations: s2.add(loc) logger.info(f'Adenoma locations: {[str(ss) for ss in s]}') logger.info(f'Large polyp locations: {s2}') return 1 if ( s & s2 # both contain same location or s2 and None in s # unknown adenoma location and large polyp in cspy or s and None in s2 # unknown large polyp location (cspy) ) else 0
def get_villous_histology(pm: PathManager, location: Location = Location.ANY, allow_maybe=False): """ Get villous or tubulovillous by requested location :param allow_maybe: if False, only include when location is guaranteed :param location: location to look for :param pm: :return: """ tbv = pm.get_histology(Histology.TUBULOVILLOUS, allow_maybe=allow_maybe) vil = pm.get_histology(Histology.VILLOUS, allow_maybe=allow_maybe) if location == Location.ANY: return 1 if tbv[0] + vil[0] else 0 elif location == Location.PROXIMAL: return 1 if tbv[1] + vil[1] else 0 elif location == Location.DISTAL: return 1 if tbv[2] + vil[2] else 0 elif location == Location.RECTAL: return 1 if tbv[3] + vil[3] else 0 elif location == Location.UNKNOWN: return 1 if tbv[4] + vil[4] else 0 else: raise ValueError(f'Unrecognized location: {location}')
def process_text(path_text='', cspy_text='', cspy_finding_version=FindingVersion.BROAD): pm = PathManager(path_text) cm = CspyManager(cspy_text, version=cspy_finding_version) data = {} if pm: specs, specs_combined, specs_dict = PathManager.parse_jars(path_text) tb, tbv, vl = get_adenoma_histology(pm) # count adenoma_cutoff, adenoma_status, adenoma_count = get_adenoma_count_advanced( pm) _, _, jar_adenoma_count = get_adenoma_count_advanced(pm, jar_count=True) # distal aden_dist_cutoff, aden_dist_status, aden_dist_count = get_adenoma_distal( pm) _, _, jar_ad_cnt_dist = get_adenoma_distal(pm, jar_count=True) # proximal aden_prox_cutoff, aden_prox_status, aden_prox_count = get_adenoma_proximal( pm) _, _, jar_ad_cnt_prox = get_adenoma_proximal(pm, jar_count=True) # rectal aden_rect_cutoff, aden_rect_status, aden_rect_count = get_adenoma_rectal( pm) _, _, jar_ad_cnt_rect = get_adenoma_rectal(pm, jar_count=True) # unk aden_unk_cutoff, aden_unk_status, aden_unk_count = get_adenoma_unknown( pm) _, _, jar_ad_cnt_unk = get_adenoma_unknown(pm, jar_count=True) data.update({ ADENOMA_STATUS: get_adenoma_status(specs), TUBULAR: tb, TUBULOVILLOUS: bool(tbv), VILLOUS: bool(vl), ANY_VILLOUS: get_villous_histology(pm), PROXIMAL_VILLOUS: get_villous_histology(pm, Location.PROXIMAL), DISTAL_VILLOUS: get_villous_histology(pm, Location.DISTAL), RECTAL_VILLOUS: get_villous_histology(pm, Location.RECTAL), UNKNOWN_VILLOUS: get_villous_histology(pm, Location.UNKNOWN), SIMPLE_HIGHGRADE_DYSPLASIA: get_highgrade_dysplasia(specs), HIGHGRADE_DYSPLASIA: has_dysplasia(pm), ADENOMA_COUNT: get_adenoma_count(specs), LARGE_ADENOMA: has_large_adenoma(pm, cm, version=cspy_finding_version), ADENOMA_COUNT_ADV: adenoma_count, JAR_ADENOMA_COUNT_ADV: jar_adenoma_count, ADENOMA_STATUS_ADV: adenoma_status, ADENOMA_DISTAL: aden_dist_status, ADENOMA_DISTAL_COUNT: aden_dist_count, JAR_ADENOMA_DISTAL_COUNT: jar_ad_cnt_dist, ADENOMA_PROXIMAL: aden_prox_status, ADENOMA_PROXIMAL_COUNT: aden_prox_count, JAR_ADENOMA_PROXIMAL_COUNT: jar_ad_cnt_prox, ADENOMA_RECTAL: aden_rect_status, ADENOMA_RECTAL_COUNT: aden_rect_count, JAR_ADENOMA_RECTAL_COUNT: jar_ad_cnt_rect, ADENOMA_UNKNOWN: aden_unk_status, ADENOMA_UNKNOWN_COUNT: aden_unk_count, JAR_ADENOMA_UNKNOWN_COUNT: jar_ad_cnt_unk, JAR_SESSILE_SERRATED_ADENOMA_COUNT: get_sessile_serrated_adenoma(pm, jar_count=True), CARCINOMA_COUNT: get_carcinomas(pm, jar_count=True), CARCINOMA_MAYBE_COUNT: get_carcinomas_maybe(pm, jar_count=True), CARCINOMA_POSSIBLE_COUNT: get_carcinomas_possible(pm, jar_count=True), CARCINOMA_IN_SITU_COUNT: get_carcinomas_in_situ(pm, jar_count=True), CARCINOMA_IN_SITU_MAYBE_COUNT: get_carcinomas_in_situ_maybe(pm, jar_count=True), CARCINOMA_IN_SITU_POSSIBLE_COUNT: get_carcinomas_in_situ_possible(pm, jar_count=True), }) if cm: data.update({ INDICATION: cm.indication, NUM_POLYPS: cm.num_polyps, BOWEL_PREP: cm.prep, EXTENT: cm.extent, }) # split maybe counters into two separate columns data.update(split_maybe_counters(data)) return data
def get_carcinomas_in_situ_possible(pm: PathManager, jar_count=True): return pm.get_carcinoma_in_situ_maybe_count(jar_count=jar_count)
def get_carcinomas_in_situ_maybe(pm: PathManager, jar_count=True): return pm.get_carcinoma_in_situ_maybe_count(jar_count=jar_count, probable_only=True)
def get_carcinomas_in_situ(pm: PathManager, jar_count=True): return pm.get_carcinoma_in_situ_count(jar_count=jar_count)
def get_carcinomas(pm: PathManager, jar_count=True): return pm.get_carcinoma_count(jar_count=jar_count)
def get_sessile_serrated_adenoma(pm: PathManager, jar_count=True): return pm.get_sessile_serrated_count(jar_count=jar_count)