def journal2table(journal): """Recebe a string journal, caso a formatação seja compatível com um csv, retorna este formato como markdown Do contrário simplesmente retorna a string inalterada""" try: string = json.loads(journal) string = listify(string) headers = [TABLECOLS.get(s, s) for s in string[0].keys()] values = [list(d.values()) for d in string] except json.JSONDecodeError: table = [ [r.strip() for r in j.strip().split(",")] for j in journal.split("\n") if j.strip() != "" ] if not len(set([len(t) for t in table])) == 1: print( "O texto passado como notes, não está configurado corretamente para formatar uma tabela" ) print( "No Campo notes será enviada a string no formato que está, sem modificação" ) return journal values = table[1:] headers = table[0] return tabulate(values, headers, tablefmt="textile")
def mk_action(name: str, value) -> Atom: # with Python 3.10 value: np.ndarray | Any if isinstance(value, np.ndarray): value = listify(value.tolist()) return ExecutionLink(SchemaNode(name), mk_list(*value)) return ExecutionLink(SchemaNode(name), mk_node(value))
def labeled_observation(self, space: Space, obs, sbs="") -> list[Atom]: """The main processing block from Gym observations to Atomese Uses Gym's `Space` types to determine what kind of data structure is passed and produces a generic Atomese representation from it. Use the `sbs` argument to add more into resulting Atom names. Returns a list of created Atoms. """ if isinstance(space, sp.Tuple): observation: List[Atom] = [] for s in space: idx = len(observation) _sbs = sbs + "-" + str(idx) if sbs else str(idx) observation.extend(self.labeled_observation(s, obs[idx], _sbs)) return observation elif isinstance(space, sp.Box): label = sbs + "-Box" if sbs else "Box" return self.transform_percept(label, *obs) elif isinstance(space, sp.Discrete): label = sbs + "-Discrete" if sbs else "Discrete" return self.transform_percept(label, obs) elif isinstance(space, sp.Dict): observation: List[Atom] = [] for k in obs.keys(): label = sbs + "-" + k if sbs else k if isinstance(space[k], sp.Discrete): observation += self.transform_percept(label, obs[k]) elif isinstance(space[k], sp.Box): l = (obs[k].tolist() if isinstance(obs[k], np.ndarray) else listify(obs[k])) observation += self.transform_percept(label, *l) elif isinstance(space[k], sp.Tuple): _sbs = sbs + "-" + k if sbs else k observation.extend( self.labeled_observation(space[k], obs[k], _sbs)) elif isinstance(space[k], sp.Dict): _sbs = sbs + "-" + k if sbs else k observation.extend( self.labeled_observation(space[k], obs[k], _sbs)) else: raise NotImplementedError( "ObservationSpace not implemented.") return observation else: raise NotImplementedError("Unknown Observation Space.")
def chain_processed_data( problem_preproc_gen_dict: Dict[str, Iterator]) -> Iterator: # problem chunk size is 1, return generator directly if len(problem_preproc_gen_dict) == 1: return next(iter(problem_preproc_gen_dict.values())) if get_is_pyspark(): from pyspark import RDD from .pyspark_utils import join_dict_of_rdd rdd = join_dict_of_rdd(rdd_dict=problem_preproc_gen_dict) return rdd logger.warning('Chaining problems with & may consume a lot of memory if' ' data is not pyspark RDD.') data_dict = {} column_list = [] for pro in problem_preproc_gen_dict: data_dict[pro] = listify(problem_preproc_gen_dict[pro]) try: column_list.append(list(data_dict[pro][0].keys())) except IndexError: raise IndexError("Problem {} has no data".format(pro)) # get intersection and use as ensure features are the same join_key = list(set(column_list[0]).intersection(*column_list[1:])) flat_data_list = [] first_problem = next(iter(problem_preproc_gen_dict.keys())) while data_dict[first_problem]: d = {} for pro in data_dict: if not d: d = data_dict[pro].pop(0) else: for k in join_key: assert d[k] == data_dict[pro][0][ k], 'At iteration {}, feature {} not align. Expected {}, got: {}'.format( len(flat_data_list), k, d[k], data_dict[pro][0][k]) d.update(data_dict[pro].pop(0)) flat_data_list.append(d) return flat_data_list
def wrapper(ref, action): if isinstance(ref.action_space, sp.Discrete): if not len(ref.action_list) == ref.action_space.n: raise ValueError("Invalid action list.") action_name = action.out[0].name if not action_name in ref.action_list: raise ValueError( "Action {} not known in the environment.".format(action_name) ) action_name = action.out[0].name obs, r, done = step(ref, ref.action_list.index(action_name)) elif isinstance(ref.action_space, sp.Dict): actions = listify(action) act_dict = { action.out[0].name: to_python(action.out[1]) for action in actions } obs, r, done = step(ref, act_dict) else: raise NotImplementedError("Unknown action space.") return ref.parse_world_state(ref.observation_space, obs, r, done)
def check_update( field: str, value, dtype, values_set: Iterable = None, val_text_string: bool = False ) -> dict: """checa se `value` é do tipo `dtype`. Opcionalmente checa se `value` pertence ao conjunto `values_set` Opcionalmente formata `value` com a função `value_text_string` Returns: Dicionário no formato compatível com a API do Redmine {"id" : ... , "value" : ...} """ if not isinstance(value, dtype): raise TypeError( f"É esperado que o campo {field} seja do tipo {dtype}, o fornecido foi {type(value)}" ) if values_set is not None and not set(listify(value)).issubset(set(values_set)): raise ValueError( f"O valor para {field} : {value} deve pertencer ao conjunto: {values_set}" ) if val_text_string: value = value_text_string(value) return {"id": FIELD2ID[field], "value": value}
def convert_data_to_features(problem: str, data_iter: Iterable, params: Params, label_encoder: Any, tokenizer: Any, mode=TRAIN) -> Iterable[dict]: if mode != PREDICT: problem_type = params.problem_type[problem] # whether this problem is sequential labeling # for sequential labeling, targets needs to align with any # change of inputs is_seq = problem_type in ['seq_tag'] else: problem_type = 'cls' is_seq = False part_fn = partial(create_multimodal_bert_features, problem=problem, label_encoder=label_encoder, params=params, tokenizer=tokenizer, mode=mode, problem_type=problem_type, is_seq=is_seq) preprocess_buffer = params.preprocess_buffer data_buffer_list = [] num_cpus = params.num_cpus if params.num_cpus > 0 else num_cpus() # no easy fix for prediction in multiprocessing # phase is not shared between processes num_cpus = 1 if mode == PREDICT else num_cpus for data_buffer_list in chunked(data_iter, chunk_sz=preprocess_buffer): per_cpu_chunk = listify(chunked(data_buffer_list, n_chunks=num_cpus)) res_gen = Parallel(num_cpus)(delayed(part_fn)(example_list=d_list) for d_list in per_cpu_chunk) for d_list in res_gen: for d in d_list: yield d
def labeled_observations(self, space, obs, sbs=""): if isinstance(space, sp.Tuple): obs_list = [] for s in space: idx = len(obs_list) _sbs = sbs + "-" + str(idx) if sbs else str(idx) obs_list.append(*self.labeled_observations(s, obs[idx], _sbs)) return obs_list elif isinstance(space, sp.Box): label = sbs + "-Box" if sbs else "Box" return self.convert_percept(label, *obs) elif isinstance(space, sp.Discrete): label = sbs + "-Discrete" if sbs else "Discrete" return self.convert_percept(label, obs) elif isinstance(space, sp.Dict): obs_list = [] for k in obs.keys(): label = sbs + "-" + k if sbs else k if isinstance(space[k], sp.Discrete): obs_list += self.convert_percept(label, obs[k]) elif isinstance(space[k], sp.Box): l = (listify(obs[k].tolist()) if isinstance( obs[k], np.ndarray) else obs[k]) obs_list += self.convert_percept(label, *l) elif isinstance(space[k], sp.Tuple): _sbs = sbs + "-" + k if sbs else k obs_list.extend( self.labeled_observations(space[k], obs[k], _sbs)) elif isinstance(space[k], sp.Dict): _sbs = sbs + "-" + k if sbs else k obs_list.extend( self.labeled_observations(space[k], obs[k], _sbs)) else: raise NotImplementedError( "ObservationSpace not implemented.") return obs_list else: raise NotImplementedError("Unknown Observation Space.")
if tipo := d.get(key): d[key] = check_update(key, tipo, DICT_FIELDS[key], TIPO, True) key = keys[2] if not d.get(key): raise ValueError("O campo description não pode ficar vazio") key = keys[3] if fiscal := d.get(key): value = check_update(key, fiscal, DICT_FIELDS[key], name2id.keys()) value["value"] = name2id[value["value"]] d[key] = value key = keys[4] if fiscais := d.get(key): fiscais = listify(fiscais) values = check_update(key, fiscais, DICT_FIELDS[key], name2id.keys()) values["value"] = [name2id[v] for v in values["value"]] d[key] = values key = keys[6] if (relatorio := d.get(key, None)) is not None: dtype = DICT_FIELDS[key] if relatorio in (1, "1"): chave = keys[5] if (html := d.get(chave, None)) is not None: html = Path(html) if html.exists() and html.is_file(): d[chave] = check_update(chave, html.read_text(), DICT_FIELDS[chave]) d[key] = check_update(key, relatorio, dtype, (1, "1")) else:
def mk_action(name, value): if isinstance(value, np.ndarray): value = listify(value.tolist()) return ExecutionLink(SchemaNode(name), mk_list(*value)) return ExecutionLink(SchemaNode(name), mk_node(value))
def slide_rois_no_image( slide: Slide, level: int, psize: Coord, interval: Coord = (0, 0), ancestors: Optional[Sequence[Patch]] = None, offset: Coord = (0, 0), thumb_size: int = 512, slide_filters: Optional[Sequence[Filter]] = None, ) -> Iterator[Patch]: """ Get patches with coordinates. Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels and an offset in pixels, get patches with its coordinates. Does not export image at any point. Args: slide: the slide to patchify. level: pyramid level. psize: (w, h) size of the patches (in pixels). interval: (x, y) interval between 2 neighboring patches. ancestors: patches that contain upcoming patches. offset: (x, y) offset in px on x and y axis for patch start. thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. slide_filters: list of filters to apply to thumbnail. Should output boolean mask. Yields: A tuple containing a Patch object and the corresponding image as ndarray. """ psize = convert_coords(psize) offset = convert_coords(offset) ancestors = ifnone(ancestors, []) slide_filters = listify(slide_filters) if len(ancestors) > 0: mag = slide.level_downsamples[level] shape = Coord(ancestors[0].size_0) / mag size_0 = psize * mag for ancestor in ancestors: # ancestor is a patch rx, ry = ancestor.position prefix = ancestor.id k = 0 for patch_coord in regular_grid(shape, interval, psize): k += 1 idx = "{}#{}".format(prefix, k) position = patch_coord * mag + ry yield Patch( id=idx, slidename=slide._filename.split("/")[-1], position=position, level=level, size=psize, size_0=size_0, parent=ancestor, ) else: shape = Coord(*slide.level_dimensions[level]) mag = slide.level_downsamples[level] thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size))) mask = apply_slide_filters(thumb, slide_filters) k = 0 for patch_coord in get_coords_from_mask(mask, shape, interval, psize): k += 1 idx = "#{}".format(k) position = patch_coord * mag + offset size_0 = psize * mag yield Patch( id=idx, slidename=slide._filename.split("/")[-1], position=position, level=level, size=psize, size_0=size_0, )
def slide_rois( slide: Slide, level: int, psize: Coord, interval: Coord = (0, 0), ancestors: Optional[Sequence[Patch]] = None, offset: Coord = (0, 0), filters: Optional[Sequence[Filter]] = None, thumb_size: int = 512, slide_filters: Optional[Sequence[Filter]] = None, ) -> Iterator[Tuple[Patch, NDImage]]: """ Get patches with coordinates. Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels and an offset in pixels, get patches with its coordinates. Args: slide: the slide to patchify. level: pyramid level. psize: (w, h) size of the patches (in pixels). interval: (x, y) interval between 2 neighboring patches. ancestors: patches that contain upcoming patches. offset: (x, y) offset in px on x and y axis for patch start. filters: filters to accept patches. thumb_size: size of thumbnail's longest side. Always preserves aspect ratio. slide_filters: list of filters to apply to thumbnail. Should output boolean mask. Yields: A tuple containing a Patch object and the corresponding image as ndarray. """ psize = convert_coords(psize) offset = convert_coords(offset) ancestors = ifnone(ancestors, []) filters = listify(filters) slide_filters = listify(slide_filters) if len(ancestors) > 0: mag = slide.level_downsamples[level] shape = Coord(ancestors[0].size_0) / mag size_0 = psize * mag patches = [] for ancestor in ancestors: # ancestor is a patch rx, ry = ancestor.position prefix = ancestor.id k = 0 for patch_coord in regular_grid(shape, interval, psize): k += 1 idx = "{}#{}".format(prefix, k) position = patch_coord * mag + ry patches.append( Patch( id=idx, slidename=slide._filename.split("/")[-1], position=position, level=level, size=psize, size_0=size_0, parent=ancestor, )) for patch in tqdm(patches, ascii=True): try: image = slide.read_region(patch.position, patch.level, patch.size) image = numpy.array(image)[:, :, 0:3] if filter_image(image, filters): yield patch, image except openslide.lowlevel.OpenSlideError: print( "small failure while reading tile x={}, y={} in {}".format( *patch.position, slide._filename)) else: shape = Coord(*slide.level_dimensions[level]) mag = slide.level_downsamples[level] thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size))) mask = apply_slide_filters(thumb, slide_filters) k = 0 for patch_coord in get_coords_from_mask(mask, shape, interval, psize): k += 1 idx = "#{}".format(k) position = patch_coord * mag + offset size_0 = psize * mag try: image = slide.read_region(position, level, psize) image = numpy.array(image)[:, :, 0:3] if filter_image(image, filters): yield Patch( id=idx, slidename=slide._filename.split("/")[-1], position=position, level=level, size=psize, size_0=size_0, ), image except openslide.lowlevel.OpenSlideError: print( "small failure while reading tile x={}, y={} in {}".format( *position, slide._filename))