Exemple #1
0
def journal2table(journal):
    """Recebe a string journal, caso a formatação seja compatível com um csv, retorna este formato como markdown
    Do contrário simplesmente retorna a string inalterada"""
    try:
        string = json.loads(journal)
        string = listify(string)
        headers = [TABLECOLS.get(s, s) for s in string[0].keys()]
        values = [list(d.values()) for d in string]
    except json.JSONDecodeError:
        table = [
            [r.strip() for r in j.strip().split(",")]
            for j in journal.split("\n")
            if j.strip() != ""
        ]
        if not len(set([len(t) for t in table])) == 1:
            print(
                "O texto passado como notes, não está configurado corretamente para formatar uma tabela"
            )
            print(
                "No Campo notes será enviada a string no formato que está, sem modificação"
            )
            return journal
        values = table[1:]
        headers = table[0]

    return tabulate(values, headers, tablefmt="textile")
Exemple #2
0
def mk_action(name: str,
              value) -> Atom:  # with Python 3.10 value: np.ndarray | Any
    if isinstance(value, np.ndarray):
        value = listify(value.tolist())
        return ExecutionLink(SchemaNode(name), mk_list(*value))

    return ExecutionLink(SchemaNode(name), mk_node(value))
Exemple #3
0
    def labeled_observation(self, space: Space, obs, sbs="") -> list[Atom]:
        """The main processing block from Gym observations to Atomese

        Uses Gym's `Space` types to determine what kind of data structure is passed
        and produces a generic Atomese representation from it.
        Use the `sbs` argument to add more into resulting Atom names.

        Returns a list of created Atoms.
        """

        if isinstance(space, sp.Tuple):
            observation: List[Atom] = []
            for s in space:
                idx = len(observation)
                _sbs = sbs + "-" + str(idx) if sbs else str(idx)
                observation.extend(self.labeled_observation(s, obs[idx], _sbs))
            return observation
        elif isinstance(space, sp.Box):
            label = sbs + "-Box" if sbs else "Box"
            return self.transform_percept(label, *obs)
        elif isinstance(space, sp.Discrete):
            label = sbs + "-Discrete" if sbs else "Discrete"
            return self.transform_percept(label, obs)
        elif isinstance(space, sp.Dict):
            observation: List[Atom] = []
            for k in obs.keys():
                label = sbs + "-" + k if sbs else k
                if isinstance(space[k], sp.Discrete):
                    observation += self.transform_percept(label, obs[k])
                elif isinstance(space[k], sp.Box):
                    l = (obs[k].tolist() if isinstance(obs[k], np.ndarray) else
                         listify(obs[k]))
                    observation += self.transform_percept(label, *l)
                elif isinstance(space[k], sp.Tuple):
                    _sbs = sbs + "-" + k if sbs else k
                    observation.extend(
                        self.labeled_observation(space[k], obs[k], _sbs))
                elif isinstance(space[k], sp.Dict):
                    _sbs = sbs + "-" + k if sbs else k
                    observation.extend(
                        self.labeled_observation(space[k], obs[k], _sbs))
                else:
                    raise NotImplementedError(
                        "ObservationSpace not implemented.")
            return observation
        else:
            raise NotImplementedError("Unknown Observation Space.")
def chain_processed_data(
        problem_preproc_gen_dict: Dict[str, Iterator]) -> Iterator:
    # problem chunk size is 1, return generator directly
    if len(problem_preproc_gen_dict) == 1:
        return next(iter(problem_preproc_gen_dict.values()))

    if get_is_pyspark():
        from pyspark import RDD
        from .pyspark_utils import join_dict_of_rdd

        rdd = join_dict_of_rdd(rdd_dict=problem_preproc_gen_dict)
        return rdd

    logger.warning('Chaining problems with & may consume a lot of memory if'
                   ' data is not pyspark RDD.')
    data_dict = {}
    column_list = []
    for pro in problem_preproc_gen_dict:
        data_dict[pro] = listify(problem_preproc_gen_dict[pro])
        try:
            column_list.append(list(data_dict[pro][0].keys()))
        except IndexError:
            raise IndexError("Problem {} has no data".format(pro))

    # get intersection and use as ensure features are the same
    join_key = list(set(column_list[0]).intersection(*column_list[1:]))

    flat_data_list = []
    first_problem = next(iter(problem_preproc_gen_dict.keys()))
    while data_dict[first_problem]:
        d = {}
        for pro in data_dict:
            if not d:
                d = data_dict[pro].pop(0)
            else:
                for k in join_key:
                    assert d[k] == data_dict[pro][0][
                        k], 'At iteration {}, feature {} not align. Expected {}, got: {}'.format(
                            len(flat_data_list), k, d[k], data_dict[pro][0][k])
                d.update(data_dict[pro].pop(0))
        flat_data_list.append(d)
    return flat_data_list
Exemple #5
0
 def wrapper(ref, action):
     if isinstance(ref.action_space, sp.Discrete):
         if not len(ref.action_list) == ref.action_space.n:
             raise ValueError("Invalid action list.")
         action_name = action.out[0].name
         if not action_name in ref.action_list:
             raise ValueError(
                 "Action {} not known in the environment.".format(action_name)
             )
         action_name = action.out[0].name
         obs, r, done = step(ref, ref.action_list.index(action_name))
     elif isinstance(ref.action_space, sp.Dict):
         actions = listify(action)
         act_dict = {
             action.out[0].name: to_python(action.out[1]) for action in actions
         }
         obs, r, done = step(ref, act_dict)
     else:
         raise NotImplementedError("Unknown action space.")
     return ref.parse_world_state(ref.observation_space, obs, r, done)
Exemple #6
0
def check_update(
    field: str, value, dtype, values_set: Iterable = None, val_text_string: bool = False
) -> dict:
    """checa se `value` é do tipo `dtype`. Opcionalmente checa se `value` pertence ao conjunto `values_set`
    Opcionalmente formata `value` com a função `value_text_string`
    Returns: Dicionário no formato compatível com a API do Redmine {"id" : ... , "value" : ...}
    """
    if not isinstance(value, dtype):
        raise TypeError(
            f"É esperado que o campo {field} seja do tipo {dtype}, o fornecido foi {type(value)}"
        )

    if values_set is not None and not set(listify(value)).issubset(set(values_set)):
        raise ValueError(
            f"O valor para {field} : {value} deve pertencer ao conjunto: {values_set}"
        )

    if val_text_string:
        value = value_text_string(value)

    return {"id": FIELD2ID[field], "value": value}
Exemple #7
0
def convert_data_to_features(problem: str,
                             data_iter: Iterable,
                             params: Params,
                             label_encoder: Any,
                             tokenizer: Any,
                             mode=TRAIN) -> Iterable[dict]:

    if mode != PREDICT:
        problem_type = params.problem_type[problem]

        # whether this problem is sequential labeling
        # for sequential labeling, targets needs to align with any
        # change of inputs
        is_seq = problem_type in ['seq_tag']
    else:
        problem_type = 'cls'
        is_seq = False

    part_fn = partial(create_multimodal_bert_features,
                      problem=problem,
                      label_encoder=label_encoder,
                      params=params,
                      tokenizer=tokenizer,
                      mode=mode,
                      problem_type=problem_type,
                      is_seq=is_seq)
    preprocess_buffer = params.preprocess_buffer
    data_buffer_list = []
    num_cpus = params.num_cpus if params.num_cpus > 0 else num_cpus()
    # no easy fix for prediction in multiprocessing
    # phase is not shared between processes
    num_cpus = 1 if mode == PREDICT else num_cpus
    for data_buffer_list in chunked(data_iter, chunk_sz=preprocess_buffer):
        per_cpu_chunk = listify(chunked(data_buffer_list, n_chunks=num_cpus))
        res_gen = Parallel(num_cpus)(delayed(part_fn)(example_list=d_list)
                                     for d_list in per_cpu_chunk)
        for d_list in res_gen:
            for d in d_list:
                yield d
Exemple #8
0
 def labeled_observations(self, space, obs, sbs=""):
     if isinstance(space, sp.Tuple):
         obs_list = []
         for s in space:
             idx = len(obs_list)
             _sbs = sbs + "-" + str(idx) if sbs else str(idx)
             obs_list.append(*self.labeled_observations(s, obs[idx], _sbs))
         return obs_list
     elif isinstance(space, sp.Box):
         label = sbs + "-Box" if sbs else "Box"
         return self.convert_percept(label, *obs)
     elif isinstance(space, sp.Discrete):
         label = sbs + "-Discrete" if sbs else "Discrete"
         return self.convert_percept(label, obs)
     elif isinstance(space, sp.Dict):
         obs_list = []
         for k in obs.keys():
             label = sbs + "-" + k if sbs else k
             if isinstance(space[k], sp.Discrete):
                 obs_list += self.convert_percept(label, obs[k])
             elif isinstance(space[k], sp.Box):
                 l = (listify(obs[k].tolist()) if isinstance(
                     obs[k], np.ndarray) else obs[k])
                 obs_list += self.convert_percept(label, *l)
             elif isinstance(space[k], sp.Tuple):
                 _sbs = sbs + "-" + k if sbs else k
                 obs_list.extend(
                     self.labeled_observations(space[k], obs[k], _sbs))
             elif isinstance(space[k], sp.Dict):
                 _sbs = sbs + "-" + k if sbs else k
                 obs_list.extend(
                     self.labeled_observations(space[k], obs[k], _sbs))
             else:
                 raise NotImplementedError(
                     "ObservationSpace not implemented.")
         return obs_list
     else:
         raise NotImplementedError("Unknown Observation Space.")
Exemple #9
0
    if tipo := d.get(key):
        d[key] = check_update(key, tipo, DICT_FIELDS[key], TIPO, True)

    key = keys[2]
    if not d.get(key):
        raise ValueError("O campo description não pode ficar vazio")

    key = keys[3]
    if fiscal := d.get(key):
        value = check_update(key, fiscal, DICT_FIELDS[key], name2id.keys())
        value["value"] = name2id[value["value"]]
        d[key] = value

    key = keys[4]
    if fiscais := d.get(key):
        fiscais = listify(fiscais)
        values = check_update(key, fiscais, DICT_FIELDS[key], name2id.keys())
        values["value"] = [name2id[v] for v in values["value"]]
        d[key] = values

    key = keys[6]
    if (relatorio := d.get(key, None)) is not None:
        dtype = DICT_FIELDS[key]
        if relatorio in (1, "1"):
            chave = keys[5]
            if (html := d.get(chave, None)) is not None:
                html = Path(html)
                if html.exists() and html.is_file():
                    d[chave] = check_update(chave, html.read_text(), DICT_FIELDS[chave])
                    d[key] = check_update(key, relatorio, dtype, (1, "1"))
                else:
Exemple #10
0
def mk_action(name, value):
    if isinstance(value, np.ndarray):
        value = listify(value.tolist())
        return ExecutionLink(SchemaNode(name), mk_list(*value))

    return ExecutionLink(SchemaNode(name), mk_node(value))
Exemple #11
0
def slide_rois_no_image(
    slide: Slide,
    level: int,
    psize: Coord,
    interval: Coord = (0, 0),
    ancestors: Optional[Sequence[Patch]] = None,
    offset: Coord = (0, 0),
    thumb_size: int = 512,
    slide_filters: Optional[Sequence[Filter]] = None,
) -> Iterator[Patch]:
    """
    Get patches with coordinates.

    Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels
    and an offset in pixels, get patches with its coordinates. Does not export image at
    any point.

    Args:
        slide: the slide to patchify.
        level: pyramid level.
        psize: (w, h) size of the patches (in pixels).
        interval: (x, y) interval between 2 neighboring patches.
        ancestors: patches that contain upcoming patches.
        offset: (x, y) offset in px on x and y axis for patch start.
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
        slide_filters: list of filters to apply to thumbnail. Should output boolean mask.

    Yields:
        A tuple containing a Patch object and the corresponding image as
        ndarray.

    """
    psize = convert_coords(psize)
    offset = convert_coords(offset)
    ancestors = ifnone(ancestors, [])
    slide_filters = listify(slide_filters)
    if len(ancestors) > 0:
        mag = slide.level_downsamples[level]
        shape = Coord(ancestors[0].size_0) / mag
        size_0 = psize * mag
        for ancestor in ancestors:
            # ancestor is a patch
            rx, ry = ancestor.position
            prefix = ancestor.id
            k = 0
            for patch_coord in regular_grid(shape, interval, psize):
                k += 1
                idx = "{}#{}".format(prefix, k)
                position = patch_coord * mag + ry
                yield Patch(
                    id=idx,
                    slidename=slide._filename.split("/")[-1],
                    position=position,
                    level=level,
                    size=psize,
                    size_0=size_0,
                    parent=ancestor,
                )
    else:
        shape = Coord(*slide.level_dimensions[level])
        mag = slide.level_downsamples[level]
        thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size)))
        mask = apply_slide_filters(thumb, slide_filters)
        k = 0
        for patch_coord in get_coords_from_mask(mask, shape, interval, psize):
            k += 1
            idx = "#{}".format(k)
            position = patch_coord * mag + offset
            size_0 = psize * mag
            yield Patch(
                id=idx,
                slidename=slide._filename.split("/")[-1],
                position=position,
                level=level,
                size=psize,
                size_0=size_0,
            )
Exemple #12
0
def slide_rois(
    slide: Slide,
    level: int,
    psize: Coord,
    interval: Coord = (0, 0),
    ancestors: Optional[Sequence[Patch]] = None,
    offset: Coord = (0, 0),
    filters: Optional[Sequence[Filter]] = None,
    thumb_size: int = 512,
    slide_filters: Optional[Sequence[Filter]] = None,
) -> Iterator[Tuple[Patch, NDImage]]:
    """
    Get patches with coordinates.

    Given a slide, a pyramid level, a patchsize in pixels, an interval in pixels
    and an offset in pixels, get patches with its coordinates.

    Args:
        slide: the slide to patchify.
        level: pyramid level.
        psize: (w, h) size of the patches (in pixels).
        interval: (x, y) interval between 2 neighboring patches.
        ancestors: patches that contain upcoming patches.
        offset: (x, y) offset in px on x and y axis for patch start.
        filters: filters to accept patches.
        thumb_size: size of thumbnail's longest side. Always preserves aspect ratio.
        slide_filters: list of filters to apply to thumbnail. Should output boolean mask.

    Yields:
        A tuple containing a Patch object and the corresponding image as
        ndarray.

    """
    psize = convert_coords(psize)
    offset = convert_coords(offset)
    ancestors = ifnone(ancestors, [])
    filters = listify(filters)
    slide_filters = listify(slide_filters)
    if len(ancestors) > 0:
        mag = slide.level_downsamples[level]
        shape = Coord(ancestors[0].size_0) / mag
        size_0 = psize * mag
        patches = []
        for ancestor in ancestors:
            # ancestor is a patch
            rx, ry = ancestor.position
            prefix = ancestor.id
            k = 0
            for patch_coord in regular_grid(shape, interval, psize):
                k += 1
                idx = "{}#{}".format(prefix, k)
                position = patch_coord * mag + ry
                patches.append(
                    Patch(
                        id=idx,
                        slidename=slide._filename.split("/")[-1],
                        position=position,
                        level=level,
                        size=psize,
                        size_0=size_0,
                        parent=ancestor,
                    ))
        for patch in tqdm(patches, ascii=True):
            try:
                image = slide.read_region(patch.position, patch.level,
                                          patch.size)
                image = numpy.array(image)[:, :, 0:3]
                if filter_image(image, filters):
                    yield patch, image
            except openslide.lowlevel.OpenSlideError:
                print(
                    "small failure while reading tile x={}, y={} in {}".format(
                        *patch.position, slide._filename))
    else:
        shape = Coord(*slide.level_dimensions[level])
        mag = slide.level_downsamples[level]
        thumb = numpy.array(slide.get_thumbnail((thumb_size, thumb_size)))
        mask = apply_slide_filters(thumb, slide_filters)
        k = 0
        for patch_coord in get_coords_from_mask(mask, shape, interval, psize):
            k += 1
            idx = "#{}".format(k)
            position = patch_coord * mag + offset
            size_0 = psize * mag
            try:
                image = slide.read_region(position, level, psize)
                image = numpy.array(image)[:, :, 0:3]
                if filter_image(image, filters):
                    yield Patch(
                        id=idx,
                        slidename=slide._filename.split("/")[-1],
                        position=position,
                        level=level,
                        size=psize,
                        size_0=size_0,
                    ), image
            except openslide.lowlevel.OpenSlideError:
                print(
                    "small failure while reading tile x={}, y={} in {}".format(
                        *position, slide._filename))