コード例 #1
0
    def load_from_args(self, args: Optional[Namespace]) -> None:
        for arg_name, full_conf_name in self._args.items():
            arg_val = getattr(args, arg_name, None)

            if arg_val is not None:
                conf_val = self._get_value_in_correct_type(full_conf_name, arg_val)
                assign(self, full_conf_name, conf_val)
コード例 #2
0
def dict_update(dictionary, path, unic_key, value, value_format_callback=None):
    path_length = len(path) - 1

    for index, path_level_item in enumerate(path):
        is_last_iteration = path_length == index
        str_unic_key = unic_key
        step_path = ".".join(path[:index + 1])
        current_step_depth_level_value = {}

        try:
            current_step_depth_level_value = glom.glom(dictionary, step_path)

            if is_last_iteration:
                if value_format_callback is not None:
                    current_step_depth_level_value[
                        str_unic_key] = value_format_callback(
                            current_step_depth_level_value[str_unic_key])
                step_path_level_value = current_step_depth_level_value
                dictionary = glom.assign(dictionary, step_path,
                                         step_path_level_value)
        except:
            step_path_level_value = {}

            if is_last_iteration:
                current_step_depth_level_value[str_unic_key] = value
                step_path_level_value = current_step_depth_level_value

            dictionary = glom.assign(dictionary, step_path,
                                     step_path_level_value)
コード例 #3
0
ファイル: test_gke.py プロジェクト: spotify/klio
def test_apply_labels_to_deployment_config(
    input_config,
    is_ci,
    exp_deployed_by,
    run_pipeline_gke,
    monkeypatch,
    deployment_config,
):
    monkeypatch.setitem(job_gke.os.environ, "USER", "stub-user")
    monkeypatch.setitem(job_gke.os.environ, "CI", is_ci)
    monkeypatch.setattr(job_gke, "klio_cli_version", "stub-version")

    # TODO: patch user config for user labels
    user_labels = [
        "label_a=value_a",
        "label-b=value-b",
        "label-c=",
        "labeld",  # invalid, expected to be ignored
    ]
    monkeypatch.setattr(run_pipeline_gke.klio_config.pipeline_options,
                        "labels", user_labels)
    labels = {
        "app": "test-job",
        "role": "testjob",
        "klio/deployed_by": exp_deployed_by,
        "klio/klio_cli_version": "stub-version",
        "label_a": "value_a",
        "label-b": "value-b",
        "label-c": "",
    }
    expected_config = deployment_config.copy()
    glom.assign(expected_config, "spec.template.metadata.labels", labels)

    run_pipeline_gke._apply_labels_to_deployment_config(deployment_config)
    assert expected_config == deployment_config
コード例 #4
0
    def parse(self, stream, media_type=None, parser_context=None):
        parsed = MultiPartParser.parse(self, stream, media_type,
                                       parser_context)

        if len(parsed.data) > 0:
            if len(parsed.files) > 0:
                raise ParseError('Either pass data or files')
            return parsed.data

        data = {}

        # Find any JSON content first
        for name, content in parsed.files.items():
            if content.content_type != 'application/json':
                continue
            data.update(**json.load(content.file))

        # Now get any other content
        for name, content in parsed.files.items():
            if content.content_type == 'application/json':
                continue
            # name is the path into the object to assign
            glom.assign(data, name, content)

        return data
コード例 #5
0
def test_assign_missing_unassignable():
    """Check that the final assignment to the target object comes last,
    ensuring that failed assignments don't leave targets in a bad
    state.

    """
    class Tarjay(object):
        init_count = 0

        def __init__(self):
            self.__class__.init_count += 1

        @property
        def unassignable(self):
            return

    value = object()
    target = {"preexisting": "ok"}

    with pytest.raises(PathAssignError):
        assign(target, 'tarjay.unassignable.a.b.c', value, missing=Tarjay)

    assert target == {'preexisting': 'ok'}

    # why 3? "c" gets the value of "value", while "b", "a", and
    # "tarjay" all succeed and are set to Tarjay instances. Then
    # unassignable is already present, but not possible to assign to,
    # raising the PathAssignError.
    assert Tarjay.init_count == 3
コード例 #6
0
 def _apply_image_to_deployment_config(self, deployment_config):
     image_tag = self.docker_runtime_config.image_tag
     pipeline_options = self.klio_config.pipeline_options
     if image_tag:
         image_path = "spec.template.spec.containers.0.image"
         # TODO: If more than one image deployed,
         #  we need to search for correct container
         image_base = glom.glom(deployment_config, image_path)
         # Strip off existing image tag if any
         image_base = re.split(":", image_base)[0]
         full_image = f"{image_base}:{image_tag}"
         glom.assign(deployment_config, image_path, full_image)
     # Check to see if the kubernetes image to be deployed is the same
     # image that is built
     k8s_image = glom.glom(deployment_config, image_path)
     built_image_base = pipeline_options.worker_harness_container_image
     built_image = f"{built_image_base}:{image_tag}"
     if built_image != k8s_image:
         logging.warning(
             f"Image deployed by kubernetes {k8s_image} does not match "
             f"the built image {built_image}. "
             "This may result in an `ImagePullBackoff` for the deployment. "
             "If this is not intended, please change "
             "`pipeline_options.worker_harness_container_image` "
             "and rebuild  or change the container image"
             "set in kubernetes/deployment.yaml file.")
コード例 #7
0
    def action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        params = {
            "command_pathname": self.command_pathname,
            "kill_timeout": self.kill_timeout,
            "dump_file_pathname": raw_dumps[self.dump_field],
        }
        command_line = self.command_line.format(**params)

        output, return_code = execute_external_process(
            command_pathname=self.command_pathname,
            command_line=command_line,
            processor_meta=processor_meta,
            interpret_output=self._interpret_output,
        )

        glom.assign(processed_crash,
                    "classifications.jit.category",
                    val=output,
                    missing=dict)
        glom.assign(
            processed_crash,
            "classifications.jit.category_return_code",
            val=return_code,
            missing=dict,
        )
コード例 #8
0
def test_invalid_assign_op_target():
    target = {'afunc': lambda x: 'hi %s' % x}
    spec = T['afunc'](x=1)

    with pytest.raises(ValueError):
        assign(target, spec, None)
    return
コード例 #9
0
def make_nested(path_to_value: t.Dict[t.Tuple, t.Any]) -> dict:
    d = {}
    for path, value in sorted(path_to_value.items(),
                              key=lambda path_value: len(path_value[0])):
        func = dict
        glom.assign(d, ".".join(path), value, missing=func)
    return d
コード例 #10
0
    def load_from_env(self) -> None:
        for env_name, full_conf_name in self._envs.items():
            env_val = os.getenv(env_name, None)

            if env_val is not None:
                conf_val = self._get_value_in_correct_type(full_conf_name, env_val)
                assign(self, full_conf_name, conf_val)
コード例 #11
0
ファイル: dy_trace.py プロジェクト: p15r/distributey
def __camouflage_nested_dict(args_and_values: dict, keypaths: List[str]):
    # example keypath: "path.to.priv_key.subkey"
    for keypath in keypaths:
        # "priv_key.subkey"
        if (pos_start := keypath.find('priv_')) != -1:
            # "priv_key"
            pos_end = keypath[pos_start:].find('.')

            if pos_end == -1:
                # if keypath is only "priv_key", then end_pos is end
                # of string
                pos_end = len(keypath)

            # "path.to.priv_key"
            priv_keypath = keypath[:pos_start+pos_end]

            # camouflage sensitive value of argument
            try:
                glom.assign(
                    args_and_values,
                    priv_keypath,
                    CAMOUFLAGE_SIGN
                )
            except Exception as exc:
                logger.critical(
                    'Failed to camouflage sensitive argument '
                    'for path "%s".'
                    'Exception: "%s"', keypath, exc
                )

                # Keep sensitive value in log instead of aborting
                # logging.
                continue
コード例 #12
0
def test_assign_missing_object():
    val = object()

    class Container(object):
        pass

    target = Container()
    target.a = extant_a = Container()
    assign(target, 'a.b.c.d', val, missing=Container)

    assert target.a.b.c.d is val
    assert target.a is extant_a  # make sure we didn't overwrite anything on the path
コード例 #13
0
def parse_multipart_resources_spec(forms, files):
    json_spec = {}
    # Loads options.
    if "compiler" in forms:
        json_spec["compiler"] = forms["compiler"]

    # options. entries.
    for option_key in (param_key for param_key in forms
                       if "options." in param_key):
        glom.assign(json_spec, option_key, forms[option_key], missing=dict)

    # Get resources specification.
    if "resources" in forms:
        try:
            json_spec["resources"] = json.loads(forms["resources"])
        except json.decoder.JSONDecodeError as jde:
            return (
                None,
                {
                    "error": "INVALID_RESOURCES_JSON",
                    "exception_content": str(jde),
                },
            )
    else:
        # TODO Else reconstruct resources spec with best guest:
        # one main tex file, with other non tex resources.
        # Replace files in resource spec by uploaded multipart files.
        json_spec[
            "resources"], error = construct_resources_specification_from_files(
                files)
        if error:
            return None, error
        logger.info("Reconstructed resource spec: %s",
                    pprint.pformat(json_spec["resources"]))
    for resource in json_spec["resources"]:
        if "multipart" not in resource:
            continue
        # Does an uploaded file match?
        if resource["multipart"] not in files:
            return (
                None,
                {
                    "error": "MISSING_MULTIPART_FILE",
                    "filename": resource["multipart"]
                },
            )
        multipart_file = files[resource["multipart"]]
        # We uses base64 for encoding file content.
        resource["file"] = base64.b64encode(multipart_file.read())
        if "path" not in resource:
            resource["path"] = multipart_file.filename
    return json_spec, None
コード例 #14
0
def test_assign_missing_dict():
    target = {}
    val = object()

    def debugdict():
        ret = dict()
        # ret['id'] = id(ret)
        # ret['inc'] = counter.next()
        return ret

    assign(target, 'a.b.c.d', val, missing=debugdict)

    assert target == {'a': {'b': {'c': {'d': val}}}}
コード例 #15
0
def update_table_with_new_entry(main_data: dict, new_entry: dict, config: dict,
                                fidelity: dict) -> dict:
    """ Updates the benchmark dict-hierarchy with a new function evaluation entry

    The storage is in a nested dict structure where the keys are arranged in the order of the
    configuration parameters ordered by their name, fidelity parameters ordered by their names
    and the seed. The final value element in the dict contains another dict returned by the actual
    function evaluations containing the result, cost, other misc. information.
    Given that the depth of this dict data will vary for different parameter space, the package
    `glom` is used. Wherein, the sequence of keys can be provided for easy retrieval, and
    assignment of values even for varying depth of a hierarchical dict.
    """
    seed = new_entry['info']['seed']
    key_nest = []
    for k, v in config.items():
        v = str(v) if isinstance(v, str) else np.float32(v)
        key_nest.append(v)
        if glom.glom(main_data, glom.Path(*key_nest), default=None) is None:
            glom.assign(main_data, glom.Path(*key_nest), dict())
    for k, v in fidelity.items():
        key_nest.append(np.float32(v))
        if glom.glom(main_data, glom.Path(*key_nest), default=None) is None:
            glom.assign(main_data, glom.Path(*key_nest), dict())
    key_nest.append(seed)
    if glom.glom(main_data, glom.Path(*key_nest), default=None) is None:
        glom.assign(main_data, glom.Path(*key_nest), dict())
    glom.assign(main_data, glom.Path(*key_nest), new_entry)
    return main_data
コード例 #16
0
def test_assign():
    class Foo(object):
        pass

    assert glom({}, Assign(T['a'], 1)) == {'a': 1}
    assert glom({'a': {}}, Assign(T['a']['a'], 1)) == {'a': {'a': 1}}
    assert glom({'a': {}}, Assign('a.a', 1)) == {'a': {'a': 1}}
    assert glom(Foo(), Assign(T.a, 1)).a == 1
    assert glom({}, Assign('a', 1)) == {'a': 1}
    assert glom(Foo(), Assign('a', 1)).a == 1
    assert glom({'a': Foo()}, Assign('a.a', 1))['a'].a == 1

    def r():
        r = {}
        r['r'] = r
        return r

    assert glom(r(), Assign('r.r.r.r.r.r.r.r.r', 1)) == {'r': 1}
    assert glom(r(), Assign(T['r']['r']['r']['r'], 1)) == {'r': 1}
    assert glom(r(), Assign(Path('r', 'r', T['r']), 1)) == {'r': 1}
    assert assign(r(), Path('r', 'r', T['r']), 1) == {'r': 1}
    with pytest.raises(TypeError, match='path argument must be'):
        Assign(1, 'a')
    with pytest.raises(ValueError,
                       match='path must have at least one element'):
        Assign(T, 1)

    assert repr(Assign(T.a, 1)) == 'Assign(T.a, 1)'
    assign_spec = Assign(T.a, 1, missing=dict)
    assert repr(assign_spec) == "Assign(T.a, 1, missing=dict)"
    assert repr(assign_spec) == repr(eval(repr(assign_spec)))
コード例 #17
0
ファイル: meta.py プロジェクト: a-domingu/tbcnn
    def set(meta, spec=None, value=None, missing=dict) -> dict:
        """
        Set metadata in a dataframe columns
        :param meta: Meta data to be modified
        :param spec: path to the key to be modified
        :param value: dict value
        :param missing:
        :return:
        """
        if spec is not None:
            data = copy.deepcopy(meta)
            assign(data, spec, value, missing=missing)
        else:
            data = value

        return data
コード例 #18
0
def test_sequence_assign():
    target = {'alist': [0, 1, 2]}
    assign(target, 'alist.2', 3)
    assert target['alist'][2] == 3

    with pytest.raises(PathAssignError, match='could not assign') as exc_info:
        assign(target, 'alist.3', 4)

    # the following test is because pypy's IndexError is different than CPython's:
    # E         - PathAssignError(IndexError('list index out of range',), Path('alist'), '3')
    # E         + PathAssignError(IndexError('list assignment index out of range',), Path('alist'), '3')
    # E         ?                                  +++++++++++

    exc_repr = repr(exc_info.value)
    assert exc_repr.startswith('PathAssignError(')
    assert exc_repr.endswith("'3')")
    return
コード例 #19
0
def test_bad_assign_target():
    class BadTarget(object):
        def __setattr__(self, name, val):
            raise Exception("and you trusted me?")

    # sanity check
    spec = Assign('a', 'b')
    ok_target = lambda: None
    glom(ok_target, spec)
    assert ok_target.a == 'b'

    with pytest.raises(PathAssignError, match='could not assign'):
        glom(BadTarget(), spec)

    with pytest.raises(PathAccessError, match='could not access'):
        assign({}, 'a.b.c', 'moot')
    return
コード例 #20
0
ファイル: json_api.py プロジェクト: m4nu75/latex-on-http
def parse_json_resources_spec(json_payload):
    json_spec = {}

    # Select / copy several keys.
    for entry_key in (entry_key for entry_key in json_payload
                      if entry_key in PAYLOAD_KEYS_TO_COPY):
        json_spec[entry_key] = json_payload[entry_key]

    # Auto-spread options. entries.
    for option_key in (param_key for param_key in json_payload
                       if "options." in param_key):
        glom.assign(json_spec,
                    option_key,
                    json_payload[option_key],
                    missing=dict)

    return json_spec, None
コード例 #21
0
ファイル: utils.py プロジェクト: willkg/socorro-siggen
def override_values(crash_data, values):
    """
    Takes a dict of path -> value to override in the original crash data.
    After the context is over, the crash data will return to the original
    value.

    :arg crash_data: the crash data that conforms to the schema
    :arg values: dict of path -> value to override

    :yields: dict with overridden values

    """
    crash_data = copy.deepcopy(crash_data)

    for path, value in values.items():
        assign(crash_data, path, val=value, missing=dict)

    yield crash_data
コード例 #22
0
ファイル: test_gke.py プロジェクト: spotify/klio
def test_apply_labels_to_deployment_config_overrides(run_pipeline_gke,
                                                     monkeypatch,
                                                     deployment_config):
    monkeypatch.setitem(job_gke.os.environ, "USER", "stub-user")
    monkeypatch.setattr(job_gke, "klio_cli_version", "stub-version")

    labels = {
        "app": "different-app-name",
        "role": "differentappname",
        "klio/deployed_by": "stub-user",
        "klio/klio_cli_version": "stub-version",
    }

    expected_config = deployment_config.copy()
    glom.assign(expected_config, "spec.template.metadata.labels", labels)

    run_pipeline_gke._apply_labels_to_deployment_config(deployment_config)
    assert expected_config == deployment_config
コード例 #23
0
    def _set_config(self, target, value):
        try:
            glom.assign(self.config_data, target, value, missing=dict)
        except glom.mutation.PathAssignError as e:
            if "IndexError" not in str(e):
                raise e
            # handle if user is trying to append to a list - for some reason
            # glom can't handle that
            stems = target.split(".")
            last_index = 0
            for index, stem in enumerate(stems):
                try:
                    int(stem)
                except Exception:
                    continue
                new_target = ".".join(stems[last_index:index])
                property_list = glom.glom(self.config_data, new_target)
                property_list.insert(index, {})

            glom.assign(self.config_data, target, value, missing=dict)
コード例 #24
0
    def _apply_overrides(raw_config, overrides):
        """Applies overrides to raw klio config.
        If a key already exists in the raw config,
        it will be updated with the override value provided in the overrides dict.
        If a key does not yet exist in the raw config,
        it will be created and assigned the override value.
        Example formats include:
            RAW = {
                "allow_non_klio_messages": False,
                "events": {
                    "inputs": {
                        "file0": {
                            "type": "file",
                            "location": "gs://sigint-output/yesterday.txt",
                        },
                        "file1": {
                            "type": "file",
                            "location": "gs://sigint-output/today.txt",
                        }
                    }
                }
            }

            OVER = {
                "allow_non_klio_messages": True,  # Non-nested key
                "events.inputs.file1.location": "gs://sigint-output/01-01-2020.txt",
                "events.inputs.file2.location": "gs://sigint-output/01-02-2020.txt",
                "events.inputs.file2.type": "file"
            }

        Args:
            raw_config (dict): raw klio config dict
            overrides (dict): override field to override value
        Return:
            config (dict): config dict with overrides applied
        """  # NOQA E501

        for path, value in overrides.items():
            glom.assign(raw_config, path, value, missing=dict)
        return raw_config
コード例 #25
0
def test_assign_missing_with_extant_keys():
    """This test ensures that assign with missing doesn't overwrite
    perfectly fine extant keys that are along the path it needs to
    assign to. call count is also checked to make sure missing() isn't
    invoked too many times.

    """
    target = {}
    value = object()
    default_struct = {'b': {'c': {}}}

    call_count = [0]

    def _get_default_struct():
        call_count[0] += 1  # make sure this is only called once
        return default_struct

    assign(target, 'a.b.c', value, missing=_get_default_struct)

    assert target['a']['b']['c'] is value
    assert target['a']['b'] is default_struct['b']
    assert call_count == [1]
コード例 #26
0
    def _edit_deployment(self,
                         deployment_config,
                         replica_count=None,
                         image_tag=None):
        """This will update a deployment with a provided
            replica count or image tag. This mutates the
            deployment_config object

        Args:
            deployment_config(dict): deployment configuration dict
                that will get mutated with updated fields
            replica_count (int): Number of replicas the
                deployment will be updated with.
                If not provided then this will not be changed
            image_tag (str): The image tag that will be applied
                to the updated deployment.
                If not provided then this will not be updated.
        """
        log_messages = []
        if replica_count is not None:
            glom.assign(deployment_config, "spec.replicas", replica_count)
            log_messages.append(f"Scaled deployment to {replica_count}")
        if image_tag:
            image_path = "spec.template.spec.containers.0.image"
            image_base = glom.glom(deployment_config, image_path)
            # Strip off existing image tag if present
            image_base = re.split(":", image_base)[0]
            full_image = image_base + f":{image_tag}"
            glom.assign(deployment_config, image_path, full_image)
            log_messages.append(
                f"Update deployment with image tag {image_tag}")
        for message in log_messages:
            logging.info(message)

        ui_link = self._build_ui_link_from_current_context(deployment_config)
        logging.info(f"Deployment details: {ui_link}")
コード例 #27
0
ファイル: profiler.py プロジェクト: midhunsuraj/Optimus
    def _count_data_types(self, df, columns, infer=False, mismatch=None):
        """
        Count the number of int, float, string, date and booleans and output the count in json format
        :param df: Dataframe to be processed
        :param columns: Columns to be processed
        :param infer: infer the column datatype

        :return: json
        """

        columns = parse_columns(df, columns)

        count_by_data_type = df.cols.count_by_dtypes(columns,
                                                     infer=infer,
                                                     mismatch=mismatch)
        count_by_data_type_no_mismatch = copy.deepcopy(count_by_data_type)
        # Info from all the columns
        type_details = {}

        for col_name in columns:
            """
            Function for determine if register value is float or int or string.
            :param col_name:
            :return:
            """
            # Not count mismatch
            if "mismatch" in count_by_data_type_no_mismatch[col_name]:
                count_by_data_type_no_mismatch[col_name].pop("mismatch")

            # Get the greatest count by column data type
            greatest_data_type_count = max(
                count_by_data_type_no_mismatch[col_name],
                key=count_by_data_type_no_mismatch[col_name].get)
            if greatest_data_type_count == "string" or greatest_data_type_count == "boolean":
                cat = "categorical"
            elif greatest_data_type_count == "int" or greatest_data_type_count == "decimal":
                cat = "numeric"
            elif greatest_data_type_count == "date":
                cat = "date"
            elif greatest_data_type_count == "array":
                cat = "array"
            elif greatest_data_type_count == "binary":
                cat = "binary"
            elif greatest_data_type_count == "null":
                cat = "null"
            else:
                cat = None

            assign(type_details, col_name + ".dtype", greatest_data_type_count,
                   dict)
            assign(type_details, col_name + ".type", cat, dict)
            assign(type_details, col_name + ".stats",
                   count_by_data_type[col_name], dict)
        # print(type_details)
        return type_details
コード例 #28
0
ファイル: extension.py プロジェクト: ariosramirez/Optimus
def set_meta(self, spec=None, value=None, missing=dict):
    """
    Set metadata in a dataframe columns
    :param self:
    :param spec: path to the key to be modified
    :param value: dict value
    :param missing:
    :return:
    """
    if spec is not None:
        target = self.get_meta()
        data = assign(target, spec, value, missing=missing)
    else:
        data = value

    df = self
    df.schema[-1].metadata = data
    return df
コード例 #29
0
def unglom(d: T_StrAnyMapping, path: str,
           value: typing.Any) -> T_StrAnyMapping:
    """Create nested dictionary structure given a glom compatible path.

    This is essentially just a wrapper around :func:`glom.assign`, but
    works with nested paths.

        >>> unglom({}, 'foo.bar.baz', 'spam')
        {'foo': {'bar': {'baz': 'spam'}}}

    Args:
        d (:obj:`dict`): The target dictionary.
        path (str): The key path.
        value: Any value.

    Returns:
        :obj:`dict`: The original, now mutated dictionary.

    """
    try:
        return glom.assign(d, path, value)
    except KeyError:
        parent, child = path.rsplit(".", 1)
        return unglom(d, parent, {child: value})
コード例 #30
0
ファイル: profiler.py プロジェクト: midhunsuraj/Optimus
    def columns_stats(self,
                      df,
                      columns,
                      buckets=10,
                      infer=False,
                      relative_error=RELATIVE_ERROR,
                      approx_count=True,
                      mismatch=None):
        """
        Return statistical information about a specific column in json format
        :param df: Dataframe to be processed
        :param columns: Columns that you want to profile
        :param buckets: Create buckets divided by range. Each bin is equal.
        :param infer: try to infer the column datatype
        :param relative_error: relative error when the percentile is calculated. 0 is more exact as slow 1 more error and faster
        :param approx_count: Use the function approx_count_distinct or countDistinct. approx_count_distinct is faster
        :param mismatch:
        :return: json object
        """
        if self.rows_count is None:
            self.rows_count = df.count()
        columns = parse_columns(df, columns)

        # Initialize Objects
        logger.print("Processing Stats For columns...")

        # Get columns data types. This is necessary to make the pertinent histogram calculations.
        type_details = self._count_data_types(df, columns, infer, mismatch)

        # Count the categorical, numerical, boolean and date columns
        count_types = {}
        for value in type_details.values():
            name = value["dtype"]
            if name in count_types:
                count_types[name] += 1
            else:
                count_types[name] = 1

        # List the data types this data set have
        total = 0
        dtypes = []
        for key, value in count_types.items():
            if value > 0:
                dtypes.append(key)
                total = total + 1

        count_types = fill_missing_col_types(count_types)

        columns_info = {}
        columns_info["count_types"] = count_types
        columns_info["total_count_dtypes"] = total
        columns_info["dtypes_list"] = dtypes
        columns_info["columns"] = type_details

        # Aggregation
        stats = Profiler.columns_agg(df, columns, buckets, relative_error,
                                     approx_count)

        # Calculate Frequency
        logger.print("Processing Frequency ...")
        df_freq = df.cols.select("*",
                                 data_type=PYSPARK_NUMERIC_TYPES,
                                 invert=True)
        freq = None
        if df_freq is not None:
            freq = df_freq.cols.frequency("*", buckets, True, self.rows_count)

        # Calculate percentage
        for col_name in columns:
            col_info = {}
            assign(col_info, "stats", stats[col_name], dict)

            if freq is not None:
                if col_name in freq:
                    assign(col_info, "frequency", freq[col_name])

            col_info["stats"].update(
                self.extra_columns_stats(df, col_name, stats))
            assign(col_info, "name", col_name)
            assign(col_info, "column_dtype",
                   columns_info["columns"][col_name]['dtype'])
            assign(col_info, "dtypes_stats",
                   columns_info["columns"][col_name]['stats'])
            assign(col_info, "column_type",
                   columns_info["columns"][col_name]['type'])
            assign(columns_info, "columns." + col_name, col_info, dict)

            assign(col_info, "id", df.cols.get_meta(col_name, "id"))

        return columns_info