Beispiel #1
0
def buckets_grouping(*buckets: float) -> \
        Callable[[float], Tuple[Optional[int], Optional[str]]]:
    """

    Return a function that gives the index of the value using a group interval by provided input.

    :param buckets: float
    :return: Callable
    """
    def _app(x, e=None) -> Tuple[Optional[int], Optional[str]]:
        if x is None:
            return None, e
        for (lower, upper, index) in intervals:
            if lower is None and x <= upper:
                return index, e
            elif upper is None and lower < x:
                return index, e
            elif lower is not None and upper is not None and lower < x <= upper:
                return index, e
        return None, "bucket not found for {}".format(x)

    size = len(buckets)
    if size <= 0 or any([x is None for x in buckets]):
        return fixed_input(None, "buckets not provided")
    intervals = list(
        zip([None] + list(buckets),
            list(buckets) + [None], range(1,
                                          len(buckets) + 2)))
    return _app
Beispiel #2
0
def map_to_csv(fields, delimiter=","):
    """
    Convert dict to csv

    :param fields:
    :return:
    """

    def _list_to_csv(l):
        """
        Util function to overcome the use of files by in-memory io buffer

        :param l:
        :return:
        """
        io_file = io.StringIO()
        writer = csv.writer(io_file, quoting=csv.QUOTE_NONNUMERIC, lineterminator='', delimiter=delimiter)
        writer.writerow(l)
        return io_file.getvalue()

    def _app(current_tuple, e=None):
        if e is not None:
            return None, e
        csv_list = []
        for f in fields:
            if f in current_tuple:
                csv_list.append(current_tuple[f])
            else:
                e.update({"output": "expected field {} not found".format(f)})
                return None, e
        return _list_to_csv(csv_list), e
    if fields is None or len(fields) == 0:
        return fixed_input(None, "no fields provided, cannot proceed without order")
    return _app
Beispiel #3
0
def csv_to_map(fields, delimiter=','):
    """
    Convert csv to dict

    :param delimiter:
    :param fields:
    :return:
    """

    def _csv_to_list(csv_input):
        """
        Util function to overcome the use of files by in-memory io buffer

        :param csv_input:
        :return:
        """
        io_file = io.StringIO(csv_input)
        return next(csv.reader(io_file, delimiter=delimiter))

    def _app(current_tuple, e=None):
        if current_tuple is None or len(current_tuple) == 0:
            return None, "no input"
        csv_list = _csv_to_list(current_tuple)
        if len(csv_list) != len(fields):
            e = {"input": "unexpected number of fields {} obtained {} expected".format(len(csv_list), len(fields))}
            return None, e
        return {k: v for (k, v) in zip(fields, csv_list)}, e
    if fields is None or len(fields) == 0:
        return fixed_input(None, "no fields provided, cannot proceed without order")
    return _app
Beispiel #4
0
def std_score_normalization(average: float, std_deviation: float) -> \
        Callable[[float], Tuple[Optional[float], Optional[str]]]:
    """

    Return a function that apply a Standardization normalization.
    Standardization normalization - Normalization based on the average and standard deviation of the given values.

    :param average: float
    :param std_deviation: float
    :return: Callable[[float], Tuple[Optional[float], Optional[str]]]
    """
    def _app(x: float, e: str = None) -> Tuple[Optional[float], Optional[str]]:
        if x is None:
            return None, None
        return (x - average) / std_deviation, None

    if average is None:
        return fixed_input(None, "average is required")
    if std_deviation is None:
        return fixed_input(None, "std deviation is required")
    if std_deviation == 0:
        return fixed_input(None, "std deviation must be != 0")
    return _app
Beispiel #5
0
def min_max_normalization(min_value: float, max_value: float) -> \
        Callable[[float], Tuple[Optional[float], Optional[str]]]:
    """

    Return a function that apply a Rescaling normalization.
    Rescaling normalization - Normalization based on the mininum and maximum value of the given value.
    It's like rescale the value

    :param min_value: float
    :param max_value: float
    :return: Callable
    """
    def _app(x: float, e: str = None) -> Tuple[Optional[float], Optional[str]]:
        if x is None:
            return None, None
        return (x - min_value) / (max_value - min_value), None

    if min_value is None:
        return fixed_input(None, "Min value required")
    if max_value is None:
        return fixed_input(None, "Max value required")
    if max_value <= min_value:
        return fixed_input(None, "Min > Max")
    return _app
Beispiel #6
0
def type_enforcer(enforcer_function: Callable[[T], U]) -> \
        Callable[[T], Tuple[Optional[U], Optional[str]]]:
    """

    Return a function that cast from one type to another.

    :param enforcer_function: Callable
    :return: Callable
    """
    def _app(x: T, e: str = None) -> Tuple[Optional[U], Optional[str]]:
        if x is None:
            return None, None
        try:
            return enforcer_function(x), None
        except Exception as e:
            return None, "can't cast {} to enforced type {}".format(x, e)

    if enforcer_function is None:
        return fixed_input(None, "a enforcer function is required")
    return _app
Beispiel #7
0
def linear_category(categories: List[T]) -> \
        Callable[[T], Tuple[Optional[int], Optional[str]]]:
    """

    Return a function that gives a categorization value.
    This categorization is a substitution the given value by a numeric representation in the category supplied.

    :param categories: List
    :return: Callable
    """
    def _app(x: T, e: str = None) -> Tuple[Optional[int], Optional[str]]:
        if x is None:
            return None, None
        if x not in category_map:
            return None, "value {} not found on categories".format(x)
        return category_map[x], None

    if categories is None or len(categories) == 0:
        return fixed_input(None, "no categories supplied")
    category_map = {value: i + 1 for (i, value) in enumerate(categories)}

    return _app
Beispiel #8
0
def column_category(categories: List[T]) -> \
        Callable[[T], Tuple[Optional[dict], Optional[str]]]:
    """

    Return a function that gives a categorization value vectorized (see: one hot vector).
    This categorization is a substitution the given value by a vector with all categories and 1 in the category
    supplied.

    :param categories: list
    :return: Optional[str]
    """
    def _app(x: T, e: str = None) -> Tuple[Optional[dict], Optional[str]]:
        if x is None:
            return None, None
        if x not in categories:
            return None, "value {} not found on categories".format(x)
        category_map = OrderedDict([(category, 0) if category != x else
                                    (category, 1) for category in categories])
        return category_map, None

    if categories is None or len(categories) == 0:
        return fixed_input(None, "no categories supplied")
    return _app
Beispiel #9
0
def map_to_csv(fields):
    """
    Convert dict to csv

    :param fields:
    :return:
    """
    def _app(current_tuple, e=None):
        if e is not None:
            return None, e
        csv_list = []
        for f in fields:
            if f in current_tuple:
                csv_list.append(current_tuple[f])
            else:
                e.update({"output": "expected field {} not found".format(f)})
                return None, e
        return _list_to_csv(csv_list), e

    if fields is None or len(fields) == 0:
        return fixed_input(None,
                           "no fields provided, cannot proceed without order")
    return _app
Beispiel #10
0
def csv_to_map(fields):
    """
    Convert csv to dict

    :param fields:
    :return:
    """
    def _app(current_tuple, e=None):
        if current_tuple is None or len(current_tuple) == 0:
            return None, "no input"
        csv_list = _csv_to_list(current_tuple)
        if len(csv_list) != len(fields):
            e = {
                "input":
                "unexpected number of fields {} obtained {} expected".format(
                    len(csv_list), len(fields))
            }
            return None, e
        return {k: v for (k, v) in zip(fields, csv_list)}, e

    if fields is None or len(fields) == 0:
        return fixed_input(None,
                           "no fields provided, cannot proceed without order")
    return _app