def buckets_grouping(*buckets: float) -> \ Callable[[float], Tuple[Optional[int], Optional[str]]]: """ Return a function that gives the index of the value using a group interval by provided input. :param buckets: float :return: Callable """ def _app(x, e=None) -> Tuple[Optional[int], Optional[str]]: if x is None: return None, e for (lower, upper, index) in intervals: if lower is None and x <= upper: return index, e elif upper is None and lower < x: return index, e elif lower is not None and upper is not None and lower < x <= upper: return index, e return None, "bucket not found for {}".format(x) size = len(buckets) if size <= 0 or any([x is None for x in buckets]): return fixed_input(None, "buckets not provided") intervals = list( zip([None] + list(buckets), list(buckets) + [None], range(1, len(buckets) + 2))) return _app
def map_to_csv(fields, delimiter=","): """ Convert dict to csv :param fields: :return: """ def _list_to_csv(l): """ Util function to overcome the use of files by in-memory io buffer :param l: :return: """ io_file = io.StringIO() writer = csv.writer(io_file, quoting=csv.QUOTE_NONNUMERIC, lineterminator='', delimiter=delimiter) writer.writerow(l) return io_file.getvalue() def _app(current_tuple, e=None): if e is not None: return None, e csv_list = [] for f in fields: if f in current_tuple: csv_list.append(current_tuple[f]) else: e.update({"output": "expected field {} not found".format(f)}) return None, e return _list_to_csv(csv_list), e if fields is None or len(fields) == 0: return fixed_input(None, "no fields provided, cannot proceed without order") return _app
def csv_to_map(fields, delimiter=','): """ Convert csv to dict :param delimiter: :param fields: :return: """ def _csv_to_list(csv_input): """ Util function to overcome the use of files by in-memory io buffer :param csv_input: :return: """ io_file = io.StringIO(csv_input) return next(csv.reader(io_file, delimiter=delimiter)) def _app(current_tuple, e=None): if current_tuple is None or len(current_tuple) == 0: return None, "no input" csv_list = _csv_to_list(current_tuple) if len(csv_list) != len(fields): e = {"input": "unexpected number of fields {} obtained {} expected".format(len(csv_list), len(fields))} return None, e return {k: v for (k, v) in zip(fields, csv_list)}, e if fields is None or len(fields) == 0: return fixed_input(None, "no fields provided, cannot proceed without order") return _app
def std_score_normalization(average: float, std_deviation: float) -> \ Callable[[float], Tuple[Optional[float], Optional[str]]]: """ Return a function that apply a Standardization normalization. Standardization normalization - Normalization based on the average and standard deviation of the given values. :param average: float :param std_deviation: float :return: Callable[[float], Tuple[Optional[float], Optional[str]]] """ def _app(x: float, e: str = None) -> Tuple[Optional[float], Optional[str]]: if x is None: return None, None return (x - average) / std_deviation, None if average is None: return fixed_input(None, "average is required") if std_deviation is None: return fixed_input(None, "std deviation is required") if std_deviation == 0: return fixed_input(None, "std deviation must be != 0") return _app
def min_max_normalization(min_value: float, max_value: float) -> \ Callable[[float], Tuple[Optional[float], Optional[str]]]: """ Return a function that apply a Rescaling normalization. Rescaling normalization - Normalization based on the mininum and maximum value of the given value. It's like rescale the value :param min_value: float :param max_value: float :return: Callable """ def _app(x: float, e: str = None) -> Tuple[Optional[float], Optional[str]]: if x is None: return None, None return (x - min_value) / (max_value - min_value), None if min_value is None: return fixed_input(None, "Min value required") if max_value is None: return fixed_input(None, "Max value required") if max_value <= min_value: return fixed_input(None, "Min > Max") return _app
def type_enforcer(enforcer_function: Callable[[T], U]) -> \ Callable[[T], Tuple[Optional[U], Optional[str]]]: """ Return a function that cast from one type to another. :param enforcer_function: Callable :return: Callable """ def _app(x: T, e: str = None) -> Tuple[Optional[U], Optional[str]]: if x is None: return None, None try: return enforcer_function(x), None except Exception as e: return None, "can't cast {} to enforced type {}".format(x, e) if enforcer_function is None: return fixed_input(None, "a enforcer function is required") return _app
def linear_category(categories: List[T]) -> \ Callable[[T], Tuple[Optional[int], Optional[str]]]: """ Return a function that gives a categorization value. This categorization is a substitution the given value by a numeric representation in the category supplied. :param categories: List :return: Callable """ def _app(x: T, e: str = None) -> Tuple[Optional[int], Optional[str]]: if x is None: return None, None if x not in category_map: return None, "value {} not found on categories".format(x) return category_map[x], None if categories is None or len(categories) == 0: return fixed_input(None, "no categories supplied") category_map = {value: i + 1 for (i, value) in enumerate(categories)} return _app
def column_category(categories: List[T]) -> \ Callable[[T], Tuple[Optional[dict], Optional[str]]]: """ Return a function that gives a categorization value vectorized (see: one hot vector). This categorization is a substitution the given value by a vector with all categories and 1 in the category supplied. :param categories: list :return: Optional[str] """ def _app(x: T, e: str = None) -> Tuple[Optional[dict], Optional[str]]: if x is None: return None, None if x not in categories: return None, "value {} not found on categories".format(x) category_map = OrderedDict([(category, 0) if category != x else (category, 1) for category in categories]) return category_map, None if categories is None or len(categories) == 0: return fixed_input(None, "no categories supplied") return _app
def map_to_csv(fields): """ Convert dict to csv :param fields: :return: """ def _app(current_tuple, e=None): if e is not None: return None, e csv_list = [] for f in fields: if f in current_tuple: csv_list.append(current_tuple[f]) else: e.update({"output": "expected field {} not found".format(f)}) return None, e return _list_to_csv(csv_list), e if fields is None or len(fields) == 0: return fixed_input(None, "no fields provided, cannot proceed without order") return _app
def csv_to_map(fields): """ Convert csv to dict :param fields: :return: """ def _app(current_tuple, e=None): if current_tuple is None or len(current_tuple) == 0: return None, "no input" csv_list = _csv_to_list(current_tuple) if len(csv_list) != len(fields): e = { "input": "unexpected number of fields {} obtained {} expected".format( len(csv_list), len(fields)) } return None, e return {k: v for (k, v) in zip(fields, csv_list)}, e if fields is None or len(fields) == 0: return fixed_input(None, "no fields provided, cannot proceed without order") return _app