def join(delimiter, iterable, **kwargs): """Returns a string which is a concatenation of strings in ``iterable``, separated by given ``delimiter``. :param delimiter: Delimiter to put between strings :param iterable: Iterable to join Optional keyword arguments control the exact joining strategy: :param errors: What to do with erroneous non-strings in the input. Possible values include: * ``'ignore'`` (or ``None``) * ``'cast'`` (or ``False``) -- convert non-strings to strings * ``'raise'`` (or ``True``) -- raise exception for any non-strings * ``'replace'`` -- replace non-strings with alternative value :param with_: Replacement used when ``errors == 'replace'``. This can be a string, or a callable taking erroneous value and returning a string replacement. .. versionadded:: 0.0.3 Allow to specify error handling policy through ``errors`` parameter """ ensure_string(delimiter) ensure_iterable(iterable) ensure_keyword_args(kwargs, optional=('errors', 'with_')) errors = kwargs.get('errors', True) if errors in ('raise', True): iterable = imap(ensure_string, iterable) elif errors in ('ignore', None): iterable = ifilter(is_string, iterable) elif errors in ('cast', False): iterable = imap(delimiter.__class__, iterable) elif errors == 'replace': if 'with_' not in kwargs: raise ValueError("'replace' error policy requires specifying " "replacement through with_=") with_ = kwargs['with_'] if is_string(with_): replacement = lambda x: with_ elif callable(with_): replacement = with_ else: raise TypeError("error replacement must be a string or function, " "got %s" % type(with_).__name__) iterable = (x if is_string(x) else ensure_string(replacement(x)) for x in iterable) else: raise TypeError("%r is not a valid error handling policy for join()" % (errors, )) return delimiter.join(iterable)
def join(delimiter, iterable, **kwargs): """Returns a string which is a concatenation of strings in ``iterable``, separated by given ``delimiter``. :param delimiter: Delimiter to put between strings :param iterable: Iterable to join Optional keyword arguments control the exact joining strategy: :param errors: What to do with erroneous non-strings in the input. Possible values include: * ``'ignore'`` (or ``None``) * ``'cast'`` (or ``False``) -- convert non-strings to strings * ``'raise'`` (or ``True``) -- raise exception for any non-strings * ``'replace'`` -- replace non-strings with alternative value :param with_: Replacement used when ``errors == 'replace'``. This can be a string, or a callable taking erroneous value and returning a string replacement. .. versionadded:: 0.0.3 Allow to specify error handling policy through ``errors`` parameter """ ensure_string(delimiter) ensure_iterable(iterable) ensure_keyword_args(kwargs, optional=('errors', 'with_')) errors = kwargs.get('errors', True) if errors in ('raise', True): iterable = imap(ensure_string, iterable) elif errors in ('ignore', None): iterable = ifilter(is_string, iterable) elif errors in ('cast', False): iterable = imap(delimiter.__class__, iterable) elif errors == 'replace': if 'with_' not in kwargs: raise ValueError("'replace' error policy requires specifying " "replacement through with_=") with_ = kwargs['with_'] if is_string(with_): replacement = lambda x: with_ elif callable(with_): replacement = with_ else: raise TypeError("error replacement must be a string or function, " "got %s" % type(with_).__name__) iterable = (x if is_string(x) else ensure_string(replacement(x)) for x in iterable) else: raise TypeError( "%r is not a valid error handling policy for join()" % (errors,)) return delimiter.join(iterable)
def replace(needle, with_=None, in_=None): """Replace occurrences of string(s) with other string(s) in (a) string(s). Unlike the built in :meth:`str.replace` method, this function provides clean API that clearly distinguishes the "needle" (string to replace), the replacement string, and the target string to perform replacement in (the "haystack"). Additionally, a simultaneous replacement of several needles is possible. Note that this is different from performing multiple separate replacements one after another. Examples:: replace('foo', with_='bar', in_=some_text) replace('foo', with_='bar').in_(other_text) replace('foo').with_('bar').in_(another_text) replace(['foo', 'bar']).with_('baz').in_(perhaps_a_long_text) replace({'foo': 'bar', 'baz': 'qud'}).in_(even_longer_text) :param needle: String to replace, iterable thereof, or a mapping from needles to corresponding replacements :param with_: Replacement string, if ``needle`` was not a mapping :param in_: Optional string to perform replacement in :return: If all parameters were provided, result is the final string after performing a specified replacement. Otherwise, a :class:`Replacer` object is returned, allowing e.g. to perform the same replacements in many haystacks. """ if needle is None: raise TypeError("replacement needle cannot be None") if not needle: raise ValueError("replacement needle cannot be empty") if is_string(needle): replacer = Replacer((needle,)) else: ensure_iterable(needle) if not is_mapping(needle): if all(imap(is_pair, needle)): needle = dict(needle) elif not all(imap(is_string, needle)): raise TypeError("invalid replacement needle") replacer = Replacer(needle) if with_ is not None: ensure_string(with_) replacer = replacer.with_(with_) if in_ is not None: ensure_string(in_) return replacer.in_(in_) return replacer
def replace(needle, with_=None, in_=None): """Replace occurrences of string(s) with other string(s) in (a) string(s). Unlike the built in :meth:`str.replace` method, this function provides clean API that clearly distinguishes the "needle" (string to replace), the replacement string, and the target string to perform replacement in (the "haystack"). Additionally, a simultaneous replacement of several needles is possible. Note that this is different from performing multiple separate replacements one after another. Examples:: replace('foo', with_='bar', in_=some_text) replace('foo', with_='bar').in_(other_text) replace('foo').with_('bar').in_(another_text) replace(['foo', 'bar']).with_('baz').in_(perhaps_a_long_text) replace({'foo': 'bar', 'baz': 'qud'}).in_(even_longer_text) :param needle: String to replace, iterable thereof, or a mapping from needles to corresponding replacements :param with_: Replacement string, if ``needle`` was not a mapping :param in_: Optional string to perform replacement in :return: If all parameters were provided, result is the final string after performing a specified replacement. Otherwise, a :class:`Replacer` object is returned, allowing e.g. to perform the same replacements in many haystacks. """ if needle is None: raise TypeError("replacement needle cannot be None") if not needle: raise ValueError("replacement needle cannot be empty") if is_string(needle): replacer = Replacer((needle, )) else: ensure_iterable(needle) if not is_mapping(needle): if all(imap(is_pair, needle)): needle = dict(needle) elif not all(imap(is_string, needle)): raise TypeError("invalid replacement needle") replacer = Replacer(needle) if with_ is not None: ensure_string(with_) replacer = replacer.with_(with_) if in_ is not None: ensure_string(in_) return replacer.in_(in_) return replacer
def split(s, by=None, maxsplit=None): """Split a string based on given delimiter(s). Delimiters can be either strings or compiled regular expression objects. :param s: String to split :param by: A delimiter, or iterable thereof. :param maxsplit: Maximum number of splits to perform. ``None`` means no limit, while 0 does not perform a split at all. :return: List of words in the string ``s`` that were separated by delimiter(s) :raise ValueError: If the separator is an empty string or regex """ ensure_string(s) # TODO(xion): Consider introducing a case for ``split('')`` # to make it return ``['']`` rather than default ``[]`` thru ``str.split``. # It's the so-called "whitespace split" that normally eliminates # empty strings from result. However, ``split(s)`` for any other ``s`` # always returns ``[s]`` so these two approaches are at odds here. # (Possibly refer to split functions in other languages for comparison). # string delimiter are handled by appropriate standard function if by is None or is_string(by): return s.split(by) if maxsplit is None else s.split(by, maxsplit) # regex delimiters have certain special cases handled explicitly below, # so that we do the same things that ``str.split`` does if is_regex(by): if not by.pattern: return s.split('') # will fail with proper exception & message if maxsplit == 0: return [s] return by.split(s, maxsplit=maxsplit or 0) # multiple delimiters are handled by regex that matches them all if is_iterable(by): if not by: raise ValueError("empty separator list") by = list(imap(ensure_string, by)) if not s: return [''] # quickly eliminate trivial case or_ = s.__class__('|') regex = join(or_, imap(re.escape, by)) return split(s, by=re.compile(regex), maxsplit=maxsplit) raise TypeError("invalid separator")
def assertAny(self, arg, iterable=ABSENT, msg=None): """Assert that at least one element of an iterable is truthy or satisfies given predicate. :param arg: Predicate, or iterable of elements to check for truthiness :param iterable: Iterable of predicate arguments (if predicate was given) Examples:: # check if any element satisfies a predicate self.assertAny(is_valid, iterable) # check if any element is already truthy self.assertAny(iterable_of_maybe_truthies) """ if callable(arg): self.__fail_unless_iterable(iterable) if not any(imap(arg, iterable)): self.__fail(msg, "predicate not satisfied for any element") else: self.__fail_unless_iterable(arg) # shift arguments to the left if msg is None and iterable is not ABSENT: msg = iterable if not any(arg): self.__fail(msg, "no truthy elements found")
def in_(self, haystack): """Perform replacement in given string. :param haystack: String to perform replacements in :return: ``haystack`` after the replacements :raise TypeError: If ``haystack`` if not a string :raise ReplacementError: If no replacement(s) have been provided yet """ from taipan.collections import dicts ensure_string(haystack) if not is_mapping(self._replacements): raise ReplacementError("string replacements not provided") # handle special cases if not self._replacements: return haystack if len(self._replacements) == 1: return haystack.replace(*dicts.peekitem(self._replacements)) # construct a regex matching any of the needles in the order # of descending length (to prevent issues if they contain each other) or_ = haystack.__class__('|') regex = join(or_, imap( re.escape, sorted(self._replacements, key=len, reverse=True))) # do the substituion, looking up the replacement for every match do_replace = lambda match: self._replacements[match.group()] return re.sub(regex, do_replace, haystack)
def nor(*fs): """Creates a function that returns true for given arguments iff every given function evalutes to false for those arguments. :param fs: Functions to combine :return: Short-circuiting function performing logical NOR operation on results of ``fs`` applied to its arguments """ ensure_argcount(fs, min_=1) fs = list(imap(ensure_callable, fs)) if len(fs) == 1: return not_(fs[0]) if len(fs) == 2: f1, f2 = fs return lambda *args, **kwargs: not (f1(*args, **kwargs) or f2( *args, **kwargs)) if len(fs) == 3: f1, f2, f3 = fs return lambda *args, **kwargs: not (f1(*args, **kwargs) or f2( *args, **kwargs) or f3(*args, **kwargs)) def g(*args, **kwargs): for f in fs: if f(*args, **kwargs): return False return True return g
def compose(*fs): """Creates composition of the functions passed in. :param fs: One-argument functions, with the possible exception of last one that can accept arbitrary arguments :return: Function returning a result of functions from ``fs`` applied consecutively to the argument(s), in reverse order """ ensure_argcount(fs, min_=1) fs = list(imap(ensure_callable, fs)) if len(fs) == 1: return fs[0] if len(fs) == 2: f1, f2 = fs return lambda *args, **kwargs: f1(f2(*args, **kwargs)) if len(fs) == 3: f1, f2, f3 = fs return lambda *args, **kwargs: f1(f2(f3(*args, **kwargs))) fs.reverse() def g(*args, **kwargs): x = fs[0](*args, **kwargs) for f in fs[1:]: x = f(x) return x return g
def in_(self, haystack): """Perform replacement in given string. :param haystack: String to perform replacements in :return: ``haystack`` after the replacements :raise TypeError: If ``haystack`` if not a string :raise ReplacementError: If no replacement(s) have been provided yet """ from taipan.collections import dicts ensure_string(haystack) if not is_mapping(self._replacements): raise ReplacementError("string replacements not provided") # handle special cases if not self._replacements: return haystack if len(self._replacements) == 1: return haystack.replace(*dicts.peekitem(self._replacements)) # construct a regex matching any of the needles in the order # of descending length (to prevent issues if they contain each other) or_ = haystack.__class__('|') regex = join( or_, imap(re.escape, sorted(self._replacements, key=len, reverse=True))) # do the substituion, looking up the replacement for every match do_replace = lambda match: self._replacements[match.group()] return re.sub(regex, do_replace, haystack)
def batch(iterable, n, fillvalue=None): """Batches the elements of given iterable. Resulting iterable will yield tuples containing at most ``n`` elements (might be less if ``fillvalue`` isn't specified). :param n: Number of items in every batch :param fillvalue: Value to fill the last batch with. If None, last batch might be shorter than ``n`` elements :return: Iterable of batches .. note:: This is an extended version of grouper() recipe from the :module:`itertools` module documentation. """ ensure_iterable(iterable) if not isinstance(n, Integral): raise TypeError("invalid number of elements in a batch") if not (n > 0): raise ValueError("number of elements in a batch must be positive") # since we must use ``izip_longest`` # (``izip`` fails if ``n`` is greater than length of ``iterable``), # we will apply some 'trimming' to resulting tuples if necessary if fillvalue is None: fillvalue = object() trimmer = lambda item: tuple(x for x in item if x is not fillvalue) else: trimmer = identity() args = [iter(iterable)] * n zipped = izip_longest(*args, fillvalue=fillvalue) return imap(trimmer, zipped)
def concat(list_): """Concatenates a list of lists into a single resulting list.""" ensure_iterable(list_) # we don't use ``itertools.chain.from_iterable``, because that would # inadvertenly allow strings, treating them as lists of characters # and potentially producing very difficult bugs return sum(imap(ensure_sequence, list_), [])
def _harmonize_subset_types(set_, subset_tuples): """Possibly convert an iterable of tuples with subsets of given "set", to an iterable of :class:`set` objects if original "set" was so too. """ # if argument is originally a set, return subsets also as sets; # otherwise (for non-set collection), return subsets as tuples if is_tuple(set_): return subset_tuples else: subset_class = set_.__class__ if is_set(set_) else tuple return imap(subset_class, subset_tuples)
def extend(dict_, *dicts, **kwargs): """Extend a dictionary with keys and values from other dictionaries. :param dict_: Dictionary to extend Optional keyword arguments allow to control the exact way in which ``dict_`` will be extended. :param overwrite: Whether repeated keys should have their values overwritten, retaining the last value, as per given order of dictionaries. This is the default behavior (equivalent to ``overwrite=True``). If ``overwrite=False``, repeated keys are simply ignored. Example:: >> foo = {'a': 1} >> extend(foo, {'a': 10, 'b': 2}, overwrite=True) {'a': 10, 'b': 2} >> foo = {'a': 1} >> extend(foo, {'a': 10, 'b': 2}, overwrite=False) {'a': 1, 'b': 2} :param deep: Whether extending should proceed recursively, and cause corresponding subdictionaries to be merged into each other. By default, this does not happen (equivalent to ``deep=False``). Example:: >> foo = {'a': {'b': 1}} >> extend(foo, {'a': {'c': 2}}, deep=False) {'a': {'c': 2}} >> foo = {'a': {'b': 1}} >> extend(foo, {'a': {'c': 2}}, deep=True) {'a': {'b': 1, 'c': 2}} :return: Extended ``dict_`` .. versionadded:: 0.0.2 """ ensure_mapping(dict_) dicts = list(imap(ensure_mapping, dicts)) ensure_keyword_args(kwargs, optional=('deep', 'overwrite')) return _nary_dict_update([dict_] + dicts, copy=False, deep=kwargs.get('deep', False), overwrite=kwargs.get('overwrite', True))
def intertwine(*iterables): """Constructs an iterable which intertwines given iterables. The resulting iterable will return an item from first sequence, then from second, etc. until the last one - and then another item from first, then from second, etc. - up until all iterables are exhausted. """ iterables = tuple(imap(ensure_iterable, iterables)) empty = object() return (item for iterable in izip_longest(*iterables, fillvalue=empty) for item in iterable if item is not empty)
def merge(*dicts, **kwargs): """Merges two or more dictionaries into a single one. Optional keyword arguments allow to control the exact way in which the dictionaries will be merged. :param overwrite: Whether repeated keys should have their values overwritten, retaining the last value, as per given order of dictionaries. This is the default behavior (equivalent to ``overwrite=True``). If ``overwrite=False``, repeated keys are simply ignored. Example:: >> merge({'a': 1}, {'a': 10, 'b': 2}, overwrite=True) {'a': 10, 'b': 2} >> merge({'a': 1}, {'a': 10, 'b': 2}, overwrite=False) {'a': 1, 'b': 2} :param deep: Whether merging should proceed recursively, and cause corresponding subdictionaries to be merged into each other. By default, this does not happen (equivalent to ``deep=False``). Example:: >> merge({'a': {'b': 1}}, {'a': {'c': 2}}, deep=False) {'a': {'c': 2}} >> merge({'a': {'b': 1}}, {'a': {'c': 2}}, deep=True) {'a': {'b': 1, 'c': 2}} :return: Merged dictionary .. note:: For ``dict``\ s ``a`` and ``b``, ``merge(a, b)`` is equivalent to ``extend({}, a, b)``. .. versionadded:: 0.0.2 The ``overwrite`` keyword argument. """ ensure_argcount(dicts, min_=1) dicts = list(imap(ensure_mapping, dicts)) ensure_keyword_args(kwargs, optional=('deep', 'overwrite')) return _nary_dict_update(dicts, copy=True, deep=kwargs.get('deep', False), overwrite=kwargs.get('overwrite', True))
def _get_terminators(self, ctor_kwargs): """Retrieve fluent terminators from decorator's arguments.""" terminators = [] for terminator_arg in ('terminator', 'terminators'): if terminator_arg not in ctor_kwargs: continue terminator_arg_value = ctor_kwargs[terminator_arg] if is_string(terminator_arg_value): terminators.append(terminator_arg_value) elif is_iterable(terminator_arg_value): terminators.extend(imap(ensure_string, terminator_arg_value)) else: raise TypeError( "expected name or list of names of terminator methods; " "got %r instead" % type(terminator_arg_value)) return frozenset(terminators)
def mapitems(function, dict_): """Return a new dictionary where the keys and values come from applying ``function`` to key-value pairs from given dictionary. .. warning:: If ``function`` returns a key-value pair with the same key more than once, it is undefined which value will be chosen for that key in the resulting dictionary. :param function: Function taking a key-value pair as a single argument, and returning a new key-value pair; or None (corresponding to identity function) .. versionadded:: 0.0.2 """ ensure_mapping(dict_) function = identity() if function is None else ensure_callable(function) return dict_.__class__(imap(function, iteritems(dict_)))
def merge(arg, *rest, **kwargs): """Merge a collection, with functions as items, into a single function that takes a collection and maps its items through corresponding functions. :param arg: A collection of functions, such as list, tuple, or dictionary :param default: Optional default function to use for items within merged function's arguments that do not have corresponding functions in ``arg`` Example with two-element tuple:: >> dict_ = {'Alice': -5, 'Bob': 4} >> func = merge((str.upper, abs)) >> dict(map(func, dict_.items())) {'ALICE': 5, 'BOB': 4} Example with a dictionary:: >> func = merge({'id': int, 'name': str.split}) >> data = [ {'id': '1', 'name': "John Doe"}, {'id': '2', 'name': "Anne Arbor"}, ] >> list(map(func, data)) [{'id': 1, 'name': ['John', 'Doe']}, {'id': 2, 'name': ['Anne', 'Arbor']}] :return: Merged function .. versionadded:: 0.0.2 """ ensure_keyword_args(kwargs, optional=('default', )) has_default = 'default' in kwargs if has_default: default = ensure_callable(kwargs['default']) # if more than one argument was given, they must all be functions; # result will be a function that takes multiple arguments (rather than # a single collection) and returns a tuple unary_result = True if rest: fs = (ensure_callable(arg), ) + tuple(imap(ensure_callable, rest)) unary_result = False else: fs = arg if is_mapping(fs): if has_default: return lambda arg_: fs.__class__( (k, fs.get(k, default)(arg_[k])) for k in arg_) else: return lambda arg_: fs.__class__((k, fs[k](arg_[k])) for k in arg_) else: ensure_sequence(fs) if has_default: # we cannot use ``izip_longest(fs, arg_, fillvalue=default)``, # because we want to terminate the generator # only when ``arg_`` is exhausted (not when just ``fs`` is) func = lambda arg_: fs.__class__((fs[i] if i < len(fs) else default)(x) for i, x in enumerate(arg_)) else: # we cannot use ``izip(fs, arg_)`` because it would short-circuit # if ``arg_`` is longer than ``fs``, rather than raising # the required ``IndexError`` func = lambda arg_: fs.__class__(fs[i](x) for i, x in enumerate(arg_)) return func if unary_result else lambda *args: func(args)
def __fail_unless_strings(self, arg): """Fail the test unless argument is a string or iterable thereof.""" if not is_string(arg): if not (is_iterable(arg) and all(imap(is_string, arg))): self.fail("%r is not a string or iterable of strings" % (arg,))
def __fail_unless_strings(self, arg): """Fail the test unless argument is a string or iterable thereof.""" if not is_string(arg): if not (is_iterable(arg) and all(imap(is_string, arg))): self.fail("%r is not a string or iterable of strings" % (arg, ))