def str_lower(x): """Converts string samples to lower case. :returns: an expression containing the converted strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.lower() Expression = str_lower(text) Length: 5 dtype: str (expression) --------------------------------- 0 something 1 very pretty 2 is coming 3 our 4 way. """ sl = _to_string_sequence(x).lower() return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_lstrip(x, to_strip=None): """Remove leading characters from a string sample. :param str to_strip: The string to be removed :returns: an expression containing the modified string column. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.lstrip(to_strip='very ') Expression = str_lstrip(text, to_strip='very ') Length: 5 dtype: str (expression) --------------------------------- 0 Something 1 pretty 2 is coming 3 our 4 way. """ # in c++ we give empty string the same meaning as None sl = _to_string_sequence(x).lstrip('' if to_strip is None else to_strip) if to_strip != '' else x return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_center(x, width, fillchar=' '): """ Fills the left and right side of the strings with additional characters, such that the sample has a total of `width` characters. :param int width: The total number of characters of the resulting string sample. :param str fillchar: The character used for filling. :returns: an expression containing the filled strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.center(width=11, fillchar='!') Expression = str_center(text, width=11, fillchar='!') Length: 5 dtype: str (expression) --------------------------------- 0 !Something! 1 very pretty 2 !is coming! 3 !!!!our!!!! 4 !!!!way.!!! """ sl = _to_string_sequence(x).pad(width, fillchar, True, True) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_ljust(x, width, fillchar=' '): """Fills the right side of string samples with a specified character such that the strings are right-hand justified. :param int width: The minimal width of the strings. :param str fillchar: The character used for filling. :returns: an expression containing the filled strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.ljust(width=10, fillchar='!') Expression = str_ljust(text, width=10, fillchar='!') Length: 5 dtype: str (expression) --------------------------------- 0 Something! 1 very pretty 2 is coming! 3 our!!!!!!! 4 way.!!!!!! """ sl = _to_string_sequence(x).pad(width, fillchar, False, True) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_zfill(x, width): """Pad strings in a column by prepanding "0" characters. :param int width: The minimum length of the resulting string. Strings shorter less than `width` will be prepended with zeros. :returns: an expression containing the modified strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.zfill(width=12) Expression = str_zfill(text, width=12) Length: 5 dtype: str (expression) --------------------------------- 0 000Something 1 0very pretty 2 000is coming 3 000000000our 4 00000000way. """ sl = _to_string_sequence(x).pad(width, '0', True, False) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_capitalize(x): """Capitalize the first letter of a string sample. :returns: an expression containing the capitalized strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.capitalize() Expression = str_capitalize(text) Length: 5 dtype: str (expression) --------------------------------- 0 Something 1 Very pretty 2 Is coming 3 Our 4 Way. """ sl = _to_string_sequence(x).capitalize() return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_upper(x): """Converts all strings in a column to uppercase. :returns: an expression containing the converted strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.upper() Expression = str_upper(text) Length: 5 dtype: str (expression) --------------------------------- 0 SOMETHING 1 VERY PRETTY 2 IS COMING 3 OUR 4 WAY. """ sl = _to_string_sequence(x).upper() return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_replace(x, pat, repl, n=-1, flags=0, regex=False): """Replace occurences of a pattern/regex in a column with some other string. :param str pattern: string or a regex pattern :param str replace: a replacement string :param int n: number of replacements to be made from the start. If -1 make all replacements. :param int flags: ?? :param bool regex: If True, ...? :returns: an expression containing the string replacements. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.replace(pat='et', repl='__') Expression = str_replace(text, pat='et', repl='__') Length: 5 dtype: str (expression) --------------------------------- 0 Som__hing 1 very pr__ty 2 is coming 3 our 4 way. """ sl = _to_string_sequence(x).replace(pat, repl, n, flags, regex) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_repeat(x, repeats): """Duplicate each string in a column. :param int repeats: number of times each string sample is to be duplicated. :returns: an expression containing the duplicated strings Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.repeat(3) Expression = str_repeat(text, 3) Length: 5 dtype: str (expression) --------------------------------- 0 SomethingSomethingSomething 1 very prettyvery prettyvery pretty 2 is comingis comingis coming 3 ourourour 4 way.way.way. """ sl = _to_string_sequence(x).repeat(repeats) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_pad(x, width, side='left', fillchar=' '): """Pad strings in a given column. :param int width: The total width of the string :param str side: If 'left' than pad on the left, if 'right' than pad on the right side the string. :param str fillchar: The character used for padding. :returns: an expression containing the padded strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.pad(width=10, side='left', fillchar='!') Expression = str_pad(text, width=10, side='left', fillchar='!') Length: 5 dtype: str (expression) --------------------------------- 0 !Something 1 very pretty 2 !is coming 3 !!!!!!!our 4 !!!!!!way. """ sl = _to_string_sequence(x).pad(width, fillchar, side in ['left', 'both'], side in ['right', 'both']) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_strip(x, to_strip=None): """Removes leading and trailing characters. Strips whitespaces (including new lines), or a set of specified characters from each string saple in a column, both from the left right sides. :param str to_strip: The characters to be removed. All combinations of the characters will be removed. If None, it removes whitespaces. :param returns: an expression containing the modified string samples. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.strip(to_strip='very') Expression = str_strip(text, to_strip='very') Length: 5 dtype: str (expression) --------------------------------- 0 Something 1 prett 2 is coming 3 ou 4 way. """ # in c++ we give empty string the same meaning as None sl = _to_string_sequence(x).strip('' if to_strip is None else to_strip) if to_strip != '' else x return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_get(x, i): """Extract a character from each sample at the specified position from a string column. Note that if the specified position is out of bound of the string sample, this method returns '', while pandas retunrs nan. :param int i: The index location, at which to extract the character. :returns: an expression containing the extracted characters. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.get(5) Expression = str_get(text, 5) Length: 5 dtype: str (expression) --------------------------------- 0 h 1 p 2 m 3 4 """ x = _to_string_sequence(x) if i == -1: sl = x.slice_string_end(-1) else: sl = x.slice_string(i, i+1) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_join(x, sep): """Same as find (difference with pandas is that it does not raise a ValueError)""" sl = _to_string_list_sequence(x).join(sep) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def format(x, format): """Uses http://www.cplusplus.com/reference/string/to_string/ for formatting""" # don't change the dtype, otherwise for each block the dtype may be different (string length) sl = vaex.strings.format(x, format) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def to_string(x): # don't change the dtype, otherwise for each block the dtype may be different (string length) sl = vaex.strings.to_string(x) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)