Ejemplos de tokenize en Python, ejemplos de humanfriendly.text.tokenize en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: __init__.py Proyecto: yangsongx/doc

def parse_length(length):
    """
    Parse a human readable length and return the number of metres.

    :param length: The human readable length to parse (a string).
    :returns: The corresponding length in metres (a float).
    :raises: :exc:`InvalidLength` when the input can't be parsed.

    Some examples:

    >>> from humanfriendly import parse_length
    >>> parse_length('42')
    42
    >>> parse_length('1 km')
    1000
    >>> parse_length('5mm')
    0.005
    >>> parse_length('15.3cm')
    0.153
    """
    tokens = tokenize(length)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of metres.
        if len(tokens) == 1:
            return int(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and is_string(tokens[1]):
            normalized_unit = tokens[1].lower()
            # Try to match the first letter of the unit.
            for unit in length_size_units:
                if normalized_unit.startswith(unit['prefix']):
                    return tokens[0] * unit['divider']
    # We failed to parse the length specification.
    msg = "Failed to parse length! (input %r was tokenized as %r)"
    raise InvalidLength(msg % (length, tokens))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: __init__.py Proyecto: strategic-zjc/Data-Science-Term-Project

def parse_length(length):
    """
    Parse a human readable length and return the number of metres.

    :param length: The human readable length to parse (a string).
    :returns: The corresponding length in metres (a float).
    :raises: :exc:`InvalidLength` when the input can't be parsed.

    Some examples:

    >>> from humanfriendly import parse_length
    >>> parse_length('42')
    42
    >>> parse_length('1 km')
    1000
    >>> parse_length('5mm')
    0.005
    >>> parse_length('15.3cm')
    0.153
    """
    tokens = tokenize(length)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of metres.
        if len(tokens) == 1:
            return int(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and is_string(tokens[1]):
            normalized_unit = tokens[1].lower()
            # Try to match the first letter of the unit.
            for unit in length_size_units:
                if normalized_unit.startswith(unit['prefix']):
                    return tokens[0] * unit['divider']
    # We failed to parse the length specification.
    msg = "Failed to parse length! (input %r was tokenized as %r)"
    raise InvalidLength(msg % (length, tokens))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: __init__.py Proyecto: yangsongx/doc

def parse_size(size):
    """
    Parse a human readable data size and return the number of bytes.

    :param size: The human readable file size to parse (a string).
    :returns: The corresponding size in bytes (an integer).
    :raises: :exc:`InvalidSize` when the input can't be parsed.

    Some examples:

    >>> from humanfriendly import parse_size
    >>> parse_size('42')
    42
    >>> parse_size('1 KB')
    1024
    >>> parse_size('5 kilobyte')
    5120
    >>> parse_size('1.5 GB')
    1610612736
    """
    tokens = tokenize(size)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of bytes.
        if len(tokens) == 1:
            return int(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and is_string(tokens[1]):
            normalized_unit = tokens[1].lower()
            # Try to match the first letter of the unit.
            for unit in disk_size_units:
                if normalized_unit.startswith(unit['prefix']):
                    return int(tokens[0] * unit['divider'])
    # We failed to parse the size specification.
    msg = "Failed to parse size! (input %r was tokenized as %r)"
    raise InvalidSize(msg % (size, tokens))

Ejemplo n.º 4

0

Mostrar archivo

Archivo: __init__.py Proyecto: strategic-zjc/Data-Science-Term-Project

def parse_size(size):
    """
    Parse a human readable data size and return the number of bytes.

    :param size: The human readable file size to parse (a string).
    :returns: The corresponding size in bytes (an integer).
    :raises: :exc:`InvalidSize` when the input can't be parsed.

    Some examples:

    >>> from humanfriendly import parse_size
    >>> parse_size('42')
    42
    >>> parse_size('1 KB')
    1024
    >>> parse_size('5 kilobyte')
    5120
    >>> parse_size('1.5 GB')
    1610612736
    """
    tokens = tokenize(size)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of bytes.
        if len(tokens) == 1:
            return int(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and is_string(tokens[1]):
            normalized_unit = tokens[1].lower()
            # Try to match the first letter of the unit.
            for unit in disk_size_units:
                if normalized_unit.startswith(unit['prefix']):
                    return int(tokens[0] * unit['divider'])
    # We failed to parse the size specification.
    msg = "Failed to parse size! (input %r was tokenized as %r)"
    raise InvalidSize(msg % (size, tokens))

Ejemplo n.º 5

0

Mostrar archivo

def parse_size(size, binary=False):
    """
    Parse a human readable data size and return the number of bytes.

    :param size: The human readable file size to parse (a string).
    :param binary: :data:`True` to use binary multiples of bytes (base-2) for
                   ambiguous unit symbols and names, :data:`False` to use
                   decimal multiples of bytes (base-10).
    :returns: The corresponding size in bytes (an integer).
    :raises: :exc:`InvalidSize` when the input can't be parsed.

    This function knows how to parse sizes in bytes, kilobytes, megabytes,
    gigabytes, terabytes and petabytes. Some examples:

    >>> from humanfriendly import parse_size
    >>> parse_size('42')
    42
    >>> parse_size('13b')
    13
    >>> parse_size('5 bytes')
    5
    >>> parse_size('1 KB')
    1000
    >>> parse_size('1 kilobyte')
    1000
    >>> parse_size('1 KiB')
    1024
    >>> parse_size('1 KB', binary=True)
    1024
    >>> parse_size('1.5 GB')
    1500000000
    >>> parse_size('1.5 GB', binary=True)
    1610612736
    """
    tokens = tokenize(size)
    if tokens and isinstance(tokens[0], numbers.Number):
        # Get the normalized unit (if any) from the tokenized input.
        normalized_unit = tokens[1].lower() if len(tokens) == 2 and is_string(tokens[1]) else ''
        # If the input contains only a number, it's assumed to be the number of
        # bytes. The second token can also explicitly reference the unit bytes.
        if len(tokens) == 1 or normalized_unit.startswith('b'):
            return int(tokens[0])
        # Otherwise we expect two tokens: A number and a unit.
        if normalized_unit:
            for unit in disk_size_units:
                # First we check for unambiguous symbols (KiB, MiB, GiB, etc)
                # and names (kibibyte, mebibyte, gibibyte, etc) because their
                # handling is always the same.
                if normalized_unit in (unit.binary.symbol.lower(), unit.binary.name.lower()):
                    return int(tokens[0] * unit.binary.divider)
                # Now we will deal with ambiguous prefixes (K, M, G, etc),
                # symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
                # gigabyte, etc) according to the caller's preference.
                if (normalized_unit in (unit.decimal.symbol.lower(), unit.decimal.name.lower()) or
                        normalized_unit.startswith(unit.decimal.symbol[0].lower())):
                    return int(tokens[0] * (unit.binary.divider if binary else unit.decimal.divider))
    # We failed to parse the size specification.
    msg = "Failed to parse size! (input %r was tokenized as %r)"
    raise InvalidSize(format(msg, size, tokens))

Ejemplo n.º 6

0

Mostrar archivo

Archivo: __init__.py Proyecto: MrSsunlight/moniter

def parse_timespan(timespan):
    """
    Parse a "human friendly" timespan into the number of seconds.

    :param value: A string like ``5h`` (5 hours), ``10m`` (10 minutes) or
                  ``42s`` (42 seconds).
    :returns: The number of seconds as a floating point number.
    :raises: :exc:`InvalidTimespan` when the input can't be parsed.

    Note that the :func:`parse_timespan()` function is not meant to be the
    "mirror image" of the :func:`format_timespan()` function. Instead it's
    meant to allow humans to easily and succinctly specify a timespan with a
    minimal amount of typing. It's very useful to accept easy to write time
    spans as e.g. command line arguments to programs.

    The time units (and abbreviations) supported by this function are:

    - ms, millisecond, milliseconds
    - s, sec, secs, second, seconds
    - m, min, mins, minute, minutes
    - h, hour, hours
    - d, day, days
    - w, week, weeks
    - y, year, years

    Some examples:

    >>> from humanfriendly import parse_timespan
    >>> parse_timespan('42')
    42.0
    >>> parse_timespan('42s')
    42.0
    >>> parse_timespan('1m')
    60.0
    >>> parse_timespan('1h')
    3600.0
    >>> parse_timespan('1d')
    86400.0
    """
    tokens = tokenize(timespan)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of seconds.
        if len(tokens) == 1:
            return float(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and is_string(tokens[1]):
            normalized_unit = tokens[1].lower()
            for unit in time_units:
                if (normalized_unit == unit['singular']
                        or normalized_unit == unit['plural']
                        or normalized_unit in unit['abbreviations']):
                    return float(tokens[0]) * unit['divider']
    # We failed to parse the timespan specification.
    msg = "Failed to parse timespan! (input %r was tokenized as %r)"
    raise InvalidTimespan(format(msg, timespan, tokens))

Ejemplo n.º 7

0

Mostrar archivo

Archivo: __init__.py Proyecto: xolox/python-humanfriendly

def parse_timespan(timespan):
    """
    Parse a "human friendly" timespan into the number of seconds.

    :param value: A string like ``5h`` (5 hours), ``10m`` (10 minutes) or
                  ``42s`` (42 seconds).
    :returns: The number of seconds as a floating point number.
    :raises: :exc:`InvalidTimespan` when the input can't be parsed.

    Note that the :func:`parse_timespan()` function is not meant to be the
    "mirror image" of the :func:`format_timespan()` function. Instead it's
    meant to allow humans to easily and succinctly specify a timespan with a
    minimal amount of typing. It's very useful to accept easy to write time
    spans as e.g. command line arguments to programs.

    The time units (and abbreviations) supported by this function are:

    - ms, millisecond, milliseconds
    - s, sec, secs, second, seconds
    - m, min, mins, minute, minutes
    - h, hour, hours
    - d, day, days
    - w, week, weeks
    - y, year, years

    Some examples:

    >>> from humanfriendly import parse_timespan
    >>> parse_timespan('42')
    42.0
    >>> parse_timespan('42s')
    42.0
    >>> parse_timespan('1m')
    60.0
    >>> parse_timespan('1h')
    3600.0
    >>> parse_timespan('1d')
    86400.0
    """
    tokens = tokenize(timespan)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of seconds.
        if len(tokens) == 1:
            return float(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and is_string(tokens[1]):
            normalized_unit = tokens[1].lower()
            for unit in time_units:
                if (normalized_unit == unit['singular'] or
                        normalized_unit == unit['plural'] or
                        normalized_unit in unit['abbreviations']):
                    return float(tokens[0]) * unit['divider']
    # We failed to parse the timespan specification.
    msg = "Failed to parse timespan! (input %r was tokenized as %r)"
    raise InvalidTimespan(format(msg, timespan, tokens))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: __init__.py Proyecto: douglas-larocca/python-humanfriendly

def parse_timespan(timespan):
    """
    Parse a "human friendly" timespan into the number of seconds.

    :param value: A string like ``5h`` (5 hours), ``10m`` (10 minutes) or
                  ``42s`` (42 seconds).
    :returns: The number of seconds as a floating point number.
    :raises: :exc:`InvalidTimespan` when the input can't be parsed.

    Note that the :func:`parse_timespan()` function is not meant to be the
    "mirror image" of the :func:`format_timespan()` function. Instead it's
    meant to allow humans to easily and succinctly specify a timespan with a
    minimal amount of typing. It's very useful to accept easy to write time
    spans as e.g. command line arguments to programs.

    Some examples:

    >>> from humanfriendly import parse_timespan
    >>> parse_timespan('42')
    42.0
    >>> parse_timespan('42s')
    42.0
    >>> parse_timespan('1m')
    60.0
    >>> parse_timespan('1h')
    3600.0
    >>> parse_timespan('1d')
    86400.0
    """
    tokens = tokenize(timespan)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of seconds.
        if len(tokens) == 1:
            return float(tokens[0])
        # Otherwise we expect to find two tokens: A number and a unit.
        if len(tokens) == 2 and isinstance(tokens[1], string_types):
            normalized_unit = tokens[1].lower()
            # Try to match the first letter of the unit.
            for unit in time_units:
                # All of the first letters of the time units are unique, so
                # although this check is not very strict I believe it to be
                # sufficient.
                if normalized_unit.startswith(unit['singular'][0]):
                    return float(tokens[0]) * unit['divider']
    # We failed to parse the timespan specification.
    msg = "Failed to parse timespan! (input %r was tokenized as %r)"
    raise InvalidTimespan(msg % (timespan, tokens))

Ejemplo n.º 9

0

Mostrar archivo

Archivo: __init__.py Proyecto: pomidoroshev/python-humanfriendly

def parse_timespan(timespan):
    """
    Parse a "human friendly" timespan into the number of seconds.

    :param value: A string like ``5h`` (5 hours), ``10m`` (10 minutes),
                  ``42s`` (42 seconds), ``5h2d`` (5 hours 2 days) or
                  ``10m30s`` (10 minutes 30 seconds).
    :returns: The number of seconds as a floating point number.
    :raises: :exc:`InvalidTimespan` when the input can't be parsed.

    Note that the :func:`parse_timespan()` function is not meant to be the
    "mirror image" of the :func:`format_timespan()` function. Instead it's
    meant to allow humans to easily and succinctly specify a timespan with a
    minimal amount of typing. It's very useful to accept easy to write time
    spans as e.g. command line arguments to programs.

    The time units (and abbreviations) supported by this function are:

    - ms, millisecond, milliseconds
    - s, sec, secs, second, seconds
    - m, min, mins, minute, minutes
    - h, hour, hours
    - d, day, days
    - w, week, weeks
    - y, year, years

    Some examples:

    >>> from humanfriendly import parse_timespan
    >>> parse_timespan('42')
    42.0
    >>> parse_timespan('42s')
    42.0
    >>> parse_timespan('1m')
    60.0
    >>> parse_timespan('1h')
    3600.0
    >>> parse_timespan('1d')
    86400.0
    """
    def _find_divider(unit):
        normalized_unit = unit.lower()
        for time_unit in time_units:
            if (normalized_unit == time_unit['singular']
                    or normalized_unit == time_unit['plural']
                    or normalized_unit in time_unit['abbreviations']):
                return time_unit['divider']
        return None

    def _to_seconds(pair):
        """
        Convert pair of tokens to seconds.
        """
        if not is_string(pair[1]):
            raise ValueError

        divider = _find_divider(pair[1])
        if divider is None:
            raise ValueError

        return float(pair[0]) * divider

    tokens = tokenize(timespan)
    if tokens and isinstance(tokens[0], numbers.Number):
        # If the input contains only a number, it's assumed to be the number of seconds.
        if len(tokens) == 1:
            return float(tokens[0])
        # Otherwise we expect to find an even number of pairs of numbers and units.
        if len(tokens) % 2 == 0:
            try:
                return round(
                    sum(map(_to_seconds, zip(tokens[::2], tokens[1::2]))), 9)
            except ValueError:
                pass

    # We failed to parse the timespan specification.
    msg = "Failed to parse timespan! (input %r was tokenized as %r)"
    raise InvalidTimespan(format(msg, timespan, tokens))

Ejemplo n.º 10

-1

Mostrar archivo

Archivo: __init__.py Proyecto: xolox/python-humanfriendly

def parse_size(size, binary=False):
    """
    Parse a human readable data size and return the number of bytes.

    :param size: The human readable file size to parse (a string).
    :param binary: :data:`True` to use binary multiples of bytes (base-2) for
                   ambiguous unit symbols and names, :data:`False` to use
                   decimal multiples of bytes (base-10).
    :returns: The corresponding size in bytes (an integer).
    :raises: :exc:`InvalidSize` when the input can't be parsed.

    This function knows how to parse sizes in bytes, kilobytes, megabytes,
    gigabytes, terabytes and petabytes. Some examples:

    >>> from humanfriendly import parse_size
    >>> parse_size('42')
    42
    >>> parse_size('13b')
    13
    >>> parse_size('5 bytes')
    5
    >>> parse_size('1 KB')
    1000
    >>> parse_size('1 kilobyte')
    1000
    >>> parse_size('1 KiB')
    1024
    >>> parse_size('1 KB', binary=True)
    1024
    >>> parse_size('1.5 GB')
    1500000000
    >>> parse_size('1.5 GB', binary=True)
    1610612736
    """
    tokens = tokenize(size)
    if tokens and isinstance(tokens[0], numbers.Number):
        # Get the normalized unit (if any) from the tokenized input.
        normalized_unit = tokens[1].lower() if len(tokens) == 2 and is_string(tokens[1]) else ''
        # If the input contains only a number, it's assumed to be the number of
        # bytes. The second token can also explicitly reference the unit bytes.
        if len(tokens) == 1 or normalized_unit.startswith('b'):
            return int(tokens[0])
        # Otherwise we expect two tokens: A number and a unit.
        if normalized_unit:
            for unit in disk_size_units:
                # First we check for unambiguous symbols (KiB, MiB, GiB, etc)
                # and names (kibibyte, mebibyte, gibibyte, etc) because their
                # handling is always the same.
                if normalized_unit in (unit.binary.symbol.lower(), unit.binary.name.lower()):
                    return int(tokens[0] * unit.binary.divider)
                # Now we will deal with ambiguous prefixes (K, M, G, etc),
                # symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
                # gigabyte, etc) according to the caller's preference.
                if (normalized_unit in (unit.decimal.symbol.lower(), unit.decimal.name.lower()) or
                        normalized_unit.startswith(unit.decimal.symbol[0].lower())):
                    return int(tokens[0] * (unit.binary.divider if binary else unit.decimal.divider))
    # We failed to parse the size specification.
    msg = "Failed to parse size! (input %r was tokenized as %r)"
    raise InvalidSize(format(msg, size, tokens))