Ejemplo n.º 1
0
    def join(cls, phrases, language, country=None):

        if not hasattr(phrases, '__iter__'):
            raise ValueError('Param phrases must be iterable')

        values, probs = address_config.alternative_probabilities(
            cls.key, language, country=country)
        phrase, props = weighted_choice(values, probs)

        whitespace = props.get('whitespace', True)
        whitespace_phrase = six.u(' ') if whitespace else six.u('')

        phrases = [safe_decode(p) for p in phrases]

        max_phrase_join = props.get('max_phrase_join', 2)
        if len(phrases) > max_phrase_join:
            default_join = safe_decode(
                props.get(
                    'default_join', cls.DEFAULT_WHITESPACE_JOIN
                    if whitespace else cls.DEFAULT_NON_WHITESPACE_JOIN))
            prefix = default_join.join(phrases[:-max_phrase_join] +
                                       [six.u('')])
        else:
            prefix = six.u('')

        if whitespace:
            phrase = six.u('{}{}{}').format(whitespace_phrase, phrase,
                                            whitespace_phrase)
        joined_phrase = phrase.join(phrases[-max_phrase_join:])

        return six.u('').join([prefix, joined_phrase])
Ejemplo n.º 2
0
    def join(cls, phrases, language, country=None):

        if not hasattr(phrases, '__iter__'):
            raise ValueError('Param phrases must be iterable')

        values, probs = address_config.alternative_probabilities(cls.key, language, country=country)
        phrase, props = weighted_choice(values, probs)

        whitespace = props.get('whitespace', True)
        whitespace_phrase = six.u(' ') if whitespace else six.u('')

        phrases = [safe_decode(p) for p in phrases]

        max_phrase_join = props.get('max_phrase_join', 2)
        if len(phrases) > max_phrase_join:
            default_join = safe_decode(props.get('default_join', cls.DEFAULT_WHITESPACE_JOIN if whitespace else cls.DEFAULT_NON_WHITESPACE_JOIN))
            prefix = default_join.join(phrases[:-max_phrase_join] + [six.u('')])
        else:
            prefix = six.u('')

        if whitespace:
            phrase = six.u('{}{}{}').format(whitespace_phrase, phrase, whitespace_phrase)
        joined_phrase = phrase.join(phrases[-max_phrase_join:])

        return six.u('').join([prefix, joined_phrase])
Ejemplo n.º 3
0
 def phrase(cls, language, country=None):
     values, probs = address_config.alternative_probabilities(
         'cross_streets.intersection', language, country=country)
     if not values:
         return None
     phrase, props = weighted_choice(values, probs)
     return phrase
Ejemplo n.º 4
0
    def phrase(cls, unit, language, country=None, zone=None):
        if unit is not None:
            key = 'units.alphanumeric' if zone is None else 'units.zones.{}'.format(zone)

            if not address_config.get_property(key, language, country=country):
                return None

            is_alpha = safe_decode(unit).isalpha()

            direction_unit = None
            add_direction = address_config.get_property('{}.add_direction'.format(key), language, country=country)
            if add_direction:
                direction_unit = cls.add_direction(key, unit, language, country=country)

            if direction_unit and direction_unit != unit:
                unit = direction_unit
                is_alpha = False
            else:
                add_quadrant = address_config.get_property('{}.add_quadrant'.format(key), language, country=country)
                if add_quadrant:
                    unit = cls.add_quadrant(key, unit, language, country=country)
                    is_alpha = False

            return cls.numeric_phrase(key, safe_decode(unit), language,
                                      dictionaries=['unit_types_numbered'], country=country, is_alpha=is_alpha)
        else:
            key = 'units.standalone'
            values, probs = address_config.alternative_probabilities(key, language,
                                                                     dictionaries=['unit_types_standalone'],
                                                                     country=country)
            if values is None:
                return None
            phrase, phrase_props = weighted_choice(values, probs)
            return phrase.title()
Ejemplo n.º 5
0
    def pick_phrase_and_type(cls, number, language, country=None):
        values, probs = address_config.alternative_probabilities(
            cls.key, language, dictionaries=cls.dictionaries, country=country)
        if not values:
            return None, safe_decode(
                number) if number is not None else None, None

        phrase, phrase_props = weighted_choice(values, probs)

        values = []
        probs = []

        for num_type in (cls.NUMERIC, cls.NUMERIC_AFFIX):
            key = '{}_probability'.format(num_type)
            prob = phrase_props.get(key, None)
            if prob is not None:
                values.append(num_type)
                probs.append(prob)

        if not probs:
            num_type = cls.NUMERIC
        else:
            probs = cdf(probs)
            num_type = weighted_choice(values, probs)

        return num_type, phrase, phrase_props[num_type]
Ejemplo n.º 6
0
    def phrase(cls, unit, language, country=None, zone=None):
        if unit is not None:
            key = 'units.alphanumeric' if zone is None else 'units.zones.{}'.format(
                zone)

            if not address_config.get_property(key, language, country=country):
                return None

            is_alpha = safe_decode(unit).isalpha()

            direction_unit = None
            add_direction = address_config.get_property(
                '{}.add_direction'.format(key), language, country=country)
            if add_direction:
                direction_unit = cls.add_direction(key,
                                                   unit,
                                                   language,
                                                   country=country)

            if direction_unit and direction_unit != unit:
                unit = direction_unit
                is_alpha = False
            else:
                add_quadrant = address_config.get_property(
                    '{}.add_quadrant'.format(key), language, country=country)
                if add_quadrant:
                    unit = cls.add_quadrant(key,
                                            unit,
                                            language,
                                            country=country)
                    is_alpha = False

            return cls.numeric_phrase(key,
                                      safe_decode(unit),
                                      language,
                                      dictionaries=['unit_types_numbered'],
                                      country=country,
                                      is_alpha=is_alpha)
        else:
            key = 'units.standalone'
            values, probs = address_config.alternative_probabilities(
                key,
                language,
                dictionaries=['unit_types_standalone'],
                country=country)
            if values is None:
                return None
            phrase, phrase_props = weighted_choice(values, probs)
            return phrase.title()
Ejemplo n.º 7
0
    def phrase(cls, language, key, value, is_plural=False, country=None):
        category_phrase = category_config.get_phrase(language,
                                                     key,
                                                     value,
                                                     is_plural=is_plural)
        if not category_phrase:
            return NULL_CATEGORY_QUERY

        category_phrase = safe_decode(category_phrase)

        prep_phrase_type = CategoryPreposition.random(language,
                                                      country=country)

        if prep_phrase_type in (None, CategoryPreposition.NULL):
            return CategoryQuery(category_phrase,
                                 prep=None,
                                 add_place_name=True,
                                 add_address=True)

        values, probs = address_config.alternative_probabilities(
            'categories.{}'.format(prep_phrase_type),
            language,
            country=country)
        if not values:
            return CategoryQuery(category_phrase,
                                 prep=None,
                                 add_place_name=True,
                                 add_address=True)

        prep_phrase, prep_phrase_props = weighted_choice(values, probs)
        prep_phrase = safe_decode(prep_phrase)

        add_address = prep_phrase_type not in (CategoryPreposition.NEARBY,
                                               CategoryPreposition.NEAR_ME,
                                               CategoryPreposition.IN)
        add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY,
                                                  CategoryPreposition.NEAR_ME)

        return CategoryQuery(category_phrase,
                             prep=prep_phrase,
                             add_place_name=add_place_name,
                             add_address=add_address)
Ejemplo n.º 8
0
    def phrase(cls, chain, language, country=None):
        if not chain:
            return NULL_CHAIN_QUERY

        chain_phrase = safe_decode(chain)

        prep_phrase_type = CategoryPreposition.random(language, country=country)

        if prep_phrase_type in (None, CategoryPreposition.NULL):
            return ChainQuery(chain_phrase, prep=None, add_place_name=True, add_address=True)

        values, probs = address_config.alternative_probabilities('categories.{}'.format(prep_phrase_type), language, country=country)
        if not values:
            return ChainQuery(chain_phrase, prep=None, add_place_name=True, add_address=True)

        prep_phrase, prep_phrase_props = weighted_choice(values, probs)
        prep_phrase = safe_decode(prep_phrase)

        add_address = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME, CategoryPreposition.IN)
        add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME)

        return ChainQuery(chain_phrase, prep=prep_phrase, add_place_name=add_place_name, add_address=add_address)
Ejemplo n.º 9
0
    def phrase(cls, chain, language, country=None):
        if not chain:
            return NULL_CHAIN_QUERY

        chain_phrase = safe_decode(chain)

        prep_phrase_type = CategoryPreposition.random(language,
                                                      country=country)

        if prep_phrase_type in (None, CategoryPreposition.NULL):
            return ChainQuery(chain_phrase,
                              prep=None,
                              add_place_name=True,
                              add_address=True)

        values, probs = address_config.alternative_probabilities(
            'categories.{}'.format(prep_phrase_type),
            language,
            country=country)
        if not values:
            return ChainQuery(chain_phrase,
                              prep=None,
                              add_place_name=True,
                              add_address=True)

        prep_phrase, prep_phrase_props = weighted_choice(values, probs)
        prep_phrase = safe_decode(prep_phrase)

        add_address = prep_phrase_type not in (CategoryPreposition.NEARBY,
                                               CategoryPreposition.NEAR_ME,
                                               CategoryPreposition.IN)
        add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY,
                                                  CategoryPreposition.NEAR_ME)

        return ChainQuery(chain_phrase,
                          prep=prep_phrase,
                          add_place_name=add_place_name,
                          add_address=add_address)
Ejemplo n.º 10
0
    def phrase(cls, language, key, value, is_plural=False, country=None):
        category_phrase = category_config.get_phrase(language, key, value, is_plural=is_plural)
        if not category_phrase:
            return NULL_CATEGORY_QUERY

        category_phrase = safe_decode(category_phrase)

        prep_phrase_type = CategoryPreposition.random(language, country=country)

        if prep_phrase_type in (None, CategoryPreposition.NULL):
            return CategoryQuery(category_phrase, prep=None, add_place_name=True, add_address=True)

        values, probs = address_config.alternative_probabilities('categories.{}'.format(prep_phrase_type), language, country=country)
        if not values:
            return CategoryQuery(category_phrase, prep=None, add_place_name=True, add_address=True)

        prep_phrase, prep_phrase_props = weighted_choice(values, probs)
        prep_phrase = safe_decode(prep_phrase)

        add_address = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME, CategoryPreposition.IN)
        add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME)

        return CategoryQuery(category_phrase, prep=prep_phrase, add_place_name=add_place_name, add_address=add_address)
Ejemplo n.º 11
0
    def pick_phrase_and_type(cls, number, language, country=None):
        values, probs = address_config.alternative_probabilities(cls.key, language, dictionaries=cls.dictionaries, country=country)
        if not values:
            return None, safe_decode(number) if number is not None else None, None

        phrase, phrase_props = weighted_choice(values, probs)

        values = []
        probs = []

        for num_type in (cls.NUMERIC, cls.NUMERIC_AFFIX):
            key = '{}_probability'.format(num_type)
            prob = phrase_props.get(key, None)
            if prob is not None:
                values.append(num_type)
                probs.append(prob)

        if not probs:
            num_type = cls.NUMERIC
        else:
            probs = cdf(probs)
            num_type = weighted_choice(values, probs)

        return num_type, phrase, phrase_props[num_type]
Ejemplo n.º 12
0
    def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False, is_alpha=False):
        has_alpha = False
        has_numeric = True
        is_integer = False
        is_none = False
        if num is not None:
            try:
                num_int = int(num)
                is_integer = True
            except ValueError:
                try:
                    num_float = float(num)
                except ValueError:
                    tokens = tokenize(safe_decode(num))
                    has_numeric = False
                    for t, c in tokens:
                        if c == token_types.NUMERIC:
                            has_numeric = True
                        if any((ch.isalpha() for ch in t)):
                            has_alpha = True

                    if strict_numeric and has_alpha:
                        return safe_decode(num)

        else:
            is_none = True

        values, probs = None, None

        if is_alpha:
            values, probs = address_config.alternative_probabilities('{}.alpha'.format(key), language, dictionaries=dictionaries, country=country)

        # Pick a phrase given the probability distribution from the config
        if values is None:
            values, probs = address_config.alternative_probabilities(key, language, dictionaries=dictionaries, country=country)

        if not values:
            return safe_decode(num) if not is_none else None

        phrase, phrase_props = weighted_choice(values, probs)

        values = []
        probs = []

        # Dictionaries are lowercased, so title case here
        if phrase_props.get('title_case', True):
            phrase = phrase.title()

        '''
        There are a few ways we can express the number itself

        1. Alias it as some standalone word like basement (for floor "-1")
        2. Use the number itself, so "Floor 2"
        3. Append/prepend an affix e.g. 2/F for second floor
        4. As an ordinal expression e.g. "2nd Floor"
        '''
        have_standalone = False
        have_null = False
        for num_type in ('standalone', 'null', 'numeric', 'numeric_affix', 'ordinal'):
            key = '{}_probability'.format(num_type)
            prob = phrase_props.get(key)
            if prob is not None:
                if num_type == 'standalone':
                    have_standalone = True
                elif num_type == 'null':
                    have_null = True
                values.append(num_type)
                probs.append(prob)
            elif num_type in phrase_props:
                values.append(num_type)
                probs.append(1.0)
                break

        if not probs or is_none:
            return phrase

        # If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
        if has_alpha:
            values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')])
            total = float(sum(probs))
            if isclose(total, 0.0):
                return None

            probs = [p / total for p in probs]

        probs = cdf(probs)

        if len(values) < 2:
            if have_standalone:
                num_type = 'standalone'
            elif have_null:
                num_type = 'null'
            else:
                num_type = 'numeric'
        else:
            num_type = weighted_choice(values, probs)

        if num_type == 'standalone':
            return phrase
        elif num_type == 'null':
            return safe_decode(num)

        props = phrase_props[num_type]

        if is_integer:
            num_int = int(num)
            if phrase_props.get('number_abs_value', False):
                num_int = abs(num_int)
                num = num_int

            if 'number_min_abs_value' in phrase_props and num_int < phrase_props['number_min_abs_value']:
                return None

            if 'number_max_abs_value' in phrase_props and num_int > phrase_props['number_max_abs_value']:
                return None

            if phrase_props.get('number_subtract_abs_value'):
                num_int -= phrase_props['number_subtract_abs_value']
                num = num_int

        num = safe_decode(num)
        digits_props = props.get('digits')
        if digits_props:
            # Inherit the gender and category e.g. for ordinals
            for k in ('gender', 'category'):
                if k in props:
                    digits_props[k] = props[k]
            num = Digits.rewrite(num, language, digits_props, num_type=Digits.CARDINAL if num_type != 'ordinal' else Digits.ORDINAL)

        # Do we add the numeric phrase e.g. Floor No 1
        add_number_phrase = props.get('add_number_phrase', False)
        if add_number_phrase and random.random() < props['add_number_phrase_probability']:
            num = Number.phrase(num, language, country=country)

        whitespace_default = True

        if num_type == 'numeric_affix':
            phrase = props['affix']
            if props.get('upper_case', True):
                phrase = phrase.upper()
            if 'zero_pad' in props and num.isdigit():
                num = num.rjust(props['zero_pad'], props.get('zero_char', '0'))
            whitespace_default = False
        elif num_type == 'ordinal' and safe_decode(num).isdigit():
            ordinal_expression = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None))

            if ordinal_expression is not None:
                num = ordinal_expression

        if 'null_phrase_probability' in props and (num_type == 'ordinal' or (has_alpha and (has_numeric or 'null_phrase_alpha_only' in props))):
            if random.random() < props['null_phrase_probability']:
                return num

        direction = props['direction']
        whitespace = props.get('whitespace', whitespace_default)

        whitespace_probability = props.get('whitespace_probability')
        if whitespace_probability is not None:
            whitespace = random.random() < whitespace_probability

        # Occasionally switch up if direction_probability is specified
        if random.random() > props.get('direction_probability', 1.0):
            if direction == 'left':
                direction = 'right'
            elif direction == 'right':
                direction = 'left'

        whitespace_phrase = six.u(' ') if whitespace else six.u('')
        # Phrase goes to the left of hte number
        if direction == 'left':
            return six.u('{}{}{}').format(phrase, whitespace_phrase, num)
        # Phrase goes to the right of the number
        elif direction == 'right':
            return six.u('{}{}{}').format(num, whitespace_phrase, phrase)
        # Need to specify a direction, otherwise return naked number
        else:
            return safe_decode(num)
Ejemplo n.º 13
0
    def numeric_phrase(cls,
                       key,
                       num,
                       language,
                       country=None,
                       dictionaries=(),
                       strict_numeric=False,
                       is_alpha=False):
        has_alpha = False
        has_numeric = True
        is_integer = False
        is_none = False
        if num is not None:
            try:
                num_int = int(num)
                is_integer = True
            except ValueError:
                try:
                    num_float = float(num)
                except ValueError:
                    tokens = tokenize(safe_decode(num))
                    has_numeric = False
                    for t, c in tokens:
                        if c == token_types.NUMERIC:
                            has_numeric = True
                        if any((ch.isalpha() for ch in t)):
                            has_alpha = True

                    if strict_numeric and has_alpha:
                        return safe_decode(num)

        else:
            is_none = True

        values, probs = None, None

        if is_alpha:
            values, probs = address_config.alternative_probabilities(
                '{}.alpha'.format(key),
                language,
                dictionaries=dictionaries,
                country=country)

        # Pick a phrase given the probability distribution from the config
        if values is None:
            values, probs = address_config.alternative_probabilities(
                key, language, dictionaries=dictionaries, country=country)

        if not values:
            return safe_decode(num) if not is_none else None

        phrase, phrase_props = weighted_choice(values, probs)

        values = []
        probs = []

        # Dictionaries are lowercased, so title case here
        if phrase_props.get('title_case', True):
            phrase = phrase.title()
        '''
        There are a few ways we can express the number itself

        1. Alias it as some standalone word like basement (for floor "-1")
        2. Use the number itself, so "Floor 2"
        3. Append/prepend an affix e.g. 2/F for second floor
        4. As an ordinal expression e.g. "2nd Floor"
        '''
        have_standalone = False
        have_null = False
        for num_type in ('standalone', 'null', 'numeric', 'numeric_affix',
                         'ordinal'):
            key = '{}_probability'.format(num_type)
            prob = phrase_props.get(key)
            if prob is not None:
                if num_type == 'standalone':
                    have_standalone = True
                elif num_type == 'null':
                    have_null = True
                values.append(num_type)
                probs.append(prob)
            elif num_type in phrase_props:
                values.append(num_type)
                probs.append(1.0)
                break

        if not probs or is_none:
            return phrase

        # If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items
        if has_alpha:
            values, probs = zip(*[(v, p) for v, p in zip(values, probs)
                                  if v in ('numeric', 'null', 'standalone')])
            total = float(sum(probs))
            if isclose(total, 0.0):
                return None

            probs = [p / total for p in probs]

        probs = cdf(probs)

        if len(values) < 2:
            if have_standalone:
                num_type = 'standalone'
            elif have_null:
                num_type = 'null'
            else:
                num_type = 'numeric'
        else:
            num_type = weighted_choice(values, probs)

        if num_type == 'standalone':
            return phrase
        elif num_type == 'null':
            return safe_decode(num)

        props = phrase_props[num_type]

        if is_integer:
            num_int = int(num)
            if phrase_props.get('number_abs_value', False):
                num_int = abs(num_int)
                num = num_int

            if 'number_min_abs_value' in phrase_props and num_int < phrase_props[
                    'number_min_abs_value']:
                return None

            if 'number_max_abs_value' in phrase_props and num_int > phrase_props[
                    'number_max_abs_value']:
                return None

            if phrase_props.get('number_subtract_abs_value'):
                num_int -= phrase_props['number_subtract_abs_value']
                num = num_int

        num = safe_decode(num)
        digits_props = props.get('digits')
        if digits_props:
            # Inherit the gender and category e.g. for ordinals
            for k in ('gender', 'category'):
                if k in props:
                    digits_props[k] = props[k]
            num = Digits.rewrite(num,
                                 language,
                                 digits_props,
                                 num_type=Digits.CARDINAL
                                 if num_type != 'ordinal' else Digits.ORDINAL)

        # Do we add the numeric phrase e.g. Floor No 1
        add_number_phrase = props.get('add_number_phrase', False)
        if add_number_phrase and random.random(
        ) < props['add_number_phrase_probability']:
            num = Number.phrase(num, language, country=country)

        whitespace_default = True

        if num_type == 'numeric_affix':
            phrase = props['affix']
            if props.get('upper_case', True):
                phrase = phrase.upper()
            if 'zero_pad' in props and num.isdigit():
                num = num.rjust(props['zero_pad'], props.get('zero_char', '0'))
            whitespace_default = False
        elif num_type == 'ordinal' and safe_decode(num).isdigit():
            ordinal_expression = ordinal_expressions.suffixed_number(
                num, language, gender=props.get('gender', None))

            if ordinal_expression is not None:
                num = ordinal_expression

        if 'null_phrase_probability' in props and (
                num_type == 'ordinal' or
            (has_alpha and
             (has_numeric or 'null_phrase_alpha_only' in props))):
            if random.random() < props['null_phrase_probability']:
                return num

        direction = props['direction']
        whitespace = props.get('whitespace', whitespace_default)

        whitespace_probability = props.get('whitespace_probability')
        if whitespace_probability is not None:
            whitespace = random.random() < whitespace_probability

        # Occasionally switch up if direction_probability is specified
        if random.random() > props.get('direction_probability', 1.0):
            if direction == 'left':
                direction = 'right'
            elif direction == 'right':
                direction = 'left'

        whitespace_phrase = six.u(' ') if whitespace else six.u('')
        # Phrase goes to the left of hte number
        if direction == 'left':
            return six.u('{}{}{}').format(phrase, whitespace_phrase, num)
        # Phrase goes to the right of the number
        elif direction == 'right':
            return six.u('{}{}{}').format(num, whitespace_phrase, phrase)
        # Need to specify a direction, otherwise return naked number
        else:
            return safe_decode(num)
Ejemplo n.º 14
0
 def phrase(cls, language, country=None):
     values, probs = address_config.alternative_probabilities('cross_streets.intersection', language, country=country)
     if not values:
         return None
     phrase, props = weighted_choice(values, probs)
     return phrase