def to_valid_latitude(latitude): '''Convert longitude into the -180 to 180 scale''' if not is_valid_latitude(latitude): raise ValueError('Invalid latitude {}'.format(latitude)) if isclose(latitude, 90.0): latitude = 89.9999 elif isclose(latitude, -90.0): latitude = -89.9999 return latitude
def near_dupe_hashes(cls, address, geohash_precision=DEFAULT_GEOHASH_PRECISION, use_latlon=True, use_city=False, use_postal_code=False): address_expansions = cls.component_expansions(address) lat = address.get(Coordinates.LATITUDE) lon = address.get(Coordinates.LONGITUDE) postcode = safe_decode(address.get(AddressComponents.POSTAL_CODE, u'')).strip() city = safe_decode(address.get(AddressComponents.CITY, u'')).strip() if not any(address_expansions): return if lat and lon and use_latlon and not ( (isclose(lat, 0.0) and isclose(lon, 0.0)) or lat >= 90.0 or lat <= -90.0): geo = geohash.encode(lat, lon)[:geohash_precision] geohash_neighbors = [geo] + geohash.neighbors(geo) base_key = cls.GEOHASH_KEY_PREFIX for keys in six.itertools.product(geohash_neighbors, *address_expansions): yield u'{}|{}'.format(base_key, u'|'.join(keys)) if postcode and use_postal_code: postcode_expansions = expand_address( postcode, address_components=ADDRESS_POSTAL_CODE) base_key = cls.POSTCODE_KEY_PREFIX for keys in six.itertools.product(postcode_expansions, *address_expansions): yield u'{}|{}'.format(base_key, u'|'.join(keys)) if city and use_city: city_expansions = expand_address( city, address_components=ADDRESS_TOPONYM) base_key = cls.CITY_KEY_PREFIX for keys in six.itertools.product(city_expansions, *address_expansions): yield u'{}|{}'.format(base_key, u'|'.join(keys))
def normalized_vector_l2(cls, vector): n = math.sqrt(sum((s ** 2 for s in vector))) if isclose(n, 0.0): n = len(vector) return [1. / n] * n return [s / n for s in vector]
def normalized_vector_l1(cls, vector): n = float(sum(vector)) if isclose(n, 0.0): n = len(vector) return [1. / n] * n return [s / n for s in vector]
def normalized_tfidf_vector(cls, tfidf_vector): norm = math.sqrt(sum((s ** 2 for w, s in tfidf_vector))) if isclose(norm, 0.0): return tfidf_vector return [(w, s / norm) for w, s in tfidf_vector]