def have_hash_in_common(self, address1, address2, **kw): """Test whether strings have at least one shared expansion.""" expansions1 = near_dupe_hashes(address1.keys(), address1.values(), **kw) expansions2 = near_dupe_hashes(address2.keys(), address2.values(), **kw) self.assertTrue(set(expansions1) & set(expansions2))
def contained_in_hashes(self, address, output, **kw): """Test whether an expansion contains a particular output.""" hashes = near_dupe_hashes(address.keys(), address.values(), **kw) self.assertTrue(hashes) hashes = set(expansions) self.assertTrue(output in expansions)
def near_dupe_hashes(cls, address, languages=None, with_address=True, with_unit=False, with_city_or_equivalent=False, with_small_containing_boundaries=False, with_postal_code=False, with_latlon=True, geohash_precision=DEFAULT_GEOHASH_PRECISION, name_and_address_keys=None, name_only_keys=None, address_only_keys=None): lat = address.get(Coordinates.LATITUDE) lon = address.get(Coordinates.LONGITUDE) if lat is None or lon is None: lat = 0.0 lon = 0.0 with_latlon = False if languages is None: address_minus_name = cls.address_minus_name(address) languages = cls.combined_place_languages(address, address_minus_name) labels, values = cls.address_labels_and_values(address) if name_only_keys is None: name_only_keys = cls.name_only_keys if name_and_address_keys is None: name_and_address_keys = cls.name_and_address_keys if address_only_keys is None: address_only_keys = cls.address_only_keys return near_dupe_hashes( labels, values, languages=languages, with_name=cls.with_name, with_address=with_address, with_unit=with_unit, with_city_or_equivalent=with_city_or_equivalent, with_small_containing_boundaries=with_small_containing_boundaries, with_postal_code=with_postal_code, with_latlon=with_latlon, latitude=lat, longitude=lon, geohash_precision=geohash_precision, name_and_address_keys=name_and_address_keys, name_only_keys=name_only_keys, address_only_keys=cls.address_only_keys)
def near_dupe_hashes(cls, address, languages=None, with_address=True, with_unit=False, with_city_or_equivalent=False, with_small_containing_boundaries=False, with_postal_code=False, with_zip5=False, with_latlon=True, geohash_precision=None, name_and_address_keys=None, name_only_keys=None, address_only_keys=None): lat = address.get(Coordinates.LATITUDE) lon = address.get(Coordinates.LONGITUDE) if lat is None or lon is None: lat = 0.0 lon = 0.0 with_latlon = False if geohash_precision is None: geohash_precision = cls.DEFAULT_GEOHASH_PRECISION if languages is None: languages = cls.address_languages(address) base_house_number = None address_with_base_house_number = None if AddressComponents.HOUSE_NUMBER_BASE in address: base_house_number = address[AddressComponents.HOUSE_NUMBER_BASE] address = {k: v for k, v in six.iteritems(address) if k != AddressComponents.HOUSE_NUMBER_BASE} address_with_base_house_number = address.copy() address_with_base_house_number[AddressComponents.HOUSE_NUMBER] = base_house_number if name_only_keys is None: name_only_keys = cls.name_only_keys if name_and_address_keys is None: name_and_address_keys = cls.name_and_address_keys if address_only_keys is None: address_only_keys = cls.address_only_keys input_address = address all_hashes = [] all_hashes_set = set() for address in (input_address, address_with_base_house_number): if address is None: continue labels, values = cls.address_labels_and_values(address, use_zip5=with_zip5) if not (labels and values and len(labels) == len(values)): return [] hashes = near_dupe_hashes(labels, values, languages=languages, with_name=cls.with_name, with_address=with_address, with_unit=with_unit, with_city_or_equivalent=with_city_or_equivalent, with_small_containing_boundaries=with_small_containing_boundaries, with_postal_code=with_postal_code, with_latlon=with_latlon, latitude=lat, longitude=lon, geohash_precision=geohash_precision, name_and_address_keys=name_and_address_keys, name_only_keys=name_only_keys, address_only_keys=cls.address_only_keys) all_hashes.extend([h for h in hashes if h not in all_hashes_set]) all_hashes_set |= set(hashes) return all_hashes