def test_strip_spaces(self): res = strip_spaces( remove_stopwords( expand_abbreviations( separate_postcode(normalise(single_line( self.address)))[0]))) self.assertEqual(res, '3BRISLEEAVENUENORTHSHIELDS')
from addressutils import remove_stopwords from addressutils import strip_spaces from addressutils import jaccard_index if __name__ == '__main__': config = configparser.ConfigParser() config.read('addressutils.cfg') client = MongoClient(config['DATABASE']['dbURI']) db = client[config['DATABASE']['dbName']] if len(sys.argv) < 2: print('>> PLEASE PROVIDE AN ADDRESS TO MATCH') sys.exit(0) to_match = sys.argv[1] res = separate_postcode(normalise(to_match)) postcode = res[1] to_match = res[0] if postcode: addresses = db.addresses.find({'postcode': postcode}) else: res = re.split('\W+', to_match) address = ' '.join(res[:min(len(res), 4)]) addresses = db.addresses.find( {'phonetic': { '$regex': '^' + phonetic(address) }}) to_match = strip_spaces(remove_stopwords(expand_abbreviations(to_match))) best_jaccard = 0 best_match = list()
def test_expand_abbreviations(self): res = expand_abbreviations( separate_postcode(normalise(single_line(self.address)))[0]) self.assertEqual(res, '3 THE BRISLEE AVENUE NORTH SHIELDS')
def test_separate_postcode(self): res = separate_postcode(normalise(single_line(self.address))) self.assertEqual(res[0], '3 THE BRISLEE AVE NORTH SHIELDS') self.assertEqual(res[1], 'NE30 2SQ')