Esempio n. 1
0
 def test_strip_spaces(self):
     res = strip_spaces(
         remove_stopwords(
             expand_abbreviations(
                 separate_postcode(normalise(single_line(
                     self.address)))[0])))
     self.assertEqual(res, '3BRISLEEAVENUENORTHSHIELDS')
Esempio n. 2
0
    if postcode:
        addresses = db.addresses.find({'postcode': postcode})
    else:
        res = re.split('\W+', to_match)
        address = ' '.join(res[:min(len(res), 4)])
        addresses = db.addresses.find(
            {'phonetic': {
                '$regex': '^' + phonetic(address)
            }})

    to_match = strip_spaces(remove_stopwords(expand_abbreviations(to_match)))
    best_jaccard = 0
    best_match = list()
    for address in addresses:
        lines = paf_to_lines(address)
        line = separate_postcode(normalise(single_line(lines)))
        line = strip_spaces(remove_stopwords(expand_abbreviations(line[0])))
        idx = jaccard_index(to_match, line)
        if idx > best_jaccard:
            best_jaccard = idx
            best_match = list()
            best_match.append(address)
        elif idx == best_jaccard:
            best_match.append(address)

    if len(best_match) == 1:
        print(">> BEST MATCH")
        print(best_match[0])
    elif len(best_match) == 0:
        print(">> NO MATCH")
    else:
Esempio n. 3
0
 def test_expand_abbreviations(self):
     res = expand_abbreviations(
         separate_postcode(normalise(single_line(self.address)))[0])
     self.assertEqual(res, '3 THE BRISLEE AVENUE NORTH SHIELDS')
Esempio n. 4
0
 def test_separate_postcode(self):
     res = separate_postcode(normalise(single_line(self.address)))
     self.assertEqual(res[0], '3 THE BRISLEE AVE NORTH SHIELDS')
     self.assertEqual(res[1], 'NE30 2SQ')
Esempio n. 5
0
 def test_normalise(self):
     res = normalise(single_line(self.address))
     self.assertEqual(res, '3 THE BRISLEE AVE NORTH SHIELDS NE30 2SQ')
Esempio n. 6
0
 def test_single_line(self):
     res = single_line(self.address)
     self.assertEqual(res,
                      ' 3 The Brislee    Ave, North    Shields ne30 2sq')