Esempio n. 1
0
 def test_strip_spaces(self):
     res = strip_spaces(
         remove_stopwords(
             expand_abbreviations(
                 separate_postcode(normalise(single_line(
                     self.address)))[0])))
     self.assertEqual(res, '3BRISLEEAVENUENORTHSHIELDS')
Esempio n. 2
0
    def test_jaccard_index(self):
        a1 = strip_spaces(
            remove_stopwords(
                expand_abbreviations(
                    normalise('Flat 22, 8 St. Andrews Cross, PLYMOUTH'))))
        a2 = strip_spaces(
            remove_stopwords(
                expand_abbreviations(
                    normalise('Flat 22, 8 St. Andrews Cross, PLYMOUTH'))))
        self.assertEqual(jaccard_index(a1, a2), 1)

        a3 = ''
        self.assertEqual(jaccard_index(a1, a3), 0)

        a4 = strip_spaces(
            remove_stopwords(
                expand_abbreviations(
                    normalise('Flat 22, 8 St. Andrews Cross'))))
        self.assertGreater(jaccard_index(a1, a4), 0.7)

        a5 = strip_spaces(
            remove_stopwords(expand_abbreviations(normalise('Flat 22'))))
        self.assertLess(jaccard_index(a1, a5), 0.3)
Esempio n. 3
0
from addressutils import remove_stopwords
from addressutils import strip_spaces
from addressutils import jaccard_index

if __name__ == '__main__':
    config = configparser.ConfigParser()
    config.read('addressutils.cfg')
    client = MongoClient(config['DATABASE']['dbURI'])
    db = client[config['DATABASE']['dbName']]

    if len(sys.argv) < 2:
        print('>> PLEASE PROVIDE AN ADDRESS TO MATCH')
        sys.exit(0)

    to_match = sys.argv[1]
    res = separate_postcode(normalise(to_match))
    postcode = res[1]
    to_match = res[0]
    if postcode:
        addresses = db.addresses.find({'postcode': postcode})
    else:
        res = re.split('\W+', to_match)
        address = ' '.join(res[:min(len(res), 4)])
        addresses = db.addresses.find(
            {'phonetic': {
                '$regex': '^' + phonetic(address)
            }})

    to_match = strip_spaces(remove_stopwords(expand_abbreviations(to_match)))
    best_jaccard = 0
    best_match = list()
Esempio n. 4
0
 def test_expand_abbreviations(self):
     res = expand_abbreviations(
         separate_postcode(normalise(single_line(self.address)))[0])
     self.assertEqual(res, '3 THE BRISLEE AVENUE NORTH SHIELDS')
Esempio n. 5
0
 def test_separate_postcode(self):
     res = separate_postcode(normalise(single_line(self.address)))
     self.assertEqual(res[0], '3 THE BRISLEE AVE NORTH SHIELDS')
     self.assertEqual(res[1], 'NE30 2SQ')
Esempio n. 6
0
 def test_normalise(self):
     res = normalise(single_line(self.address))
     self.assertEqual(res, '3 THE BRISLEE AVE NORTH SHIELDS NE30 2SQ')