def testHighwayAbbreviations(self): 'hwy, expy' # 3 IN 1 KITCHEN: 4902 FORT HAMILTON PARKWAY BROOKLYN, NY text = "238 KINGS HWY BROOKLYN, NY" expected = "238 KINGS Highway BROOKLYN, NY" got = parser.parse(text)[0] self.assertEqual(got, expected) text = "3050 WHITESTONE EXPY QUEENS, NY" expected = "3050 WHITESTONE Expressway QUEENS, NY" got = parser.parse(text)[0] self.assertEqual(got, expected)
def testAptAndSuite(self): 'handle ste and apt' text = "35 WEST 89TH STREET APT. 1A NEW YORK, NY" expected = "35 WEST 89TH STREET Apt 1A Manhattan, NY" got = parser.parse(text)[0] self.assertEqual(got, expected) text = "35 WEST 89TH STREET STE. 1A NEW YORK, NY" expected = "35 WEST 89TH STREET Suite 1A Manhattan, NY" got = parser.parse(text)[0] self.assertEqual(got, expected)
def __init__(self, source, verbose=False): self.source = source self.address = parser.parse(source, verbose=verbose) if self.address: self.address = self.address[0] else: self.address = None
def testAddressWithMultipleCity(self): 'test ... Queens, NY NY, NY finds first address' text = "11 W. 19th Street, NY, NY 10011 , New York, NY" expected = "11 W 19th Street, Manhattan, NY" got = parser.parse(text)[0] self.assertEqual(expected, got)
def testStreetNamePreTypeAveOfAmericas(self): 'find Avenue of the Americas' expected = "131 Avenue Of The Americas Manhattan, NY" text = 'blab blah bleu %s foo fe hu' % expected got = parser.parse(text) self.assertIn(got[0], [expected])
def checkExpectation(self, sample, expect, verbose=False): source = os.path.join(self.datadir, sample) expectation = os.path.join(self.datadir, expect) expected = open(expectation).readlines() expected = [e.strip() for e in expected] text = codecs.open(source, 'r', encoding='utf8') \ .read().encode('ascii', 'ignore') addresses = parser.parse(text, verbose) if verbose: print 'expect:\t' if isinstance(expected, list): print for e in expected: print '\t%s' % e else: print '%s' % expected print print 'got \t:' if isinstance(addresses, list): print for e in addresses: print '\t%s' % e else: print '%s' % addresses for loc in addresses: self.assertIn(loc, expected) expected.remove(loc) self.assertEqual(expected, [])
def checkExpectation(self, source, expected, verbose=False): addresses = parser.parse(source, verbose) if verbose: print 'source: %s' % source print 'expected: %s' % expected print 'got: %s' % addresses for loc in addresses: self.assertIn(loc, expected) expected.remove(loc) self.assertEqual(expected, [])
def testSaintNotStreet(self): #This is unimplemented '701 St. Anns should resolve to Saint Anns instead of Street Anns' expected = ['701 St. Anns Avenue Bronx, NY'] for text in expected: print text text = 'blab blah bleu %s foo fe hu' % text got = parser.parse(text, verbose=True)[0] self.assertIn(got, expected)
def testInitials(self): text = ''' 1180 Reverend J.A. Polite Ave. Bronx, NY. ''' expected = [ '1180 Reverend J A Polite Avenue Bronx, NY' ] got = parser.parse(text)[0] self.assertIn(got, expected)
def testStreetNamePreTypes(self): 'test Avenue xxx' expected = [ "1600 Avenue L Brooklyn, NY", "3000 Avenue X Brooklyn, NY", "50 Avenue X Brooklyn, NY" ] for text in expected: text = 'blab blah bleu %s foo fe hu' % text got = parser.parse(text)[0] self.assertIn(got, expected)
def testPeriodBetweenDirectionAndStreet(self): 'period handled between direction and street' text = "Decker Design: 14W.23rd Street 3rd Floor, New York, NY" expected = "14 W 23rd Street 3rd Floor, Manhattan, NY" got = parser.parse(text)[0] self.assertEqual(got, expected)
def testInferredStreet(self): 'infer street in manhattan' text = "10 W 15th , New York, NY" expected = "10 W 15th Street, Manhattan, NY" got = parser.parse(text)[0] self.assertEqual(got, expected)
def testSaintAnneAvenue(self): 'name with apostrophe' expected = [u"600 Saint Ann's Avenue Bronx, NY"] text = "Academy of Science: 600 Saint Ann's Avenue Bronx, NY" address = parser.parse(text)[0] self.assertIn(address, expected)