Example #1
0
    def x_test_errors(self):
        from ambry.geo.address import Parser
        import imp
        import os
        import csv

        parser = Parser()

        bundle = imp.load_source('bundle', 
            '/Users/eric/proj/Bundles/src/civicdata/sandiego.gov/sandiego.gov-businesses-orig/bundle.py')
        b = bundle.Bundle()

        p = b.partitions.find(table='businesses', grain='errors')

        f_output =  os.path.join(os.path.dirname(__file__), '../support','business_addresses.out.csv')
        with open(f_output, 'w') as out:
            writer = csv.DictWriter(out, self.header)
            writer.writeheader()

            for row in p.query("SELECT * FROM businesses"):
    
                ps = parser.parse(row['address'])
    
                d = ps.as_dict()
                d['input'] = row['address'].strip()
                d['output'] = str(ps)
                writer.writerow(d)
Example #2
0
    def test_address_files(self):
        import os           
        from ambry.geo.address import Parser
        import csv

        parser = Parser()
    
        success = 0
        failure = 0
        total = 0
        filename = "crime_addresses"
        f_input =  os.path.join(os.path.dirname(__file__), '../support',filename + '.txt')
        f_output =  os.path.join(os.path.dirname(__file__), '../support',filename + '.out.csv')
        with open(f_output, 'w') as out:
            writer = csv.DictWriter(out, self.header)
            writer.writeheader()
            with open(f_input) as f:
                for line in f:
             
                    total += 1
             
                    print '----'
                    print line.strip()
             
                    try: 
                        ps = parser.parse(line)
                        if not ps:
                            failure += 1
                            continue
                    except Exception as e:
                        print "ERROR", e
                        failure += 1
                        continue

                    d = ps.as_dict()
                    d['input'] = line.strip()
                    d['output'] = str(ps)
                    writer.writerow(d)

                    if not ps.city:
                        failure += 1
                        print d
                        print ps
                        print
                    else:
 
                        success += 1
                
            print 
            print "total={} success={} failure={} rate={}".format(total, success, failure, round((float(failure)/float(total)*100), 3))
Example #3
0
    def __init__(self,partition):
        from ambry.geo.address import Parser

        self.p = partition

        self.address_cache = {}

        self.parser =  Parser()
Example #4
0
class Geocoder(object):

    def __init__(self,partition):
        from ambry.geo.address import Parser

        self.p = partition

        self.address_cache = {}

        self.parser =  Parser()

    def parse_and_code(self, addrstr, city=None, state=None, zip=None):


        adr = self.parser.parse(addrstr, city=city, state=state, zip=zip)

        if adr.hash in self.address_cache:
            address_id = self.address_cache[adr.hash]
        else:
            r = self.geocode(**adr.args)
            if r:
                address_id = r['address_id']
                self.address_cache[adr.hash] = address_id
            else:
                self.address_cache[adr.hash] = None
                address_id = None

        return  address_id, adr


    def geocode(self, number, name, direction=None,
                suffix=None, city=None, state=None, zip=None):
        '''Return a record from the geocoder table.

        This function expects a partition, p, that holds a table named 'gecoder',
        of the same structure as used in clarinova.com-geocode-casnd
        '''

        direction = direction if direction else '-'
        suffix = suffix if suffix else '-'
        city = city if city else '-'
        zip = zip if zip else -1

        try:
            zip = int(zip)
        except:
            zip = -1

        q = """
        SELECT
            *,
            (
                CASE WHEN city = :city THEN 10 ELSE 0 END +
                CASE WHEN zip = :zip THEN 10 ELSE 0 END +
                CASE WHEN suffix = :suffix THEN 10 ELSE 0 END
            ) AS score
        FROM geocoder
        WHERE  name = :name AND direction = :direction
        AND score >= 20
        AND number BETWEEN (:number-100) AND (:number+100)
        ORDER BY ABS(number - :number) LIMIT 1;
        """

        return self.p.query(q, number=number, name=name, direction=direction, suffix=suffix,
                                       city=city.title(), state=state.upper(), zip=int(zip)).first()