COMPANY_NAME = 9 COUNTRY = 17 print "Opening %s to read..." % args.input print "Opening %s to output..." % args.output with open(args.input, "r") as csv_in: reader = csv.DictReader(csv_in) with open(args.output, "w") as csv_out: writer = csv.writer(csv_out) for in_row in reader: out_row = [''] * 22 addr = in_row['ADDRESS'] if addr: print "Cleaning address..." print "Input: " + addr addr = clean(addr) if addr: (street, city, state, postal, country) = addr print "---- %s, %s, %s, %s, %s -----" % addr out_row[STREET] = street if street else '' out_row[CITY] = city if city else '' out_row[STATE] = state if state else '' out_row[POSTAL] = postal if postal else '' out_row[COUNTRY] = country if country else '' else: print "NO MATCHING ADDRESS" out_row[CUSIP6] = in_row['CUSIP6'] out_row[COMPANY_NAME] = in_row['NAME'] writer.writerow(out_row)
def test_clean_ireland_address(self): addr = clean("70 SIR JOHN ROGERSON'S QUAY, Dublin 2, L2 2, Ireland") self.assertEqual((u"70 Sir John Rogerson's Quay", u'Dublin', u'Dublin', None, u'Ireland'), addr)
reload(sys) sys.setdefaultencoding('utf8') parser = argparse.ArgumentParser(description='Geocode csv file.') parser.add_argument("path", help="Path to csv") parser.add_argument("field", help="Name of address field") args = parser.parse_args() def read_fieldnames(path): with open(path, "r") as path_csv: reader = csv.reader(path_csv) row = reader.next() return row fieldnames = read_fieldnames( args.path) + ['STREET', 'CITY', 'STATE', 'POSTAL', 'COUNTRY'] with open(args.path, "r") as path_csv: reader = csv.DictReader(path_csv) with open("geocoded.csv", "w") as geocoded_out: writer = csv.DictWriter(geocoded_out, fieldnames) for row in reader: address = row[args.field] print "Geocoding " + address geo_address = clean(address) if geo_address: (row['STREET'], row['CITY'], row['STATE'], row['POSTAL'], row['COUNTRY']) = geo_address writer.writerow(row)
def test_clean_weird_chinese_address(self): addr = clean('M5 1 Jiuxianqiao East Road, Chaoyang District, Beijing 100016, People s Republic of China') self.assertEqual(None, addr)
import argparse import sys reload(sys) sys.setdefaultencoding('utf8') parser = argparse.ArgumentParser(description='Geocode csv file.') parser.add_argument("path", help="Path to csv") parser.add_argument("field", help="Name of address field") args = parser.parse_args() def read_fieldnames(path): with open(path, "r") as path_csv: reader = csv.reader(path_csv) row = reader.next() return row fieldnames = read_fieldnames(args.path) + ['STREET', 'CITY', 'STATE', 'POSTAL', 'COUNTRY'] with open(args.path, "r") as path_csv: reader = csv.DictReader(path_csv) with open("geocoded.csv", "w") as geocoded_out: writer = csv.DictWriter(geocoded_out, fieldnames) for row in reader: address = row[args.field] print "Geocoding " + address geo_address = clean(address) if geo_address: (row['STREET'], row['CITY'], row['STATE'], row['POSTAL'], row['COUNTRY']) = geo_address writer.writerow(row)