def run_on(dirs, pts,states): places = parse_location_file() place_re = placefinder.make_re(places) logging.info('Created place regexp.') covs = [] centres = [] for dir, pt, state in zip(dirs, pts, states): centres.append(pt) dir = "/Users/mike/Dropbox/Projects/dc2j/"+dir.strip() logging.info("getting files for directory: %s"%dir) files = get_newspaper_webpages(dir) logging.info("getting placenames for directory: %s"%dir) names = get_placenames(files, place_re, 100) logging.info("found %s placenames in directory: %s"%(len(names),dir)) if len(names) == 1: logging.warn("found no placenames in %s"%dir) # TODO make a better default here! covs.append((0,0)) continue c = coordinates(names, places) try: c_nearest = [nearest(pt, ci) for ci in c] cov = coverage(pt, c_nearest) covs.append(cov) except: print pt print state print addr2latlon(state) raise return centres, covs
except: print pt print state print addr2latlon(state) raise return centres, covs def main(): dirs = [ 'www.broomfieldenterprise.com', 'www.poughkeepsiejournal.com', 'www.aspendailynews.com', 'www.suntimes.com', 'www.southbendtribune.com' ] pts = [ (39.9205411,-105.0866504), (41.7003713,-73.9209701), (39.1910983,-106.8175387), (41.887610,-87.636057), (41.6725, -86.255278) ] run_on(dirs, pts) if __name__ == "__main__": logging.basicConfig(stream=sys.stdout, level=logging.INFO) lat, lng = addr2latlon("%s, %s, Newspaper"%("Moline Daily Dispatch", "IL")) covs = run_on(['www.qconline.com'],[(lat,lng)]) print "-------\n" + str(covs)