Example #1
0
def run_on(dirs, pts,states):
    places = parse_location_file()
    place_re = placefinder.make_re(places)
    logging.info('Created place regexp.')
    
    covs = []
    centres = []
    for dir, pt, state in zip(dirs, pts, states):
        centres.append(pt)
        dir = "/Users/mike/Dropbox/Projects/dc2j/"+dir.strip()
        logging.info("getting files for directory: %s"%dir)
        files = get_newspaper_webpages(dir)
        logging.info("getting placenames for directory: %s"%dir)
        names = get_placenames(files, place_re, 100)
        logging.info("found %s placenames in directory: %s"%(len(names),dir))
        if len(names) == 1:
            logging.warn("found no placenames in %s"%dir)
            # TODO make a better default here!
            covs.append((0,0))
            continue
        c = coordinates(names, places)
        try:            
            c_nearest = [nearest(pt, ci) for ci in c]
            cov = coverage(pt, c_nearest)
            covs.append(cov)
        except:
            print pt
            print state
            print addr2latlon(state)
            raise
    return centres, covs
Example #2
0
        except:
            print pt
            print state
            print addr2latlon(state)
            raise
    return centres, covs

def main():
    dirs = [
      'www.broomfieldenterprise.com',
      'www.poughkeepsiejournal.com',
      'www.aspendailynews.com',
      'www.suntimes.com',
      'www.southbendtribune.com'
    ]
    pts = [
      (39.9205411,-105.0866504),
      (41.7003713,-73.9209701),
      (39.1910983,-106.8175387),
      (41.887610,-87.636057),
      (41.6725, -86.255278)
    ]
    run_on(dirs, pts)

if __name__ == "__main__":
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    
    lat, lng = addr2latlon("%s, %s, Newspaper"%("Moline Daily Dispatch", "IL"))
    covs = run_on(['www.qconline.com'],[(lat,lng)])
    print "-------\n" + str(covs)