def handle(self, *args, **options): HERE = os.getcwd() print "Working directory is", HERE print "Adding latest events..." from ebdata.scrapers.us.ma.boston.events.retrieval import main as events_main events_main() print "Adding latest news..." from ebdata.scrapers.general.georss.retrieval import main as news_main news_main([ "http://search.boston.com/search/api?q=*&sort=-articleprintpublicationdate&subject=massachusetts&scope=bonzai" ]) # more feeds from Joel. Local blog news: news_main([ "http://search.boston.com/search/api?q=*&sort=-articleprintpublicationdate&scope=blogs&count=250&subject=massachusetts&format=atom" ]) print "Adding police reports..." from ebdata.scrapers.us.ma.boston.police_reports.retrieval import main as pr_main pr_main() print " Adding building permits..." from ebdata.scrapers.us.ma.boston.building_permits.retrieval import PermitScraper PermitScraper().update() print "Adding GeoReport issues..." from ebdata.scrapers.general.open311.georeportv2 import main as georeport_main georeport_main([ '--html-url-template=http://seeclickfix.com/issues/{id}', '--days-prior=6', 'http://seeclicktest.com/boston/open311/v2' ]) # TODO: fix traceback: ebdata.blobs.scrapers.NoSeedYet: You need to add a Seed with the URL 'http://www.cityofboston.gov/news/ #echo Adding press releases... #python everyblock/everyblock/cities/boston/city_press_releases/retrieval.py || die print "Adding flickr photos..." from ebdata.scrapers.general.flickr import flickr_retrieval flickr_retrieval.main() print "Updating aggregates, see ebpub/README.txt..." from ebpub.db.bin.update_aggregates import update_all_aggregates update_all_aggregates() print """
def handle(self, *args, **options): HERE = os.getcwd() print "Working directory is", HERE print "Adding latest events..." from ebdata.scrapers.us.ma.boston.events.retrieval import main as events_main events_main() print "Adding latest news..." from ebdata.scrapers.general.georss.retrieval import main as news_main news_main(["http://search.boston.com/search/api?q=*&sort=-articleprintpublicationdate&subject=massachusetts&scope=bonzai"]) # more feeds from Joel. Local blog news: # ... this one times out #news_main(["http://search.boston.com/search/api?q=*&sort=-articleprintpublicationdate&scope=blogs&count=250&subject=massachusetts&format=atom"]) print "Adding police reports..." from ebdata.scrapers.us.ma.boston.police_reports.retrieval import main as pr_main pr_main() print " Adding building permits..." from ebdata.scrapers.us.ma.boston.building_permits.retrieval import PermitScraper PermitScraper().update() print "Adding GeoReport issues..." from ebdata.scrapers.general.open311.georeportv2 import main as georeport_main georeport_main(['--html-url-template=http://seeclickfix.com/issues/{id}', '--days-prior=6', 'http://seeclicktest.com/boston/open311/v2']) # TODO: fix traceback: ebdata.blobs.scrapers.NoSeedYet: You need to add a Seed with the URL 'http://www.cityofboston.gov/news/ #echo Adding press releases... #python everyblock/everyblock/cities/boston/city_press_releases/retrieval.py || die print "Adding flickr photos..." from ebdata.scrapers.general.flickr import flickr_retrieval flickr_retrieval.main(['--days=3']) print "Updating aggregates, see ebpub/README.txt..." from ebpub.db.bin.update_aggregates import update_all_aggregates update_all_aggregates() print """
def do_events(**kwargs): from obdemo.scrapers.add_events import main return main()
def do_news(**kwargs): from obdemo.scrapers.add_news import main main(["http://search.boston.com/search/api?q=*&sort=-articleprintpublicationdate&subject=massachusetts&scope=bonzai&count=400"]) main(["http://search.boston.com/search/api?q=*&sort=-articleprintpublicationdate&scope=blogs&count=400&subject=massachusetts&format=atom"])
def do_flickr(**kwargs): from ebdata.scrapers.general.flickr.flickr_retrieval import main return main()