#!/usr/bin/python import time import locale from spacebrewInterface.spacebrew import Spacebrew from lxml.html import parse from lxml.cssselect import CSSSelector # configure the spacebrew client name = "Casual Scraper" server = "sandbox.spacebrew.cc" brew = Spacebrew(name, server=server) brew.addPublisher("image", "string") print("Starting spacebrew"); brew.start() base_urls = [ "http://newyork.craigslist.org/search/aap?hasPic=1", "http://philadelphia.craigslist.org/search/aap?hasPic=1", "http://sfbay.craigslist.org/search/aap?hasPic=1"] history = [] try: while 1: for base_url in base_urls: print "scraping %s" % base_url doc = parse(base_url).getroot() links = doc.cssselect('span.pl a') if links: for link in links: subdoc_url = link.get('href')
brew2.addSubscriber("sub1","range") brew2.addSubscriber("sub2","boolean") brew2.addSubscriber("sub3") # For any subscriber, you can define any number of functions # that will get called with the sent value when a message arrives. # Here's a simple example of a function that recieves a value. def example(value): print "Got",value,type(value) # We call "subscribe" to associate a function with a subscriber. brew2.subscribe("sub1",example) brew2.subscribe("sub2",example) brew2.subscribe("sub3",example) # Calling start on a brew starts it running in a separate thread. brew1.start() brew2.start() # We'll publish a value every three seconds. While this is running, # go to your admin interface and connect the subscriber to the publisher # to see the values. try: while True: time.sleep(3) # The publish method sends a value from the specified # publisher. brew1.publish('pub','rub') except (KeyboardInterrupt, SystemExit) as e: # Calling stop on a brew disconnects it and waits for its # associated thread to finish.