예제 #1
0
#!/usr/bin/python

import time
import locale
from spacebrewInterface.spacebrew import Spacebrew
from lxml.html import parse
from lxml.cssselect import CSSSelector

# configure the spacebrew client
name = "Casual Scraper"
server = "sandbox.spacebrew.cc"
brew = Spacebrew(name, server=server)
brew.addPublisher("image", "string")
print("Starting spacebrew");
brew.start()


base_urls = [
	"http://newyork.craigslist.org/search/aap?hasPic=1", 
	"http://philadelphia.craigslist.org/search/aap?hasPic=1", 
	"http://sfbay.craigslist.org/search/aap?hasPic=1"]
history = []
try:
	while 1:
		for base_url in base_urls:
			print "scraping %s" % base_url
			doc = parse(base_url).getroot()
			links = doc.cssselect('span.pl a')
			if links:
				for link in links:
					subdoc_url = link.get('href')
brew2.addSubscriber("sub1","range")
brew2.addSubscriber("sub2","boolean")
brew2.addSubscriber("sub3")

# For any subscriber, you can define any number of functions
# that will get called with the sent value when a message arrives.
# Here's a simple example of a function that recieves a value.
def example(value):
    print "Got",value,type(value)
# We call "subscribe" to associate a function with a subscriber.
brew2.subscribe("sub1",example)
brew2.subscribe("sub2",example)
brew2.subscribe("sub3",example)

# Calling start on a brew starts it running in a separate thread.
brew1.start()
brew2.start()

# We'll publish a value every three seconds. While this is running,
# go to your admin interface and connect the subscriber to the publisher
# to see the values.
try:
    while True:
        time.sleep(3)
        # The publish method sends a value from the specified
        # publisher.
        brew1.publish('pub','rub')

except (KeyboardInterrupt, SystemExit) as e:
    # Calling stop on a brew disconnects it and waits for its
    # associated thread to finish.