コード例 #1
0
  def execute(self):

    user = '******'
    feed = 'http://assets.redbubble.com/people/' + user + '/art.atom'
    d = feedparser.parse(feed)
    for e in d.entries:
      itemimg = e.enclosures[0].href.replace('300x300,075','550x550,075')
      itemurl = e.link.replace("?utm_campaign=feed&utm_medium=feed&utm_source=RB","")
      itemname = e.title
      if not Item.objects.filter(itemurl=itemurl).count():
        print "Adding " + itemname
        i = Item(itemimg = itemimg, itemurl = itemurl, itemname = itemname)
        i.save()
コード例 #2
0
def scraperedbubble(pagenum=1):
    mech = Browser()
#    mech.set_proxies({"http":"localhost:3129"})
    user = '******'
    url = 'http://www.redbubble.com/people/' + user + '/art?page='
    
    page = mech.open(url + str(pagenum))
     
    html = page.read()
    soup = BeautifulSoup(html)
    """
     <div id="works" class="images"> 
        <ul> 
            <li> 
        <span class="work-cell"> 
            <span class="work-info"> 
                <a href="/people/roger/art/5251040-1-wentworth-park-wandering" title="Wentworth Park Wandering by Roger Barnes"><img alt="Wentworth Park Wandering by Roger Barnes" src="http://ih0.redbubble.net/work.5251040.1.flat,135x135,075,f.jpg" /></a>            <span class="title"><a href="/people/roger/art/5251040-1-wentworth-park-wandering" title="Wentworth Park Wandering">Wentworth Park Wa...</a></span> 
            </span> 
        </span> 
    </li><li> 
        <span class="work-cell"> 
            <span class="work-info"> 
                <a href="/people/roger/art/5237728-1-wentworth-park-wandering" title="Wentworth Park Wandering by Roger Barnes"><img alt="Wentworth Park Wandering by Roger Barnes" src="http://ih1.redbubble.net/work.5237728.1.flat,135x135,075,f.jpg" /></a>            <span class="title"><a href="/people/roger/art/5237728-1-wentworth-park-wandering" title="Wentworth Park Wandering">Wentworth Park Wa...</a></span> 
            </span> 
        </span> 
    </li><li> 
    """

    print soup.prettify()

    entries = soup.findAll("span", attrs={"class" : "work-info"})
    for e in entries:
        print "found an entry"
        imageurl = e.a.img['src'].replace('135x135,075,f','550x550,075,f')
        itemurl = "http://www.redbubble.com" + e.a['href']
        itemname = e.a['title'].replace(" by Roger Barnes","")

        if not Item.objects.filter(itemurl=itemurl).count():
            print "adding an entry"
            i = Item(itemimg = imageurl, itemurl = itemurl, itemname = itemname)
            i.save()