Esempio n. 1
0
class FlipkartTrendsPipeline(object):

  def __init__(self):
    self.newcsv=csv.writer(open("books.csv","w"))
    #self.engine = create_engine('mysql connection')
    #self.Session = sessionmaker(bind=self.engine)
    #self.dt = datetime
    #Base.metadata.create_all(self.engine)
    self.session = Session()

  def process_item(self, item, spider):
    use_for = ['FlipkartSpider']
    if spider.name in use_for:
      log.msg(item['title'], level=log.DEBUG)
      round_number = self.session.query(Meta).order_by(desc(Meta.round)).first()
      #self.session.refresh(round_number)
      #self.newcsv.writerow([item['author'][0],item['title'][0],item['price'][0]])
      book = Books(unicode(round_number.round),unicode(item['title'][0]),unicode(item['author'][0]),flipkart=unicode(item['price'][0].split(' ')[2]))
      self.session.add(book)
      self.session.commit()
      self.session.flush()
      #session.close()
      return item
    else:
      return item
Esempio n. 2
0
class FlipkartTrendsPipeline(object):
    def __init__(self):
        self.newcsv = csv.writer(open("books.csv", "w"))
        #self.engine = create_engine('mysql connection')
        #self.Session = sessionmaker(bind=self.engine)
        #self.dt = datetime
        #Base.metadata.create_all(self.engine)
        self.session = Session()

    def process_item(self, item, spider):
        use_for = ['FlipkartSpider']
        if spider.name in use_for:
            log.msg(item['title'], level=log.DEBUG)
            round_number = self.session.query(Meta).order_by(desc(
                Meta.round)).first()
            #self.session.refresh(round_number)
            #self.newcsv.writerow([item['author'][0],item['title'][0],item['price'][0]])
            book = Books(unicode(round_number.round),
                         unicode(item['title'][0]),
                         unicode(item['author'][0]),
                         flipkart=unicode(item['price'][0].split(' ')[2]))
            self.session.add(book)
            self.session.commit()
            self.session.flush()
            #session.close()
            return item
        else:
            return item
Esempio n. 3
0
class FlipkartSpider(BaseSpider):

  name = "FlipkartSpider"
  allowed_domains = ["http://flipkart.com","www.flipkart.com"]
  start_urls = [
      'http://www.flipkart.com/view-books/0/new-releases'
      ]

  def __init__(self):
    #self.engine = create_engine('mysql connection')
    #self.Session = sessionmaker(bind=self.engine)
    #Base.metadata.create_all(self.engine)
    self.session = Session()
    try:
      round_info = self.session.query(Meta).order_by(desc(Meta.round)).first()
      print round_info
      new_round = Meta(round_info.round+1)
      self.session.add(new_round)
      self.session.commit()
      self.session.flush()
      self.session.close()
    except:
      new_round = Meta(0)
      self.session.add(new_round)
      self.session.commit()
      self.session.flush()
      self.session.close()
      

  def parse(self, response):
    #filename = response.url.split("/")[-2]
    #open(filename, 'wb').write(response.body)
    hxs = HtmlXPathSelector(response)
    #hxs.select('//div[@class="line bmargin10"]/h2[@class="fk-srch-item-title fksd-bodytext"]/a/text()').extract()
    sites = hxs.select('//div[@class="fk-srch-item fk-inf-scroll-item"]')
    #sites = hxs.select('//div[@class="lastUnit"]/div[@id="search_results"]')
    items=[]
    print sites.__len__()
    for site in sites:
      #print site
      item = Book()
      item['title']= site.select('div[@class="line fksd-bodytext "]/div[@class="line bmargin10"]/h2[@class="fk-srch-item-title fksd-bodytext"]/a/text()').extract()
      item['author'] = site.select('div[@class="line fksd-bodytext "]/div[@class="line bmargin10"]/span[@class="fk-item-authorinfo-text fksd-smalltext"]/a/text()').extract()
      item['price'] = site.select('div[@class="line fksd-bodytext "]/div[@class="unit fk-sitem-info-section"]/div[@class="line fk-itemdetail-info fksd-bodytext"]/div[@class="line dlvry-det"]/div[@class="line fk-srch-pricing fksd-smalltext"]/b[@class="fksd-bodytext price final-price"]/text()').extract()
      items.append(item)
      #print item
    return items