Example #1
0
 def __init__(self):
     self.newcsv = csv.writer(open("books.csv", "w"))
     #self.engine = create_engine('mysql connection')
     #self.Session = sessionmaker(bind=self.engine)
     #self.dt = datetime
     #Base.metadata.create_all(self.engine)
     self.session = Session()
Example #2
0
class InfibeamSpider(BaseSpider):
  name = "InfibeamSpider"
  allowed_domains = ["http://infibeam.com","www.infibeam.com"]

  def __init__(self):
    self.session = Session()
  
  def start_requests(self):
    round_info = self.session.query(Meta).order_by(desc(Meta.round)).first()
    for row in self.session.query(Books).filter(Books.round==round_info.round):
      # log.msg("Looking for "+row.title,level=log.DEBUG)
      ebayurl="http://www.ebay.in/sch/i.html?_from=R40&_npmv=3&_trksid=m570&_nkw=Anupam Kher&_sacat=267"
      url = "http://www.infibeam.com/search?q="+row.title+" ,"+row.author
      yield Request(url,callback=self.parse_lookup, encoding='utf-8')
  
  def parse_lookup(self,response):
    hxs =  HtmlXPathSelector(response)
#    log.msg("here",level=log.DEBUG)
    result_title = hxs.select('//div[@id="bd"]/div[@id="yui-main"]/div[@class="yui-b"]/div[@class="yui-g"]/div[@id="search_result"]/ul[@class="search_result"]/li/span[@class="title"]/h2[@class="simple"]/a/text()').extract()[0]
    result_price = hxs.select('//div[@id="bd"]/div[@id="yui-main"]/div[@class="yui-b"]/div[@class="yui-g"]/div[@id="search_result"]/ul[@class="search_result"]/li/div[@class="price"]/b/text()').extract()[0]
    item = Book()
    item['title']=result_title
    item['author']="None"
    item['price']= result_price
    return item
Example #3
0
class InfibeamSpider(BaseSpider):
    name = "InfibeamSpider"
    allowed_domains = ["http://infibeam.com", "www.infibeam.com"]

    def __init__(self):
        self.session = Session()

    def start_requests(self):
        round_info = self.session.query(Meta).order_by(desc(
            Meta.round)).first()
        for row in self.session.query(Books).filter(
                Books.round == round_info.round):
            # log.msg("Looking for "+row.title,level=log.DEBUG)
            ebayurl = "http://www.ebay.in/sch/i.html?_from=R40&_npmv=3&_trksid=m570&_nkw=Anupam Kher&_sacat=267"
            url = "http://www.infibeam.com/search?q=" + row.title + " ," + row.author
            yield Request(url, callback=self.parse_lookup, encoding='utf-8')

    def parse_lookup(self, response):
        hxs = HtmlXPathSelector(response)
        #    log.msg("here",level=log.DEBUG)
        result_title = hxs.select(
            '//div[@id="bd"]/div[@id="yui-main"]/div[@class="yui-b"]/div[@class="yui-g"]/div[@id="search_result"]/ul[@class="search_result"]/li/span[@class="title"]/h2[@class="simple"]/a/text()'
        ).extract()[0]
        result_price = hxs.select(
            '//div[@id="bd"]/div[@id="yui-main"]/div[@class="yui-b"]/div[@class="yui-g"]/div[@id="search_result"]/ul[@class="search_result"]/li/div[@class="price"]/b/text()'
        ).extract()[0]
        item = Book()
        item['title'] = result_title
        item['author'] = "None"
        item['price'] = result_price
        return item
Example #4
0
 def __init__(self):
   self.newcsv=csv.writer(open("books.csv","w"))
   #self.engine = create_engine('mysql connection')
   #self.Session = sessionmaker(bind=self.engine)
   #self.dt = datetime
   #Base.metadata.create_all(self.engine)
   self.session = Session()
Example #5
0
class FlipkartTrendsPipeline(object):

  def __init__(self):
    self.newcsv=csv.writer(open("books.csv","w"))
    #self.engine = create_engine('mysql connection')
    #self.Session = sessionmaker(bind=self.engine)
    #self.dt = datetime
    #Base.metadata.create_all(self.engine)
    self.session = Session()

  def process_item(self, item, spider):
    use_for = ['FlipkartSpider']
    if spider.name in use_for:
      log.msg(item['title'], level=log.DEBUG)
      round_number = self.session.query(Meta).order_by(desc(Meta.round)).first()
      #self.session.refresh(round_number)
      #self.newcsv.writerow([item['author'][0],item['title'][0],item['price'][0]])
      book = Books(unicode(round_number.round),unicode(item['title'][0]),unicode(item['author'][0]),flipkart=unicode(item['price'][0].split(' ')[2]))
      self.session.add(book)
      self.session.commit()
      self.session.flush()
      #session.close()
      return item
    else:
      return item
Example #6
0
class FlipkartTrendsPipeline(object):
    def __init__(self):
        self.newcsv = csv.writer(open("books.csv", "w"))
        #self.engine = create_engine('mysql connection')
        #self.Session = sessionmaker(bind=self.engine)
        #self.dt = datetime
        #Base.metadata.create_all(self.engine)
        self.session = Session()

    def process_item(self, item, spider):
        use_for = ['FlipkartSpider']
        if spider.name in use_for:
            log.msg(item['title'], level=log.DEBUG)
            round_number = self.session.query(Meta).order_by(desc(
                Meta.round)).first()
            #self.session.refresh(round_number)
            #self.newcsv.writerow([item['author'][0],item['title'][0],item['price'][0]])
            book = Books(unicode(round_number.round),
                         unicode(item['title'][0]),
                         unicode(item['author'][0]),
                         flipkart=unicode(item['price'][0].split(' ')[2]))
            self.session.add(book)
            self.session.commit()
            self.session.flush()
            #session.close()
            return item
        else:
            return item
Example #7
0
 def __init__(self):
   #self.engine = create_engine('mysql connection')
   #self.Session = sessionmaker(bind=self.engine)
   #Base.metadata.create_all(self.engine)
   self.session = Session()
   try:
     round_info = self.session.query(Meta).order_by(desc(Meta.round)).first()
     print round_info
     new_round = Meta(round_info.round+1)
     self.session.add(new_round)
     self.session.commit()
     self.session.flush()
     self.session.close()
   except:
     new_round = Meta(0)
     self.session.add(new_round)
     self.session.commit()
     self.session.flush()
     self.session.close()
Example #8
0
 def __init__(self):
   self.session = Session()
Example #9
0
class FlipkartSpider(BaseSpider):

  name = "FlipkartSpider"
  allowed_domains = ["http://flipkart.com","www.flipkart.com"]
  start_urls = [
      'http://www.flipkart.com/view-books/0/new-releases'
      ]

  def __init__(self):
    #self.engine = create_engine('mysql connection')
    #self.Session = sessionmaker(bind=self.engine)
    #Base.metadata.create_all(self.engine)
    self.session = Session()
    try:
      round_info = self.session.query(Meta).order_by(desc(Meta.round)).first()
      print round_info
      new_round = Meta(round_info.round+1)
      self.session.add(new_round)
      self.session.commit()
      self.session.flush()
      self.session.close()
    except:
      new_round = Meta(0)
      self.session.add(new_round)
      self.session.commit()
      self.session.flush()
      self.session.close()
      

  def parse(self, response):
    #filename = response.url.split("/")[-2]
    #open(filename, 'wb').write(response.body)
    hxs = HtmlXPathSelector(response)
    #hxs.select('//div[@class="line bmargin10"]/h2[@class="fk-srch-item-title fksd-bodytext"]/a/text()').extract()
    sites = hxs.select('//div[@class="fk-srch-item fk-inf-scroll-item"]')
    #sites = hxs.select('//div[@class="lastUnit"]/div[@id="search_results"]')
    items=[]
    print sites.__len__()
    for site in sites:
      #print site
      item = Book()
      item['title']= site.select('div[@class="line fksd-bodytext "]/div[@class="line bmargin10"]/h2[@class="fk-srch-item-title fksd-bodytext"]/a/text()').extract()
      item['author'] = site.select('div[@class="line fksd-bodytext "]/div[@class="line bmargin10"]/span[@class="fk-item-authorinfo-text fksd-smalltext"]/a/text()').extract()
      item['price'] = site.select('div[@class="line fksd-bodytext "]/div[@class="unit fk-sitem-info-section"]/div[@class="line fk-itemdetail-info fksd-bodytext"]/div[@class="line dlvry-det"]/div[@class="line fk-srch-pricing fksd-smalltext"]/b[@class="fksd-bodytext price final-price"]/text()').extract()
      items.append(item)
      #print item
    return items
Example #10
0
 def __init__(self):
     self.session = Session()