def _get_brands(self, session, insert): brands = [] # Male # maleBrandsUrl = "http://eu.topman.com/en/tmeu/category/brands-617803/view-all-brands-1700863" # gender = "male" # olog.log("TopshopTracker._get_brands > Calling <b>"+maleBrandsUrl+"</b>", 'info') # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 (.NET CLR 3.5.30729); nl-NL'} # req = urllib2.Request(maleBrandsUrl.replace(' ', '%20'), headers=hdr) # data = urllib2.urlopen(req).read() # tree = lxml.html.fromstring(data) # brand_data = tree.cssselect('div[class*=\"categoryBlock\"] ul li a') # for b in brand_data: # brand = {'key' : None, 'name' : None, 'logoUrl' : None, 'logoLargeUrl' : None} # brand['shopUrl'] = b.attrib['href'] # brand['name'] = unicode(b.attrib['title']).encode('ascii', 'xmlcharrefreplace') # try: # req = urllib2.Request(brand['shopUrl'], headers=hdr) # adata = urllib2.urlopen(req).read() # atree = lxml.html.fromstring(adata) # brand['shopUrl'] = atree.cssselect('li[class*=\"show_all\"] a')[0].attrib['href'] # except: # pass # uuid = str(shortuuid.uuid(brand['name'])) # br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) # olog.log("TopshopTracker._get_brands << Found brand <b>"+str(br)+"</b>", 'debug') # if insert is True: # brand_in_db = session.query(orm.Brand).filter_by(name=unicode(br.name)).first() # if brand_in_db is None: # session.add(br) # session.flush() # brandid = br.id # olog.log("TopshopTracker._get_brands >>> Inserted brand <b>"+br.name+"</b> with id <b>" + str(brandid) + "</b>", "warning") # else: # brandid = brand_in_db.id # olog.log("Brand <b>"+brand_in_db.name+"</b> already in database with id <b>" + str(brandid) + "</b>", "debug") # storebrand_in_db = session.query(orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(brandid=brandid).first() # if storebrand_in_db is None: # storebrand = {'key': None, 'storeid' : None, 'brandid' : None, 'gender': None, 'url' : None} # sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) # olog.log("TopshopTracker._get_brands >>> Inserted <b>"+str(sb)+"</b>", "warning") # session.add(sb) # session.flush() # brands.append(br) # Female femaleBrandsUrl = "http://eu.topshop.com/en/tseu/category/brands-a-to-z-4070022/home?TS=1422011935571" gender = "female" olog.log( "TopshopTracker._get_brands > Calling <b>" + femaleBrandsUrl + "</b>", 'info') display = Display(visible=0, size=(800, 600)) display.start() browser = webdriver.Firefox() browser.get(femaleBrandsUrl) data = browser.page_source browser.quit() display.stop() tree = lxml.html.fromstring(data) brand_data = tree.cssselect( 'div[class*=\"a-to-z\"] div[id*=\"jsonList\"] div[class*=\"columns\"] div div[class*=\"items\"] a' ) for b in brand_data: brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None } brand['shopUrl'] = b.attrib['href'] brand['name'] = unicode(b.attrib['title'].title()).encode( 'ascii', 'xmlcharrefreplace') uuid = str(shortuuid.uuid(brand['name'])) br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log( "TopshopTracker._get_brands << Found brand <b>" + str(br) + "</b>", 'debug') if insert is True: brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: session.add(br) session.flush() brandid = br.id olog.log( "TopshopTracker._get_brands >>> Inserted brand <b>" + br.name + "</b> with id <b>" + str(brandid) + "</b>", "warning") else: brandid = brand_in_db.id olog.log( "Brand <b>" + brand_in_db.name + "</b> already in database with id <b>" + str(brandid) + "</b>", "debug") storebrand_in_db = session.query(orm.StoreBrand).filter_by( storeid=unicode(self.storeid)).filter_by( brandid=brandid).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "TopshopTracker._get_brands >>> Inserted <b>" + str(sb) + "</b>", "warning") session.add(sb) session.flush() brands.append(br) session.commit() return brands
def create_message(session, user, date, products): """ Create a message for the email """ olog.log('Creating message for ' +str(user.name), 'info') message = """\r\n <table class="table"> <tr> <th>Product</th> <th>Change</th> <th>Old price</th> <th>New price</th>\r\n""" for p in products: product = p[0] prices = p[1] olog.log(' ' +str(product.title)+ " " +str(product.link), 'debug') if len(prices) > 1: if prices[1].price != "" and prices[0].price != "": try: pnew = prices[0].price.split(' ', 1)[0] pnew = float(pnew.replace(',', '.').strip()) except: pnew = 0 try: pold = prices[1].price.split(' ', 1)[0] pold = float(pold.replace(',', '.').strip()) except: pold = 0 msg_str = "Old price <b>"+str(prices[1].price)+"</b>, " msg_str += "new price <b>"+str(prices[0].price)+"</b>" if pnew > pold: clr = "red" change = "Increase" elif pnew < pold: clr = "green" change = "Decrease" else: clr = "yellow" change = "No change" message += """ <tr> <td><a href="%(link)s">%(title)s</a></td> <td color="%(color)s">%(change)s</td> <td>%(pricenew)s</td> <td>%(priceold)s</td> </tr>\r\n """ % {'link' : str(p[0].link), 'title' : str(p[0].title), 'color' : clr, 'change' : change, 'priceold' : str(prices[0].price), 'pricenew' : str(prices[1].price)} olog.log(msg_str, pType=None, color=clr) elif(prices[1].price == "" and prices[0].price != ""): olog.log('Updated price for '+str(prices[0]), color="purple") message += """ <tr> <td><a href="%(link)s">%(title)s</a></td> <td color="blue">Back available</td> <td></td> <td>%(priceold)s</td> </tr>\r\n """ % {'link' : str(p[0].link), 'title' : str(p[0].title), 'priceold' : str(prices[0].price)} elif(prices[1].price != "" and prices[0].price == ""): olog.log('N.A.. Old price is '+str(prices[1]), color="blue") message += """ <tr> <td><a href="%(link)s">%(title)s</a></td> <td color="yellow">N.A.</td> <td>'+%(pricenew)s+'</td> <td>'+%(priceold)s+'</td> </tr>\r\n """ % {'link' : str(p[0].link), 'title' : str(p[0].title), 'priceold' : str(prices[0].price), 'pricenew' : str(prices[1].price) } elif len(prices) == 1: message += '<tr><td><a href="'+str(p[0].link)+'">'+str(p[0].title)+'</a></td><td>New</td><td></td><td>'+str(prices[0].price)+'</td></tr>' message += '</table>' return message
def _set_items_for_brand(self, brand, session, insert): items = [] olog.log( "ClarksTracker._get_items_for_brand > Calling <b>" + brand.url + "</b>", "info") # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; nl-NL; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 (.NET CLR 3.5.30729);'} # req = urllib2.Request(brand.url, headers=hdr) display = Display(visible=0, size=(800, 600)) display.start() browser = webdriver.Firefox() browser.get(brand.url) lastHeight = browser.execute_script( "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight )" ) while True: browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") print "Scrolling down..." time.sleep(10) newHeight = browser.execute_script( "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight )" ) if newHeight == lastHeight: break lastHeight = newHeight data = browser.page_source browser.quit() display.stop() try: tree = lxml.html.fromstring(data) all_items = tree.cssselect( 'ul[id*=\"prod-list\"] li[class*=\"product-list-item\"] p a[href*=\"\/p\/\"]' ) for it in all_items: if it is not None: date = time.strftime('%Y-%m-%d %H:%M:%S') iid = it.attrib['href'].split('p/')[1] i = session.query(orm.Item).filter_by( itemid=iid).filter_by(storeid=self.storeid).first() if i is None: item = self._get_item(brand, iid) i = orm.Item(item['storeid'], item['itemid'], item['brandid'], item['link'], item['color'], item['title'], item['category'], item['gender'], item['uuid']) olog.log( "ClarksTracker._get_items_for_brand >>> Inserted item <b>" + str(i) + "</b>", "warning") if insert is True: session.add(i) session.flush() itemid = i.id for imageurl in item['images']: ii = orm.ItemImage(itemid, imageurl) olog.log( "ClarksTracker._get_items_for_brand >>>> Inserted image <b>" + str(ii) + "</b>", "warning") session.add(ii) ip = orm.ItemPrice(itemid, item['price'], item['currency'], date) olog.log( "ClarksTracker._get_items_for_brand >>>> Inserted price <b>" + str(ip) + "</b>", "warning") session.add(ip) else: itemid = i.id olog.log( "ClarksTracker._get_items_for_brand <<< <b>" + i.title + "</b> already in database with id <b>" + str(itemid) + "</b>", "info") #endif i is None items.append(i) #endif it is not None #endfor it in all_items except: pass # Opening url went wrong if insert is True: session.commit() #endif insert is True olog.log( "ClarksTracker._get_items_for_brand < Found <b>" + str(len(items)) + " products</b>", "info") return items
def _set_brands(self, session, insert): brands = [] brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None, 'shopUrl': None } brand['name'] = 'Clarks' # Fixed. Website only sells Clarks brand['shopUrl'] = 'http://www.clarks.nl/c/heren-alle-stijlen' uuid = str(shortuuid.uuid(brand['name'])) gender = 'Male' br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log( "ClarksTracker._set_brands << Found brand <b>" + str(br) + "</b>", 'debug') brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: if insert is True: session.add(br) session.flush() brandid = br.id olog.log( "ClarksTracker._set_brands >>> Inserted brand <b>" + br.name + "</b> with id <b>" + str(brandid) + "</b>", "warning") else: brandid = brand_in_db.id olog.log( "ClarksTracker._set_brands << Brand <b>" + brand_in_db.name + "</b> already in database with id <b>" + str(brandid) + "</b>", "info") storebrand_in_db = session.query( orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by( brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "ClarksTracker._set_brands << Inserted <b>" + str(sb) + "</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log( "ClarksTracker._set_brands << StoreBrand <b>" + str(storebrand_in_db) + "</b> already in database with id <b>" + str(storebrand_in_db.id) + "</b>", "info") brands.append(br) brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None, 'shopUrl': None } brand['name'] = 'Clarks' # Fixed. Website only sells Clarks brand['shopUrl'] = 'http://www.clarks.nl/c/dames-alle-stijlen' uuid = str(shortuuid.uuid(brand['name'])) gender = 'Female' br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log( "ClarksTracker._set_brands << Found brand <b>" + str(br) + "</b>", 'debug') brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: if insert is True: session.add(br) session.flush() brandid = br.id olog.log( "ClarksTracker._set_brands >>> Inserted brand <b>" + br.name + "</b> with id <b>" + str(brandid) + "</b>", "warning") else: brandid = brand_in_db.id olog.log( "ClarksTracker._set_brands << Brand <b>" + brand_in_db.name + "</b> already in database with id <b>" + str(brandid) + "</b>", "info") storebrand_in_db = session.query( orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by( brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "ClarksTracker._set_brands << Inserted <b>" + str(sb) + "</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log( "ClarksTracker._set_brands << StoreBrand <b>" + str(storebrand_in_db) + "</b> already in database with id <b>" + str(storebrand_in_db.id) + "</b>", "info") brands.append(br) if insert is True: session.commit() return brands
def _set_items_for_brand(self, brand, session, insert): global hdr items = [] display = Display(visible=0, size=(1920, 1080)) display.start() browser = webdriver.Firefox() date = time.strftime('%Y-%m-%d %H:%M:%S') olog.log("NetaporterTracker._set_items_for_brand >>> Get articles <b>"+brand.url+"</b>", 'debug') # try: req = urllib2.Request(brand.url, headers=hdr) data = urllib2.urlopen(req).read() tree = lxml.html.fromstring(data) items_data = tree.cssselect('div[id=\"product-list\"] div[class=\"product-images\"] div[class*=\"product-image\"] a') for it in items_data: iid = re.search('/product/(.*?)/', it.attrib['href']).group(1) i = session.query(orm.Item).filter_by(itemid=iid).filter_by(storeid=self.storeid).first() if i is None: item = self._get_item(brand, it.attrib['href'], browser, display) if item: i = orm.Item(item['storeid'], item['itemid'], item['brandid'], item['link'], item['color'], item['title'], item['category'], item['gender'], item['uuid']) olog.log("NetaporterTracker._set_items_for_brand <<< Inserted item <b>"+str(i)+"</b>>", "warning") if insert is True: session.add(i) session.flush() itemid = i.id for imageurl in item['images']: ii = orm.ItemImage(itemid, imageurl) olog.log("NetaporterTracker._set_items_for_brand <<<< Inserted image <b>"+str(ii)+"</b>", "warning") if insert is True: session.add(ii) ip = orm.ItemPrice(itemid, item['price'], item['currency'], date) olog.log("NetaporterTracker._set_items_for_brand <<<< Inserted price <b>"+str(ip)+"</b>", "warning") if insert is True: session.add(ip) else: olog.log("NetaporterTracker._set_items_for_brand <<<< Error getting item", "error") else: olog.log("NetaporterTracker._set_items_for_brand <<< <b>"+str(i)+"</b> already in database</b>", "info") #endif i is None items.append(i) # break #endfor it in items_data if insert is True: session.commit() # # except: # # olog.log("BijenkorfTracker._set_items_for_brand <<< Error opening URL", 'error') browser.quit() display.stop() return items