def _set_brands(self, session, insert): global hdr brands = [] brandsUrl = "https://www.net-a-porter.com/de/en/Shop/AZDesigners?cm_sp=topnav-_-designers-_-designera-z" gender = "female" olog.log("NetaporterTracker._set_brands > Calling <b>"+brandsUrl+"</b>", 'info') req = urllib2.Request(brandsUrl, headers=hdr) data = urllib2.urlopen(req).read() tree = lxml.html.fromstring(data) brand_data = tree.cssselect('div[class=\"designer_list_col\"] ul li[class!=\"top-letter\"] a') for b in brand_data: brand = {'key' : None, 'name' : None, 'logoUrl' : None, 'logoLargeUrl' : None, 'shopUrl' : None} brand['shopUrl'] = 'http://www.net-a-porter.com'+b.attrib['href']+"?pn=1&npp=view_all&image_view=product&dScroll=0" brand['name'] = unicode(b.attrib['title'].title()).encode('ascii', 'xmlcharrefreplace') brand_in_db = session.query(orm.Brand).filter_by(name=unicode(brand['name'])).first() if brand_in_db is None: uuid = str(shortuuid.uuid(brand['name'])) br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log("NetaporterTracker._set_brands <<< Inserted brand <b>"+str(br)+"</b> with id <b>" + str(br.id) + "</b>", "warning") if insert is True: session.add(br) session.flush() brandid = br.id else: br = brand_in_db olog.log("NetaporterTracker._set_brands <<< Brand <b>"+str(brand_in_db)+"</b> already in database", "info") brandid = brand_in_db.id storebrand_in_db = session.query(orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = {'key': None, 'storeid' : None, 'brandid' : None, 'gender': None, 'url' : None} sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log("NetaporterTracker._set_brands <<< Inserted <b>"+str(sb)+"</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log("NetaporterTracker._set_brands <<< <b>"+str(storebrand_in_db)+"</b> already in database", "info") brands.append(br) session.commit() return brands
def _set_brands(self, session, insert): brands = [] brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None, 'shopUrl': None } brand['name'] = 'Ted Baker' # Fixed. Website only sells Ted Baker brand['shopUrl'] = 'http://www.tedbaker.com/nl/Mens/c/category_mens' uuid = str(shortuuid.uuid(brand['name'])) gender = 'Male' br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log( "TedBakerTracker._set_brands << Found brand <b>" + str(br) + "</b>", 'debug') brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: if insert is True: session.add(br) session.flush() brandid = br.id olog.log( "TedBakerTracker._set_brands >>> Inserted brand <b>" + br.name + "</b> with id <b>" + str(brandid) + "</b>", "warning") else: brandid = brand_in_db.id olog.log( "TedBakerTracker._set_brands << Brand <b>" + brand_in_db.name + "</b> already in database with id <b>" + str(brandid) + "</b>", "info") storebrand_in_db = session.query( orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by( brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "TedBakerTracker._set_brands << Inserted <b>" + str(sb) + "</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log( "TedBakerTracker._set_brands << StoreBrand <b>" + str(storebrand_in_db) + "</b> already in database with id <b>" + str(storebrand_in_db.id) + "</b>", "info") brands.append(br) brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None, 'shopUrl': None } brand['name'] = 'Ted Baker' # Fixed. Website only sells Clarks brand[ 'shopUrl'] = 'http://www.tedbaker.com/nl/Womens/c/category_womens' uuid = str(shortuuid.uuid(brand['name'])) gender = 'Female' br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log( "TedBakerTracker._set_brands << Found brand <b>" + str(br) + "</b>", 'debug') brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: if insert is True: session.add(br) session.flush() brandid = br.id olog.log( "TedBakerTracker._set_brands >>> Inserted brand <b>" + br.name + "</b> with id <b>" + str(brandid) + "</b>", "warning") else: brandid = brand_in_db.id olog.log( "TedBakerTracker._set_brands << Brand <b>" + brand_in_db.name + "</b> already in database with id <b>" + str(brandid) + "</b>", "info") storebrand_in_db = session.query( orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by( brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "TedBakerTracker._set_brands << Inserted <b>" + str(sb) + "</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log( "TedBakerTracker._set_brands << StoreBrand <b>" + str(storebrand_in_db) + "</b> already in database with id <b>" + str(storebrand_in_db.id) + "</b>", "info") brands.append(br) if insert is True: session.commit() return brands
def _set_brands(self, session, insert): brands = [] # Male maleBrandsUrl = "http://www.sarenza.nl/herenschoenen" gender = "male" olog.log( "SarenzaTracker._set_brands > Calling <b>" + maleBrandsUrl + "</b>", 'info') display = Display(visible=0, size=(1920, 1080)) display.start() browser = webdriver.Firefox() browser.get(maleBrandsUrl) data = browser.page_source tree = lxml.html.fromstring(data) brand_data = tree.cssselect('ul[class*=\"search-list\"] li a') for b in brand_data: brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None, 'shopUrl': None } brand['name'] = unicode(b.text_content()).encode( 'ascii', 'xmlcharrefreplace') brandA = "http://sarenza.nl/Search.aspx?Ftq=" + brand[ 'name'] + "%20dames" req = urllib2.Request(brandA.replace(' ', '%20'), headers=hdr) adata = urllib2.urlopen(req).read() atree = lxml.html.fromstring(adata) prod_data = atree.cssselect('ul[class*=\"vignettes\"] li a') prodUrl = prod_data[0].attrib['href'] req = urllib2.Request(prodUrl, headers=hdr) bdata = urllib2.urlopen(req).read() btree = lxml.html.fromstring(bdata) brand['logoLargeUrl'] = btree.cssselect( 'img[id*=\"ImgBrandName\"]')[0].attrib['src'].split('?')[0] brand['shopUrl'] = "http://www.sarenza.nl" + b.attrib['href'] regexp = 'Brand=([0-9]*)' result = re.search(regexp, brand['shopUrl']) if result: brand['key'] = result.group(1) uuid = str(shortuuid.uuid(brand['name'])) br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: olog.log( "SarenzaTracker._set_brands <<< Inserted brand <b>" + br.name + "</b> with id <b>" + str(br.id) + "</b>", "warning") if insert is True: session.add(br) session.flush() brandid = br.id else: brandid = brand_in_db.id olog.log( "Brand <b>" + str(brand_in_db) + "</b> already in database with id <b>" + str(brandid) + "</b>", "info") storebrand_in_db = session.query(orm.StoreBrand).filter_by( storeid=unicode(self.storeid)).filter_by( brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "SarenzaTracker._set_brands <<< Inserted <b>" + str(sb) + "</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log( "SarenzaTracker._set_brands <<< <b>" + str(storebrand_in_db) + "</b> already in database", "info") brands.append(br) # Female femaleBrandsUrl = 'http://www.sarenza.nl/alle-damesschoenen' gender = "female" olog.log( "SarenzaTracker._set_brands > Calling <b>" + femaleBrandsUrl + "</b>", 'info') browser.get(femaleBrandsUrl) data = browser.page_source browser.quit() display.stop() tree = lxml.html.fromstring(data) brand_data = tree.cssselect('ul[class*=\"search-list\"] li a') for b in brand_data: brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None } brand['name'] = unicode(b.text_content()).encode( 'ascii', 'xmlcharrefreplace').strip() brandA = "http://sarenza.nl/Search.aspx?Ftq=" + brand[ 'name'] + "%20dames" req = urllib2.Request(brandA.replace(' ', '%20'), headers=hdr) adata = urllib2.urlopen(req).read() atree = lxml.html.fromstring(adata) prod_data = atree.cssselect('ul[class*=\"vignettes\"] li a') prodUrl = prod_data[0].attrib['href'] req = urllib2.Request(prodUrl, headers=hdr) bdata = urllib2.urlopen(req).read() btree = lxml.html.fromstring(bdata) brand['logoLargeUrl'] = btree.cssselect( 'img[id*=\"ImgBrandName\"]')[0].attrib['src'].split('?')[0] brand['shopUrl'] = btree.cssselect( 'div[class*=\"row-fl\"] div[class*=\"item\"] a' )[0].attrib['href'] regexp = 'Brand=([0-9]*)' result = re.search(regexp, brand['shopUrl']) if result: brand['key'] = result.group(1) uuid = str(shortuuid.uuid(brand['name'])) br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: olog.log( "SarenzaTracker._set_brands <<< Inserted brand <b>" + str(br) + "</b> with id <b>" + str(br.id) + "</b>", "warning") if insert is True: session.add(br) session.flush() brandid = br.id else: brandid = brand_in_db.id olog.log( "Brand <b>" + str(brand_in_db) + "</b> already in database with id <b>" + str(brandid) + "</b>", "info") storebrand_in_db = session.query(orm.StoreBrand).filter_by( storeid=unicode(self.storeid)).filter_by( brandid=brandid).filter_by(gender=gender).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "SarenzaTracker._set_brands <<< Inserted <b>" + str(sb) + "</b>", "warning") if insert is True: session.add(sb) session.flush() else: olog.log( "SarenzaTracker._set_brands <<< <b>" + str(storebrand_in_db) + "</b> already in database", "info") brands.append(br) session.commit() return brands
def _get_brands(self, session, insert): brands = [] # Male # maleBrandsUrl = "http://eu.topman.com/en/tmeu/category/brands-617803/view-all-brands-1700863" # gender = "male" # olog.log("TopshopTracker._get_brands > Calling <b>"+maleBrandsUrl+"</b>", 'info') # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 (.NET CLR 3.5.30729); nl-NL'} # req = urllib2.Request(maleBrandsUrl.replace(' ', '%20'), headers=hdr) # data = urllib2.urlopen(req).read() # tree = lxml.html.fromstring(data) # brand_data = tree.cssselect('div[class*=\"categoryBlock\"] ul li a') # for b in brand_data: # brand = {'key' : None, 'name' : None, 'logoUrl' : None, 'logoLargeUrl' : None} # brand['shopUrl'] = b.attrib['href'] # brand['name'] = unicode(b.attrib['title']).encode('ascii', 'xmlcharrefreplace') # try: # req = urllib2.Request(brand['shopUrl'], headers=hdr) # adata = urllib2.urlopen(req).read() # atree = lxml.html.fromstring(adata) # brand['shopUrl'] = atree.cssselect('li[class*=\"show_all\"] a')[0].attrib['href'] # except: # pass # uuid = str(shortuuid.uuid(brand['name'])) # br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) # olog.log("TopshopTracker._get_brands << Found brand <b>"+str(br)+"</b>", 'debug') # if insert is True: # brand_in_db = session.query(orm.Brand).filter_by(name=unicode(br.name)).first() # if brand_in_db is None: # session.add(br) # session.flush() # brandid = br.id # olog.log("TopshopTracker._get_brands >>> Inserted brand <b>"+br.name+"</b> with id <b>" + str(brandid) + "</b>", "warning") # else: # brandid = brand_in_db.id # olog.log("Brand <b>"+brand_in_db.name+"</b> already in database with id <b>" + str(brandid) + "</b>", "debug") # storebrand_in_db = session.query(orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(brandid=brandid).first() # if storebrand_in_db is None: # storebrand = {'key': None, 'storeid' : None, 'brandid' : None, 'gender': None, 'url' : None} # sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) # olog.log("TopshopTracker._get_brands >>> Inserted <b>"+str(sb)+"</b>", "warning") # session.add(sb) # session.flush() # brands.append(br) # Female femaleBrandsUrl = "http://eu.topshop.com/en/tseu/category/brands-a-to-z-4070022/home?TS=1422011935571" gender = "female" olog.log( "TopshopTracker._get_brands > Calling <b>" + femaleBrandsUrl + "</b>", 'info') display = Display(visible=0, size=(800, 600)) display.start() browser = webdriver.Firefox() browser.get(femaleBrandsUrl) data = browser.page_source browser.quit() display.stop() tree = lxml.html.fromstring(data) brand_data = tree.cssselect( 'div[class*=\"a-to-z\"] div[id*=\"jsonList\"] div[class*=\"columns\"] div div[class*=\"items\"] a' ) for b in brand_data: brand = { 'key': None, 'name': None, 'logoUrl': None, 'logoLargeUrl': None } brand['shopUrl'] = b.attrib['href'] brand['name'] = unicode(b.attrib['title'].title()).encode( 'ascii', 'xmlcharrefreplace') uuid = str(shortuuid.uuid(brand['name'])) br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid) olog.log( "TopshopTracker._get_brands << Found brand <b>" + str(br) + "</b>", 'debug') if insert is True: brand_in_db = session.query( orm.Brand).filter_by(name=unicode(br.name)).first() if brand_in_db is None: session.add(br) session.flush() brandid = br.id olog.log( "TopshopTracker._get_brands >>> Inserted brand <b>" + br.name + "</b> with id <b>" + str(brandid) + "</b>", "warning") else: brandid = brand_in_db.id olog.log( "Brand <b>" + brand_in_db.name + "</b> already in database with id <b>" + str(brandid) + "</b>", "debug") storebrand_in_db = session.query(orm.StoreBrand).filter_by( storeid=unicode(self.storeid)).filter_by( brandid=brandid).first() if storebrand_in_db is None: storebrand = { 'key': None, 'storeid': None, 'brandid': None, 'gender': None, 'url': None } sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl']) olog.log( "TopshopTracker._get_brands >>> Inserted <b>" + str(sb) + "</b>", "warning") session.add(sb) session.flush() brands.append(br) session.commit() return brands