Beispiel #1
0
    def _get_brands(self, session, insert):
        brands = []

        # Male
        # maleBrandsUrl = "http://eu.topman.com/en/tmeu/category/brands-617803/view-all-brands-1700863"

        # gender = "male"
        # olog.log("TopshopTracker._get_brands > Calling <b>"+maleBrandsUrl+"</b>", 'info')
        # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 (.NET CLR 3.5.30729); nl-NL'}
        # req = urllib2.Request(maleBrandsUrl.replace(' ', '%20'), headers=hdr)
        # data = urllib2.urlopen(req).read()
        # tree = lxml.html.fromstring(data)

        # brand_data = tree.cssselect('div[class*=\"categoryBlock\"] ul li a')
        # for b in brand_data:
        #     brand = {'key' : None, 'name' : None, 'logoUrl' : None, 'logoLargeUrl' : None}
        #     brand['shopUrl'] = b.attrib['href']
        #     brand['name'] = unicode(b.attrib['title']).encode('ascii', 'xmlcharrefreplace')
        #     try:
        #         req = urllib2.Request(brand['shopUrl'], headers=hdr)
        #         adata = urllib2.urlopen(req).read()
        #         atree = lxml.html.fromstring(adata)
        #         brand['shopUrl'] = atree.cssselect('li[class*=\"show_all\"] a')[0].attrib['href']
        #     except:
        #         pass

        #     uuid = str(shortuuid.uuid(brand['name']))
        #     br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid)
        #     olog.log("TopshopTracker._get_brands << Found brand <b>"+str(br)+"</b>", 'debug')

        #     if insert is True:
        #         brand_in_db = session.query(orm.Brand).filter_by(name=unicode(br.name)).first()
        #         if brand_in_db is None:
        #             session.add(br)
        #             session.flush()
        #             brandid = br.id
        #             olog.log("TopshopTracker._get_brands >>> Inserted brand <b>"+br.name+"</b> with id <b>" + str(brandid) + "</b>", "warning")
        #         else:
        #             brandid = brand_in_db.id
        #             olog.log("Brand <b>"+brand_in_db.name+"</b> already in database with id <b>" + str(brandid) + "</b>", "debug")

        #         storebrand_in_db = session.query(orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(brandid=brandid).first()
        #         if storebrand_in_db is None:
        #             storebrand = {'key': None, 'storeid' : None, 'brandid' : None, 'gender': None, 'url' : None}
        #             sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl'])
        #             olog.log("TopshopTracker._get_brands >>> Inserted <b>"+str(sb)+"</b>", "warning")
        #             session.add(sb)
        #             session.flush()

        #     brands.append(br)

        # Female
        femaleBrandsUrl = "http://eu.topshop.com/en/tseu/category/brands-a-to-z-4070022/home?TS=1422011935571"

        gender = "female"
        olog.log(
            "TopshopTracker._get_brands > Calling <b>" + femaleBrandsUrl +
            "</b>", 'info')

        display = Display(visible=0, size=(800, 600))
        display.start()
        browser = webdriver.Firefox()
        browser.get(femaleBrandsUrl)
        data = browser.page_source
        browser.quit()
        display.stop()

        tree = lxml.html.fromstring(data)

        brand_data = tree.cssselect(
            'div[class*=\"a-to-z\"] div[id*=\"jsonList\"] div[class*=\"columns\"] div div[class*=\"items\"] a'
        )

        for b in brand_data:
            brand = {
                'key': None,
                'name': None,
                'logoUrl': None,
                'logoLargeUrl': None
            }
            brand['shopUrl'] = b.attrib['href']
            brand['name'] = unicode(b.attrib['title'].title()).encode(
                'ascii', 'xmlcharrefreplace')
            uuid = str(shortuuid.uuid(brand['name']))
            br = orm.Brand(brand['name'], brand['logoUrl'],
                           brand['logoLargeUrl'], uuid)
            olog.log(
                "TopshopTracker._get_brands << Found brand <b>" + str(br) +
                "</b>", 'debug')

            if insert is True:
                brand_in_db = session.query(
                    orm.Brand).filter_by(name=unicode(br.name)).first()
                if brand_in_db is None:
                    session.add(br)
                    session.flush()
                    brandid = br.id
                    olog.log(
                        "TopshopTracker._get_brands >>> Inserted brand <b>" +
                        br.name + "</b> with id <b>" + str(brandid) + "</b>",
                        "warning")
                else:
                    brandid = brand_in_db.id
                    olog.log(
                        "Brand <b>" + brand_in_db.name +
                        "</b> already in database with id <b>" + str(brandid) +
                        "</b>", "debug")

                storebrand_in_db = session.query(orm.StoreBrand).filter_by(
                    storeid=unicode(self.storeid)).filter_by(
                        brandid=brandid).first()
                if storebrand_in_db is None:
                    storebrand = {
                        'key': None,
                        'storeid': None,
                        'brandid': None,
                        'gender': None,
                        'url': None
                    }
                    sb = orm.StoreBrand(brand['key'], self.storeid, brandid,
                                        gender, brand['shopUrl'])
                    olog.log(
                        "TopshopTracker._get_brands >>> Inserted <b>" +
                        str(sb) + "</b>", "warning")
                    session.add(sb)
                    session.flush()

            brands.append(br)

        session.commit()

        return brands
Beispiel #2
0
def create_message(session, user, date, products):
    """ Create a message for the email """
    olog.log('Creating message for ' +str(user.name), 'info')
    message = """\r\n
                <table class="table">
                  <tr>
                    <th>Product</th>
                    <th>Change</th>
                    <th>Old price</th>
                    <th>New price</th>\r\n"""
    for p in products:
        product = p[0]
        prices = p[1]
        olog.log(' ' +str(product.title)+ " " +str(product.link), 'debug')
        if len(prices) > 1:
            if prices[1].price != "" and prices[0].price != "":
                try:
                    pnew = prices[0].price.split(' ', 1)[0]
                    pnew = float(pnew.replace(',', '.').strip())
                except:
                    pnew = 0

                try:
                    pold = prices[1].price.split(' ', 1)[0]
                    pold = float(pold.replace(',', '.').strip())
                except:
                    pold = 0

                msg_str = "Old price <b>"+str(prices[1].price)+"</b>, "
                msg_str += "new price <b>"+str(prices[0].price)+"</b>"
                if pnew > pold:
                    clr = "red"
                    change = "Increase"
                elif pnew < pold:
                    clr = "green"
                    change = "Decrease"
                else:
                    clr = "yellow"
                    change = "No change"
                message += """
                            <tr>
                              <td><a href="%(link)s">%(title)s</a></td>
                              <td color="%(color)s">%(change)s</td>
                              <td>%(pricenew)s</td>
                              <td>%(priceold)s</td>
                            </tr>\r\n
                              """ % {'link' : str(p[0].link), 
                                     'title' : str(p[0].title), 
                                     'color' : clr, 
                                     'change' : change,
                                     'priceold' : str(prices[0].price),
                                     'pricenew' : str(prices[1].price)}
                
                olog.log(msg_str, pType=None, color=clr)
            elif(prices[1].price == "" and prices[0].price != ""):
                olog.log('Updated price for '+str(prices[0]), color="purple")
                message += """
                    <tr>
                        <td><a href="%(link)s">%(title)s</a></td>
                        <td color="blue">Back available</td>
                        <td></td>
                        <td>%(priceold)s</td>
                    </tr>\r\n
                    """ % {'link' : str(p[0].link), 
                           'title' : str(p[0].title), 
                           'priceold' : str(prices[0].price)}
            elif(prices[1].price != "" and prices[0].price == ""):
                olog.log('N.A.. Old price is '+str(prices[1]), color="blue")
                message += """
                <tr>
                    <td><a href="%(link)s">%(title)s</a></td>
                    <td color="yellow">N.A.</td>
                    <td>'+%(pricenew)s+'</td>
                    <td>'+%(priceold)s+'</td>
                </tr>\r\n
                """ % {'link' : str(p[0].link), 
                       'title' : str(p[0].title), 
                       'priceold' : str(prices[0].price),
                       'pricenew' : str(prices[1].price)
                }
        elif len(prices) == 1:
            message += '<tr><td><a href="'+str(p[0].link)+'">'+str(p[0].title)+'</a></td><td>New</td><td></td><td>'+str(prices[0].price)+'</td></tr>'

    message += '</table>'
    return message
Beispiel #3
0
    def _set_items_for_brand(self, brand, session, insert):
        items = []

        olog.log(
            "ClarksTracker._get_items_for_brand > Calling <b>" + brand.url +
            "</b>", "info")
        # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; nl-NL; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 (.NET CLR 3.5.30729);'}
        # req = urllib2.Request(brand.url, headers=hdr)

        display = Display(visible=0, size=(800, 600))
        display.start()
        browser = webdriver.Firefox()
        browser.get(brand.url)

        lastHeight = browser.execute_script(
            "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight )"
        )
        while True:
            browser.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")
            print "Scrolling down..."
            time.sleep(10)
            newHeight = browser.execute_script(
                "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight )"
            )
            if newHeight == lastHeight:
                break
            lastHeight = newHeight

        data = browser.page_source
        browser.quit()
        display.stop()

        try:
            tree = lxml.html.fromstring(data)
            all_items = tree.cssselect(
                'ul[id*=\"prod-list\"] li[class*=\"product-list-item\"] p a[href*=\"\/p\/\"]'
            )

            for it in all_items:
                if it is not None:
                    date = time.strftime('%Y-%m-%d %H:%M:%S')
                    iid = it.attrib['href'].split('p/')[1]
                    i = session.query(orm.Item).filter_by(
                        itemid=iid).filter_by(storeid=self.storeid).first()
                    if i is None:
                        item = self._get_item(brand, iid)
                        i = orm.Item(item['storeid'], item['itemid'],
                                     item['brandid'], item['link'],
                                     item['color'], item['title'],
                                     item['category'], item['gender'],
                                     item['uuid'])
                        olog.log(
                            "ClarksTracker._get_items_for_brand >>> Inserted item <b>"
                            + str(i) + "</b>", "warning")
                        if insert is True:
                            session.add(i)
                            session.flush()
                            itemid = i.id

                            for imageurl in item['images']:
                                ii = orm.ItemImage(itemid, imageurl)
                                olog.log(
                                    "ClarksTracker._get_items_for_brand >>>> Inserted image <b>"
                                    + str(ii) + "</b>", "warning")
                                session.add(ii)
                            ip = orm.ItemPrice(itemid, item['price'],
                                               item['currency'], date)
                            olog.log(
                                "ClarksTracker._get_items_for_brand >>>> Inserted price <b>"
                                + str(ip) + "</b>", "warning")
                            session.add(ip)
                    else:
                        itemid = i.id
                        olog.log(
                            "ClarksTracker._get_items_for_brand <<< <b>" +
                            i.title + "</b> already in database with id <b>" +
                            str(itemid) + "</b>", "info")
                    #endif i is None
                    items.append(i)
                #endif it is not None
            #endfor it in all_items
        except:
            pass  # Opening url went wrong
        if insert is True:
            session.commit()
        #endif insert is True
        olog.log(
            "ClarksTracker._get_items_for_brand < Found <b>" +
            str(len(items)) + " products</b>", "info")

        return items
Beispiel #4
0
    def _set_brands(self, session, insert):
        brands = []

        brand = {
            'key': None,
            'name': None,
            'logoUrl': None,
            'logoLargeUrl': None,
            'shopUrl': None
        }
        brand['name'] = 'Clarks'  # Fixed. Website only sells Clarks
        brand['shopUrl'] = 'http://www.clarks.nl/c/heren-alle-stijlen'
        uuid = str(shortuuid.uuid(brand['name']))
        gender = 'Male'
        br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'],
                       uuid)
        olog.log(
            "ClarksTracker._set_brands << Found brand <b>" + str(br) + "</b>",
            'debug')

        brand_in_db = session.query(
            orm.Brand).filter_by(name=unicode(br.name)).first()
        if brand_in_db is None:
            if insert is True:
                session.add(br)
                session.flush()
                brandid = br.id
                olog.log(
                    "ClarksTracker._set_brands >>> Inserted brand <b>" +
                    br.name + "</b> with id <b>" + str(brandid) + "</b>",
                    "warning")
        else:
            brandid = brand_in_db.id
            olog.log(
                "ClarksTracker._set_brands << Brand <b>" + brand_in_db.name +
                "</b> already in database with id <b>" + str(brandid) + "</b>",
                "info")

        storebrand_in_db = session.query(
            orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(
                brandid=brandid).filter_by(gender=gender).first()
        if storebrand_in_db is None:
            storebrand = {
                'key': None,
                'storeid': None,
                'brandid': None,
                'gender': None,
                'url': None
            }
            sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender,
                                brand['shopUrl'])
            olog.log(
                "ClarksTracker._set_brands << Inserted <b>" + str(sb) + "</b>",
                "warning")
            if insert is True:
                session.add(sb)
                session.flush()
        else:
            olog.log(
                "ClarksTracker._set_brands << StoreBrand <b>" +
                str(storebrand_in_db) +
                "</b> already in database with id <b>" +
                str(storebrand_in_db.id) + "</b>", "info")

        brands.append(br)

        brand = {
            'key': None,
            'name': None,
            'logoUrl': None,
            'logoLargeUrl': None,
            'shopUrl': None
        }
        brand['name'] = 'Clarks'  # Fixed. Website only sells Clarks
        brand['shopUrl'] = 'http://www.clarks.nl/c/dames-alle-stijlen'
        uuid = str(shortuuid.uuid(brand['name']))
        gender = 'Female'
        br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'],
                       uuid)
        olog.log(
            "ClarksTracker._set_brands << Found brand <b>" + str(br) + "</b>",
            'debug')

        brand_in_db = session.query(
            orm.Brand).filter_by(name=unicode(br.name)).first()
        if brand_in_db is None:
            if insert is True:
                session.add(br)
                session.flush()
                brandid = br.id
                olog.log(
                    "ClarksTracker._set_brands >>> Inserted brand <b>" +
                    br.name + "</b> with id <b>" + str(brandid) + "</b>",
                    "warning")
        else:
            brandid = brand_in_db.id
            olog.log(
                "ClarksTracker._set_brands << Brand <b>" + brand_in_db.name +
                "</b> already in database with id <b>" + str(brandid) + "</b>",
                "info")

        storebrand_in_db = session.query(
            orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(
                brandid=brandid).filter_by(gender=gender).first()
        if storebrand_in_db is None:
            storebrand = {
                'key': None,
                'storeid': None,
                'brandid': None,
                'gender': None,
                'url': None
            }
            sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender,
                                brand['shopUrl'])
            olog.log(
                "ClarksTracker._set_brands << Inserted <b>" + str(sb) + "</b>",
                "warning")
            if insert is True:
                session.add(sb)
                session.flush()
        else:
            olog.log(
                "ClarksTracker._set_brands << StoreBrand <b>" +
                str(storebrand_in_db) +
                "</b> already in database with id <b>" +
                str(storebrand_in_db.id) + "</b>", "info")

        brands.append(br)

        if insert is True:
            session.commit()

        return brands
Beispiel #5
0
    def _set_items_for_brand(self, brand, session, insert):
        global hdr
        items = []
        
        display = Display(visible=0, size=(1920, 1080))
        display.start()
        browser = webdriver.Firefox()
        
        date = time.strftime('%Y-%m-%d %H:%M:%S')
        olog.log("NetaporterTracker._set_items_for_brand >>> Get articles <b>"+brand.url+"</b>", 'debug')
        # try:
        req = urllib2.Request(brand.url, headers=hdr)
        data = urllib2.urlopen(req).read()
        tree = lxml.html.fromstring(data)
    
        items_data = tree.cssselect('div[id=\"product-list\"] div[class=\"product-images\"] div[class*=\"product-image\"] a')
        for it in items_data:
            iid = re.search('/product/(.*?)/', it.attrib['href']).group(1)
            i = session.query(orm.Item).filter_by(itemid=iid).filter_by(storeid=self.storeid).first()
            if i is None:
                item = self._get_item(brand, it.attrib['href'], browser, display)
                if item:
                    i = orm.Item(item['storeid'], item['itemid'], item['brandid'], item['link'], item['color'], item['title'], item['category'], item['gender'], item['uuid'])
                    olog.log("NetaporterTracker._set_items_for_brand <<< Inserted item <b>"+str(i)+"</b>>", "warning")
                    if insert is True:
                        session.add(i)
                        session.flush()
                        itemid = i.id
                        for imageurl in item['images']:
                            ii = orm.ItemImage(itemid, imageurl)
                            olog.log("NetaporterTracker._set_items_for_brand <<<< Inserted image <b>"+str(ii)+"</b>", "warning")
                            if insert is True:     
                                session.add(ii)
                        ip = orm.ItemPrice(itemid, item['price'], item['currency'], date)
                        olog.log("NetaporterTracker._set_items_for_brand <<<< Inserted price <b>"+str(ip)+"</b>", "warning")
                        if insert is True:
                            session.add(ip)                      
                else:
                    olog.log("NetaporterTracker._set_items_for_brand <<<< Error getting item", "error")
            else:
                olog.log("NetaporterTracker._set_items_for_brand <<< <b>"+str(i)+"</b> already in database</b>", "info")
            #endif i is None
            items.append(i)
            
            # break
        #endfor it in items_data 

      
        if insert is True:
            session.commit()
        # # except:
        # #     olog.log("BijenkorfTracker._set_items_for_brand <<< Error opening URL", 'error')
        browser.quit()
        display.stop()
           
        return items