Beispiel #1
0
    def updateRegularProducts(self, productRegular):
        print 'Updating prices for regular products...'
        try:
            con = mdb.connect(Utils.getConfig()['host'], Utils.getConfig()['user'],
                              Utils.getConfig()['passwd'], Utils.getConfig()['dbname'])
            cur = con.cursor(mdb.cursors.DictCursor)
            # select the most recent records for each product
            start = time.time()
            # ids = ''
            # for prod in productRegular:
            #     ids = ids + str(prod['id']) + ','
            # ids = ids[:-1]
            # cur.execute("SELECT p1.* FROM product_price p1 LEFT JOIN product_price p2 \
            #             ON (p1.id = p2.id AND p1.date < p2.date) WHERE p2.date IS NULL AND p1.id IN ({})".format(ids))
            cur.execute("SELECT * FROM product_price")
            productRegularDB = cur.fetchall()
            end = time.time()
            print "Query time: {}".format(end - start)
            # sort by date (descending) and by product ID
            productRegularDB = sorted(productRegularDB, key=lambda k: (k['date']), reverse=True)
            productRegularDB = sorted(productRegularDB, key=lambda k: (k['id']))
            Utils.deleteDuplicates(productRegularDB)
            idArray = Utils.buildArray(productRegularDB)

            for product in productRegular:
                index = Utils.binarySearch(idArray, product['id'])
                # insert a new product record if it hasn't occurred before or the price has changed 
                if index == -1 or productRegularDB[index]['price'] != product['price']:
                    cur.execute("INSERT INTO product_price (id, date, price, category, url) \
                    VALUES ({}, \"{}\", {}, {}, \"{}\")".format(product['id'], product['date'], product['price'],
                                                                product['category'], product['url']))
        except mdb.Error, e:
            print "Error %d: %s" % (e.args[0], e.args[1])
            sys.exit(1)
Beispiel #2
0
 def getProducts(self):
     with open(Utils.getConfig()['subcatFile']) as json_file:
         jsonData = json.load(json_file)
     self.getProductsByCat(jsonData)
     self.items = sorted(self.items, key=lambda k: (k['id']))
     Utils.saveJsonFile(Utils.getConfig()['subcatFile'], jsonData)
     Utils.deleteDuplicates(self.items)
     Utils.saveJsonFile(Utils.getConfig()['productFile'], self.items)
Beispiel #3
0
 def getProducts(self):
     with open(Utils.getConfig()['subcatFile']) as json_file:
         self.jsonData = json.load(json_file)
     for cat in self.jsonData:
         print cat['url']
         page = 0
         while True:
             page += 1
             url = cat['url'].encode('utf-8') + "/I-Page{}_40".format(page)
             print url
             try:
                 self.parse(cat['subId'], url)
             except urllib2.HTTPError as httpError:
                 print httpError
                 if str(httpError.code)[0] == '5':
                     self.parse(cat['subId'], url)
                 else:
                     break
             except IndexError:
                 break
     self.items = sorted(self.items, key=lambda k: (k['id']))
     Utils.deleteDuplicates(self.items)
     Utils.saveJsonFile(Utils.getConfig()['productFile'], self.items)
Beispiel #4
0
    def preparePromoList(self):
        male = ["męsk", "mesk"]
        female = ["damsk", "kobie"]
        junior = ["junior", "dziec"]
        juniorYear = ["lat", "ans"]

        try:
            con = mdb.connect(
                Utils.getConfig()["host"],
                Utils.getConfig()["user"],
                Utils.getConfig()["passwd"],
                Utils.getConfig()["dbname"],
            )
            cur = con.cursor(mdb.cursors.DictCursor)
            #             cur.execute('SELECT p1.* FROM product_price p1 LEFT JOIN product_price p2 \
            #                         ON (p1.id = p2.id AND p1.price > p2.price) WHERE p2.price IS NULL')
            # get all products from DB
            cur.execute("SELECT * FROM product_price")
            productPriceDB = cur.fetchall()
            productPriceDB = sorted(productPriceDB, key=lambda k: (k["date"], k["price"]))
            Utils.deleteDuplicates(productPriceDB)
            # get promoted products added today but skip discontinued items
            cur.execute(
                'SELECT * FROM product_promo WHERE operation != {} AND last_date > "{}" ORDER BY '
                "operation ASC, discount DESC".format(ProcessData.PROD_WITHDRAW, self.dateTime)
            )
            productPromoDB = cur.fetchall()

            print(
                "#decapromolist lista promowanych produktów (delta {}-{}):".format(
                    self.datePrevProcFormatted, self.dateFormatted
                ),
                file=self.mdFile,
            )

            for cat in self.subcatData:
                # checked whether the list contains at least one product belonging to processed subcategory
                rowCat = next((row for row in productPromoDB if row["category"] == cat["subId"]), None)
                if rowCat is None:
                    continue
                else:
                    catStr = "\nKategoria: " + cat["name"].encode("utf-8") + "->" + cat["subName"].encode("utf-8")
                # process promoted items
                for row in productPromoDB:
                    product = {}
                    # when a product doesn't belong to considered subcategory skip to the next one
                    if row["category"] != cat["subId"]:
                        continue
                    # if row['name'] == row['name'].upper():
                    #     name = str(unicode(row['name'], 'utf-8', 'ignore').title().encode('utf-8'))
                    # else:
                    #     name = row['name']
                    url = Utils.getConfig()["siteURL"] + row["url"]
                    content = urllib2.urlopen(url).read()
                    response = html.fromstring(content)

                    # get product information
                    nameCheck = ""
                    namePosStart = content.find("tc_vars")
                    if namePosStart != -1:
                        namePosEnd = content.find("/*", namePosStart)
                        nameCheck = content[namePosStart:namePosEnd]
                        nameCheck = nameCheck.lower()

                    # get the product name
                    try:
                        name = response.xpath('//span[@id="productName"]')[0].text
                        if name == name.upper():
                            name = name.title().encode("utf-8")
                        else:
                            name = name.encode("utf-8")
                        print(name + " " + url.encode("utf-8"))
                    except IndexError:
                        print("\Invalid product: " + row)
                        continue

                    # when a product is out of stock then skip to the next one
                    outOfStock = response.xpath('//link[@href="http://schema.org/OutOfStock"]')
                    if outOfStock:
                        print("Out of stock")
                        continue

                    # get an image
                    imgPosStart = content.find('tc_vars["product_url_picture"]')
                    imgPosEnd = content.find('";', imgPosStart)
                    img = content[imgPosStart + 34 : imgPosEnd]

                    # quite vague method to determine the sex
                    # (in most cases it works just fine, i.e. when the description is correct)
                    label = ""
                    labelPosStart = content.find('tc_vars["product_breadcrumb_label"]')
                    if labelPosStart != -1:
                        labelPosEnd = content.find('");', labelPosStart)
                        label = content[labelPosStart + 49 : labelPosEnd]
                        label = label.lower()
                    nameLower = name.lower()
                    if (
                        any(substring in label for substring in male) == True
                        or any(substring in nameLower for substring in male) == True
                        or any(substring in nameCheck for substring in male) == True
                    ):
                        sex = "M"
                    elif (
                        any(substring in label for substring in female) == True
                        or any(substring in nameLower for substring in female) == True
                        or any(substring in nameCheck for substring in female) == True
                    ):
                        sex = "F"
                    elif (
                        any(substring in label for substring in junior) == True
                        or any(substring in nameLower for substring in junior) == True
                        or any(substring in nameCheck for substring in junior) == True
                    ):
                        sex = "J"
                    else:
                        sex = "U"
                    # get list of available sizes
                    sizeList = ""
                    # product['sz'] = []
                    for size in response.xpath('//li[@class=" available"]'):
                        sizeStr = size.xpath("a")[0].text
                        sizeList = sizeList + sizeStr.strip() + ", "
                        # product['sz'].append(sizeStr)
                    sizeListLower = sizeList.lower()
                    if any(substring in sizeListLower for substring in juniorYear):
                        sex = "J"

                    text = "{} [{}]({}) ".format(sex, name, url)
                    product["sz"] = " " + sizeList[:-2]
                    product["sx"] = sex
                    product["nm"] = '<a href="' + url.encode("utf-8") + '">' + name + "</a>"
                    # product['rl'] = url
                    product["sc"] = cat["name"].encode("utf-8") + "->" + cat["subName"].encode("utf-8")
                    # product['sc'] = row['category']
                    # product['im'] = img
                    if row["discount"] >= 60:
                        text += "**"
                    text = text + "{}->{} ({}%) [{}]".format(
                        row["old_price"],
                        row["price"],
                        row["discount"],
                        self.operationToDescr(row["operation"], str(row["prev_price"])),
                    )
                    product["pr"] = row["price"]
                    product["op"] = row["old_price"]
                    product["dc"] = row["discount"]
                    product["pp"] = row["prev_price"]
                    product["or"] = self.operationToDescr(row["operation"], str(row["prev_price"]))
                    if row["discount"] >= 60:
                        text += "**"
                    if sizeList != "":
                        try:
                            text = text + " [Rozmiary: {}]".format(sizeList[:-2])
                        except UnicodeEncodeError:
                            pass
                    # additional check to be sure that the current price is the lowest to this day
                    # (checking "price history")
                    prodLowestPrice = next(
                        (
                            prodLowestPrice
                            for prodLowestPrice in productPriceDB
                            if prodLowestPrice["id"] == row["id"] and prodLowestPrice["price"] < row["price"]
                        ),
                        None,
                    )
                    if prodLowestPrice is not None:
                        text = text + " [Regularna cena była niższa {} w dn. {}]".format(
                            prodLowestPrice["price"], prodLowestPrice["date"]
                        )
                        product["rp"] = prodLowestPrice["price"]
                        product["rd"] = prodLowestPrice["date"].strftime("%d.%m.%Y")

                    if catStr != "":
                        print(catStr + self.SPACES, file=self.mdFile)
                        catStr = ""
                    print(text + self.SPACES, file=self.mdFile)
                    self.products.append(product)
        except mdb.Error, e:
            print("Error %d: %s" % (e.args[0], e.args[1]))
            sys.exit(1)