예제 #1
0
    def parse_item(self, item, index, package_type):
        '''
        Parses an individual entry from the main listing.  This is how the majority
        of the updates will be occuring.  We will mostly be using these updates for
        awareness of auctions that have been added.  While we are here we will
        continue to track the price of the auction even though it is not closed, as
        this may become relevent in some searches.
        '''
        aid = item.findChild('input', {'name': 'frmAuctionID%s' % index}).get('value')
        bid = item.findChild('input', {'name': 'frmBrandCode%s' % index}).get('value')
        auction = Auction.query.filter_by(aid=aid, site='cigarauctioneer').first()
        brand = Brand.query.filter_by(ca_id=bid).first()

        if not brand:
            brand = Brand()
            brand.name = item.findChild('input', {'name': 'frmBrandDesc%s' % index}).get('value')
            brand.ca_id = bid
            db.session.add(brand)

        if not auction:
            # as we haven't seen this action before, we will need to get all of
            # the usual information and store that into a new Auction database
            # object.
            auction = Auction()
            auction.type = package_type
            auction.name = item.findChild('input', {'name': 'frmItemDesc%s' % index}).get('value')
            auction.aid = aid
            auction.site = 'cigarauctioneer'
            auction.close = self.timestamp_gen(item.findChild('div', text='Time Left:').findNext('div').text)
            auction.link = item.findChild('a', {'itemprop': 'url'}).get('href')
            if package_type is not 'singles':
                auction.quantity = int(item.findChild('input', {'name': 'frmItemsPerPack%s' % index}).get('value'))
            else:
                auction.quantity = 1
            brand.auctions.append(auction)
            db.session.add(auction)

        # now we need to get the current price and update the timestamp.
        auction.price = float(item.findChild('div', {'itemprop': 'price'}).text.strip('$'))
        auction.timestamp = datetime.now()

        # Now we need to commit the changes to the database ;)
        db.session.commit()
예제 #2
0
파일: cbid.py 프로젝트: griu/bidhistory
    def parse_item(self, item):
        '''
        Parses an individual entry from the main listing.  This is how the majority
        of the updates will be occuring.  We will mostly be using these updates for
        awareness of auctions that have been added.  While we are here we will
        continue to track the price of the auction even though it is not closed, as
        this may become relevent in some searches.
        '''
        keywords = [
            'toro', 'robusto', 'belicoso', 'connecticut', 'maduro', 'churchill',
            'torpedo', 'corona', 'single', 'lonsdale', 'corojo', 'sumatra',
            'magnum', 'maestro', 'brillantes', 'series \'a\'', 'imperial',
            'box-press', 'gigante', 'shorty', 
        ]
        aid = item.findChild('td', {'class': 'cb_wcb cb_colsm'}).findNext('span').get('data-id')
        auction = Auction.query.filter_by(aid=aid, site='cbid').first()

        if not auction:
            # as we haven't seen this action before, we will need to get all of
            # the usual information and store that into a new Auction database
            # object.
            link = 'http://www.cigarbid.com%s' % item.findNext('a').get('href')
            title = item.findNext('a').text
            category = item.findNext('a').findNext('span').text
            auction = Auction()

            try:
                # Not all Boxes are in the boxes part of the auction.  We will
                # will instead leverage the commonality of the title formatting
                # to pull all of these.
                auction.name, auction.quantity = re.findall(r'^([\W\w]+) \((\d{1,3})\)', title)[0]
                auction.type = 'box'
            except IndexError:
                auction.name = title
                if category == 'Boxes':
                    try:
                        auction.name, auction.quantity = re.findall(r'^([\W\w]+) \((\d{1,3})\)', title)[0]
                        auction.type = 'box'
                    except IndexError:
                        pass
                elif category == '5-Packs':
                    # The next way we can handle this is to look for all of the
                    # 5-Packs in the response and set the quantity to 5.
                    auction.quantity = 5
                    auction.type = '5-pack'
                elif category == 'Singles':
                    # Singles should always be a quantity of 1.
                    auction.quantity = 1
                    auction.type = 'single'
                #elif category in ['Specials', 'Samplers', 'Quick-ies']:
                if not auction.quantity:
                    # These 3 categories are some genetal catch-all categories
                    # and we need to handle the information in a more generic
                    # way.

                    # First lets run through some of the more common paths that
                    # peopl take...
                    if '5 cigars' in title.lower():
                        auction.type = '5-pack'
                        auction.quantity = 5
                    elif '10 cigars' in title.lower():
                        auction.type = '10-pack'
                        auction.quantity = 10
                    if '-pack' in title.lower():
                        matches = re.findall(r'(\d{1,3})-pack', title.lower())
                        if len(matches) > 0:
                            auction.type = '%s-pack' % matches[0]
                            auction.quantity = int(matches[0])
                    elif '5 cigars' in title.lower():
                        auction.type = '5-pack'
                        auction.quantity = 5
                    elif '10 cigars' in title.lower():
                        auction.type = '10-pack'
                        auction.quantity = 10
                    elif 'pack of 5' in title.lower():
                        auction.type = '5-pack'
                        auction.quantity = 5
                    elif 'pack of 10' in title.lower():
                        auction.type = '10-pack'
                        auction.quantity = 10                    
                    elif ' cigars' in title.lower(): 
                        matches = re.findall(r'(\d{1,3})[ -]cigars', title.lower())
                        if len(matches) > 0:
                            auction.type = 'bundle'
                            auction.quantity = int(matches[0])
                    elif 'single' in title.lower():
                        auction.type = 'single'
                        auction.quantity = 1
                    elif 'sampler' in title.lower():
                        auction.type = 'sampler'
                    else:
                        matches = re.findall(r'(\w+) of (\d{1,3})', title.lower())
                        if len(matches) > 0:
                            auction.type = matches[0][0]
                            auction.quantity = int(matches[0][1])
                            if auction.type in ['brick',]:
                                auction.type = 'bundle'

                    if not auction.quantity:
                        for keyword in keywords:
                            # If all else has failed, we have a list of keywords
                            # that would let us know of the title is referring to
                            # cigars or some other merch.  If any of these match,
                            # then we will consider it a single.
                            if keyword in title.lower():
                                auction.quantity = 1
                if not auction.quantity:
                    # Well none of the above options matched, so this doesn't
                    # appear to be a Cigar listing.  Lets abort, throw a pretty
                    # message, and get on with it.
                    logging.debug('ABORTING %s:%s:%s' % (aid, category, title))
                    return
                else:
                    logging.debug('CREATED %s:%s:%s:%s' % (aid, category, auction.quantity, title))
            auction.site = 'cbid'
            auction.link = link
            auction.aid = item.find('span', {'class': 'add'}).get('data-id')
            auction.close = self.timestamp_gen(item.find('td', {'class': 'cb_product_timeleft'}).text)
            #brand.auctions.append(auction)
            db.session.add(auction)

        # now we need to get the current price and update the timestamp.
        cprice = item.find('td', {'class': 'cb_product_current_price'}).findNext('span').text.strip('$')
        if cprice is not u'':
            auction.price = float(cprice)
        auction.timestamp = datetime.now()

        # Now we need to commit the changes to the database ;)
        db.session.commit()
예제 #3
0
파일: cbid.py 프로젝트: ferranc/bidhistory
    def parse_item(self, item):
        """
        Parses an individual entry from the main listing.  This is how the majority
        of the updates will be occuring.  We will mostly be using these updates for
        awareness of auctions that have been added.  While we are here we will
        continue to track the price of the auction even though it is not closed, as
        this may become relevent in some searches.
        """
        keywords = [
            "toro",
            "robusto",
            "belicoso",
            "connecticut",
            "maduro",
            "churchill",
            "torpedo",
            "corona",
            "single",
            "lonsdale",
            "corojo",
            "sumatra",
            "magnum",
            "maestro",
            "brillantes",
            "series 'a'",
            "imperial",
            "box-press",
            "gigante",
            "shorty",
        ]
        aid = item.findChild("td", {"class": "cb_wcb cb_colsm"}).findNext("span").get("data-id")
        auction = Auction.query.filter_by(aid=aid, site="cbid").first()

        if not auction:
            # as we haven't seen this action before, we will need to get all of
            # the usual information and store that into a new Auction database
            # object.
            link = "http://www.cigarbid.com%s" % item.findNext("a").get("href")
            title = item.findNext("a").text
            category = item.findNext("a").findNext("span").text
            auction = Auction()

            try:
                # Not all Boxes are in the boxes part of the auction.  We will
                # will instead leverage the commonality of the title formatting
                # to pull all of these.
                auction.name, auction.quantity = re.findall(r"^([\W\w]+) \((\d{1,3})\)", title)[0]
                auction.type = "box"
            except IndexError:
                auction.name = title
                if category == "Boxes":
                    try:
                        auction.name, auction.quantity = re.findall(r"^([\W\w]+) \((\d{1,3})\)", title)[0]
                        auction.type = "box"
                    except IndexError:
                        pass
                elif category == "5-Packs":
                    # The next way we can handle this is to look for all of the
                    # 5-Packs in the response and set the quantity to 5.
                    auction.quantity = 5
                    auction.type = "5-pack"
                elif category == "Singles":
                    # Singles should always be a quantity of 1.
                    auction.quantity = 1
                    auction.type = "single"
                # elif category in ['Specials', 'Samplers', 'Quick-ies']:
                if not auction.quantity:
                    # These 3 categories are some genetal catch-all categories
                    # and we need to handle the information in a more generic
                    # way.

                    # First lets run through some of the more common paths that
                    # peopl take...
                    if "5 cigars" in title.lower():
                        auction.type = "5-pack"
                        auction.quantity = 5
                    elif "10 cigars" in title.lower():
                        auction.type = "10-pack"
                        auction.quantity = 10
                    if "-pack" in title.lower():
                        matches = re.findall(r"(\d{1,3})-pack", title.lower())
                        if len(matches) > 0:
                            auction.type = "%s-pack" % matches[0]
                            auction.quantity = int(matches[0])
                    elif "5 cigars" in title.lower():
                        auction.type = "5-pack"
                        auction.quantity = 5
                    elif "10 cigars" in title.lower():
                        auction.type = "10-pack"
                        auction.quantity = 10
                    elif "pack of 5" in title.lower():
                        auction.type = "5-pack"
                        auction.quantity = 5
                    elif "pack of 10" in title.lower():
                        auction.type = "10-pack"
                        auction.quantity = 10
                    elif " cigars" in title.lower():
                        matches = re.findall(r"(\d{1,3})[ -]cigars", title.lower())
                        if len(matches) > 0:
                            auction.type = "bundle"
                            auction.quantity = int(matches[0])
                    elif "single" in title.lower():
                        auction.type = "single"
                        auction.quantity = 1
                    elif "sampler" in title.lower():
                        auction.type = "sampler"
                    else:
                        matches = re.findall(r"(\w+) of (\d{1,3})", title.lower())
                        if len(matches) > 0:
                            auction.type = matches[0][0]
                            auction.quantity = int(matches[0][1])
                            if auction.type in ["brick"]:
                                auction.type = "bundle"

                    if not auction.quantity:
                        for keyword in keywords:
                            # If all else has failed, we have a list of keywords
                            # that would let us know of the title is referring to
                            # cigars or some other merch.  If any of these match,
                            # then we will consider it a single.
                            if keyword in title.lower():
                                auction.quantity = 1
                if not auction.quantity:
                    # Well none of the above options matched, so this doesn't
                    # appear to be a Cigar listing.  Lets abort, throw a pretty
                    # message, and get on with it.
                    logging.debug("ABORTING %s:%s:%s" % (aid, category, title))
                    return
                else:
                    logging.debug("CREATED %s:%s:%s:%s" % (aid, category, auction.quantity, title))
            auction.site = "cbid"
            auction.link = link
            auction.aid = item.find("span", {"class": "add"}).get("data-id")
            auction.close = self.timestamp_gen(item.find("td", {"class": "cb_product_timeleft"}).text)
            # brand.auctions.append(auction)
            db.session.add(auction)

        # now we need to get the current price and update the timestamp.
        cprice = item.find("td", {"class": "cb_product_current_price"}).findNext("span").text.strip("$")
        if cprice is not u"":
            auction.price = float(cprice)
        auction.timestamp = datetime.now()

        # Now we need to commit the changes to the database ;)
        db.session.commit()