starttime = re.findall("[0-9]{1,2}:[0-9]{2}\s*[aApP][mM]", time)[0] #This extracts the time, including am/pm
        try:
            price = bsObj.find("h3", {"class":"price-range"}).get_text().strip() # Pulls the price, which could be a price range
        except:  # Let's hope that it's free if it doesn't have an h3 w/ a class of "price-range"
            price = "Free!"
        price = price.replace("DONATIONS AT THE DOOR","Donations at the Door")
        artist = bsObj.find("h1", {"class":"headliners summary"}).get_text() # Event / top artist name
        artist = artist.replace("VINYL LOUNGE OPEN MIC", "Vinyl Lounge Open Mic") # Eliminate annoying all-caps, if applicable
        artist = artist.replace(", VINYL LOUNGE", "") # Eliminate 'bonus' info about artist being @ Vinyl
        artist = artist.replace("Gordon Sterling Presents:", "")
        if "gypsy sally's jam" in artist.lower():
            genre = "Potpourri"
        if "closed" in artist.lower() in artist: # Skip closed private events
            continue
        localList = scraperLibrary.getLocalList()
        if scraperLibrary.compactWord(artist) in localList:
            local = "Yes"
        else:
            local = ""
        try:
            artistweb = bsObj.find("li", {"class":"web"}).find("a").attrs["href"]  #THIS finds the first instance of a li with a class of "web", then digs deeper, finding the first instance w/in that li of a child a, and pulls the href.  BUT - since some artists may not have link, using try/except
        except:
            artistweb = ""
        try: # There isn't always a description...
            description = bsObj.find("div", {"class":"bio"}).get_text() # Get the description, which does include a lot of breaks - will it be a mess?
        except:
            description = ""

        [description, readmore] = scraperLibrary.descriptionTrim(description, [], 800, artistweb, newhtml)

        descriptionJammed = description.replace(" ","") # Create a string with no spaces
Esempio n. 2
0
                                          '%Y-%m-%d').weekday() == 3:
                mrHenrys.genre = "Americana"  #Mr. Henry's almost always has Americana on Thursdays...
            else:
                mrHenrys.genre = "Jazz & Blues"
            pageanddate.add(
                (newPage, mrHenrys.date, datetoday)
            )  # Add link to list, paired with event date and today's date
            mrHenrys.artist = bsObj.find(
                "h1", {
                    "class": "tribe-events-single-event-title"
                }).get_text().strip()
            if mrHenrys.artist == "Smith Jackson":
                mrHenrys.artist = "SmithJackson"
            mrHenrys.artist = mrHenrys.artist.replace("Double Header", "")
            localList = scraperLibrary.getLocalList()
            if scraperLibrary.compactWord(mrHenrys.artist) in localList:
                mrHenrys.local = "Yes"
            else:
                mrHenrys.local = ""

            longtime = bsObj.find("span", {
                "class": "tribe-event-date-start"
            }).get_text().strip()
            starttime = re.findall("[0-9]{1,2}\:[0-5][0-9]\s[aApP][mM]",
                                   longtime)[0]
            if starttime == "6:00 pm":  # events with 6:00 start time actually have 6:00 doors but 2 different event times
                mrHenrys.starttime = ["7:30 pm", "9:45 pm"]
            else:
                mrHenrys.starttime = [starttime]
            try:  # Most events are free, but 1 or 2 events a month require tickets
                mrHenrys.price = bsObj.find("span", {