def getEventfulDocument(link): #try: eventRequest = urllib.request.Request(link) eventResponse = urllib.request.urlopen(eventRequest) eventSoup = BeautifulSoup(eventResponse.read(), "html5lib") document = NewDocument() title = '' titleContainer = eventSoup.h1.find_all('span') if titleContainer: for span in titleContainer: title += str(span.text) document.title = title eventdate='' dateContainer = eventSoup.find_all(attrs={"itemprop": "startDate"}) if dateContainer: for event in dateContainer: eventdate += str(event.text) document.date = eventdate location = '' locationContainer = eventSoup.find_all(attrs={"itemprop": "location"}) if locationContainer: for span in locationContainer: location += span.p.text document.location = location description = eventSoup.find(attrs={"class": "section-block description"}) if description: document.description = description.p.string genreContainer = eventSoup.find(attrs={"class": "section-block description"}) if genreContainer: last_p = None for last_p in genreContainer.findAll('p'):pass if last_p: document.genres.append(last_p.a.text) artistContainer = eventSoup.find(attrs={"itemprop": "performer"}) if artistContainer: document.artists.append(artistContainer.span.string) artistLink = artistContainer.a['href'] artistRequest = urllib.request.Request(artistLink) artistResponse = urllib.request.urlopen(artistRequest) artistSoup = BeautifulSoup(artistResponse.read(), "html5lib") document.genres.append(artistSoup.h5.string) image = eventSoup.find(attrs={"class": "image-viewer-open"}) if image: document.imageUrls.append(image.img['src']) document.urls.append(link) return document
def getSongkickDocument(link): try: eventRequest = urllib.request.Request(link) eventResonse = urllib.request.urlopen(eventRequest, timeout=5) eventSoup = BeautifulSoup(eventResonse.read(), "html5lib") document = NewDocument() title = '' titleContainer = eventSoup.h1.span.find_all('a') if titleContainer: for a in titleContainer: title += str(a.text) document.title = title details = '' detailsContainer = eventSoup.find(attrs={"class":'additional-details-container'}) if detailsContainer: for p in detailsContainer: details += str(p.string) document.description = details document.date = eventSoup.h5.string location = '' locationContainer = eventSoup.find_all("div", { "class":'location'}) if locationContainer: for span in locationContainer: location += str(span.text) document.location = location performer = '' artistContainer = eventSoup.find_all(attrs={"class":'line-up'}) if artistContainer: for span in artistContainer: performer += str(span.a.text) document.artists.append(performer) image = eventSoup.find(attrs={"class": "profile-picture-wrapper"}) if image: document.imageUrls.append(image.img['src']) document.urls.append(link) return document except: print("An ERROR occured for this document!")