예제 #1
0
def getEventfulDocument(link):
    #try:
        eventRequest = urllib.request.Request(link)
        eventResponse = urllib.request.urlopen(eventRequest)
        eventSoup = BeautifulSoup(eventResponse.read(), "html5lib")
        
        document = NewDocument()
        
        title = ''
        titleContainer = eventSoup.h1.find_all('span')
        if titleContainer:
            for span in titleContainer:
                title += str(span.text)
        document.title = title
        
        eventdate=''
        dateContainer = eventSoup.find_all(attrs={"itemprop": "startDate"})
        if dateContainer:
            for event in dateContainer:
                eventdate += str(event.text)
        document.date = eventdate
        
        location = ''
        locationContainer = eventSoup.find_all(attrs={"itemprop": "location"})
        if locationContainer:
            for span in locationContainer:
                location += span.p.text
        document.location = location
                
        description = eventSoup.find(attrs={"class": "section-block description"})
        if description:
            document.description = description.p.string
        
        genreContainer = eventSoup.find(attrs={"class": "section-block description"})
        if genreContainer:
            last_p = None
            for last_p in genreContainer.findAll('p'):pass
            if last_p:
                document.genres.append(last_p.a.text)
            
        artistContainer = eventSoup.find(attrs={"itemprop": "performer"})
        if artistContainer:
            document.artists.append(artistContainer.span.string)
        
            artistLink = artistContainer.a['href']
            artistRequest = urllib.request.Request(artistLink)
            artistResponse = urllib.request.urlopen(artistRequest)
            artistSoup = BeautifulSoup(artistResponse.read(), "html5lib")
            
            document.genres.append(artistSoup.h5.string)
            
        image = eventSoup.find(attrs={"class": "image-viewer-open"})
        if image:
            document.imageUrls.append(image.img['src'])

        document.urls.append(link)
        return document
예제 #2
0
def getSongkickDocument(link):
    try:
        eventRequest = urllib.request.Request(link)
        eventResonse = urllib.request.urlopen(eventRequest, timeout=5)
        eventSoup = BeautifulSoup(eventResonse.read(), "html5lib")
        
        document = NewDocument()
        
        title = ''
        titleContainer = eventSoup.h1.span.find_all('a')
        if titleContainer:
            for a in titleContainer:
                title += str(a.text)
        document.title = title
                
        details = ''
        detailsContainer = eventSoup.find(attrs={"class":'additional-details-container'})
        if detailsContainer:
            for p in detailsContainer:
                details += str(p.string)
        document.description = details
        
        document.date = eventSoup.h5.string
        
        location = ''
        locationContainer = eventSoup.find_all("div", { "class":'location'})
        if locationContainer:
            for span in locationContainer:
                location += str(span.text)
        document.location = location
        
        performer = ''
        artistContainer = eventSoup.find_all(attrs={"class":'line-up'})
        if artistContainer:
            for span in artistContainer:
                performer += str(span.a.text)                                    
        document.artists.append(performer)
    
        image = eventSoup.find(attrs={"class": "profile-picture-wrapper"})
        if image:
            document.imageUrls.append(image.img['src'])
        
        document.urls.append(link)
        return document
    except:
        print("An ERROR occured for this document!")