def extract(url): base = "https://www.active.com" sleep(0.2) try: if (url[:3] == "http"): req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) else: req = Request(base + url, headers={'User-Agent': 'Mozilla/5.0'}) s = urlopen(req).read() soup = bs.BeautifulSoup(s, 'lxml') info = soup.find(id='body-container') #print("\n-----> Extracting: ",base+url) try: event = Event() myStr = "" for i in range(14): myStr += random.choice(string.ascii_letters + string.digits) event.id = myStr event.title = info.h1.text event.link = base + url print("Title: ", event.title) description = info.find(class_='asset-summary span8').text event.description = description date = info.h5.text event.date = event.dateFinder(date) location = info.find(class_='ed-address-text').text event.address, event.city, event.lat, event.lng = event.addressFinder( location) if (type(event.lat) != str): event.lat = str(event.lat) event.lng = str(event.lng) print("SUCCESS") table.put_item(Item=event.toJSON()) else: event.address, event.city, event.lat, event.lng = event.addressFinderBasic( location) if (type(event.lat) != str): event.lat = str(event.lat) event.lng = str(event.lng) print("Success") table.put_item(Item=event.toJSON()) else: print("address failure") except: print("Error") except: print("Page error", base + url)
def extract(url): try: req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) print("Extracting...") s = urlopen(req).read() soup = bs.BeautifulSoup(s,'lxml') info = soup.find(class_='mn-section mn-event-detail-listing') try: event = Event() myStr = "" for i in range(14): myStr+= random.choice(string.ascii_letters + string.digits) event.id=myStr event.title = info.find(class_='mn-event-content').text event.link = url description = info.find(itemprop='description').text event.description = description #event.short_description = description[:92]+"..." event.date = datetime.strptime(info.find(class_='mn-event-day').text, '%B %d, %Y').strftime('%Y-%m-%d') #event.category = event.categoryFinder(description) location = info.find(itemprop='name').text event.address, event.city, event.lat, event.lng = event.addressFinder(location) if(type(event.lat)!=str): event.lat = str(event.lat) event.lng = str(event.lng) print("SUCCESS\n") table.put_item(Item=event.toJSON()) else: event.address, event.city, event.lat, event.lng = event.addressFinderBasic(location) if(type(event.lat)!=str): event.lat = str(event.lat) event.lng = str(event.lng) print("Success\n") table.put_item(Item=event.toJSON()) else: print("address failure\n") except: print("Event error",url,"\n") except: print("Page error", url,"\n")