def extract(url): base = "https://www.active.com" sleep(0.2) try: if (url[:3] == "http"): req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) else: req = Request(base + url, headers={'User-Agent': 'Mozilla/5.0'}) s = urlopen(req).read() soup = bs.BeautifulSoup(s, 'lxml') info = soup.find(id='body-container') #print("\n-----> Extracting: ",base+url) try: event = Event() myStr = "" for i in range(14): myStr += random.choice(string.ascii_letters + string.digits) event.id = myStr event.title = info.h1.text event.link = base + url print("Title: ", event.title) description = info.find(class_='asset-summary span8').text event.description = description date = info.h5.text event.date = event.dateFinder(date) location = info.find(class_='ed-address-text').text event.address, event.city, event.lat, event.lng = event.addressFinder( location) if (type(event.lat) != str): event.lat = str(event.lat) event.lng = str(event.lng) print("SUCCESS") table.put_item(Item=event.toJSON()) else: event.address, event.city, event.lat, event.lng = event.addressFinderBasic( location) if (type(event.lat) != str): event.lat = str(event.lat) event.lng = str(event.lng) print("Success") table.put_item(Item=event.toJSON()) else: print("address failure") except: print("Error") except: print("Page error", base + url)
def extract(url): sleep(0.2) try: if (url[:3] == "http"): req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) else: req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) s = urlopen(req).read() soup = bs.BeautifulSoup(s, 'lxml') info = soup.find(class_='full m-event-detail clearfix') #print("\n-----> Extracting: ",base+url) try: event = Event() myStr = "" for i in range(14): myStr += random.choice(string.ascii_letters + string.digits) event.id = myStr event.title = info.h1.text event.link = url description = info.find(class_='m-event-detail-description').text event.description = description date = info.find(class_="m-date__singleDate").text event.date = event.dateFinder(date) #location = "18300 W Alameda Pkwy, Morrison, CO 80465" event.lng = "-105.2048546" event.lat = "39.6664666" event.city = "Morrison" event.address = "8300 W Alameda Pkwy" print("\n\n Item: ", event.title) table.put_item(Item=event.toJSON()) except: print("Info error", url) except: print("Page error", url)
def extract(url): try: req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) print("Extracting...") s = urlopen(req).read() soup = bs.BeautifulSoup(s,'lxml') info = soup.find(class_='mn-section mn-event-detail-listing') try: event = Event() myStr = "" for i in range(14): myStr+= random.choice(string.ascii_letters + string.digits) event.id=myStr event.title = info.find(class_='mn-event-content').text event.link = url description = info.find(itemprop='description').text event.description = description #event.short_description = description[:92]+"..." event.date = datetime.strptime(info.find(class_='mn-event-day').text, '%B %d, %Y').strftime('%Y-%m-%d') #event.category = event.categoryFinder(description) location = info.find(itemprop='name').text event.address, event.city, event.lat, event.lng = event.addressFinder(location) if(type(event.lat)!=str): event.lat = str(event.lat) event.lng = str(event.lng) print("SUCCESS\n") table.put_item(Item=event.toJSON()) else: event.address, event.city, event.lat, event.lng = event.addressFinderBasic(location) if(type(event.lat)!=str): event.lat = str(event.lat) event.lng = str(event.lng) print("Success\n") table.put_item(Item=event.toJSON()) else: print("address failure\n") except: print("Event error",url,"\n") except: print("Page error", url,"\n")
now = datetime.datetime.now() def random_date(start, end): """Generate a random datetime between `start` and `end`""" return start + datetime.timedelta( # Get a random amount of seconds between `start` and `end` seconds=random.randint(0, int((end - start).total_seconds())), ) for i in range(50): event = Event() myID = "" for i in range(14): myID += random.choice(string.ascii_letters + string.digits) images = ["one.jpg", "two.jpg", "three.jpg", "four.jpg"] event.id = myID event.title = text.sentence() event.link = "https://google.com" event.description = text.quote() event.date = str(random_date(now, now + datetime.timedelta(600))) event.category = text.words(quantity=4) event.address = address.address() event.city = address.city() event.lat = str(round(random.uniform(37, 40.8), 7)) event.lng = str(round(random.uniform(-108.9, -102.2), 7)) event.image = random.choice(images) ans = event.toJSON() #print(ans) table.put_item(Item=ans)