예제 #1
0
def extract(url):
    sleep(0.2)
    try:
        if (url[:3] == "http"):
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        else:
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        s = urlopen(req).read()
        soup = bs.BeautifulSoup(s, 'lxml')
        info = soup.find(class_='full m-event-detail clearfix')
        #print("\n-----> Extracting: ",base+url)
        try:
            event = Event()
            myStr = ""
            for i in range(14):
                myStr += random.choice(string.ascii_letters + string.digits)
            event.id = myStr

            event.title = info.h1.text
            event.link = url
            description = info.find(class_='m-event-detail-description').text
            event.description = description
            date = info.find(class_="m-date__singleDate").text
            event.date = event.dateFinder(date)
            #location = "18300 W Alameda Pkwy, Morrison, CO 80465"
            event.lng = "-105.2048546"
            event.lat = "39.6664666"
            event.city = "Morrison"
            event.address = "8300 W Alameda Pkwy"
            print("\n\n Item: ", event.title)
            table.put_item(Item=event.toJSON())
        except:
            print("Info error", url)
    except:
        print("Page error", url)
예제 #2
0
def extract(url):
    base = "https://www.active.com"
    sleep(0.2)
    try:
        if (url[:3] == "http"):
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        else:
            req = Request(base + url, headers={'User-Agent': 'Mozilla/5.0'})
        s = urlopen(req).read()
        soup = bs.BeautifulSoup(s, 'lxml')
        info = soup.find(id='body-container')
        #print("\n-----> Extracting: ",base+url)
        try:
            event = Event()
            myStr = ""
            for i in range(14):
                myStr += random.choice(string.ascii_letters + string.digits)
            event.id = myStr

            event.title = info.h1.text
            event.link = base + url
            print("Title: ", event.title)
            description = info.find(class_='asset-summary span8').text
            event.description = description
            date = info.h5.text
            event.date = event.dateFinder(date)
            location = info.find(class_='ed-address-text').text
            event.address, event.city, event.lat, event.lng = event.addressFinder(
                location)
            if (type(event.lat) != str):
                event.lat = str(event.lat)
                event.lng = str(event.lng)
                print("SUCCESS")
                table.put_item(Item=event.toJSON())
            else:
                event.address, event.city, event.lat, event.lng = event.addressFinderBasic(
                    location)
                if (type(event.lat) != str):
                    event.lat = str(event.lat)
                    event.lng = str(event.lng)
                    print("Success")
                    table.put_item(Item=event.toJSON())
                else:
                    print("address failure")
        except:
            print("Error")
    except:
        print("Page error", base + url)