Beispiel #1
0
    def handle(self, *args, **options):
        print "Scraping"
        now = datetime.now()
        url = 'http://www.o2academybrixton.co.uk/?t=list'
        html = urllib.urlopen(url)
        page = BeautifulSoup.BeautifulSoup(html)
        rows = page.find('div', {'class': 'eventViewList'}).findAll('tr')
        for row in rows:

            #date and time
            datetime_td = row.find('td', {'class': 'eventViewListDate'})
            if datetime_td:
                datetime_scraped = ''
                for div in datetime_td.findAll('div'):
                    datetime_scraped = datetime_scraped + ' ' + div.text
                event_datetime = datetime.strptime(datetime_scraped, " %d %b '%y-%a %I.%M%p" )


                # is today and not a Sunday?
                if (event_datetime -  datetime.now()).days == 0 and datetime.now().weekday() != 6:

                    #artist
                    artist_td = row.find('td', {'class': 'eventViewListName'})
                    if artist_td:
                        artist_name = artist_td.find('a',{'class': 'main'}).text
                        url = artist_td.find('a',{'class': 'main'})['href']
                        #guid is artist, month year. i.e. dont save multiple night runs
                        guid = artist_name.replace(' ', '') + event_datetime.strftime('%m%y')

                        #Is sold out?
                        links_td = row.find('td', {'class': 'eventViewListLinks'})
                        status_div = links_td.find('div', {'class': 'icon'})
                        if status_div and status_div.text == 'Sold Out':

                            existing_events = models.Event.objects.filter(guid=guid)

                            if len(existing_events) == 0:
                                event = models.Event()
                                event.message = 'The academy sold out tonight: %s' % artist_name
                                event.event_type = models.EventType.objects.get(short_name='academy')
                                event.info_link = url
                                event.guid = guid
                                event.occured = event_datetime
                                event.address = "Brixton Academy, 211 Stockwell Road, SW9 9SL"
                                event.lng = -0.11497
                                event.lat = 51.46526
                                event.data = {'artist_name': artist_name}
                                event.save()
                                print "saved %s " % event.message
Beispiel #2
0
    def handle(self, *args, **options):
        print "Getting JSON"

        now = datetime.now()
        data = json.load(
            urllib.urlopen(
                'http://ratings.food.gov.uk/enhanced-search/en-GB/%5E/sw9/desc_rating/0/522/%5E/1/1/500/json'
            ))
        if data and data['FHRSEstablishment'].get('EstablishmentCollection'):
            for venue in data['FHRSEstablishment']['EstablishmentCollection'][
                    'EstablishmentDetail']:
                if venue['RatingDate'].find(now.strftime('%Y')) > -1:
                    rating_date = datetime.strptime(venue['RatingDate'],
                                                    '%Y-%m-%d')
                    days_since_rating = (now - rating_date).days

                    latlng = {
                        'lat': float(venue['Geocode']['Latitude']),
                        'lng': float(venue['Geocode']['Longitude'])
                    }
                    if geo.is_local(latlng):
                        #has it been added before?
                        venue_url = 'http://ratings.food.gov.uk/business/en-GB/%s' % venue[
                            'FHRSID']
                        guid = venue_url + venue['RatingDate']
                        existing_events = models.Event.objects.filter(
                            guid=guid)
                        if len(existing_events) == 0:

                            event = models.Event()
                            event.message = "%s was inspected for food hygiene. It got %s out of 5." % (
                                venue['BusinessName'], venue['RatingValue'])
                            event.event_type = models.EventType.objects.get(
                                short_name='foodratings')
                            event.info_link = venue_url
                            event.guid = guid
                            event.occured = rating_date
                            event.address = "%s, %s %s" % (
                                venue['AddressLine1'], venue['AddressLine2'],
                                venue['PostCode'])
                            event.lng = latlng['lng']
                            event.lat = latlng['lat']
                            event.data = {
                                'business_name': venue['BusinessName'],
                                'business_type': venue['BusinessType'],
                                'rating_value': int(venue['RatingValue'])
                            }
                            event.save()
                            print "saved %s " % event.message
Beispiel #3
0
    def handle(self, *args, **options):
        url = 'http://api.wikilocation.org/articles?lat=%s&lng=%s&limit=25&format=json' % (
            '51.46238', '-0.1145')
        data = json.load(urllib.urlopen(url))
        for article in data['articles']:
            id = article['id']
            title = article['title']

            #check if local
            if geo.is_local({
                    'lat': float(article['lat']),
                    'lng': float(article['lng'])
            }):

                #rss history
                feed_url = 'http://en.wikipedia.org/w/index.php?curid=%s&action=history&feed=atom' % id
                rss = feedparser.parse(feed_url)
                for entry in rss.entries:
                    datetime_updated = dateutil.parser.parse(entry['updated'])
                    split = entry['title'].split(':')
                    change_title = split[len(split) - 1]
                    message = "The Wikipedia article about %s was edited - %s" % (
                        title, change_title)

                    save = False

                    #check if recent (last few days)
                    yesterday = date.today() - timedelta(3)
                    save = (datetime_updated.date() > yesterday)

                    if save:
                        existing_events = models.Event.objects.filter(
                            guid=entry['links'][0]['href'])
                        if len(existing_events) == 0:
                            print "save"
                            event = models.Event()
                            event.message = message
                            event.event_type = models.EventType.objects.get(
                                short_name='wikipedia')
                            event.info_link = entry['links'][0]['href']
                            event.guid = entry['links'][0]['href']
                            event.address = ''
                            event.lng = article['lng']
                            event.lat = article['lat']
                            event.data = article
                            event.save()
Beispiel #4
0
    def handle(self, *args, **options):
        print "Starting to scrape"
        html = urllib.urlopen(
            'http://www.lambeth.gov.uk/Services/Business/LicencesStreetTrading/AlcoholEntertainmentLateNightRefreshment/CurrentApplications.htm'
        )
        page = BeautifulSoup.BeautifulSoup(html)

        #find rows
        for infobox in page.findAll('div', {'class': 'infoBox'}):
            for list_item in infobox.findAll('li'):

                address = list_item.find('a').string
                details = list_item.contents[4].split(
                    'last date for representations')[0].rstrip(', ').rstrip(
                        ' - ')
                application_pdf_link = 'http://www.lambeth.gov.uk/' + list_item.find(
                    'a')['href']

                postcode = geo.extract_gb_postcode(address)
                latlng = geo.postcode_latlng(postcode)

                existing_events = models.Event.objects.filter(
                    guid=application_pdf_link)
                if len(existing_events) == 0:
                    if geo.is_local(latlng):

                        applicant = address.split(',')[0]
                        application_type = details.replace(
                            'Application for ', '')
                        message = "%s %s " % (applicant, application_type)

                        event = models.Event()
                        event.message = self.humanize(message)
                        event.event_type = models.EventType.objects.get(
                            short_name='licence')
                        event.info_link = application_pdf_link
                        event.guid = application_pdf_link
                        event.address = address
                        event.lng = latlng['lng']
                        event.lat = latlng['lat']
                        event.data = {
                            'applicant': applicant,
                            'application_type': application_type
                        }
                        event.save()
Beispiel #5
0
    def handle(self, *args, **options):
        print "Scraping"
        now = datetime.now()
        url = 'http://www.beholder.co.uk/steam/'
        html = urllib.urlopen(url)
        page = BeautifulSoup.BeautifulSoup(html)
        brixton_row = page.find('tr', {'id': 'beholder_row_BRX'})
        values = brixton_row.findAll('td')[1].findChildren()
        if values == []:
            print "NO TRAINS :("
        else:
            print "TRAINS!"

            # get the time
            times = ''
            for value in values:
                times = value.text + ' '

            #try and get details of train
            html = urllib.urlopen(values[0]['href'])
            page = BeautifulSoup.BeautifulSoup(html)
            service_name = page.find('table').find('td').find('font').find(
                'b').text.title()

            # work out guid and see if already saved
            guid = "%s/%s" % (url, now.strftime('%Y-%m-%d:%H'))
            existing_events = models.Event.objects.filter(guid=guid)

            if len(existing_events) == 0:
                event = models.Event()
                event.message = "%s will be steaming through Brixton Station at %s" % (
                    service_name, times)
                event.event_type = models.EventType.objects.get(
                    short_name='steamtrain')
                event.info_link = url
                event.guid = guid
                event.address = "32 Brixton Station Road, Brixton, London, SW9 8PE"
                event.lng = -0.11399
                event.lat = 51.46327
                event.data = {'service_name': service_name, 'time': times}
                event.save()
                print "saved %s " % event.message
Beispiel #6
0
    def handle(self, *args, **options):
        print "Getting JSON"
        data = json.load(
            urllib.urlopen(
                'http://openlylocal.com/councils/12/planning_applications.json'
            ))
        for application in data['planning_applications']:
            existing_events = models.Event.objects.filter(
                guid=application['url'])
            if len(existing_events) == 0:
                if geo.is_local({
                        'lat': application['lat'],
                        'lng': application['lng']
                }):
                    address = application['address']
                    building_name = address.split(' London')[0]
                    action = self.guess_type(application['description'])
                    agent = self.get_agent(application)
                    if agent == None:
                        agent = 'Someone'

                    message = "%s %s %s" % (agent, action, building_name)

                    event = models.Event()
                    event.message = message
                    event.event_type = models.EventType.objects.get(
                        short_name='planning')
                    event.info_link = application['url']
                    event.guid = application['url']
                    event.address = address
                    event.lng = application['lng']
                    event.lat = application['lat']
                    event.data = {
                        'agent': agent,
                        'description': application['description'],
                        'application type': application['application_type']
                    }
                    event.save()