예제 #1
0
 def update(self, feed):
     feed_data = feedparser.parse(feed.rss_url)
     try:
         feed.last_modified = struct_to_datetime(feed_data.feed.updated_parsed)
     except:
         feed.last_modified = parse_date(feed_data.headers.get('last-modified', datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000")))
         
     feed.save()
     
     items = set()
     for x_item in feed_data.entries:
         guid, last_modified = x_item.id, datetime(*x_item.date_parsed[:7])
         
         for i in items:
             if i.guid == guid:
                 item = i
                 break
         else:
             item = Item(guid=guid, last_modified=datetime(1900,1,1), feed=feed)
             
         if True or item.last_modified < last_modified:
             item.title = x_item.title
             item.description = sanitise_html(x_item.description)
             item.link = x_item.link
             item.last_modified = last_modified
             item.save()
         
         items.add(item)
     
     return items
    def handle_noargs(self, **options):
        location_data = {}

        for feed in RSSFeed.events.all():
            if not feed.rss_url.startswith('http://www.dailyinfo.co.uk/'):
                continue

            
            feed_data = feedparser.parse(feed.rss_url)
            items = list(feed.rssitem_set.all())
            guids = set()
            
            for x_item in feed_data.entries:
                guid, last_modified = x_item.id, datetime(*x_item.date_parsed[:7])
                
                #print x_item.link
                #if x_item.link != 'http://www.dailyinfo.co.uk/events.php?colname=Lectures%2C+Seminars+and+Conferences&period=7&eventday=10&eventmonth=12&eventyear=2009#70276':
                #    continue
                
                print x_item.items()
                            
                for i in items:
                    if i.guid == guid:
                        item = i
                        break
                else:
                    item = RSSItem(guid=guid, last_modified=datetime(1900,1,1), feed=feed)
                    
                if True or item.last_modified < last_modified:
                    item.title = x_item.title.split(': ', 1)[1]
                    
                    try:
                        item.description = sanitise_html(Command.SUMMARY_RE.match(x_item.summary).groups(0)[0])
                    except:
                        item.description = sanitise_html(x_item.summary)
                        
                    item.link = x_item.link
                    item.last_modified = last_modified
                    item.dt_start = dateutil.parser.parse(x_item.xcal_dtstart)
                    item.dt_end = dateutil.parser.parse(x_item.xcal_dtend)
                    
                    item.location_url = x_item.xcal_url
                    
                    venue_id = int(Command.DAILY_INFO_VENUE_ID_RE.match(x_item.xcal_url).groups(0)[0])

                    try:
                        item.location_name, item.location_address, item.location_point = location_data[venue_id]
                    except KeyError:
                        try:
                            source, id = daily_info_ids[venue_id]
                            entity_type = iter(EntityType.objects.filter(source=source)).next()
                            entity = Entity.objects.get(**{str(entity_type.id_field): id})
                            item.location_entity = entity
                            item.location_point = entity.location
                            item.location_name = entity.title
                        except (KeyError, Entity.DoesNotExist):
                            venue_et = ES.parse(urllib.urlopen(item.location_url))
                            item.location_name = [e for e in venue_et.findall('.//div') if e.attrib.get('class')=='heading'][0].text.strip()
                            
                            try:
                                item.location_point = Point(float(x_item.geo_long),
                                                            float(x_item.geo_lat))
                                print x_item.geo_lat, x_item.geo_long
                            except AttributeError, ValueError:
                                for link in venue_et.findall('.//a'):
                                    match = Command.GOOGLE_MAPS_LINK_RE.match(link.attrib.get('href', ''))
                                    if match:
                                        item.location_point = self.postcode_to_point(match.groups(0)[0])
                                        break
                                else:
                                    item.location_point = None
                            
                            for para in venue_et.findall('.//p')[1:]:
                                item.location_address = (para.text or '').strip()
                                item.location_address = Command.WHITESPACE_RE.sub(' ', item.location_address)
                                if item.location_point:
                                    break
                                    
                                match = Command.POSTCODE_RE.search(item.location_address)
                                if not match:
                                    break
                                    
                                item.location_point = self.postcode_to_point(match.groups(0)[0])
                                print item.location_point
                                break
                            
                            location_data[venue_id] = item.location_name, item.location_address, item.location_point
                    
                    
                    item.save()

                
                guids.add(guid)
                
            for item in items:
            
                if not item.guid in guids:
                    item.delete()