Python decode_htmlentities Examples

Programming Language: Python

Namespace/Package Name: htmldecode

Method/Function: decode_htmlentities

Examples at hotexamples.com: 2

Python decode_htmlentities - 2 examples found. These are the top rated real world Python examples of htmldecode.decode_htmlentities extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: icser_sr.py Project: vad/trentevent

 def get_events(self):
     from hashlib import md5
     ap = self.doc.find('td', {"class": "main"}).findAll('p')
     
     year = 0
     for p in ap:
         p = decode_htmlentities(p.string).strip()
         if not p: continue
     
         if not year:
             year = p.split(' ')[-1]
             continue
         
         event = Event()
     
         try:
             date = p.split(' ')[0].split('/')
             desc = ' '.join(p.split(' ')[1:])
             #event.add('dtstart', dateStart)
             #event.add('dtstamp', dateStart) #maybe it's better to use NOW()
             #event.add('dtend', dateEnd)
             event.add('location', LOCATION)
             event.add('dtstart;value=date', "%s%.2d%.2d" % (int(year),
                 int(date[1]),int(date[0])))
             event.add('summary', desc)
             
             #TODO: add other info like the date!!
             md5text = desc
             
             event['uid'] = md5(md5text).hexdigest()+'@supercinemarovereto.it'
             yield event
         except:
             continue

Example #2

Show file

File: icser_isdd.py Project: Morail/trentevent

    def get_events(self):
        import re

        events_desc = str(self.doc).split('];')

        # we append the local timezone to each time so that icalendar will convert
        # to UTC in the output
        lt = LocalTimezone()

        base_url = 'http://www.isuonidelledolomiti.it'

        re_url = re.compile('/IT/.*/\?s=\d+')
        re_hours = re.compile('ore (\d{1,2})')
        
        for e in events_desc:

            print e

            location, description, summary = '', '', ''
            date_, hour_ = None, None

            if not e.startswith('Eventi['):
                print 'Not an ISDD event'
                continue

            a = e.split(' = ')

            p = remove_html_tags(decode_htmlentities(a[1])).strip().decode("utf-8")
            if not p:
                continue

            list_ = p[1:-1].split('\',')
            #print list, len(list)

            event = Event()
        
            try:

                date_ = list_[2][1:].split('/')
                print date_

                if date_:

                    try:
                        print list_[4][1:]
                        hours_match = re.search(re_hours, list_[4][1:])
                        if hours_match:
                            print 'MATCH!', hours_match.group()
                            hour_ = hours_match.group().split(' ')[1]
                            print hour_
                        else:
                            hour_ = 0
                    except ValueError:
                        print "ValueError while retrieving hour"
                        hour_ = 0

                    datestart = datetime(int(date_[2]), int(date_[1]), int(date_[0]), int(hour_), tzinfo=lt)
                    dateend = datestart + timedelta(hours=default_event_duration)

                else:
                    print "NON TROVO DATA", p
                    continue

                print "DateStart: ", datestart
                print "DateEnd: ", dateend

                location = list_[-1][1:]
                #print "location: ", location
                summary = list_[1][1:]
                #print "summary: ", summary
                description = list_[-2][2:]
                #print "description: ", description

                url_match = re.search(re_url, p)
                if url_match:
                    url = base_url + url_match.group()
                    if description:
                        description = description + " - " + url
                    else :
                        description = url

                event.add('dtstart', datestart)
                event.add('dtstamp', datestart) # maybe it's better to use NOW()
                event.add('dtend', dateend)

                #print summary, location, description

                event.add('location', location)
                event.add('description', description)
                event.add('summary', summary)
                
                #TODO: add other info like the date!!
                
                event['uid'] = list_[0][1:]

                yield event
            except:
                print 'ERRORE', sys.exc_info()[0]
                print p
                print
                continue