Ejemplos de HTMLParser.endswith en Python

Lenguaje de programación: Python

Namespace/Package Name: HTMLParser

Clase / Tipo: HTMLParser

Método / Función: endswith

Ejemplos en hotexamples.com: 3

Python HTMLParser.endswith - 3 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de HTMLParser.HTMLParser.endswith extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

HTMLParser(30)

__init__(30)

close(30)

feed(30)

encode(19)

get_starttag_text(4)

decode(3)

endswith(3)

get_data(2)

_init_(2)

clear_cdata_mode(2)

parse_mangastream(2)

fed(2)

rstrip(1)

handle_date(1)

error(1)

getContentFromTags(1)

find(1)

__getattribute(1)

count(1)

upper(1)

Ejemplo n.º 1

Mostrar archivo

 def _cleanlist(self, listvids):
     resultlist = []
     for vid in listvids:
         assert isinstance(vid, dict)
         vid.setdefault(vid.keys()[0])
         url = HTMLParser().unescape(vid.get('url'))
         thumb = HTMLParser().unescape(vid.get('thumb'))
         label = HTMLParser().unescape(vid.get('label'))
         upr = urlparse.urlparse(self.url)
         vbase = upr.scheme + '://' + upr.netloc + '/'
         if not url.startswith('http'):
             url = urlparse.urlparse(vbase + url.lstrip('/')).geturl()
         if not thumb.startswith('http'):
             thumb = urlparse.urlparse(vbase + thumb.lstrip('/')).geturl()
         if thumb.endswith('.jpg') or thumb.endswith('.png') or thumb.endswith('.jpeg'):
             newvid = dict(url=url, thumb=thumb, label=label)
             newvid.setdefault(newvid.keys()[0])
             resultlist.append(newvid)
     return resultlist

Ejemplo n.º 2

Mostrar archivo

def clean_html(value):
    """ """
    if clean_html and value:
        # we need a surrounding <p></p> or the content is not generated by appy.pod
        if not value.startswith(u'<p>') or not value.endswith(u'</p>'):
            value = u'<p>%s</p>' % value
        soup = BeautifulSoup(safe_unicode(value))
        soup_contents = soup.renderContents()
        if not isinstance(soup_contents, unicode):
            soup_contents = safe_unicode(soup_contents)
        # clean HTML with HTMLParser, it will remove special entities like &#xa0;
        soup_contents = HTMLParser().unescape(soup_contents)
        # clean HTML with lxml Cleaner
        cleaner = Cleaner()
        soup_contents = cleaner.clean_html(soup_contents)
        # clean_html surrounds the cleaned HTML with <div>...</div>... removes it!
        if soup_contents.startswith(u'<div>') and soup_contents.endswith(
                u'</div>'):
            soup_contents = soup_contents[5:-6]
        if not soup_contents == value:
            value = soup_contents
    return value

Ejemplo n.º 3

Mostrar archivo

def populate_restaurants(c):
    print 'Populating Restaurants table...'

    if not (os.access('restaurants', os.R_OK)
            and os.path.isdir('restaurants')):
        print >> sys.stderr, "Error: cannot access raw data directory 'restaurants'"
        sys.exit(1)

    if not (os.access('suburbs.txt', os.R_OK)
            and os.path.isfile('suburbs.txt')):
        print >> sys.stderr, "Error: cannot access raw data file 'suburbs.txt'"
        sys.exit(1)

    #get postcodes from file and cache in dict
    suburbs = open('suburbs.txt').readlines()
    postcodes = {}
    for suburb in suburbs:
        lat, lng, pst, sub = suburb.strip().split('\t')
        postcodes[sub] = pst
    postcodes['CBD'] = 2000  #special case not in data file

    users = c.execute('SELECT username FROM Users').fetchall()
    num_users = c.execute('SELECT COUNT(*) FROM Users').fetchone()[0]

    i = 0
    for restaurant in glob.glob('restaurants/*'):
        r = open(restaurant).readlines()

        #extract info from file
        try:
            name = r[0].strip()
            name = HTMLParser().unescape(name)
            address = r[1].strip()
            address = HTMLParser().unescape(address)
            address = re.sub(r'nsw', 'NSW', address, flags=re.I)
            if not address.endswith(', NSW'):
                address = address + ', NSW'
            suburb = re.match(r'.*, (.+), Sydney', r[1]).group(1)
            suburb = HTMLParser().unescape(suburb)
            phone = r[2].strip().replace('(', '').replace(')', '')
            if re.match('Not available', phone):
                phone = 'Not provided'
            hours = r[3].strip()
            hours = re.sub(r'\s*,\s*', ', ', hours)
            hours = HTMLParser().unescape(hours)
            cuisine = r[4].strip()
            cuisine = HTMLParser().unescape(cuisine)
            cost = r[5].strip()
            image = r[6].strip()
        except:
            print >> sys.stderr, "Error: skipping '%s'" % restaurant
            continue

        #lookup postcode using suburb
        postcode = ''
        if not suburb in postcodes:
            continue
        else:
            postcode = postcodes[suburb]

        #and append it to the address
        address = address + ' ' + str(postcode)

        #chose a random protocol for the website
        protocol = 'http://'
        if random.randint(0, 1) == 1:
            protocol = 'https://'

        #make site of the form protocol://www.lowercase.name.of.restaurant.fake.com
        website = name.replace('  ', ' ').replace(' ', '.').replace(
            '-', '').strip() + '.fake.com'
        website = HTMLParser().unescape(website)
        website = urllib.quote(website)  #encode as url
        website = protocol + 'www.' + website  #avoid encoding the protocol
        website = website.lower().replace('..', '.')

        #ensure only some restaurants have owners
        owner = None
        if random.randint(0, 3) == 0:
            owner = users[random.randint(0, num_users - 1)][0]

        i += 1
        data = (i, name, suburb, address, postcode, phone, hours, cuisine,
                owner, website, cost, image)
        c.execute(
            '''INSERT INTO Restaurants
				(id, name, suburb, address, postcode, phone, hours, cuisine, owner, website, cost, image)
				VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', data)