def get_url(url, time=20):
    try:
        output = wget(url, timeout=time).read()
    except urllib2.HTTPError, e:
        print(e.code)
        error_message = e.code
        print(error_message)
def populateList():
    '''first, we get the whole list of pokemon, sorted by national dex number.
    there is also a regional dex number, which i will preserve later.
    returns a tuple in the form (name, url_suffix).
    '''
    path = URL + "/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"
    page = wget(path)
    soup = bs(page.read(), 'html.parser')
    tables = soup.findAll('table')

    # - tables[1] is the list of kanto (kdex) pokemon.
    # - tables[2] is the list of jhoto (jdex) pokemon.
    # - tables[3] is the list of hoenn (hdex) pokemon.
    # - tables[4] is the list of sinnoh (sdex) pokemon.
    # - tables[5] is the list of unova (udex) pokemon.
    # - tables[6] is the list of kalos pokemon. kalos is special because the region is 
    #     split into 3 sub regions, central (cekdex), coastal (cokdex), and mountain (mokdex).
    # - tables[7] is the list of alola (adex) pokemon. it is not populated, as the region 
    #     is part of the gen VII game release (not released yet).

    # get a list of pokemon
    pokemon = []
    for table in tables[:7]:    # ignoring alola region for now
        entries = bs(table.__str__(), 'html.parser').findAll('tr')
        for entry in entries[1:]:   # entries[0] defines column headers.
            entry = bs(entry.__str__(), 'html.parser')
            info = entry.findAll('td')[3]
            poke = (info.a.contents[0], info.a['href'])
            if poke not in pokemon:     # there are duplicate entries. some pokemon have different "states".
                pokemon.append(poke)    # using a dictionary reorders, lets stay in order for debugging's sake.

    return pokemon
Esempio n. 3
0
 def download_http_photo(self, url, user_profile):
     if url is not None:
         try:
             response = wget(url)
             # first param for extension
             user_profile.photo.save(url, ContentFile(response.read()))
         except Exception as e:
             logger.error(
                 "Unable to download photo from url %s for user %s because %s",
                 url, user_profile.username, e)
def cullPokemonData(pokeTuple):
    '''Grabs data for a single pokemon.'''
    path  = URL + pokeTuple[1]
    page  = wget(path)
    sys.stdout.write(".")
    soup  = bs(page.read(), 'html.parser')
    table = soup.find('table', {'class':'roundy'})
    
    # at this point, I have the right table. need to parse out the following values.
    element  = table.find("td", {"width" : "50%"})
    name     = element.big.big.b.contents[0]
    if "Nidoran" in name:
        name = name[:-1]
    # print "name >>>", name # debug

    # to account for inline explain spans
    if len(element.a.span.contents) > 1:
        category = element.a.span.span.contents[0] + element.a.span.contents[1]
    else:
        category = element.a.span.contents[0]
    category = re.sub("\xe9", "e", category)
    # print "cat >>>", category # debug
    
    sys.stdout.write(".")
    
    element = table.find("th", {"width" : "25%", "class" : "roundy", "style" : "background:#FFF;"})
    natdex  = element.big.big.a.span.contents[0]
    # print "natdex >>>", natdex # debug
    
    _type = ""
    element = table.find("td", {"class" : "roundy", "colspan" : "4"})
    types = element.findAll("td")
    if types[0].a.span.b is None:
        element = table.find("td", {"class" : "roundy", "colspan" : "2"})
    element = element.table.tr.td.table.tr
    types = element.findAll("td")
    for t in types:
        if t.a.span.b.contents[0] != "Unknown":
            _type += t.a.span.b.contents[0] + " "    
    # print "type >>>", _type # debug
    
    sys.stdout.write(".")
    script = 'INSERT INTO pokemon(name, category, natdex, type) VALUES ("%s", "%s", "%s", "%s")' % (name, category, natdex, _type)
    try:
        cursor.execute(script)
        out = cursor.fetchone()
        if out:
            print out
        db.commit()
    except:
       db.rollback()