Esempio n. 1
0
def scrape_thread(thread):
    logger.debug("Scraping thread {0}".format(thread))
    html = urllib2.urlopen(FORUM_URL + THREAD_URL % thread['threadID']).read()
    thread_soup = BeautifulSoup(html)
    first_post = thread_soup.findAll('div', attrs={'id': 'forum_ctl00_MessageList_ctl00_DisplayPost1_MessagePost1'})[0]
    eveboard_link = first_post.find('a', href=re.compile('.*eveboard.com/pilot/.*'))
    if eveboard_link:
        logger.debug("Found eveboard link {0}".format(eveboard_link))
        pilot_name = eveboard_link['href'].split('/pilot/')[1]
        # clean up tags and bbcode
        for a in first_post('a'):
            a.extract()
        for img in first_post('img'):
            img.extract()
        first_post = first_post.prettify().replace('<br />', ' ').replace('\n', '').replace('<i>', '').replace(
            '</i>', '').replace('<b>', '').replace('</b>', '')
        # find them passwords!
        passwords = []
        for regs in RS_PWD:
            potential_passwords = regs.finditer(first_post)
            for match in potential_passwords:
                if match.group(1) not in passwords:
                    logger.debug("Found eveboard password {0}".format(match.group(1)))
                    passwords.append(match.group(1))
        if passwords:
            passwords.append('1234')  # append the most commonly used password
            for password in passwords:
                scraped_info = scrape_character(pilot_name, password)
                if scraped_info:
                    scraped_info.update({'charname': pilot_name, 'password': password})
                    return scraped_info
            logger.debug("Password didin't work trying without")
            scraped_info = scrape_character(pilot_name, None)
            if scraped_info:
                scraped_info.update({'charname': pilot_name, 'password': None})
                return scraped_info
            else:
                return None
        else:
            logger.debug("No passwords found trying without")
            scraped_info = scrape_character(pilot_name, None)
            if scraped_info:
                scraped_info.update({'charname': pilot_name, 'password': None})
                return scraped_info
            else:
                return None

    else:
        logger.debug("Could not find eveboard link")
        return None
 def refresh_characters(self, limit, staleness):
     staledate = now() - timedelta(days=staleness)
     update_limit = now() - timedelta(days=limit)
     updated_threads = Thread.objects.filter(last_update__gte=update_limit,
                                             blacklisted=False)
     stale_characters = Character.objects.filter(
         Q(last_update__lte=staledate) | Q(last_update=None),
         thread__in=updated_threads)
     for character in stale_characters:
         logger.debug("Updating stale character %s" % character.name)
         scraped_info = scrape_character(character.name, character.password)
         if scraped_info:
             new_sp_total = 0
             for skill in scraped_info['skills']:
                 existing_skill = character.skills.filter(
                     skill__name=skill[0])
                 if len(existing_skill) > 0:
                     existing_skill[0].skill_points = skill[2]
                     existing_skill[0].level = skill[1]
                     new_sp_total += skill[2]
                     existing_skill[0].save()
                 else:
                     cs = CharSkill()
                     cs.character = character
                     if skill[0] in STUPID_OLDNAMELOOKUP:
                         cs.skill = Skill.objects.filter(
                             name=STUPID_OLDNAMELOOKUP[skill[0]])[0]
                     else:
                         cs.skill = Skill.objects.filter(name=skill[0])[0]
                     cs.level = skill[1]
                     cs.skill_points = skill[2]
                     cs.typeID = cs.skill.typeID
                     cs.save()
                     character.skills.add(cs)
                     new_sp_total += skill[2]
             character.remaps = scraped_info['stats']['remaps']
             character.unspent_skillpoints = scraped_info['stats'][
                 'unallocated_sp']
             character.total_sp = new_sp_total
             character.last_update = now()
             character.save()
             logger.debug("Update of stale character %s complete" %
                          character.name)
 def refresh_characters(self, limit, staleness):
     staledate = now() - timedelta(days=staleness)
     update_limit = now() - timedelta(days=limit)
     updated_threads = Thread.objects.filter(last_update__gte=update_limit, blacklisted=False)
     stale_characters = Character.objects.filter(
         Q(last_update__lte=staledate) | Q(last_update=None), thread__in=updated_threads)
     for character in stale_characters:
         logger.debug("Updating stale character %s" % character.name)
         scraped_info = scrape_character(character.name, character.password)
         if scraped_info:
             new_sp_total = 0
             for skill in scraped_info['skills']:
                 existing_skill = character.skills.filter(skill__name=skill[0])
                 if len(existing_skill) > 0:
                     existing_skill[0].skill_points = skill[2]
                     existing_skill[0].level = skill[1]
                     new_sp_total += skill[2]
                     existing_skill[0].save()
                 else:
                     cs = CharSkill()
                     cs.character = character
                     if skill[0] in STUPID_OLDNAMELOOKUP:
                         cs.skill = Skill.objects.filter(name=STUPID_OLDNAMELOOKUP[skill[0]])[0]
                     else:
                         cs.skill = Skill.objects.filter(name=skill[0])[0]
                     cs.level = skill[1]
                     cs.skill_points = skill[2]
                     cs.typeID = cs.skill.typeID
                     cs.save()
                     character.skills.add(cs)
                     new_sp_total += skill[2]
             character.remaps = scraped_info['stats']['remaps']
             character.unspent_skillpoints = scraped_info['stats']['unallocated_sp']
             character.total_sp = new_sp_total
             character.last_update = now()
             character.save()
             logger.debug("Update of stale character %s complete" % character.name)