def scrape_thread(thread): logger.debug("Scraping thread {0}".format(thread)) html = urllib2.urlopen(FORUM_URL + THREAD_URL % thread['threadID']).read() thread_soup = BeautifulSoup(html) first_post = thread_soup.findAll('div', attrs={'id': 'forum_ctl00_MessageList_ctl00_DisplayPost1_MessagePost1'})[0] eveboard_link = first_post.find('a', href=re.compile('.*eveboard.com/pilot/.*')) if eveboard_link: logger.debug("Found eveboard link {0}".format(eveboard_link)) pilot_name = eveboard_link['href'].split('/pilot/')[1] # clean up tags and bbcode for a in first_post('a'): a.extract() for img in first_post('img'): img.extract() first_post = first_post.prettify().replace('<br />', ' ').replace('\n', '').replace('<i>', '').replace( '</i>', '').replace('<b>', '').replace('</b>', '') # find them passwords! passwords = [] for regs in RS_PWD: potential_passwords = regs.finditer(first_post) for match in potential_passwords: if match.group(1) not in passwords: logger.debug("Found eveboard password {0}".format(match.group(1))) passwords.append(match.group(1)) if passwords: passwords.append('1234') # append the most commonly used password for password in passwords: scraped_info = scrape_character(pilot_name, password) if scraped_info: scraped_info.update({'charname': pilot_name, 'password': password}) return scraped_info logger.debug("Password didin't work trying without") scraped_info = scrape_character(pilot_name, None) if scraped_info: scraped_info.update({'charname': pilot_name, 'password': None}) return scraped_info else: return None else: logger.debug("No passwords found trying without") scraped_info = scrape_character(pilot_name, None) if scraped_info: scraped_info.update({'charname': pilot_name, 'password': None}) return scraped_info else: return None else: logger.debug("Could not find eveboard link") return None
def refresh_characters(self, limit, staleness): staledate = now() - timedelta(days=staleness) update_limit = now() - timedelta(days=limit) updated_threads = Thread.objects.filter(last_update__gte=update_limit, blacklisted=False) stale_characters = Character.objects.filter( Q(last_update__lte=staledate) | Q(last_update=None), thread__in=updated_threads) for character in stale_characters: logger.debug("Updating stale character %s" % character.name) scraped_info = scrape_character(character.name, character.password) if scraped_info: new_sp_total = 0 for skill in scraped_info['skills']: existing_skill = character.skills.filter( skill__name=skill[0]) if len(existing_skill) > 0: existing_skill[0].skill_points = skill[2] existing_skill[0].level = skill[1] new_sp_total += skill[2] existing_skill[0].save() else: cs = CharSkill() cs.character = character if skill[0] in STUPID_OLDNAMELOOKUP: cs.skill = Skill.objects.filter( name=STUPID_OLDNAMELOOKUP[skill[0]])[0] else: cs.skill = Skill.objects.filter(name=skill[0])[0] cs.level = skill[1] cs.skill_points = skill[2] cs.typeID = cs.skill.typeID cs.save() character.skills.add(cs) new_sp_total += skill[2] character.remaps = scraped_info['stats']['remaps'] character.unspent_skillpoints = scraped_info['stats'][ 'unallocated_sp'] character.total_sp = new_sp_total character.last_update = now() character.save() logger.debug("Update of stale character %s complete" % character.name)
def refresh_characters(self, limit, staleness): staledate = now() - timedelta(days=staleness) update_limit = now() - timedelta(days=limit) updated_threads = Thread.objects.filter(last_update__gte=update_limit, blacklisted=False) stale_characters = Character.objects.filter( Q(last_update__lte=staledate) | Q(last_update=None), thread__in=updated_threads) for character in stale_characters: logger.debug("Updating stale character %s" % character.name) scraped_info = scrape_character(character.name, character.password) if scraped_info: new_sp_total = 0 for skill in scraped_info['skills']: existing_skill = character.skills.filter(skill__name=skill[0]) if len(existing_skill) > 0: existing_skill[0].skill_points = skill[2] existing_skill[0].level = skill[1] new_sp_total += skill[2] existing_skill[0].save() else: cs = CharSkill() cs.character = character if skill[0] in STUPID_OLDNAMELOOKUP: cs.skill = Skill.objects.filter(name=STUPID_OLDNAMELOOKUP[skill[0]])[0] else: cs.skill = Skill.objects.filter(name=skill[0])[0] cs.level = skill[1] cs.skill_points = skill[2] cs.typeID = cs.skill.typeID cs.save() character.skills.add(cs) new_sp_total += skill[2] character.remaps = scraped_info['stats']['remaps'] character.unspent_skillpoints = scraped_info['stats']['unallocated_sp'] character.total_sp = new_sp_total character.last_update = now() character.save() logger.debug("Update of stale character %s complete" % character.name)