def process_soup(soup): dates = soup.findAll(lambda t: (u'class', u'recent_date') in t.attrs) scores = soup.findAll(lambda t: (u'class', u'recent_score') in t.attrs) nm = 0 for i in range(0,len(dates)): url = dates[i].parent['href'] id = int(dates[i].parent['href'].split('/')[-2]) if len(scores[i].contents) < 5: print '--- Skipped match %i (score missing on overview page)' % id continue date = '20' + '-'.join(str(dates[i].contents[0]).split('.')[-1::-1]) ns = BeautifulSoup(get_url(url)) q = ns.findAll(lambda t: (u'class', u'match_coverage') in t.attrs) setting = q[0].contents[0].strip() q = ns.findAll(lambda t: (u'class', u'edb_logo_img') in t.attrs) try: player_a = int(q[0].parent['href'].split('/')[-2].split('-')[0]) player_b = int(q[1].parent['href'].split('/')[-2].split('-')[0]) except: print '--- Skipped match %i (couldn\'t parse players)' % id continue q = ns.findAll(lambda t: (u'class', u'match_score match_score_finished') in t.attrs) player_a_score = int(q[0].contents[1].contents[0]) player_b_score = int(q[0].contents[3].contents[0]) q = ns.findAll(lambda t: (u'class', u'content_race') in t.attrs)[0:2] exp_races = [t['alt'].upper() for t in q] try: pa = Player.objects.get(sc2c_id=player_a) except: os.system('./sc2cp.py %i %s' % (player_a, exp_races[0])) try: Player.objects.get(sc2c_id=player_b) except: os.system('./sc2cp.py %i %s' % (player_b, exp_races[1])) try: pa = Player.objects.get(sc2c_id=player_a) pb = Player.objects.get(sc2c_id=player_b) n1 = Match.objects.filter(pla=pa, plb=pb, sca=player_a_score, scb=player_b_score).extra(where=['abs(datediff(date,\'%s\')) < 2' % date]) n2 = Match.objects.filter(pla=pb, plb=pa, sca=player_b_score, scb=player_a_score).extra(where=['abs(datediff(date,\'%s\')) < 2' % date]) n1 = n1.exists() n2 = n2.exists() if n1 or n2: print '--- Skipped match %i (possible duplicate in database)' % id continue m = Match(pla=pa, plb=pb, sca=player_a_score, scb=player_b_score, date=date) m.event = setting + ' (SC2C)' m.set_period() m.save() nm += 1 print '%i: %s %i-%i %s (%s)' % (m.period.id, pa.tag, player_a_score, player_b_score, pb.tag, setting) except: print '--- Skipped match %i (players not in local database: %i, %i)' % (id, player_a, player_b) return (nm, False)