def get_public_problems(profile, verbose=True): """Extract public submissions from the given profile.""" problems = [] urlbase = ('https://www.urionlinejudge.com.br/judge/pt/profile/' '%s/page:%s/sort:run_id/direction:asc') refreshed = set() i = 0 while True: i += 1 url = urlbase % (profile, i) # Read html or break if encounter an error page try: html = htmlopen(url, verbose=verbose) except RuntimeError as ex: if ex.args[0] == 404: break raise # Process table tbody = html.xpath('//table/tbody')[0] transaction = [] for tr in tbody: data = [x.text_content().strip() for x in tr] if not data[0] or len(data) == 1: break # Convert some columns.. data[0] = int(data[0]) # problem profile data[2] = _ranking(data[2]) # ranking data[5] = float(data[5]) # time data[6] = _todatetime(data[6]) # date problem = Problem(*data) transaction.append(problem) # Add transaction to problem list if len(transaction) == 28: problems.extend(transaction) elif url not in refreshed: # Force refresh urlopen(url, verbose=verbose, expires=120) refreshed.add(url) i -= 1 else: problems.extend(transaction) break # Create dataframe return pd.DataFrame(problems, columns=problem_fields)
def get_public_profile(profile, verbose=True): """View all non-problem related information in the public profile.""" url = ('https://www.urionlinejudge.com.br/judge/pt/profile/' '%s/page:1/sort:run_id/direction:asc') % profile html = htmlopen(url, verbose=verbose) username = html.xpath('//div[@class="pb-username"]')[0].text_content() info = html.xpath('//ul[@class="pb-information"]/li') data = (x.text_content().strip() for x in info) data = (x.partition(':') for x in data) data = dict((k.lower(), v) for (k, _, v) in data) data['username'] = username # Make conversions dd, mm, yyyy = map(int, data.pop('desde').split('/')) data['date'] = datetime.date(yyyy, mm, dd) data['ranking'] = int(data.pop('posição')[:-1]) data['solved'] = int(data.pop('resolvido')) data['submissions'] = int(data.pop('submissões')) data['tried'] = int(data.pop('tentado')) data['country'] = data.pop('país') data['university'] = data.pop('universidade').strip() return Profile(**data)
def __htmlopen(self, url, **kwds): if url in urlcache(): return htmlopen(url, **kwds) else: return htmlopen(url, session=self.session, **kwds)