def get_remote_oj(cls): url = 'https://{}/util/remoteOJs'.format(cls.host) res = SpiderHttp().get(url=url).json() for k, v in res.items(): Language.delete_oj(v['name']) for key, value in v['languages'].items(): Language.create(oj=v['name'], key=key, value=value)
def get_user_info(self, oj_username): username = oj_username.oj_username url = 'http://acm.hdu.edu.cn/userstatus.php?user={}'.format(username) res = SpiderHttp().get(url=url) soup = BeautifulSoup(res.text, 'lxml') res = soup.findAll('p', {'align': 'left'})[0] return re.findall(r'p\((\d+).*?\);', res.text)
def _get_id_by_username(username): url = 'https://www.nowcoder.com/search?type=all&query={}'.format( username) res = SpiderHttp().get(url=url) test = re.findall(r'/profile/(\d+)', res.text) if not test: return None return test[0]
def get_user_info(self, oj_username): username = oj_username.oj_username uid = LojSpider._get_user_id(username) url = 'https://loj.ac/user/{}'.format(uid) res = SpiderHttp().get(url=url) accept_problem_list = re.findall(r'<a href="/problem/(\d+)">', res.text) return accept_problem_list
def _change_problem_id(problem_id): new_problem_id = get_value('jisuanke-{}'.format(problem_id)) if not new_problem_id: res = SpiderHttp().get( url='http://nanti.jisuanke.com/t/{}'.format(problem_id)) new_problem_id = re.findall(r'//nanti.jisuanke.com/t/(.*)', res.url)[0] set_value('jisuanke-{}'.format(problem_id), new_problem_id) return new_problem_id
def get_user_info(self, oj_username): username = oj_username.oj_username url = 'http://vjudge.net/user/solveDetail/{}'.format(username) res = SpiderHttp().post(url=url) res = json.loads(res.text)['acRecords'] accept_problem_list = [] for oj_name in res: for problem_id in res[oj_name]: accept_problem_list.append('{}-{}'.format(oj_name, problem_id)) return accept_problem_list
def get_problem_info(self, problem_id): try: url = 'http://loj.ac/problems/search?keyword={}'.format(problem_id) res = SpiderHttp().get(url=url) data = re.findall(r'<td>(\d+)</td>\n.*<td>(\d+)</td>', res.text)[0] accept = int(data[0]) total = int(data[1]) rating = int(calculate_problem_rating(total, accept)) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_problem_info(self, problem_id): star_rating = [DEFAULT_PROBLEM_RATING, 800, 1200, 1600, 2000, 2400] try: url = 'https://ac.nowcoder.com/acm/problem/list?keyword={}'.format( problem_id) res = SpiderHttp().get(url=url) data = re.findall(r'<td>\n(\d+)星\n</td>', res.text) star = int(data[0][0]) rating = star_rating[star] except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def __init__(self, remote_user): self.username = remote_user.username self.password = remote_user.password self.request = SpiderHttp() if remote_user.cookies: self.request.sess.cookies.update(remote_user.cookies) if not self.check_login_status(): if not self.login(): raise Exception('remote user login error') assert self.check_login_status() remote_user.modify(cookies=self.request.sess.cookies.get_dict())
def get_problem_info(self, problem_id): try: url = 'https://nanti.jisuanke.com/t/{}'.format(problem_id) res = SpiderHttp().get(url=url) data = re.findall(r'通过 (\d+) 人次 / 提交 (\d+) 人次', res.text) accept = int(data[0][0]) total = int(data[0][1]) rating = int(calculate_problem_rating(total, accept)) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def _get_gym_constest_rating(contest_id): star_rating = [DEFAULT_PROBLEM_RATING, 1200, 1600, 2000, 2400, 2800] stars = mapping.get_value('gym-{}'.format(contest_id)) if stars is not None: return star_rating[int(stars)] url = 'https://codeforces.com/gyms' req = SpiderHttp() res = req.get(url=url) soup = BeautifulSoup(res.text, 'lxml') token = soup.find('input', {'name': 'csrf_token'})['value'] res = req.post(url=url, data={ 'csrf_token': token, 'searchByNameOrIdQuery': contest_id, 'searchByProblem': False, }) soup = BeautifulSoup(res.text, 'lxml') stars = len( soup.find('tr', { 'data-contestid': contest_id }).findAll('img')) mapping.set_value('gym-{}'.format(contest_id), str(stars)) return star_rating[stars]
def get_problem_info(self, problem_id): url = 'http://acm.hdu.edu.cn/showproblem.php?pid={}'.format(problem_id) res = SpiderHttp().get(url=url) try: re_res = re.search( r'<br>Total Submission\(s\): (\d+)( ){4}Accepted Submission\(s\): (\d+)<br>', res.text) total = int(re_res.group(1)) accept = int(re_res.group(3)) rating = calculate_problem_rating(total, accept) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def _get_problem_info(self, problem_id): url = 'https://www.nitacm.com/problem_show.php?pid={}'.format( problem_id) res = SpiderHttp().get(url=url) try: selector = Selector(res.text) oj_name = selector.xpath( '//*[@id="conditions"]/span[1]/text()').get() remote_problem_id = selector.xpath( '//*[@id="conditions"]/a/text()').get() if remote_problem_id: return oj_name + "-" + remote_problem_id return "nit-" + problem_id except: return "nit-" + problem_id
def get_user_info(self, oj_username): username = oj_username.oj_username accept_problem_list = [] url = 'http://codeforces.com/api/user.status?handle={}'.format( username) res = SpiderHttp().get(url=url) res = json.loads(res.text) if res['status'] != 'OK': return [] res = res['result'] for rec in res: if rec['verdict'] == 'OK': accept_problem_list.append('{}-{}'.format( rec['problem']['contestId'], rec['problem']['index'])) return accept_problem_list
def get_problem_info(self, problem_id): url = 'http://poj.org/problem?id={}'.format(problem_id) res = SpiderHttp().get(url=url) try: total = int( re.search(r'<td><b>Total Submissions:</b> (\d+)</td>', res.text).group(1)) accept = int( re.search(r'<td><b>Accepted:</b> (\d+)</td>', res.text).group(1)) rating = calculate_problem_rating(total, accept) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_user_info(self, oj_username): username = oj_username.oj_username url = 'https://www.nitacm.com/userinfo.php?name={}'.format(username) res = SpiderHttp().get(url=url) soup = BeautifulSoup(res.text, 'lxml') raw_data = [] for i in soup.find(id='userac'): try: raw_data.append(i.text) except AttributeError: pass data = [] for i in raw_data: r = self._get_problem_info(i) data.append(r) return data
def get_problem_info(self, problem_id): url = 'https://www.nitacm.com/problem_stat.php?pid={}'.format( problem_id) res = SpiderHttp().get(url=url) try: selector = Selector(res.text) total = int( selector.xpath( '//*[@id="probstat"]/tbody/tr[1]/td/a/text()').get()) accept = int( selector.xpath( '//*[@id="probstat"]/tbody/tr[2]/td/a/text()').get()) rating = calculate_problem_rating(total, accept) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_problem_info(self, problem_id): p = re.match('^([0-9]+)([a-zA-Z]+[0-9]*)$', problem_id) problem_id_1 = p.group(1) problem_id_2 = p.group(2) if int(problem_id_1) < 100000: # 题目 url = 'https://codeforces.com/problemset/problem/{}/{}'.format( problem_id_1, problem_id_2) res = SpiderHttp().get(url=url) try: rating = int( re.search(r'title="Difficulty">\s*\*(\d+)\s*</span>', res.text).group(1)) except: rating = DEFAULT_PROBLEM_RATING else: # gym try: rating = self._get_gym_constest_rating(problem_id_1) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_user_info(self, oj_username): username = oj_username.oj_username uid = NowcoderSpider._get_id_by_username(username) if uid: username = uid index = 1 data = [] pre = [] while 1: url = 'https://ac.nowcoder.com/acm/contest/profile/{}/practice-coding?pageSize=200&statusTypeFilter=5&page={}'.format( username, index) res = SpiderHttp().get(url=url) r = re.findall(r'/acm/problem/(\d+)', res.text) if r == pre: break data.extend(r) pre = r index += 1 return data
def get_problem_info(self, problem_id): problem_id = int(problem_id) - 1000 url = 'http://acm.zju.edu.cn/onlinejudge/showProblemStatus.do?problemId={}'.format( problem_id) res = SpiderHttp().get(url=url) try: selector = Selector(res.text) total = int( selector.xpath( '//*[@id="content_body"]/div[2]/table/tr[2]/td[10]/a/text()' ).get()) accept_tmp = selector.xpath( '//*[@id="content_body"]/div[2]/table/tr[2]/td[1]/a/text()' ).get() accept = int(re.search(r'(\d+)\(\d+%+\)', accept_tmp).group(1)) rating = calculate_problem_rating(total, accept) except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_user_info(self, oj_username): username = oj_username.oj_username page = 1 accept_list = [] while True: url = 'https://i.jisuanke.com/timeline/nanti/{}?page={}'.format( username, page) res = SpiderHttp().get(url=url) try: res_json = json.loads(res.text) except: break res = res_json.get('data', dict()) if not res: break for data in res: problem_id = re.findall('//nanti.jisuanke.com/t/(.*)', data['url'])[0] problem_id = JisuankeSpider._change_problem_id(problem_id) accept_list.append(problem_id) page += 1 return accept_list
def get_user_info(self, oj_username): username = oj_username.oj_username url = 'http://new.npuacm.info/api/crawlers/zoj/{}'.format(username) res = SpiderHttp().get(url=url) res_json = json.loads(res.text) return res_json.get('data', dict()).get('solvedList', list())
def get_user_info(self, oj_username): username = oj_username.oj_username url = 'http://poj.org/userstatus?user_id={}'.format(username) res = SpiderHttp().get(url=url) accept_problem_list = re.findall(r'p\((\d+)\)', res.text) return accept_problem_list
def _get_user_id(username): url = 'http://loj.ac/find_user?nickname={}'.format(username) res = SpiderHttp().get(url=url) uid = re.findall(r'user/(\d+)', res.url)[0] return uid