예제 #1
0
 async def fetch_status(self, first):
     url = self.status_prefix.format(self.account.nickname, first)
     status_list = []
     try:
         response = await self.load_page(url, {'Cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         status_table = soup.find('table', class_='table_text')
         for row in status_table.children:
             if row.name != 'tr':
                 continue
             if row.get('class') and 'table_header' in row.get('class'):
                 continue
             td_text = [td.text for td in row.children]
             status = {
                 'type': DataType.Submit,
                 'account': self.account,
                 'status': submit.SubmitStatus.BROKEN,
                 'run_id': td_text[0],
                 'submit_time': td_text[1],
                 'result': td_text[2],
                 'pro_id': td_text[3],
                 'run_time': td_text[4][:-2],
                 'memory': td_text[5][:-1],
                 'lang': td_text[7],
                 'code': None
             }
             status_list.append(status)
         return status_list
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch status account: {} first: {}'.format(
             self.TAG, self.account, first))
예제 #2
0
 async def get_status(self, handle, start=1, length=50):
     is_gym = lambda cid: len(str(cid)) >= 6
     url = self.status_prefix.format(handle, start, length)
     try:
         response = await self.load_page(url)
         if not response:
             return False
         response_data = json.loads(response.body.decode())
         if response_data['status'] != 'OK':
             return False
         result = response_data['result']
         status_list = []
         for row in result:
             if is_gym(row['contestId']):
                 continue
             pro_id = '{0}{1}'.format(row['contestId'], row['problem']['index'])
             submit_at = datetime.fromtimestamp(row['creationTimeSeconds'])
             # code = yield self.get_code(row['contestId'], row['id'])
             status = {
                 'type': DataType.Submit, 'account': self.account,
                 'status': submit.SubmitStatus.BROKEN,
                 'pro_id': pro_id, 'run_id': row['id'], 'submit_time': submit_at,
                 'run_time': row['timeConsumedMillis'], 'memory': row['memoryConsumedBytes'] // 1024,
                 'lang': row['programmingLanguage'], 'code': None, 'result': row['verdict']
             }
             status_list.append(status)
         return status_list
     except Exception as e:
         logger.error(e)
예제 #3
0
 async def fetch_status(self, first=''):
     url = self.status_prefix.format(self.account.nickname, first)
     status_list = []
     try:
         response = await self.load_page(url)
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         status_table = soup.find('table', class_='a')
         for row in status_table.children:
             if row.name != 'tr':
                 continue
             if row.get('class') and 'in' in row.get('class'):
                 continue
             td_text = [td.text for td in row.children if td.name == 'td']
             # code = yield self.get_code(td_text[0])
             run_time = td_text[5][:-2] or '-1'
             memory = td_text[4][:-1] or '-1'
             status = {
                 'type': DataType.Submit, 'account': self.account,
                 'status': submit.SubmitStatus.BROKEN,
                 'run_id': td_text[0], 'submit_time': td_text[8], 'result': td_text[3],
                 'pro_id': td_text[2], 'run_time': run_time, 'memory': memory,
                 'lang': td_text[6], 'code': None
             }
             status_list.append(status)
         return status_list
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch status => user_id: {} top: {}'.format(
             self.TAG,self.account.nickname, first))
예제 #4
0
 async def fetch_status(self, first):
     url = self.status_prefix.format(self.account.nickname, first)
     status_list = []
     try:
         response = await self.load_page(url, {'Cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         status_table = soup.find('table', class_='table_text')
         for row in status_table.children:
             if row.name != 'tr':
                 continue
             if row.get('class') and 'table_header' in row.get('class'):
                 continue
             td_text = [td.text for td in row.children]
             status = {
                 'type': DataType.Submit, 'account': self.account, 'status': submit.SubmitStatus.BROKEN,
                 'run_id': td_text[0], 'submit_time': td_text[1], 'result': td_text[2],
                 'pro_id': td_text[3], 'run_time': td_text[4][:-2], 'memory': td_text[5][:-1],
                 'lang': td_text[7], 'code': None
             }
             status_list.append(status)
         return status_list
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch status account: {} first: {}'.format(self.TAG, self.account, first))
예제 #5
0
 async def fetch_status(self, first=''):
     url = self.status_prefix.format(self.account.nickname, first)
     status_list = []
     try:
         response = await self.load_page(url)
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         status_table = soup.find('table', class_='a')
         for row in status_table.children:
             if row.name != 'tr':
                 continue
             if row.get('class') and 'in' in row.get('class'):
                 continue
             td_text = [td.text for td in row.children if td.name == 'td']
             # code = yield self.get_code(td_text[0])
             run_time = td_text[5][:-2] or '-1'
             memory = td_text[4][:-1] or '-1'
             status = {
                 'type': DataType.Submit, 'account': self.account,
                 'status': submit.SubmitStatus.BROKEN,
                 'run_id': td_text[0], 'submit_time': td_text[8], 'result': td_text[3],
                 'pro_id': td_text[2], 'run_time': run_time, 'memory': memory,
                 'lang': td_text[6], 'code': None
             }
             status_list.append(status)
         return status_list
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch status => user_id: {} top: {}'.format(
             self.TAG,self.account.nickname, first))
예제 #6
0
 def init_http_client():
     try:
         httpclient.AsyncHTTPClient.configure(
             "tornado.curl_httpclient.CurlAsyncHTTPClient")
         logger.info('[ACM-Spider] 配置 CurlAsyncHTTPClient 成功')
     except Exception as ex:
         logger.error(
             '[ACM-Spider] 配置 CurlAsyncHTTPClient 失败: {}'.format(ex))
예제 #7
0
 async def load_page(url, headers=None, **kwargs):
     response = None
     try:
         response = await Spider.fetch(url, headers=headers, **kwargs)
     except httpclient.HTTPError as ex:
         logger.error('加载 {} 失败: {}'.format(url, ex))
         raise LoadPageException('加载 {} 失败: {}'.format(url, ex))
     finally:
         return response
예제 #8
0
 async def load_page(url, headers=None, **kwargs):
     response = None
     try:
         response = await Spider.fetch(url, headers=headers, **kwargs)
     except httpclient.HTTPError as ex:
         logger.error('加载 {} 失败: {}'.format(url, ex))
         raise LoadPageException('加载 {} 失败: {}'.format(url, ex))
     finally:
         return response
예제 #9
0
 async def get_code(self, run_id, **kwargs):
     url = self.code_url_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'cookie': self.cookie},
                                         validate_cert=False)
         soup = self.get_lxml_bs4(response.body)
         code = soup.find('pre', class_='sh-c').text
         return code
     except Exception as e:
         logger.error(e)
         logger.error(traceback.format_exc())
예제 #10
0
 async def get_code(self, run_id, **kwargs):
     url = self.code_url_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'cookie': self.cookie},
                                         validate_cert=False)
         soup = self.get_lxml_bs4(response.body)
         code = soup.find('pre', class_='sh-c').text
         return code
     except Exception as e:
         logger.error(e)
         logger.error(traceback.format_exc())
예제 #11
0
 async def get_code(self, run_id, **kwargs):
     pro_id = kwargs['pro_id']
     contest_id = re.compile(r'^\d+').match(pro_id).group()
     url = self.code_prefix.format(contest_id, run_id)
     try:
         response = await self.load_page(url)
         if not response:
             return None
         soup = self.get_lxml_bs4(response.body)
         code = soup.find('pre', class_='program-source').text
         return code
     except Exception as e:
         logger.error(e)
예제 #12
0
 async def get_rating(self):
     url = self.rating_api_prefix.format(self.account.nickname)
     try:
         response = await self.fetch(url)
         if not response:
             return False
         res = json.loads(response.body.decode())
         if len(res) > 0:
             max_rating = max(res, key=lambda x: x['rating'])
             return dict(rating=res[-1]['rating'],
                         maxRating=max_rating['rating'])
     except Exception as ex:
         logger.error(ex)
         logger.error('{} {} get Rating error'.format(self.TAG, self.account))
예제 #13
0
 async def get_rating(self):
     url = self.rating_api_prefix.format(self.account.nickname)
     try:
         response = await self.fetch(url)
         if not response:
             return False
         res = json.loads(response.body.decode())
         if len(res) > 0:
             max_rating = max(res, key=lambda x: x['rating'])
             return dict(rating=res[-1]['rating'],
                         maxRating=max_rating['rating'])
     except Exception as ex:
         logger.error(ex)
         logger.error('{} {} get Rating error'.format(
             self.TAG, self.account))
예제 #14
0
 async def get_code(self, run_id, **kwargs):
     url = self.source_code_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         pre_node = soup.find('pre')
         if not pre_node:
             return False
         logger.debug("{} fetch {}\'s code {} success".format(self.TAG, self.account, run_id))
         return pre_node.text
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch {}\'s {} code error'.format(self.TAG, self.account, run_id))
예제 #15
0
 async def get_code(self, run_id, **kwargs):
     url = self.source_code_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         pre_node = soup.find('pre')
         if not pre_node:
             return False
         logger.debug("{} fetch {}\'s code {} success".format(self.TAG, self.account, run_id))
         return pre_node.text
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch {}\'s {} code error'.format(self.TAG, self.account, run_id))
예제 #16
0
 async def get_solved(self):
     url = self.user_url_prefix.format(self.account.nickname)
     try:
         response = await self.load_page(url, {'cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         solved = soup.find('button', id='showac').previous_sibling.string.strip()
         submitted = soup.find('a', href='status.php?showname={}'.format(self.account.nickname)).text
         return {
             'solved': solved, 'submitted': submitted,
             # 'solved_list': self._get_solved_list(soup)
         }
     except Exception as ex:
         logger.error('{} get Solved/Submitted error {}: {}'.format(self.TAG, self.account, ex))
         raise ex
예제 #17
0
 async def get_solved(self):
     url = self.user_info_prefix.format(self.account.nickname)
     try:
         response = await self.load_page(url)
         if not response:
             return False
         ret = json.loads(response.body.decode())
         if ret['status'] != 'OK':
             return False
         user_info = ret['result'][0]
         return {
             'rating': user_info['rating'],
             'maxRating': user_info['maxRating']
         }
     except Exception as e:
         logger.error(e)
예제 #18
0
 async def get_code_zip(self, min, max):
     url = self.code_zip_url.format(min, max)
     try:
         response = await self.fetch(url, method=HttpMethod.GET,
                                     headers={'cookie': self.cookie},
                                     validate_cert=False)
         buffer = response.buffer
         with ZipFile(buffer) as code_zip:
             for name in code_zip.namelist():
                 run_id = re.split(r'/|_', name)[2]
                 with code_zip.open(name) as code_fp:
                     code = code_fp.read()
                 status = {
                     'type': DataType.Code, 'account': self.account,
                     'run_id': run_id, 'code': code
                 }
                 await self.put_queue([status])
     except Exception as e:
         logger.error(e)
         logger.error(traceback.format_exc())
예제 #19
0
    async def get_solved(self):
        url = self.user_url_prefix.format(self.account.nickname)
        try:
            response = await self.load_page(url)
            if not response:
                return False
            soup = self.get_lxml_bs4(response.body)
            # solved count
            solved_count = soup.find('a', href=re.compile('^status\?result=0')).text
            submitted_count = soup.find('a', href=re.compile('^status\?user_id')).text

            # solved list
            # solved_list = self._get_solved_list(soup)
            return {
                'solved': solved_count,
                'submitted': submitted_count,
                #'solved_list': solved_list
            }
        except Exception as ex:
            logger.error('{} {} get Solved/Submitted error: {}'.format(self.TAG, self.account, ex))
            raise ex
예제 #20
0
    async def get_solved(self):
        url = self.user_url_prefix.format(self.account.nickname)
        try:
            response = await self.load_page(url)
            if not response:
                return False
            soup = self.get_lxml_bs4(response.body)
            # solved count
            solved_count = soup.find('a', href=re.compile('^status\?result=0')).text
            submitted_count = soup.find('a', href=re.compile('^status\?user_id')).text

            # solved list
            # solved_list = self._get_solved_list(soup)
            return {
                'solved': solved_count,
                'submitted': submitted_count,
                #'solved_list': solved_list
            }
        except Exception as ex:
            logger.error('{} {} get Solved/Submitted error: {}'.format(self.TAG, self.account, ex))
            raise ex
예제 #21
0
async def spider_runner(idx):
    """ 爬虫运行地 """
    logger.info('[SpiderRunner #{0}] 开始运行 ...'.format(idx))
    while True:
        cur_account = await AccountQueue.get()
        logger.info(
            '[SpiderRunner #{0}] {1} <=== account_queue(size={2})'.format(
                idx, cur_account, AccountQueue.qsize()))
        # let spider.run()
        worker = await SpiderFactory[cur_account.oj_name].get()
        worker.account = cur_account

        try:
            cur_account.set_status(account.AccountStatus.UPDATING)
            cur_account.save()
            await worker.run()
            cur_account.set_status(account.AccountStatus.NORMAL)
        except LoginException as ex:
            logger.error(ex)
            cur_account.set_status(account.AccountStatus.ACCOUNT_ERROR)
        except Exception as ex:
            logger.error(ex)
            logger.error(traceback.format_exc())
            cur_account.set_status(account.AccountStatus.UPDATE_ERROR)
        finally:
            cur_account.save()

        # work done
        logger.info('[SpiderRunner #{0}] {1} work done'.format(
            idx, cur_account))
        SpiderFactory[cur_account.oj_name].task_done()
        AccountQueue.task_done()
        await SpiderFactory[cur_account.oj_name].put(worker)
예제 #22
0
async def spider_runner(idx):
    """ 爬虫运行地 """
    logger.info('[SpiderRunner #{0}] 开始运行 ...'.format(idx))
    while True:
        cur_account = await AccountQueue.get()
        logger.info('[SpiderRunner #{0}] {1} <=== account_queue(size={2})'
                    .format(idx, cur_account, AccountQueue.qsize()))
        # let spider.run()
        worker = await SpiderFactory[cur_account.oj_name].get()
        worker.account = cur_account

        try:
            cur_account.set_status(account.AccountStatus.UPDATING)
            cur_account.save()
            await worker.run()
            cur_account.set_status(account.AccountStatus.NORMAL)
        except LoginException as ex:
            logger.error(ex)
            cur_account.set_status(account.AccountStatus.ACCOUNT_ERROR)
            await gen.sleep(60 * 2)
        except Exception as ex:
            logger.error(ex)
            logger.error(traceback.format_exc())
            cur_account.set_status(account.AccountStatus.UPDATE_ERROR)
            await gen.sleep(60 * 2)
        finally:
            cur_account.save()

        # work done
        logger.info('[SpiderRunner #{0}] {1} work done'.format(idx, cur_account))
        SpiderFactory[cur_account.oj_name].task_done()
        AccountQueue.task_done()
        await SpiderFactory[cur_account.oj_name].put(worker)
예제 #23
0
    async def get_solved(self):
        url = self.user_url_prefix.format(self.account.nickname)
        try:
            response = await self.load_page(url, {'Cookie': self.cookie})
            if not response:
                return False
            soup = self.get_lxml_bs4(response.body)
            # solved count
            count = soup.find_all('td', text=['Problems Submitted', 'Problems Solved'])
            submitted_count = count[0].next_sibling.text
            solved_count = count[1].next_sibling.text

            # solved list
            # solved_list = self._get_solved_list(soup)
            return {
                'solved': solved_count,
                'submitted': submitted_count,
                # 'solved_list': solved_list
            }
        except Exception as ex:
            logger.error('{} {} get Solved/Submitted error: {}'.format(self.TAG, self.account, ex))
            raise ex
예제 #24
0
 async def get_code_zip(self, min, max):
     url = self.code_zip_url.format(min, max)
     try:
         response = await self.fetch(url,
                                     method=HttpMethod.GET,
                                     headers={'cookie': self.cookie},
                                     validate_cert=False)
         buffer = response.buffer
         with ZipFile(buffer) as code_zip:
             for name in code_zip.namelist():
                 run_id = re.split(r'/|_', name)[2]
                 with code_zip.open(name) as code_fp:
                     code = code_fp.read()
                 status = {
                     'type': DataType.Code,
                     'account': self.account,
                     'run_id': run_id,
                     'code': code
                 }
                 await self.put_queue([status])
     except Exception as e:
         logger.error(e)
         logger.error(traceback.format_exc())
예제 #25
0
    async def get_solved(self):
        url = self.user_url_prefix.format(self.account.nickname)
        try:
            response = await self.load_page(url, {'Cookie': self.cookie})
            if not response:
                return False
            soup = self.get_lxml_bs4(response.body)
            # solved count
            count = soup.find_all(
                'td', text=['Problems Submitted', 'Problems Solved'])
            submitted_count = count[0].next_sibling.text
            solved_count = count[1].next_sibling.text

            # solved list
            # solved_list = self._get_solved_list(soup)
            return {
                'solved': solved_count,
                'submitted': submitted_count,
                # 'solved_list': solved_list
            }
        except Exception as ex:
            logger.error('{} {} get Solved/Submitted error: {}'.format(
                self.TAG, self.account, ex))
            raise ex
예제 #26
0
 async def get_code(self, run_id, **kwargs):
     url = self.source_code_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'Cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         code_area = soup.find('textarea', id='usercode')
         if not code_area:
             logger.error('{} {} Fail to load code {} page'.format(self.TAG, self.account, run_id))
             logger.error('{}: {}'.format(self.TAG, code_area))
             return False
         code = code_area.text
         logger.debug('{} {} Success to load code {} page'.format(self.TAG, self.account, run_id))
         return code
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch {}\'s {} code error'.format(self.TAG, self.account, run_id))
예제 #27
0
 async def get_code(self, run_id, **kwargs):
     url = self.source_code_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'Cookie': self.cookie})
         if not response:
             return False
         soup = self.get_lxml_bs4(response.body)
         code_area = soup.find('textarea', id='usercode')
         if not code_area:
             logger.error('{} {} Fail to load code {} page'.format(
                 self.TAG, self.account, run_id))
             logger.error('{}: {}'.format(self.TAG, code_area))
             return False
         code = code_area.text
         logger.debug('{} {} Success to load code {} page'.format(
             self.TAG, self.account, run_id))
         return code
     except Exception as ex:
         logger.error(ex)
         logger.error('{} fetch {}\'s {} code error'.format(
             self.TAG, self.account, run_id))
예제 #28
0
 async def get_code(self, run_id, **kwargs):
     url = self.code_prefix.format(run_id)
     try:
         response = await self.load_page(url, {'cookie': self.cookie})
         if not response:
             logger.error('{} {} Fail to load code {} page'.format(self.TAG, self.account, run_id))
             logger.error('{}: response => {}'.format(self.TAG, response))
             return False
         res = json.loads(response.body.decode('utf-8'))
         code = res['source']
         logger.debug('{} {} Success to load code {} page'.format(self.TAG, self.account, run_id))
         return unescape(code)
     except Exception as ex:
         logger.error('{} fetch {}\'s {} code error {}'.format(self.TAG, self.account, run_id, ex))
예제 #29
0
        async def wrapper(*args, **kwargs):
            left_times = times
            call_state, ret = False, None
            while left_times > 0 and call_state is False:
                try:
                    if left_times != times:
                        logger.warn('[重试第 {0} 次] ===> {1}({2})'.format(
                            times - left_times, function.__name__, args))

                    ret = await function(*args, **kwargs)
                    call_state = True if ret else False
                    if not call_state:
                        await gen.sleep(duration * 60)
                except Exception as e:
                    logger.error(e)
                    logger.error(traceback.format_exc())
                finally:
                    left_times -= 1
            if call_state is False:
                message = '[已经重试 {0} 次] def {1}({2}) call fail'.format(times, function.__name__, args)
                logger.error(message)
            return ret
예제 #30
0
        async def wrapper(*args, **kwargs):
            left_times = times
            call_state, ret = False, None
            while left_times > 0 and call_state is False:
                try:
                    if left_times != times:
                        logger.warn('[重试第 {0} 次] ===> {1}({2})'.format(
                            times - left_times, function.__name__, args))

                    ret = await function(*args, **kwargs)
                    call_state = True if ret else False
                    if not call_state:
                        await gen.sleep(duration * 60)
                except Exception as e:
                    logger.error(e)
                    logger.error(traceback.format_exc())
                finally:
                    left_times -= 1
            if call_state is False:
                message = '[已经重试 {0} 次] def {1}({2}) call fail'.format(
                    times, function.__name__, args)
                logger.error(message)
            return ret
예제 #31
0
 def init_http_client():
     try:
         httpclient.AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
         logger.info('[ACM-Spider] 配置 CurlAsyncHTTPClient 成功')
     except Exception as ex:
         logger.error('[ACM-Spider] 配置 CurlAsyncHTTPClient 失败: {}'.format(ex))