Esempio n. 1
0
    def get_problem(self, *args, **kwargs):
        url = 'http://acm.wust.edu.cn/problem.php?id=' + str(kwargs['pid']) + '&soj=0'
        problem = Problem()
        try:
            website_data = Spider.get_data(url, self.code_type)

            problem.remote_id = kwargs['pid']
            problem.remote_url = url
            problem.remote_oj = 'WUST'
            problem.title = re.search(r': ([\s\S]*?)</h2>', website_data).group(1)
            problem.time_limit = re.search(r'(\d* Sec)', website_data).group(1)
            problem.memory_limit = re.search(r'(\d* MB)', website_data).group(1)
            problem.special_judge = re.search(r'class=red>Special Judge</span>', website_data) is not None
            soup = BeautifulSoup(website_data, 'lxml')

            # case:problem.picture=self.parse_html("img", soup, website_data)
            problem.description = self.parse_html("Description", soup, website_data)
            problem.input = self.parse_html("Input", soup, website_data)
            problem.output = self.parse_html("Output", soup, website_data)
            input_data = self.parse_html("Sample Input", soup, website_data)
            output_data = self.parse_html("Sample Output", soup, website_data)
            problem.hint = self.parse_html("HINT", soup, website_data)
            problem.author = self.parse_html("Author", soup, website_data)
            problem.source = self.parse_html("Source", soup, website_data)
            problem.sample = [
                {'input': input_data,
                 'output': output_data}]
        except:
            return Problem.PROBLEM_NOT_FOUND
        return problem
Esempio n. 2
0
    def get_problem(self, *args, **kwargs):
        url = 'http://acm.hdu.edu.cn/showproblem.php?pid=' + str(kwargs['pid'])
        problem = Problem()
        try:
            website_data = Spider.get_data(url, self.code_type)

            problem.remote_id = kwargs['pid']
            problem.remote_url = url
            problem.remote_oj = 'HDU'
            problem.title = re.search(r'color:#1A5CC8\'>([\s\S]*?)</h1>',
                                      website_data).group(1)
            problem.time_limit = re.search(r'(\d* MS)', website_data).group(1)
            problem.memory_limit = re.search(r'/(\d* K)',
                                             website_data).group(1)
            problem.special_judge = re.search(
                r'color=red>Special Judge</font>', website_data) is not None
            problem.description = re.search(
                r'>Problem Description</div>[\s\S]*?panel_content>([\s\S]*?)</div>',
                website_data).group(1)
            problem.input = re.search(
                r'>Input</div>[\s\S]*?panel_content>([\s\S]*?)</div>',
                website_data).group(1)
            problem.output = re.search(
                r'>Output</div>[\s\S]*?panel_content>([\s\S]*?)</div>',
                website_data).group(1)
            match_group = re.search(
                r'>Sample Input</div>[\s\S]*?panel_content>([\s\S]*?)</div',
                website_data)
            input_data = ''

            if match_group:
                input_data = re.search(r'(<pre><div[\s\S]*?>)?([\s\S]*)',
                                       match_group.group(1)).group(2)

            output_data = ''
            match_group = re.search(
                r'>Sample Output</div>[\s\S]*?panel_content>([\s\S]*?)</div',
                website_data)
            if match_group:
                output_data = re.search(r'(<pre><div[\s\S]*?>)?([\s\S]*)',
                                        match_group.group(1)).group(2)
                if re.search('<div', output_data):
                    output_data = re.search(r'([\s\S]*?)<div',
                                            output_data).group(1)
            problem.sample = [{'input': input_data, 'output': output_data}]

            match_group = re.search(
                r'>Author</div>[\s\S]*?panel_content>([\s\S]*?)</div>',
                website_data)
            if match_group:
                problem.author = match_group.group(1)
            match_group = re.search(
                r'<i>Hint</i>[\s\S]*?/div>[\s]*([\s\S]+?)</div>', website_data)
            if match_group:
                problem.hint = match_group.group(1)
        except:
            return Problem.PROBLEM_NOT_FOUND
        return problem
    def get_problem(self, *args, **kwargs):
        pid = kwargs.get('pid')
        url = 'http://codeforces.com/problemset/problem/' + pid[:
                                                                -1] + '/' + pid[
                                                                    -1:]
        problem = Problem()
        try:
            website_data = self.req.get(url)
            problem.remote_id = kwargs.get('pid')
            problem.remote_url = url
            problem.remote_oj = 'CODEFORCES'
            problem.title = re.search(r'class="title">([\s\S]*?)</div>',
                                      website_data.text).group(1)
            temp_result = re.search(
                r'time-limit"><div class="property-title">([\s\S]*?)</div>([\s\S]*?)</div>',
                website_data.text)
            problem.time_limit = temp_result.group(
                1) + ': ' + temp_result.group(2)
            temp_result = re.search(
                r'class="memory-limit"><div class="property-title">([\s\S]*?)</div>([\s\S]*?)</div>',
                website_data.text)
            problem.memory_limit = temp_result.group(
                1) + ': ' + temp_result.group(2)
            problem.special_judge = None
            problem.description = re.search(
                r'class="property-title">'
                '([\s\S]*?)</div>([\s\S]*?)</div>'
                '([\s\S]*?)<div>([\s\S]*?)</div>', website_data.text).group(4)
            problem.input = re.search(
                r'class="section-title">Input</div>([\s\S]*?)</div>',
                website_data.text).group(1)
            problem.output = re.search(
                r'class="section-title">Output</div>([\s\S]*?)</div>',
                website_data.text).group(1)

            input_data = ''
            soup = BeautifulSoup(website_data.text, 'lxml')
            lines = soup.find_all('div', attrs={'class': 'input'})
            if lines:
                for line in lines:
                    input_data += str(line.find('pre')) + '\n'

            output_data = ''
            lines = soup.find_all('div', attrs={'class': 'output'})
            if lines:
                for line in lines:
                    output_data += str(line.find('pre')) + '\n'

            problem.sample = [{'input': input_data, 'output': output_data}]

            temp_result = re.search(
                r'div class="note">([\s\S]*?)</div>([\s\S]*?)</div>',
                website_data.text).group(2)
            problem.hint = temp_result

        except:
            return None
Esempio n. 4
0
    def get_problem(self, *args, **kwargs):
        url = 'http://poj.org/problem?id=' + str(kwargs['pid'])
        problem = Problem()
        try:
            res = self.req.get(url=url)
            website_data = res.text
            problem.remote_id = kwargs['pid']
            problem.remote_url = url
            problem.remote_oj = 'POJ'
            problem.title = re.search(r'ptt" lang="en-US">([\s\S]*?)</div>',
                                      website_data).group(1)
            problem.time_limit = re.search(r'(\d*MS)', website_data).group(1)
            problem.memory_limit = re.search(
                r'Memory Limit:</b> ([\s\S]*?)</td>', website_data).group(1)

            problem.special_judge = re.search(r'red;">Special Judge</td>',
                                              website_data) is not None
            problem.description = re.search(
                r'>Description</p>[\s\S]*?lang="en-US">([\s\S]*?)</div>',
                website_data).group(1)  #
            problem.input = re.search(
                r'>Input</p>[\s\S]*?lang="en-US">([\s\S]*?)</div>',
                website_data).group(1)
            problem.output = re.search(
                r'>Output</p>[\s\S]*?lang="en-US">([\s\S]*?)</div>',
                website_data).group(1)
            match_group = re.search(r'>Sample Input</p>([\s\S]*?)<p class',
                                    website_data)
            input_data = ''
            if match_group:
                input_data = re.search('"sio">([\s\S]*?)</pre>',
                                       match_group.group(1)).group(1)

            output_data = ''
            match_group = re.search(r'>Sample Output</p>([\s\S]*?)<p class',
                                    website_data)
            if match_group:
                output_data = re.search('"sio">([\s\S]*?)</pre>',
                                        match_group.group(1)).group(1)
            problem.sample = [{'input': input_data, 'output': output_data}]
            # match_group = re.search(r'>Author</div>[\s\S]*?panel_content>([\s\S]*?)</div>', website_data)
            # if match_group:
            #    problem.author = match_group.group(1)

            match_group = re.search(
                r'>Hint</p>[\s\S]*?"en-US">([\s\S]*?)</div>', website_data)
            if match_group:
                problem.hint = match_group.group(1)
            match_group = re.search(
                r'>Source</p>[\s\S]*?"en-US">([\s\S]*?)</div>', website_data)
            if match_group:
                problem.source = match_group.group(1)
            return problem
        except:
            pass
        return None
Esempio n. 5
0
    def get_problem(self, *args, **kwargs):
        url = 'http://acm.fzu.edu.cn/problem.php?pid=' + str(kwargs['pid'])
        problem = Problem()
        try:
            website_data = Spider.get_data(url, self.code_type)
            soup = BeautifulSoup(website_data, 'lxml')
            problem.remote_id = kwargs['pid']
            problem.remote_url = url
            problem.remote_oj = 'FZU'
            problem.title = re.search(r'<b> Problem [\d]* ([\s\S]*?)</b>',
                                      website_data).group(1)
            problem.time_limit = re.search(r'(\d* mSec)',
                                           website_data).group(1)
            problem.memory_limit = re.search(r'(\d* KB)',
                                             website_data).group(1)
            problem.special_judge = re.search(
                r'<font color="blue">Special Judge</font>',
                website_data) is not None
            pro_desc = soup.find_all(attrs={"class": 'pro_desc'})
            problem.description = pro_desc[0].get_text()
            if len(pro_desc) >= 2:
                problem.input = pro_desc[1].get_text()
            if len(pro_desc) >= 3:
                problem.output = pro_desc[2].get_text()
            data = soup.find_all(attrs={"class": 'data'})
            if len(data) > 1:
                input_data = data[0].get_text()
                output_data = data[1].get_text()
            problem.sample = [{'input': input_data, 'output': output_data}]

            h2s = soup.find_all('h2')
            for h2 in h2s[-2:]:
                if (h2.get_text().strip() == 'Hint'):
                    problem.hint = h2.next_sibling

                if (h2.get_text().strip() == 'Source'):
                    problem.source = h2.next_sibling

        except Exception as e:
            #print(e)
            return Problem.PROBLEM_NOT_FOUND
        return problem