コード例 #1
0
 def get_cities_from_wikipedia(url='https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population'):
     ab = AnonBrowser()
     ab.anonymize()
     page = ab.open(url)
     html = page.read()
     parser = CityParser()
     parser.feed(html)
     return sorted(parser.cities)
コード例 #2
0
ファイル: marmosetbrowser.py プロジェクト: j3doucet/marmoset
    def __init__(self, username=None, password=None, **kwargs):
        """
        Initializes the marmoset browser.  Handles navigation and parsing
        of pages.

        @param self: The marmoset instance
        @param kwargs: Dictionary of arguments to determine method
        @return: marmoset
        """
        self.browser = AnonBrowser(user_agents=DESKTOP_USER_AGENTS, cookiefile=self.cookiefile)
        self.username = username
        self.password = password
コード例 #3
0
def mirror_images(url, dir):
    if not os.path.isdir(dir):
        print "No such directory: %s" % dir
        return

    ab = AnonBrowser()
    page = ab.open(url)
    html = page.read()
    image_links = ImageParser.get_images_from_html(html)
    for image_link in image_links:
        filename = image_link.lstrip('http://')
        filename = filename.replace('/', '_')
        filename = os.path.join(dir, filename)
        data = ab.open(image_link).read()
        print '[*] Saving %s' % filename
        with open(filename, 'wb') as f:
            f.write(data)
コード例 #4
0
class Marmoset():
    """
    The marmoset class manages interaction with the Marmoset Submission
    Server thrugh the AnonBrowser.

    @ivar base_url: The base url for the marmoset submission server.
    """
    base_url = "http://marmoset.student.cs.uwaterloo.ca"
    cookiefile = os.path.join(tempfile.gettempdir(),
                              'marmoset.session.cookies')

    def __init__(self, username=None, password=None, **kwargs):
        """
        Initializes the marmoset browser.  Handles navigation and parsing
        of pages.

        @param self: The marmoset instance
        @param kwargs: Dictionary of arguments to determine method
        @return: marmoset
        """
        self.browser = AnonBrowser(user_agents=DESKTOP_USER_AGENTS,
                                   cookiefile=self.cookiefile)
        self.username = username
        self.password = password

    def authenticate(self, username=None, password=None):
        """
        Attempts to authenticate the user using the given username/password
        combinations.

        @param self: The marmoset instance
        @return: boolean
        """
        response = self.browser.open(self.base_url)
        if self.browser.geturl().find("cas") > -1:
            self.browser.select_form(nr=0)

            if not username and not password:
                username, password = self.login()

            self.browser.form['username'] = username
            self.browser.form['password'] = password
            response = self.browser.submit()

            if self.browser.geturl().find("cas") > -1:
                return False

            self.browser.save_cookies()

        user = '******'
        content = response.read()
        soup = BeautifulSoup(content)
        found = False
        nr = 0

        for form in soup.find_all('form'):
            inp = form.find(lambda tag: dict(tag.attrs)['name'] == user)
            if inp is None:
                continue
            elif dict(inp.attrs)['value'] == username:
                found = True
                break
            nr += 1

        if not found:
            return False

        self.browser.select_form(nr=nr)
        self.browser.submit()

        return True

    def login(self, username=None, password=None):
        """
        Logs the user in

        @param self: The marmoset instance
        @return: tuple
        """
        self.username = username if username else self.username
        self.password = password if password else self.password
        self.browser.delete_cookies()
        self.browser.clear_cookies(self.cookiefile)

        return self.username, self.password

    def select_course(self, course):
        """
        Navigates to the course selection page and determine which course
        page to visit.

        @param self: The marmoset instance
        @param course: The course
        @return: None
        """
        for link in self.browser.links():
            if re.match(re.compile('.*' + course + '(\s+|\(|:|$)', re.I),
                        link.text):
                self.browser.follow_link(link)

    def select_and_follow(self, patt, target):
        """
        Selects a link based on a pattern and follows it.

        @param self: The marmoset instance
        @param patt: The pattern to look for
        @param target: The target url to fo follow after matching the pattern.
        @return: None
        """
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())
        link = None

        for group in soup.find_all('tr'):
            links = group.find_all('a')
            if len(links) > 0 and links[0].text.strip().lower() == patt.lower(
            ):
                link = group.find(lambda tag: tag.text.find(target) > -1).find(
                    'a')
                href = link.attrs['href']
                link = next(l for l in self.browser.links()
                            if href == dict(l.attrs)['href'])
                self.browser.follow_link(link)

        if not link:
            raise NoMatchingQueryException(patt)

    def submit(self, course, assignment, files, zipname=None):
        """
        Submits a file to the specified assignment and course.

        @param self: The marmoset instance
        @param course: The course
        @param assignment: The assignment
        @param filename: The name of the file we're submitting.
        @return: None
        """
        if not self.authenticate():
            raise BrowserException(
                "Invalid username/password combination for %s" % self.username)

        if not zipname:
            zipname = getattr(self, 'zipname', "%s.zip" % assignment)

        self.select_course(course)
        self.select_and_follow(assignment, 'submit')
        self.browser.select_form(nr=0)

        # If multiple files, zip and submit
        if type(files) == list:
            filename = write_zip(zipname, files)
        else:
            filename = files

        self.browser.form.add_file(open(filename), 'text/plain', filename)
        self.browser.submit()

        if self.browser.geturl().find("/view/project.jsp?projectPK=") > -1:
            return True

        return False

    def get_num_release_tokens(self, course, assignment, submission=None):
        """
        Gets the number of release tokens the user has available to
        theme.

        @param self: The Marmoset instance
        @param course: The name of the course
        @param assignment: The assignment to look at
        @return: int
        """
        if not self.authenticate():
            raise BrowserException(
                "Invalid username/password combination for %s" % self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        self.find_submission(submission)

        response = self.browser.reload()
        soup = BeautifulSoup(response.read())
        tokens = soup.find_all(lambda tag: tag.name == 'p' and re.search(
            '^You currently have', tag.text))

        if len(tokens) == 0:
            raise BrowserException("This submission has no release tests.")
        elif type(tokens[0]) != str:
            tokens = list(token.text for token in tokens)
        tokens = re.search("[0-9]+", " ".join(tokens)).group(0)
        return int(tokens)

    def release(self, course, assignment, submission=None):
        """
        Release tests the specified submission for the assignment.

        @param self: The marmoset isntance
        @param assignment: The name of the assignment
        @param submission: The submission number
        @return: None
        """
        tokens = self.get_num_release_tokens(course, assignment, submission)
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())
        release_link = soup.find_all(lambda tag: tag.name == 'a' and tag.text.
                                     find('Click here to release') > -1)

        if len(release_link) > 0 and tokens > 0:
            release_link = release_link[0]
            link = next(l for l in self.browser.links()
                        if release_link.attrs['href'] == dict(l.attrs)['href'])
            submission = next(tag for tag in soup.find_all('h2')
                              if tag.text.find('Submission') > -1)
            submission = submission.text.split()[1][:-1]
            self.browser.follow_link(link)
            self.browser.select_form(nr=0)
            self.browser.submit()
            return True
        elif tokens == 0:
            raise BrowserException(
                "Can't release test %s, %s tokens available." %
                (assignment, tokens))
        else:
            raise BrowserException("Can't release test %s at the moment." %
                                   (assignment))

        return False

    def long(self, course, assignment, submission=None):
        """
        Gets the detailed test results for a specified assingment based on the submission
        number.

        @param self: The marmoset instance
        @param assignment: The name of the assignment
        @param submission: The submission number
        @return: None
        """
        if not self.authenticate():
            raise BrowserException(
                "Invalid username/password combination for %s" % self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        self.find_submission(submission)
        data = self.get_table(course, assignment)
        return data

    def fetch(self, course, assignment, submissions=5):
        """
        Fetches the n most recent short results for a specified assignment.

        @param self: The marmoset instance
        @param course: The name of the course
        @param assignment: The name of the assignment
        @return: None
        """
        if not self.authenticate():
            raise BrowserException(
                "Invalid username/password combination for %s" % self.username)

        submissions = submissions if submissions else 5
        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        data = self.get_table(course, assignment, submissions + 1)
        return data

    def download(self, course, assignment, submission=None, zipname=None):
        """
        Downloads the specified submission.  Defaults to the most recent
        submission.

        @param self: The marmoset instance
        @param course: The name of the course
        @param assignment: The name of the assignment
        @param submission: The submission number
        @param zipname: The name of the zipfile to create
        @return: File
        """
        if not self.authenticate():
            raise BrowserException(
                "Invalid username/password combination for %s" % self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')

        f = None
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())

        details = []
        rows = soup.find_all(lambda tag: tag.name == 'tr')
        for row in rows:
            if row.has_attr('class'):
                cell = row.find_all('td')[-2]
                cell = re.sub(r'[\t\n\r]+', ' ', cell.text)
                details.append(cell)
        links = soup.find_all(
            lambda tag: tag.name == 'a' and tag.text.find('download') > -1)
        links = list(l.attrs['href'] for l in links)
        submission = int(submission) if submission else len(links)

        # check possible error conditions
        if submission == 0:
            raise BrowserException("No submission 0")
        elif len(links) == 0:
            raise BrowserException("No submissions available.")
        elif submission > len(links) and submission > len(details):
            raise BrowserException("Invalid submission number %s" % submission)
        elif submission > len(links):
            index = (submission - 1) * -1
            raise BrowserException("Submission # %s %s " %
                                   (submission, details[index]))

        index = (submission - 1) * -1
        link = self.base_url + links[index]

        zipname = zipname if zipname else assignment + '.zip'
        f = self.browser.retrieve(link, zipname)[0]

        return f

    def find_submission(self, submission=None):
        """
        Finds the specified submission on the current page and follows the link.
        Defaults to last submission.

        @param self: The marmoset instance
        @param submission: The submission number
        @return: None
        """
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())

        details = []
        rows = soup.find_all(lambda tag: tag.name == 'tr')
        for row in rows:
            if row.has_attr('class'):
                cell = row.find_all('td')[-2]
                cell = re.sub(r'[\t\n\r]+', ' ', cell.text)
                details.append(cell)
        links = soup.find_all(
            lambda tag: tag.name == 'a' and tag.text.find('view') > -1)
        links = list(link.attrs['href'] for link in links)

        index = -1 * (int(submission) if submission else len(details))
        submission = int(submission) if submission else len(details)

        # check possible error conditions
        if submission <= 0:
            raise BrowserException("No submission #%s" % submission)
        elif len(links) == 0:
            raise BrowserException("No submissions available.")
        elif submission > len(links) and submission > len(details):
            raise BrowserException("Invalid submission number %s" % submission)
        elif submission > len(links):
            raise BrowserException("Submission # %s %s " %
                                   (submission, details[index]))

        links = list(l for l in self.browser.links()
                     if dict(l.attrs)['href'] in links)
        self.browser.follow_link(links[index])

    def get_assignment_deadline(self, course, assignment):
        """
        Gets the deadline of an assignment.  Returned as a datetime object.

        @param self: The marmoset instance
        @param course: The course we're looking at
        @param assignment: The assignment we're looking at
        @return: string
        """
        if not self.authenticate():
            raise BrowserException(
                "Invalid username/password combination for %s" % self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        response = self.browser.reload()
        soup = BeautifulSoup(response)

        text = next(tag for tag in soup.find_all('p')
                    if tag.text.find("Deadline") > -1).text.strip().split()
        deadline = [] if len(text) < 3 else text[:3]

        return deadline

    def get_table(self, course, assignment, number_of_rows=sys.maxint):
        """
        Gets the relevant data pertaining to a course

        @param self: The marmoset instance
        @param course: The course we're looking at
        @param assignment: The assignment we're looking at
        @param number_of_rows: The number of rows to return
        @return: dict
        """
        response = self.browser.reload()
        soup = BeautifulSoup(response)

        text = next(tag for tag in soup.find_all('p')
                    if tag.text.find("Deadline") > -1).text.strip().split()
        deadline = [] if len(text) < 3 else text[:3]

        rows = soup.find_all('tr')
        headers, data = [], []
        for i in range(0, min(len(rows), number_of_rows)):
            row = rows[i]

            for col in row.find_all('th'):
                text = col.text.split()
                if len(text) > 0:
                    headers.append(" ".join(text))

            td = []
            for col in row.find_all('td'):
                text = col.text.split()
                if len(col.find_all('a')) > 0:
                    td.append(self.base_url + col.find('a').attrs['href'])
                elif 'colspan' in col.attrs:
                    for i in range(0, int(col.attrs['colspan'])):
                        td.append(" ".join(text))
                else:
                    td.append(" ".join(text))

            if len(td) > 0:
                data.append(OrderedDict(zip(headers, td)))

        return data
コード例 #5
0
ファイル: marmosetbrowser.py プロジェクト: hkpeprah/marmoset
class Marmoset():
    """
    The marmoset class manages interaction with the Marmoset Submission
    Server thrugh the AnonBrowser.

    @ivar base_url: The base url for the marmoset submission server.
    """
    base_url = "http://marmoset.student.cs.uwaterloo.ca"
    cookiefile = os.path.join(tempfile.gettempdir(), 'marmoset.session.cookies')

    def __init__(self, username=None, password=None, **kwargs):
        """
        Initializes the marmoset browser.  Handles navigation and parsing
        of pages.

        @param self: The marmoset instance
        @param kwargs: Dictionary of arguments to determine method
        @return: marmoset
        """
        self.browser = AnonBrowser(user_agents=DESKTOP_USER_AGENTS, cookiefile=self.cookiefile)
        self.username = username
        self.password = password

    def authenticate(self, username=None, password=None):
        """
        Attempts to authenticate the user using the given username/password
        combinations.

        @param self: The marmoset instance
        @return: boolean
        """
        response = self.browser.open(self.base_url)

        if self.browser.geturl().find("cas") > -1:
            if username is None and password is None:
                username, password = self.login()

            self.browser.select_form(nr=0)
            self.browser.form['username'] = username
            self.browser.form['password'] = password

            response = self.browser.submit()

            if self.browser.geturl().find("cas") > -1:
                return False

            self.browser.save_cookies()

        user = '******'
        content = response.read()
        soup = BeautifulSoup(content)
        found = False
        nr = 0

        if username is None:
            username = self.username

        for form in soup.find_all('form'):
            inp = form.find(lambda tag: dict(tag.attrs)['name'] == user)
            if inp is None:
                continue
            elif dict(inp.attrs)['value'] == username:
                found = True
                break
            nr += 1

        if not found:
            return False

        self.browser.select_form(nr=nr)
        self.browser.submit()

        return True

    def login(self, username=None, password=None):
        """
        Logs the user in

        @param self: The marmoset instance
        @return: tuple
        """
        self.username = username if username else self.username
        self.password = password if password else self.password
        self.browser.delete_cookies()
        self.browser.clear_cookies(self.cookiefile)

        return self.username, self.password

    def select_course(self, course):
        """
        Navigates to the course selection page and determine which course
        page to visit.

        @param self: The marmoset instance
        @param course: The course
        @return: None
        """
        for link in self.browser.links():
            if re.match(re.compile('.*' + course + '(\s+|\(|:|$)', re.I), link.text):
                self.browser.follow_link(link)

    def select_and_follow(self, patt, target): 
        """
        Selects a link based on a pattern and follows it.

        @param self: The marmoset instance
        @param patt: The pattern to look for
        @param target: The target url to fo follow after matching the pattern.
        @return: None
        """
        response = self.browser.reload()
        contents = response.read()
        soup = BeautifulSoup(contents)
        link = None

        for group in soup.find_all('tr'):
            links = group.find_all('a')
            if len(links) > 0 and links[0].text.strip().lower() == patt.lower():
                link = group.find(lambda tag: tag.text.find(target) > -1).find('a')
                href = link.attrs['href']
                link = next(l for l in self.browser.links() if href == dict(l.attrs)['href'])
                self.browser.follow_link(link)

        if not link:
            raise NoMatchingQueryException(patt)

    def submit(self, course, assignment, files, zipname=None):
        """
        Submits a file to the specified assignment and course.

        @param self: The marmoset instance
        @param course: The course
        @param assignment: The assignment
        @param filename: The name of the file we're submitting.
        @return: None
        """
        if not self.authenticate():
            raise BrowserException("Invalid username/password combination for %s" % self.username)

        if not zipname:
            zipname = getattr(self, 'zipname', "%s.zip" % assignment)

        self.select_course(course)
        self.select_and_follow(assignment, 'submit')
        self.browser.select_form(nr = 0)

        # If multiple files, zip and submit
        if type(files) == list:
            filename = write_zip(zipname, files)
        else:
            filename = files

        self.browser.form.add_file(open(filename, 'rb'), 'application/octet-stream', filename)
        self.browser.submit()

        if self.browser.geturl().find("/view/project.jsp?projectPK=") > -1:
            return True

        return False

    def get_num_release_tokens(self, course, assignment, submission=None):
        """
        Gets the number of release tokens the user has available to
        theme.

        @param self: The Marmoset instance
        @param course: The name of the course
        @param assignment: The assignment to look at
        @return: int
        """
        if not self.authenticate():
            raise BrowserException("Invalid username/password combination for %s"%self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        self.find_submission(submission)

        response = self.browser.reload()
        soup = BeautifulSoup(response.read())
        tokens = soup.find_all(lambda tag: tag.name == 'p' and re.search('^You currently have', tag.text))

        if len(tokens) == 0:
            raise BrowserException("This submission has no release tests.")
        elif type(tokens[0]) != str:
            tokens = list(token.text for token in tokens)
        tokens = re.search("[0-9]+", " ".join(tokens)).group(0)
        return int(tokens)

    def release(self, course, assignment, submission=None):
        """
        Release tests the specified submission for the assignment.

        @param self: The marmoset isntance
        @param assignment: The name of the assignment
        @param submission: The submission number
        @return: None
        """
        tokens = self.get_num_release_tokens(course, assignment, submission)
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())
        release_link = soup.find_all(lambda tag: tag.name == 'a' and tag.text.find('Click here to release') > -1)

        if len(release_link) > 0 and tokens > 0:
            release_link = release_link[0]
            link = next(l for l in self.browser.links() if release_link.attrs['href'] == dict(l.attrs)['href'])
            submission = next(tag for tag in soup.find_all('h2') if tag.text.find('Submission') > -1)
            submission = submission.text.split()[1][:-1]
            self.browser.follow_link(link)
            self.browser.select_form(nr = 0)
            self.browser.submit()
            return True
        elif tokens == 0:
            raise BrowserException("Can't release test %s, %s tokens available."%(assignment, tokens))
        else:
            raise BrowserException("Can't release test %s at the moment."%(assignment))

        return False

    def long(self, course, assignment, submission=None):
        """
        Gets the detailed test results for a specified assingment based on the submission
        number.

        @param self: The marmoset instance
        @param assignment: The name of the assignment
        @param submission: The submission number
        @return: None
        """
        if not self.authenticate():
            raise BrowserException("Invalid username/password combination for %s"%self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        self.find_submission(submission)
        data = self.get_table(course, assignment)
        return data

    def fetch(self, course, assignment, submissions=5):
        """
        Fetches the n most recent short results for a specified assignment.

        @param self: The marmoset instance
        @param course: The name of the course
        @param assignment: The name of the assignment
        @return: None
        """
        if not self.authenticate():
            raise BrowserException("Invalid username/password combination for %s"%self.username)

        submissions = submissions if submissions else 5
        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        data = self.get_table(course, assignment, submissions + 1)
        return data

    def download(self, course, assignment, submission=None, zipname=None):
        """
        Downloads the specified submission.  Defaults to the most recent
        submission.

        @param self: The marmoset instance
        @param course: The name of the course
        @param assignment: The name of the assignment
        @param submission: The submission number
        @param zipname: The name of the zipfile to create
        @return: File
        """
        if not self.authenticate():
            raise BrowserException("Invalid username/password combination for %s"%self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')

        f = None
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())

        details = []
        rows = soup.find_all(lambda tag: tag.name == 'tr')
        for row in rows:
            if row.has_attr('class'):
                cell = row.find_all('td')[-2]
                cell = re.sub(r'[\t\n\r]+', ' ', cell.text)
                details.append(cell)
        links = soup.find_all(lambda tag: tag.name == 'a' and tag.text.find('download') > -1)
        links = list(l.attrs['href'] for l in links)
        submission = int(submission) if submission else len(links)

        # check possible error conditions
        if submission == 0:
            raise BrowserException("No submission 0")
        elif len(links) == 0:
            raise BrowserException("No submissions available.")
        elif submission > len(links) and submission > len(details):
            raise BrowserException("Invalid submission number %s" % submission)
        elif submission > len(links):
            index = (submission - 1) * -1
            raise BrowserException("Submission # %s %s "%(submission, details[index]))

        index = (submission - 1) * -1
        link = self.base_url + links[index]

        zipname = zipname if zipname else assignment + '.zip'
        f = self.browser.retrieve(link, zipname)[0]

        return f

    def find_submission(self, submission=None):
        """
        Finds the specified submission on the current page and follows the link.
        Defaults to last submission.

        @param self: The marmoset instance
        @param submission: The submission number
        @return: None
        """
        response = self.browser.reload()
        soup = BeautifulSoup(response.read())

        details = []
        rows = soup.find_all(lambda tag: tag.name == 'tr')
        for row in rows:
            if row.has_attr('class'):
                cell = row.find_all('td')[-2]
                cell = re.sub(r'[\t\n\r]+', ' ', cell.text)
                details.append(cell)
        links = soup.find_all(lambda tag: tag.name == 'a' and tag.text.find('view') > -1)
        links = list(link.attrs['href'] for link in links)

        index = -1 * (int(submission) if submission else len(details))
        submission = int(submission) if submission else len(details)

        # check possible error conditions
        if submission <= 0:
            raise BrowserException("No submission #%s"%submission)
        elif len(links) == 0:
            raise BrowserException("No submissions available.")
        elif submission > len(links) and submission > len(details):
            raise BrowserException("Invalid submission number %s" % submission)
        elif submission > len(links):
            raise BrowserException("Submission # %s %s "%(submission, details[index]))

        links = list(l for l in self.browser.links() if dict(l.attrs)['href'] in links)
        self.browser.follow_link(links[index])

    def get_assignment_deadline(self, course, assignment):
        """
        Gets the deadline of an assignment.  Returned as a datetime object.

        @param self: The marmoset instance
        @param course: The course we're looking at
        @param assignment: The assignment we're looking at
        @return: string
        """
        if not self.authenticate():
            raise BrowserException("Invalid username/password combination for %s"%self.username)

        self.select_course(course)
        self.select_and_follow(assignment, 'view')
        response = self.browser.reload();
        soup = BeautifulSoup(response)

        text = next(tag for tag in soup.find_all('p') if tag.text.find("Deadline") > -1).text.strip().split()
        deadline = [] if len(text) < 3 else text[:3]

        return deadline

    def get_table(self, course, assignment, number_of_rows=sys.maxint):
        """
        Gets the relevant data pertaining to a course

        @param self: The marmoset instance
        @param course: The course we're looking at
        @param assignment: The assignment we're looking at
        @param number_of_rows: The number of rows to return
        @return: dict
        """
        response = self.browser.reload()
        soup = BeautifulSoup(response)

        text = next(tag for tag in soup.find_all('p') if tag.text.find("Deadline") > -1).text.strip().split()
        deadline = [] if len(text) < 3 else text[:3]

        rows = soup.find_all('tr')
        headers, data = [], []
        for i in range(0, min(len(rows), number_of_rows)):
            row = rows[i]
            
            for col in row.find_all('th'):
                text = col.text.split()
                if len(text) > 0:
                    headers.append(" ".join(text))

            td = []
            for col in row.find_all('td'):
                text = col.text.split()
                if len(col.find_all('a')) > 0:
                    td.append(self.base_url + col.find('a').attrs['href'])
                elif 'colspan' in col.attrs:
                    for i in range(0, int(col.attrs['colspan'])):
                        td.append(" ".join(text))
                else:
                    td.append(" ".join(text))
            
            if len(td) > 0:
                data.append(OrderedDict(zip(headers, td)))

        return data
コード例 #6
0
def parse_links(url):
    ab = AnonBrowser()
    ab.anonymize()
    page = ab.open(url)
    html = page.read()
    return LinkParser.get_links_from_html(html)
コード例 #7
0
from anonbrowser import AnonBrowser
from sys import argv

if __name__ == "__main__":
    test_browser = AnonBrowser(use_soup=True)
    if len(argv) > 1 and argv[1] == "rotate": 
        print("Starting IP: {}".format(test_browser.check_ip()))
        limit = int(argv[2]) if len(argv) > 2 else 3
        for _ in range(limit):
            test_browser._rotate_ip()
    else:
        page = test_browser.get("http://cadelwatson.com/")
        print(page)