Example #1
0
    def get_basic_info(self):
        """
        Collects and stores basic information about the target
        """
        rest_url = url_path_join(self.url, self.api_path)
        if self.basic_info is not None:
            return self.basic_info

        try:
            req = self.s.get(rest_url)
        except Exception:
            raise NoWordpressApi
        if req.status_code >= 400:
            raise NoWordpressApi
        self.basic_info = req.json()

        if 'name' in self.basic_info.keys():
            self.name = self.basic_info['name']

        if 'description' in self.basic_info.keys():
            self.description = self.basic_info['description']

        if 'namespaces' in self.basic_info.keys() and 'wp/v2' in \
                self.basic_info['namespaces']:
            self.has_v2 = True

        return self.basic_info
Example #2
0
    def crawl_pages(self, url):
        """
        Crawls all pages while there is at least one result for the given
        endpoint
        """
        page = 1
        total_entries = 0
        total_pages = 0
        more_entries = True
        entries = []
        base_url = url
        if self.search_terms is not None:
            if '?' in base_url:
                base_url += '&' + urlencode({'search': self.search_terms})
            else:
                base_url += '?' + urlencode({'search': self.search_terms})
        while more_entries:
            rest_url = url_path_join(self.url, self.api_path,
                                     (base_url % page))
            try:
                req = self.s.get(rest_url)
                if page == 1 and 'X-WP-Total' in req.headers:
                    total_entries = int(req.headers['X-WP-Total'])
                    total_pages = int(req.headers['X-WP-TotalPages'])
                    print("Number of entries: %d" % total_entries)
            except HTTPError400:
                break
            except Exception:
                raise WordPressApiNotV2
            try:
                json_content = get_content_as_json(req)
                if type(json_content) is list and len(json_content) > 0:
                    entries += json_content
                    if total_entries > 0:
                        print_progress_bar(page, total_pages, length=70)
                else:
                    more_entries = False
            except JSONDecodeError:
                more_entries = False

            page += 1

        return entries
Example #3
0
    def crawl_single_page(self, url):
        """
            Crawls a single URL
        """
        content = None
        rest_url = url_path_join(self.url, self.api_path, url)
        try:
            req = self.s.get(rest_url)
        except HTTPError400:
            return None
        except HTTPError404:
            return None
        except Exception:
            raise WordPressApiNotV2
        try:
            content = get_content_as_json(req)
        except JSONDecodeError:
            pass

        return content
Example #4
0
 def crawl_namespaces(self, ns):
     """
     Crawls all accessible get routes defined for the specified namespace.
     """
     namespaces = self.get_namespaces()
     routes = self.get_routes()
     ns_data = {}
     if ns != "all" and ns not in namespaces:
         raise NSNotFoundException
     for url, route in routes.items():
         if 'namespace' not in route.keys() \
            or 'endpoints' not in route.keys():
             continue
         url_as_ns = url.lstrip('/')
         if '(?P<' in url or url_as_ns in namespaces:
             continue
         if ns != 'all' and route['namespace'] != ns or \
            route['namespace'] in ['wp/v2', '']:
             continue
         for endpoint in route['endpoints']:
             if 'GET' not in endpoint['methods']:
                 continue
             keep = True
             if len(endpoint['args']) > 0 and type(
                     endpoint['args']) is dict:
                 for name, arg in endpoint['args'].items():
                     if arg['required']:
                         keep = False
             if keep:
                 rest_url = url_path_join(self.url, self.api_path, url)
                 try:
                     ns_request = self.s.get(rest_url)
                     ns_data[url] = ns_request.json()
                 except Exception:
                     continue
     return ns_data
Example #5
0
    def crawl_pages(self,
                    url,
                    start=None,
                    num=None,
                    search_terms=None,
                    display_progress=True):
        """
        Crawls all pages while there is at least one result for the given
        endpoint or tries to get pages from start to end
        """
        if search_terms is None:
            search_terms = self.search_terms
        page = 1
        total_entries = 0
        total_pages = 0
        more_entries = True
        entries = []
        base_url = url
        entries_left = 1
        per_page = 10
        if search_terms is not None:
            if '?' in base_url:
                base_url += '&' + urlencode({'search': search_terms})
            else:
                base_url += '?' + urlencode({'search': search_terms})
        if start is not None:
            page = math.floor(start / per_page) + 1
        if num is not None:
            entries_left = num
        while more_entries and entries_left > 0:
            rest_url = url_path_join(self.url, self.api_path,
                                     (base_url % page))
            if start is not None:
                rest_url += "&per_page=%d" % per_page
            try:
                req = self.s.get(rest_url)
                if (page == 1 or start is not None and page == math.floor(
                        start / per_page) + 1) and 'X-WP-Total' in req.headers:
                    total_entries = int(req.headers['X-WP-Total'])
                    total_pages = int(req.headers['X-WP-TotalPages'])
                    print("Total number of entries: %d" % total_entries)
                    if start is not None and total_entries < start:
                        start = total_entries - 1
            except HTTPError400:
                break
            except Exception:
                raise WordPressApiNotV2
            try:
                json_content = get_content_as_json(req)
                if type(json_content) is list and len(json_content) > 0:
                    if (start is None or start is not None and page >
                            math.floor(start / per_page) + 1) and num is None:
                        entries += json_content
                        if start is not None:
                            entries_left -= len(json_content)
                    elif start is not None and page == math.floor(
                            start / per_page) + 1:
                        if num is None or num is not None and len(
                                json_content[start % per_page:]) < num:
                            entries += json_content[start % per_page:]
                            if num is not None:
                                entries_left -= len(json_content[start %
                                                                 per_page:])
                        else:
                            entries += json_content[start %
                                                    per_page:(start %
                                                              per_page) + num]
                            entries_left = 0
                    else:
                        if num is not None and entries_left > len(
                                json_content):
                            entries += json_content
                            entries_left -= len(json_content)
                        else:
                            entries += json_content[:entries_left]
                            entries_left = 0

                    if display_progress:
                        if num is None and start is None and total_entries >= 0:
                            print_progress_bar(page, total_pages, length=70)
                        elif num is None and start is not None and total_entries >= 0:
                            print_progress_bar(total_entries - start -
                                               entries_left,
                                               total_entries - start,
                                               length=70)
                        elif num is not None and total_entries > 0:
                            print_progress_bar(num - entries_left,
                                               num,
                                               length=70)
                else:
                    more_entries = False
            except JSONDecodeError:
                more_entries = False

            page += 1

        return (entries, total_entries)