def search(self, query, page=None, detailed=False):
        page = 0 if page is None else int(page)

        if page > len(self._pagtok) - 1:
            raise ValueError(
                'Parameter \'page\' ({page}) must be between 0 and 12.'.format(
                    page=page))

        pagtok = self._pagtok[page]
        data = generate_post_data(0, 0, pagtok)

        self.params.update({
            'q': quote_plus(query),
            'c': 'apps',
        })

        response = send_request('POST', self._search_url, data, self.params)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [
                parse_card_info(app)
                for app in soup.select('div[data-uitype="500"]')
            ]

        return apps
Beispiel #2
0
    def developer(self, developer, results=None, page=None, detailed=False):
        """Sends a POST request and retrieves a list of the developer's
        published applications on the Play Store.

        :param developer: developer name to retrieve apps from, e.g. 'Disney'
        :param results: the number of app results to retrieve
        :param page: the page number to retrieve
        :param detailed: if True, sends request per app for its full detail
        :return: a list of app dictionaries
        """
        if not isinstance(developer, basestring) or developer.isdigit():
            raise ValueError('Parameter \'developer\' must be the developer name, not the developer id.')

        results = s.DEV_RESULTS if results is None else results
        page = 0 if page is None else page
        page_num = (results // 20) * page
        if not 0 <= page_num <= 12:
            raise ValueError('Page out of range. (results // 20) * page must be between 0 - 12')
        pagtok = self._pagtok[page_num]

        url = build_url('developer', developer)
        data = generate_post_data(results, 0, pagtok)
        response = send_request('POST', url, data, self.params)

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
            apps = [parse_card_info(app)
                    for app in soup.select('div[data-uitype="500"]')]

        return apps
    def search(self, query, page=None, detailed=False):
        """Sends a POST request and retrieves a list of applications matching
        the query term(s).

        :param query: search query term(s) to retrieve matching apps
        :param page: the page number to retrieve. Max is 12.
        :param detailed: if True, sends request per app for its full detail
        :return: a list of apps matching search terms
        """
        page = 0 if page is None else int(page)
        if page > len(self._pagtok) - 1:
            raise ValueError(
                "Parameter 'page' ({page}) must be between 0 and 12.".format(
                    page=page))

        pagtok = self._pagtok[page]
        data = generate_post_data(0, 0, pagtok)

        self.params.update({"q": quote_plus(query), "c": "apps"})

        response = send_request("POST", self._search_url, data, self.params)
        soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8")

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [
                parse_cluster_card_info(app)
                for app in soup.select("div.Vpfmgd")
            ]

        return apps
Beispiel #4
0
    def categories(self, ignore_promotions=True):
        """Sends a GET request to the front page (app store base url), parses
        and returns a list of all available categories.
        """
        categories = {}

        response = send_request('GET', s.BASE_URL, params=self.params)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        category_links = soup.select(
            'div[id*="action-dropdown-children"] a[href*="category"]')
        age_query = '?age='

        for cat in category_links:
            url = urljoin(s.BASE_URL, cat.attrs['href'])
            category_id = url.split('/')[-1]
            name = cat.string.strip()

            if age_query in category_id:
                category_id = 'FAMILY'
                url = url.split('?')[0]
                name = 'Family'

            if category_id not in categories:
                if ignore_promotions and '/store/apps/category/' not in url:
                    continue

                categories[category_id] = {
                    'name': name,
                    'url': url,
                    'category_id': category_id
                }

        return categories
Beispiel #5
0
    def categories(self):
        """Sends a GET request to the front page (app store base url), parses
        and returns a list of all available categories.

        Note: May contain some promotions, e.g. "Popular Characters"
        """
        categories = {}
        strainer = SoupStrainer('a', {'class': 'child-submenu-link'})

        response = send_request('GET', s.BASE_URL)
        soup = BeautifulSoup(response.content,
                             'lxml',
                             from_encoding='utf8',
                             parse_only=strainer)
        category_links = soup.select('a.child-submenu-link')
        age = '?age='

        for cat in category_links:
            url = urljoin(s.BASE_URL, cat.attrs['href'])
            category_id = url.split('/')[-1]
            name = cat.string.strip()

            if age in category_id:
                category_id = 'FAMILY'
                url = url.split('?')[0]
                name = 'Family'

            if category_id not in categories:
                categories[category_id] = {
                    'name': name,
                    'url': url,
                    'category_id': category_id}

        return categories
    def categories(self, ignore_promotions=True):
        """Sends a GET request to the front page (app store base url), parses
        and returns a list of all available categories.
        """
        categories = {}

        response = send_request("GET", s.BASE_URL, params=self.params)
        soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8")

        category_links = soup.select(
            'div[id*="action-dropdown-children"] a[href*="category"]')
        age_query = "?age="

        for cat in category_links:
            url = urljoin(s.BASE_URL, cat.attrs["href"])
            category_id = url.split("/")[-1]
            name = cat.string.strip()

            if age_query in category_id:
                category_id = "FAMILY"
                url = url.split("?")[0]
                name = "Family"

            if category_id not in categories:
                if ignore_promotions and "/store/apps/category/" not in url:
                    continue

                categories[category_id] = {
                    "name": name,
                    "url": url,
                    "category_id": category_id,
                }

        return categories
    def developer(self, developer, results=None, page=None, detailed=False):

        if not isinstance(developer, basestring) or developer.isdigit():
            raise ValueError(
                'Parameter \'developer\' must be the developer name, not the developer id.'
            )

        results = s.DEV_RESULTS if results is None else results
        page = 0 if page is None else page
        page_num = (results // 20) * page
        if not 0 <= page_num <= 12:
            raise ValueError(
                'Page out of range. (results // 20) * page must be between 0 - 12'
            )
        pagtok = self._pagtok[page_num]

        url = build_url('developer', developer)
        data = generate_post_data(results, 0, pagtok)
        response = send_request('POST', url, data, self.params)

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            soup = BeautifulSoup(response.content,
                                 'lxml',
                                 from_encoding='utf8')
            apps = [
                parse_card_info(app)
                for app in soup.select('div[data-uitype="500"]')
            ]

        return apps
Beispiel #8
0
    def search(self, query, page=None, detailed=False):
        """Sends a POST request and retrieves a list of applications matching
        the query term(s).

        :param query: search query term(s) to retrieve matching apps
        :param page: the page number to retrieve. Max is 12.
        :param detailed: if True, sends request per app for its full detail
        :return: a list of apps matching search terms
        """
        page = 0 if page is None else int(page)
        if page > len(self._pagtok) - 1:
            raise ValueError('Parameter \'page\' ({page}) must be between 0 and 12.'.format(
                page=page))

        pagtok = self._pagtok[page]
        data = generate_post_data(0, 0, pagtok)

        self.params.update({
            'q': quote_plus(query),
            'c': 'apps',
        })

        response = send_request('POST', self._search_url, data, self.params)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [parse_card_info(app)
                    for app in soup.select('div[data-uitype="500"]')]

        return apps
Beispiel #9
0
    def developer(self, developer, results=None, page=None, detailed=False):
        """Sends a POST request and retrieves a list of the developer's
        published applications on the Play Store.

        :param developer: developer name to retrieve apps from, e.g. 'Disney'
        :param results: the number of app results to retrieve
        :param page: the page number to retrieve
        :param detailed: if True, sends request per app for its full detail
        :return: a list of app dictionaries
        """
        if not isinstance(developer, basestring) or developer.isdigit():
            raise ValueError('Parameter \'developer\' must be the developer name, not the developer id.')

        results = s.DEV_RESULTS if results is None else results
        page = 0 if page is None else page
        page_num = (results // 20) * page
        if not 0 <= page_num <= 12:
            raise ValueError('Page out of range. (results // 20) * page must be between 0 - 12')
        pagtok = self._pagtok[page_num]

        url = build_url('developer', developer)
        data = generate_post_data(results, 0, pagtok)
        response = send_request('POST', url, data, self.params)

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
            apps = [parse_card_info(app)
                    for app in soup.select('div[data-uitype=500]')]

        return apps
Beispiel #10
0
    def category_items(self, category, detailed=False):
        """
        list of apps in category main page
        :param detailed:
        :param category:
        :return:
        """
        category_url = build_category_url(category)
        response = send_request("GET", category_url)

        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
        items_elements = soup.select(self.list_item_selector)

        if not len(items_elements):
            items_elements = soup.select(self.list_item_promo_selector)

        app_ids = [
            get_query_params(element['href'])['id'][0]
            for element in items_elements
        ]

        if not detailed:
            return [{'app_id': app_id} for app_id in app_ids]

        return multi_futures_app_request(app_ids=app_ids)
    def categories(self, ignore_promotions=True):

        categories = {}
        strainer = SoupStrainer('ul', {'class': 'submenu-item-wrapper'})

        response = send_request('GET', s.BASE_URL, params=self.params)
        soup = BeautifulSoup(response.content,
                             'lxml',
                             from_encoding='utf8',
                             parse_only=strainer)
        category_links = soup.select('a.child-submenu-link')
        category_links += soup.select('a.parent-submenu-link')
        age_query = '?age='

        for cat in category_links:
            url = urljoin(s.BASE_URL, cat.attrs['href'])
            category_id = url.split('/')[-1]
            name = cat.string.strip()

            if age_query in category_id:
                category_id = 'FAMILY'
                url = url.split('?')[0]
                name = 'Family'

            if category_id not in categories:
                if ignore_promotions and '/store/apps/category/' not in url:
                    continue

                categories[category_id] = {
                    'name': name,
                    'url': url,
                    'category_id': category_id
                }

        return categories
Beispiel #12
0
    def cluster_items(self, gsr, detailed=False):
        """
        Get cluster page items
        https://play.google.com/store/apps/collection/cluster?clp=0g4cChoKFHRvcHNlbGxpbmdfZnJlZV9HQU1FEAcYAw%3D%3D:S:ANO1ljJ_Y5U&gsr=Ch_SDhwKGgoUdG9wc2VsbGluZ19mcmVlX0dBTUUQBxgD:S:ANO1ljL4b8c
        :param gsr: cluster id
        :return: list of app details
        """
        cluster_url = build_cluster_url(gsr=gsr)
        response = send_request("GET", cluster_url)

        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
        items_elements = soup.select(self.list_item_selector)

        if not len(items_elements):
            items_elements = soup.select(self.list_item_promo_selector)

        app_ids = [
            get_query_params(element['href'])['id'][0]
            for element in items_elements
        ]

        if not detailed:
            return [{'app_id': app_id} for app_id in app_ids]

        return multi_futures_app_request(app_ids=app_ids)
Beispiel #13
0
    def search(self, query, page=None, detailed=False):
        """Sends a POST request and retrieves a list of applications matching
        the query term(s).

        :param query: search query term(s) to retrieve matching apps
        :param page: the page number to retrieve. Max is 12.
        :param detailed: if True, sends request per app for its full detail
        :return: a list of apps matching search terms
        """
        page = 0 if page is None else int(page)
        if page > len(self._pagtok) - 1:
            raise ValueError('Parameter \'page\' ({page}) must be between 0 and 12.'.format(
                page=page))

        pagtok = self._pagtok[page]
        data = generate_post_data(0, 0, pagtok)

        self.params.update({
            'q': quote_plus(query),
            'c': 'apps',
        })

        response = send_request('POST', self._search_url, data, self.params)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [parse_card_info(app)
                    for app in soup.select('div[data-uitype=500]')]

        return apps
Beispiel #14
0
    def collection(
        self,
        collection_id,
        category_id=None,
        results=None,
        page=None,
        age=None,
        detailed=False,
    ):
        """Sends a POST request and fetches a list of applications belonging to
        the collection and an optional category.

        :param collection_id: the collection id, e.g. 'NEW_FREE'.
        :param category_id: (optional) the category id, e.g. 'GAME_ACTION'.
        :param results: the number of apps to retrieve at a time.
        :param page: page number to retrieve; limitation: page * results <= 500.
        :param age: an age range to filter by (only for FAMILY categories)
        :param detailed: if True, sends request per app for its full detail
        :return: a list of app dictionaries
        """
        if collection_id not in COLLECTIONS and not collection_id.startswith(
                "promotion"):
            raise ValueError("Invalid collection_id '{collection}'.".format(
                collection=collection_id))
        collection_name = COLLECTIONS.get(collection_id) or collection_id

        category = "" if category_id is None else CATEGORIES.get(category_id)
        if category is None:
            raise ValueError("Invalid category_id '{category}'.".format(
                category=category_id))

        results = s.NUM_RESULTS if results is None else results
        if results > 120:
            raise ValueError("Number of results cannot be more than 120.")

        page = 0 if page is None else page
        if page * results > 500:
            raise ValueError(
                "Start (page * results) cannot be greater than 500.")

        if category.startswith("FAMILY") and age is not None:
            self.params["age"] = AGE_RANGE[age]

        url = build_collection_url(category, collection_name)
        data = generate_post_data(results, page)
        response = send_request("POST", url, data, self.params)

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            soup = BeautifulSoup(response.content,
                                 "lxml",
                                 from_encoding="utf8")
            apps = [
                parse_card_info(app_card)
                for app_card in soup.select('div[data-uitype="500"]')
            ]

        return apps
Beispiel #15
0
    def test_request_with_params(self):
        method = 'GET'
        params = {'q': 'google play store'}
        response = send_request(method, self.url, params=params)
        expected_url = "{base}{params}".format(
            base=self.url, params='?q=google+play+store')

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.url, expected_url)
    def test_request_with_params(self):
        method = "GET"
        params = {"q": "google play store"}
        response = send_request(method, self.url, params=params)
        expected_url = "{base}{params}".format(base=self.url,
                                               params="?q=google+play+store")

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.url, expected_url)
Beispiel #17
0
    def reviews(self, app_id, page=1):
        """Sends a POST request and retrieves a list of reviews for
        the specified app.

        :param app_id: the app to retrieve details from, e.g. 'com.nintendo.zaaa'
        :param page: the page number to retrieve; max is 10
        :return: a list of reviews
        """
        data = {
            'reviewType': 0,
            'pageNum': page,
            'id': app_id,
            'reviewSortOrder': 4,
            'xhr': 1,
            'hl': self.language
        }
        self.params['authuser'] = '******'

        response = send_request('POST', s.REVIEW_URL, data, self.params)
        content = response.text
        content = content[content.find('[["ecr"'):].strip()
        data = json.loads(content)
        html = data[0][2]
        soup = BeautifulSoup(html, 'lxml', from_encoding='utf8')

        reviews = []
        for element in soup.select('.single-review'):
            review = {}

            avatar_style = element.select_one('.author-image').get('style')
            if avatar_style:
                sheet = cssutils.css.CSSStyleSheet()
                sheet.add('tmp { %s }' % avatar_style)
                review['author_image'] = list(cssutils.getUrls(sheet))[0]

            review_header = element.select_one('.review-header')
            review['review_id'] = review_header.get('data-reviewid', '')
            review['review_permalink'] = review_header.select_one('.reviews-permalink').get('href')

            review['author_name'] = review_header.select_one('.author-name').text
            review['review_date'] = review_header.select_one('.review-date').text

            curr_rating = review_header.select_one('.current-rating').get('style')
            review['current_rating'] = int(int(str(cssutils.parseStyle(curr_rating).width).replace('%', '')) / 20)

            body_elem = element.select_one('.review-body')
            review_title = body_elem.select_one('.review-title').extract()
            body_elem.select_one('.review-link').decompose()
            review['review_title'] = review_title.text
            review['review_body'] = body_elem.text

            reviews.append(review)

        return reviews
    def suggestions(self, query):
        if not query:
            raise ValueError("Cannot get suggestions for an empty query.")
        self.params.update({
            'json': 1,
            'c': 0,
            'query': query,
        })

        response = send_request('GET',
                                self._suggestion_url,
                                params=self.params)
        suggestions = [q['s'] for q in response.json()]
        return suggestions
Beispiel #19
0
    def suggestions(self, query):
        """Sends a GET request and retrieves a list of autocomplete suggestions
        matching the query term(s).

        :param query: search query term(s) to retrieve autocomplete suggestions
        :return: a list of suggested search queries, up to 5
        """
        if not query:
            raise ValueError("Cannot get suggestions for an empty query.")

        params = {'json': 1, 'c': 0, 'hl': 'en', 'gl': 'us', 'query': query}

        response = send_request('GET', self._suggestion_url, params=params)
        suggestions = [q['s'] for q in response.json()]
        return suggestions
Beispiel #20
0
    def collection(self, collection_id, category_id=None, results=None,
                   page=None, age=None, detailed=False):
        """Sends a POST request and fetches a list of applications belonging to
        the collection and an optional category.

        :param collection_id: the collection id, e.g. 'NEW_FREE'.
        :param category_id: (optional) the category id, e.g. 'GAME_ACTION'.
        :param results: the number of apps to retrieve at a time.
        :param page: page number to retrieve; limitation: page * results <= 500.
        :param age: an age range to filter by (only for FAMILY categories)
        :param detailed: if True, sends request per app for its full detail
        :return: a list of app dictionaries
        """
        if (collection_id not in COLLECTIONS and
                not collection_id.startswith('promotion')):
            raise ValueError('Invalid collection_id \'{collection}\'.'.format(
                collection=collection_id))
        collection_name = COLLECTIONS.get(collection_id) or collection_id

        category = '' if category_id is None else CATEGORIES.get(category_id)
        if category is None:
            raise ValueError('Invalid category_id \'{category}\'.'.format(
                category=category_id))

        results = s.NUM_RESULTS if results is None else results
        if results > 120:
            raise ValueError('Number of results cannot be more than 120.')

        page = 0 if page is None else page
        if page * results > 500:
            raise ValueError('Start (page * results) cannot be greater than 500.')

        if category.startswith('FAMILY') and age is not None:
            self.params['age'] = AGE_RANGE[age]

        url = build_collection_url(category, collection_name)
        data = generate_post_data(results, page)
        response = send_request('POST', url, data, self.params)

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
            apps = [parse_card_info(app_card)
                    for app_card in soup.select('div[data-uitype="500"]')]

        return apps
    def collection(self,
                   collection_id,
                   category_id=None,
                   results=None,
                   page=None,
                   age=None,
                   detailed=False):

        if (collection_id not in COLLECTIONS
                and not collection_id.startswith('promotion')):
            raise ValueError('Invalid collection_id \'{collection}\'.'.format(
                collection=collection_id))
        collection_name = COLLECTIONS.get(collection_id) or collection_id

        category = '' if category_id is None else CATEGORIES.get(category_id)
        if category is None:
            raise ValueError('Invalid category_id \'{category}\'.'.format(
                category=category_id))

        results = s.NUM_RESULTS if results is None else results
        if results > 120:
            raise ValueError('Number of results cannot be more than 120.')

        page = 0 if page is None else page
        if page * results > 500:
            raise ValueError(
                'Start (page * results) cannot be greater than 500.')

        if category.startswith('FAMILY') and age is not None:
            self.params['age'] = AGE_RANGE[age]

        url = build_collection_url(category, collection_name)
        data = generate_post_data(results, page)
        response = send_request('POST', url, data, self.params)

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            soup = BeautifulSoup(response.content,
                                 'lxml',
                                 from_encoding='utf8')
            apps = [
                parse_card_info(app_card)
                for app_card in soup.select('div[data-uitype="500"]')
            ]

        return apps
Beispiel #22
0
    def suggestions(self, query):
        """Sends a GET request and retrieves a list of autocomplete suggestions
        matching the query term(s).

        :param query: search query term(s) to retrieve autocomplete suggestions
        :return: a list of suggested search queries, up to 5
        """
        if not query:
            raise ValueError("Cannot get suggestions for an empty query.")

        self.params.update({"json": 1, "c": 0, "query": query})

        response = send_request("GET",
                                self._suggestion_url,
                                params=self.params)
        suggestions = [q["s"] for q in response.json()]
        return suggestions
    def details(self, app_id):
        url = build_url('details', app_id)

        try:
            response = send_request('GET', url, params=self.params)
            soup = BeautifulSoup(response.content,
                                 'lxml',
                                 from_encoding='utf8')
        except requests.exceptions.HTTPError as e:
            raise ValueError('Invalid application ID: {app}. {error}'.format(
                app=app_id, error=e))

        app_json = parse_app_details(soup)
        app_json.update({
            'app_id': app_id,
            'url': url,
        })
        return app_json
    def similar(self, app_id, detailed=False, **kwargs):

        url = build_url('similar', app_id)
        response = send_request('GET',
                                url,
                                params=self.params,
                                allow_redirects=True)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [
                parse_card_info(app)
                for app in soup.select('div[data-uitype="500"]')
            ]

        return apps
Beispiel #25
0
    def similar(self, app_id, detailed=False):
        """Sends a GET request, follows the redirect, and retrieves a list of
        applications similar to the specified app.

        :param app_id: the app to retrieve details from, e.g. 'com.nintendo.zaaa'
        :param detailed: if True, sends request per app for its full detail
        :return: a list of similar apps
        """
        url = build_url('similar', app_id)
        response = send_request('GET', url, allow_redirects=True)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [self._parse_card_info(app)
                    for app in soup.select('div[data-uitype=500]')]

        return apps
Beispiel #26
0
    def details(self, app_id):
        """Sends a GET request and parses an application's details.

        :param app_id: the app to retrieve details, e.g. 'com.nintendo.zaaa'
        :return: a dictionary of app details
        """
        url = build_url("details", app_id)

        try:
            response = send_request("GET", url, params=self.params)
            soup = BeautifulSoup(response.content,
                                 "lxml",
                                 from_encoding="utf8")
        except requests.exceptions.HTTPError as e:
            raise ValueError("Invalid application ID: {app}. {error}".format(
                app=app_id, error=e))

        app_json = parse_app_details(soup)
        app_json.update({"app_id": app_id, "url": url})
        return app_json
Beispiel #27
0
    def suggestions(self, query):
        """Sends a GET request and retrieves a list of autocomplete suggestions
        matching the query term(s).

        :param query: search query term(s) to retrieve autocomplete suggestions
        :return: a list of suggested search queries, up to 5
        """
        if not query:
            raise ValueError("Cannot get suggestions for an empty query.")

        self.params.update({
            'json': 1,
            'c': 0,
            'query': query,
        })

        response = send_request('GET',
                                self._suggestion_url,
                                params=self.params)
        suggestions = [q['s'] for q in response.json()]
        return suggestions
Beispiel #28
0
    def details(self, app_id):
        """Sends a GET request and parses an application's details.

        :param app_id: the app to retrieve details, e.g. 'com.nintendo.zaaa'
        :return: a dictionary of app details
        """
        url = build_url('details', app_id)

        try:
            response = send_request('GET', url, params=self.params)
            soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
        except requests.exceptions.HTTPError as e:
            raise ValueError('Invalid application ID: {app}. {error}'.format(
                app=app_id, error=e))

        app_json = parse_app_details(soup)
        app_json.update({
            'app_id': app_id,
            'url': url,
        })
        return app_json
Beispiel #29
0
    def details(self, app_id):
        """Sends a GET request and parses an application's details.

        :param app_id: the app to retrieve details, e.g. 'com.nintendo.zaaa'
        :return: a dictionary of app details
        """
        url = build_url('details', app_id)

        try:
            response = send_request('GET', url, params=self.params)
            soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
        except requests.exceptions.HTTPError as e:
            raise ValueError('Invalid application ID: {app}. {error}'.format(
                app=app_id, error=e))

        app_json = parse_app_details(soup)
        app_json.update({
            'app_id': app_id,
            'url': url,
        })
        return app_json
Beispiel #30
0
    def similar(self, app_id, detailed=False, **kwargs):
        """Sends a GET request, follows the redirect, and retrieves a list of
        applications similar to the specified app.

        :param app_id: app to retrieve details from, e.g. 'com.nintendo.zaaa'
        :param detailed: if True, sends request per app for its full detail
        :return: a list of similar apps
        """
        url = build_url('similar', app_id)
        response = send_request('GET',
                                url,
                                params=self.params,
                                allow_redirects=True)
        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [parse_card_info(app)
                    for app in soup.select('div[data-uitype="500"]')]

        return apps
Beispiel #31
0
    def category_clusters(self, category):
        """

        :param category:
        :return:
        """

        clusters = {}
        cluster_selector = "c-wiz > c-wiz > div > div.Z3lOXb > div.xwY9Zc > a"

        category_url = build_category_url(category=category)
        response = send_request("GET", category_url)

        soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8')
        cluster_elements = soup.select(cluster_selector)

        for element in cluster_elements:
            title = element.h2.text
            gsr = get_query_params(element['href'])['gsr'][0]

            clusters[title] = gsr

        return clusters
Beispiel #32
0
    def categories(self, ignore_promotions=True):
        """Sends a GET request to the front page (app store base url), parses
        and returns a list of all available categories.
        """
        categories = {}
        strainer = SoupStrainer('ul', {'class': 'submenu-item-wrapper'})

        response = send_request('GET', s.BASE_URL, params=self.params)
        soup = BeautifulSoup(response.content,
                             'lxml',
                             from_encoding='utf8',
                             parse_only=strainer)
        category_links = soup.select('a.child-submenu-link')
        category_links += soup.select('a.parent-submenu-link')
        age_query = '?age='

        for cat in category_links:
            url = urljoin(s.BASE_URL, cat.attrs['href'])
            category_id = url.split('/')[-1]
            name = cat.string.strip()

            if age_query in category_id:
                category_id = 'FAMILY'
                url = url.split('?')[0]
                name = 'Family'

            if category_id not in categories:
                if ignore_promotions and '/store/apps/category/' not in url:
                    continue

                categories[category_id] = {
                    'name': name,
                    'url': url,
                    'category_id': category_id}

        return categories
Beispiel #33
0
    def similar(self, app_id, detailed=False, **kwargs):
        """Sends a GET request, follows the redirect, and retrieves a list of
        applications similar to the specified app.

        :param app_id: app to retrieve details from, e.g. 'com.nintendo.zaaa'
        :param detailed: if True, sends request per app for its full detail
        :return: a list of similar apps
        """
        url = build_url("similar", app_id)
        response = send_request("GET",
                                url,
                                params=self.params,
                                allow_redirects=True)
        soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8")

        if detailed:
            apps = self._parse_multiple_apps(response)
        else:
            apps = [
                parse_cluster_card_info(app)
                for app in soup.select("div.Vpfmgd")
            ]

        return apps
    def reviews(self, app_id_list, page=0):
        #print("In Scraper - reviews def   \n")
        reviews_adder = []
        for n in range(len(app_id_list)):
            #app_id=app_id_list[n]
            #print(app_id)
            data = {
                'reviewType': 0,
                'pageNum': page,
                'id': app_id_list[n],
                'reviewSortOrder': 4,
                'xhr': 1,
                'hl': self.language
            }
            self.params['authuser'] = '******'
            #print('before send request')
            #print(app_id_list[n])
            response = send_request('POST', s.REVIEW_URL, data, self.params)
            content = response.text
            content = content[content.find('[["ecr"'):].strip()
            data = json.loads(content)
            #print(data)
            html = data[0][2]
            soup = BeautifulSoup(html, 'lxml', from_encoding='utf8')
            #print(soup)
            reviews = []
            for element in soup.select('.single-review'):
                #print('Inside single review')
                review = {}
                #print("In Scraper - reviews def- rev_app_id:: \n")
                #print(app_id)
                review['rev_app_id'] = app_id_list[n]
                avatar_style = element.select_one('.author-image').get('style')
                #print(avatar_style)
                if avatar_style:
                    sheet = cssutils.css.CSSStyleSheet()
                    sheet.add('tmp { %s }' % avatar_style)
                    review['author_image'] = list(cssutils.getUrls(sheet))[0]

                review_header = element.select_one('.review-header')
                review['review_id'] = review_header.get('data-reviewid', '')
                review['review_permalink'] = review_header.select_one(
                    '.reviews-permalink').get('href')

                review['author_name'] = review_header.select_one(
                    '.author-name').text
                review['review_date'] = review_header.select_one(
                    '.review-date').text

                curr_rating = review_header.select_one('.current-rating').get(
                    'style')
                review['current_rating'] = int(
                    int(
                        str(cssutils.parseStyle(curr_rating).width).replace(
                            '%', '')) / 20)

                body_elem = element.select_one('.review-body')
                review_title = body_elem.select_one('.review-title').extract()
                body_elem.select_one('.review-link').decompose()
                review['review_title'] = review_title.text
                review['review_body'] = body_elem.text

                reviews.append(review)
                reviews_adder.append(review)
                data = ''
        return reviews_adder
Beispiel #35
0
    def test_send_normal_request(self):
        method = 'GET'
        response = send_request(method, self.url)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.url, self.url)
    def test_send_normal_request(self):
        method = "GET"
        response = send_request(method, self.url)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.url, self.url)