Beispiel #1
0
    def getCompleteAppInfo(self, id: str) -> Optional[AppItem]:
        """
		Find app id in database. If found, return the data, otherwise return null.
		"""

        self.__cursor.execute(
            f"SELECT name,rating,num_reviews,install_fee,inAppPurchases,app_icon,containsAds FROM App WHERE id=:id and isPartialInfo=0",
            {"id": id})
        tmp = self.__cursor.fetchone()
        if tmp is None:
            return None
        else:
            permissions = self.getAppPermissions(id)
            categories = self.getAppCategories(id)
            assert permissions is not None
            assert categories is not None
            appItem = AppItem()
            appItem['id'] = id
            appItem['name'] = tmp[0]
            appItem['rating'] = tmp[1]
            appItem['num_reviews'] = tmp[2]
            appItem['install_fee'] = tmp[3]
            appItem['inAppPurchases'] = tmp[4]
            appItem['app_icon'] = tmp[5]
            appItem['containsAds'] = tmp[6]
            appItem['permissions'] = permissions
            appItem['categories'] = categories
            return appItem
Beispiel #2
0
def test_searchResultUpperBound(websiteUrl, dbFilePath):
    connection = sqlite3.connect(dbFilePath)
    cursor = connection.cursor()
    try:
        appAccessor = GooglePlayAdvancedSearch.DBUtils.AppAccessor()
        insertedCount = GooglePlayAdvancedSearch.DBUtils.MAX_SELECT + 1
        for i in range(insertedCount):
            app = AppItem()
            app['id'] = 'GooglePlayAdvancedSearch.testApp' + str(i)
            app['name'] = 'matched keyword'
            app['rating'] = 0
            app['install_fee'] = 0
            app['app_icon'] = ''

            appAccessor.insertOrUpdateApp(app)
        del appAccessor

        cursor.execute(
            "select count(*) from App where id like 'GooglePlayAdvancedSearch.testApp%'"
        )
        assert int(cursor.fetchone(
        )[0]) >= insertedCount, f"failed to insert {insertedCount} rows."

        response = requests.get(websiteUrl + '/Api/Search?q=matched%20keyword',
                                verify=True)
        data = response.json()
        assert len(
            data['apps']
        ) <= GooglePlayAdvancedSearch.DBUtils.MAX_SELECT, f"At most returns {GooglePlayAdvancedSearch.DBUtils.MAX_SELECT}, actually returns {len(data['apps'])}."
    finally:
        cursor.execute('delete from App where id like :id',
                       {'id': 'GooglePlayAdvancedSearch.testApp%'})
        connection.commit()
        connection.close()
Beispiel #3
0
    def searchApps(self, namePattern: str) -> List[AppItem]:
        """
		Search fresh apps which has specific patterns in their name column.

		Search result is not guaranteed to be complete.

		:param namePattern: the specific patterns (usually search keyword)
		:return: a list of AppItem or none
		"""

        appList = []

        self.__cursor.execute(
            f"SELECT id,name,rating,num_reviews,install_fee,inAppPurchases,app_icon FROM App WHERE name LIKE :namePattern Limit "
            + str(MAX_SELECT), {"namePattern": '%' + namePattern + '%'})
        tmp = self.__cursor.fetchall()
        for app in tmp:
            appItem = AppItem()
            appItem['id'] = app[0]
            appItem['name'] = app[1]
            appItem['rating'] = app[2]
            appItem['num_reviews'] = app[3]
            appItem['install_fee'] = app[4]
            appItem['inAppPurchases'] = app[5]
            appItem['app_icon'] = app[6]
            appList.append(appItem)

        return appList
Beispiel #4
0
def test_getCompleteAppInfoPartial():
    app = AppItem()
    app['name'] = 'test app'
    app['id'] = 'testid'
    app['rating'] = 1
    app['install_fee'] = 0
    app['app_icon'] = ''

    appAccessor = AppAccessor()
    appAccessor.insertOrUpdateApp(app)

    assert appAccessor.getCompleteAppInfo(
        'testid') is None, "Database only has partial info."
	def parse(self, response):
		appInfo = AppItem()
		appInfo['id'] = urlParse.parse_qs(urlParse.urlparse(response.url).query)['id'][0]

		h1 = response.css("h1[itemprop=name]")
		appInfo['name'] = h1.css("*::text").get()

		parentBox = h1.xpath('../..')
		c1 = parentBox.css("a[itemprop=genre]")
		appInfo['categories'] = c1.css("*::text").getall()
		appInfo['inAppPurchases'] = parentBox.xpath("div[text()[contains(.,'Offers in-app purchases')]]").get() is not None
		appInfo['containsAds'] = parentBox.xpath("div[text()[contains(.,'Contains Ads')]]").get() is not None
		try:
			# the first match is the rating box.
			ariaLabel = response.css('c-wiz div[aria-label][role=img]::attr(aria-label)').get()
			appInfo['rating'] = float(re.search(r'\d\.\d', ariaLabel)[0])
		except:
			appInfo['rating'] = 0

		try:
			ariaLabel_review = parentBox.css('span[aria-label]::attr(aria-label)').get()
			appInfo['num_reviews'] = int(ariaLabel_review.split(' ')[0].replace(',', ''))
		except:
			appInfo['num_reviews'] = None

		feeElement = parentBox.xpath('following-sibling::*').css('span button[aria-label]')
		if feeElement is None:
			self.logger.error("Install fee is not found.")
		elif feeElement.css('::attr(data-is-free)').get() == "true":
			appInfo['install_fee'] = 0
		else:
			try:
				appInfo['install_fee'] = float(re.search(r'\d+\.\d*', feeElement.css('::attr(aria-label)').get())[0])
			except:
				self.logger.error(f"Unexpected install fee. feeElement: {feeElement.get()}")
				if self.__seleniumAvailable != False:
					baseDir = os.path.dirname(os.path.abspath(__file__)) + '../../../../screenshots/'
					os.makedirs(baseDir, exist_ok=True)
					self.__seleniumAvailable = getPageWithSelenium(response.url, baseDir + appInfo['id'] + '.png', 'c-wiz[data-view-instance-id]')

		ariaLabel_icon = response.css("img[itemprop=image][alt='Cover art']::attr(src)").get()
		appInfo['app_icon'] = ariaLabel_icon

		r = scrapy.FormRequest(r'https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=xdSrCf&hl=en',
							   headers={"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"},
							   formdata={'f.req': r'[[["xdSrCf","[[null,[\"' + appInfo['id'] + r'\",7],[]]]",null,"1"]]]'},
							   # not use cb_kwargs because it's only passed to callback, no errback.
							   meta={'appInfo': appInfo},
							   callback=self.permissions_retrieved,
							   errback=self.permissions_errback)
		yield r
Beispiel #6
0
def searchGooglePlay(keyword) -> List[AppItem]:
	url = 'https://play.google.com/store/search?q=%s&c=apps' % urllib.parse.quote_plus(keyword)
	page = requests.get(url, verify=True)

	# "key: 'ds:3'" is not reliable.
	matches = re.findall(r'<script.*?>AF_initDataCallback\((.+?)\)\s*;\s*</script>', page.text, flags=re.DOTALL)

	data = None
	# Typically the target segment is the last match.
	for i in range(len(matches) - 1, 0, -1):
		m = matches[i]
		if 'googleusercontent.com' in m:
			try:
				startIndex = m.find('data:')
				endIndex = m.rfind(']', startIndex + 1)
				m = m[startIndex + len('data:'):endIndex + 1]
				data = jsonLoads(m)
				data = data[0][1]
				break
			except:
				pass

	if not data:
		print("We couldn't find anything for your search.")
		return []

	appInfos = []

	appSaver = GooglePlayAdvancedSearch.DBUtils.AppAccessor()
	appsData = None
	try:
		while True:
			appsData = data[0][0][0]
			print(f'Load {len(appsData)} apps.')
			for app in appsData:
				appId = app[12][0]
				if any(a['id'] == appId for a in appInfos):
					print(f'Duplicate app id {appId}.')
					continue

				appInfo = AppItem()
				appInfo['name'] = app[2]
				appInfo['id'] = appId
				appInfo['rating'] = app[6][0][2][1][1] if app[6] is not None else 0
				appInfo['app_icon'] = app[1][1][0][3][2]
				if app[7]:
					appInfo['install_fee'] = float(re.search(r'\d+\.\d*', app[7][0][3][2][1][0][2])[0])
				else:
					appInfo['install_fee'] = 0
				print(appInfo['id'])

				appSaver.insertOrUpdateApp(appInfo)

				appInfos.append(appInfo)

			if data[0][0][-2]:
				pageToken = data[0][0][-2][1]
			else:
				break

			print('continue searching')
			response = requests.post(r'https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=qnKhOb&hl=en',
									 headers={"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"},
									 data={'f.req': r'[[["qnKhOb","[[null,[[10,[10,50]],true,null,[96,27,4,8,57,30,110,79,11,16,49,1,3,9,12,104,55,56,51,10,34,31,77],[null,null,null,[[[[7,31],[[1,52,43,112,92,58,69,31,19,96]]]]]]],null,\"'
													+ pageToken
													+ r'\"]]",null,"generic"]]]'},
									 verify=True)
			package = jsonLoads(response.text[response.text.index('\n') + 1:])
			data = jsonLoads(package[0][2])

	except Exception as e:
		print(str(e))
		if appsData is None:
			print(f'data:\n{data}')
		else:
			print(f'appsData:\n{appsData}')
	return appInfos
Beispiel #7
0
def searchGooglePlay(keyword) -> List[AppItem]:
    url = 'https://play.google.com/store/search?q=%s&c=apps' % keyword
    page = requests.get(url, verify=True)

    # "key: 'ds:3'" is not reliable.
    matches = re.findall(
        r'<script.*?>AF_initDataCallback\(\s*{.*?data:function\(\){return\s+(\[.+?\])\s*}\s*}\s*\)\s*;\s*</script>',
        page.text,
        flags=re.DOTALL)
    data = jsonLoads(matches[-1])
    data = data[0][1]

    if not data:
        print("We couldn't find anything for your search.")
        return []

    appInfos = []

    appSaver = GooglePlayAdvancedSearch.DBUtils.AppAccessor(1)
    while True:
        appsData = data[0][0][0]
        print(f'Load {len(appsData)} apps.')
        for app in appsData:
            appId = app[12][0]
            if any(a['id'] == appId for a in appInfos):
                print(f'Duplicate app id {appId}.')
                continue

            appInfo = AppItem()
            appInfo['appName'] = app[2]
            appInfo['id'] = appId
            appInfo['rating'] = app[6][0][2][1][1]
            appInfo['app_icon'] = app[1][1][0][3][2]
            if app[7]:
                appInfo['install_fee'] = float(
                    re.search(r'\d+\.\d*', app[7][0][3][2][1][0][2])[0])
            else:
                appInfo['install_fee'] = 0
            print(appInfo['id'])

            appSaver.insertOrUpdateApp(appInfo)

            appInfos.append(appInfo)

        if data[0][0][-2]:
            pageToken = data[0][0][-2][1]
        else:
            break

        print('continue searching')
        response = requests.post(
            r'https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=qnKhOb&hl=en',
            headers={
                "Content-Type":
                "application/x-www-form-urlencoded;charset=UTF-8"
            },
            data={
                'f.req':
                r'[[["qnKhOb","[[null,[[10,[10,50]],true,null,[96,27,4,8,57,30,110,79,11,16,49,1,3,9,12,104,55,56,51,10,34,31,77],[null,null,null,[[[[7,31],[[1,52,43,112,92,58,69,31,19,96]]]]]]],null,\"'
                + pageToken + r'\"]]",null,"generic"]]]'
            },
            verify=True)
        package = jsonLoads(response.text[response.text.index('\n') + 1:])
        data = jsonLoads(package[0][2])
    return appInfos