Beispiel #1
0
def test_getCompleteAppInfoPartial():
    app = AppItem()
    app['name'] = 'test app'
    app['id'] = 'testid'
    app['rating'] = 1
    app['install_fee'] = 0
    app['app_icon'] = ''

    appAccessor = AppAccessor()
    appAccessor.insertOrUpdateApp(app)

    assert appAccessor.getCompleteAppInfo(
        'testid') is None, "Database only has partial info."
Beispiel #2
0
def getCompleteAppInfo(app_ids: List[str]) -> List[AppItem]:
	"""
	search the app_ids in database or retrieve from Google Play.

	run scraper against the apps that are not in our database
	if scraper failed for some app_id, their corresponding app_info will have only app_id

	:param app_ids: list of app_id
	:return:    list of dictionary(set). each set represents the app_info of the corresponding app_id,
                the return list have the same length as the input list
	"""

	if len(app_ids) == 0:
		return []

	# https://stackoverflow.com/a/39537308/746461
	# Python 3.6 and up keeps dict insertion order.
	# Python 3.7 formalizes it to a language specification.
	if sys.version_info < (3, 6):
		# sys.version_info is a named tuple. https://docs.python.org/3/glossary.html#term-named-tuple
		print(f'Python {tuple(sys.version_info)} may not keep dictionary insertion order. Upgrade to at least version 3.6.', file=sys.stderr)

	appAccessor = AppAccessor()
	with connection.cursor() as cursor:
		# very important!
		# connection.cursor() gives django cursor, connection.cursor().connection.cursor() gives underlying sqlite cursor.
		# django cursor and sqlite cursor uses different parameter style!
		if GooglePlayAdvancedSearch.DBUtils.getAppCountInDatabase(cursor) > 0:
			# search database first pass. If the app isn't in database, leave it none. We will fill in the second pass.
			app_infos = {id: appAccessor.getCompleteAppInfo(id) for id in app_ids}
		else:
			app_infos = {id: None for id in app_ids}

	# search database second pass
	# for first-pass non-found apps, pass into scraper
	appsMissingInDatabase = [k for k, v in app_infos.items() if v is None]
	if len(appsMissingInDatabase) > 0:
		code2 = os.system(('python ' if sys.platform == 'win32' else '') + "../scraper/Program.py -p %s" % ",".join(appsMissingInDatabase))
		if hasattr(os, 'WEXITSTATUS') and os.WEXITSTATUS(code2) == GooglePlayAdvancedSearch.Errors.sslErrorCode \
				or not hasattr(os, 'WEXITSTATUS') and code2 == GooglePlayAdvancedSearch.Errors.sslErrorCode:
			raise requests.exceptions.SSLError()

		appAccessor = AppAccessor()
		scraper_fail_id = []
		for id in appsMissingInDatabase:
			tmp = appAccessor.getCompleteAppInfo(id)
			if tmp:
				app_infos[id] = tmp
			else:
				assert id in app_infos
				app_infos[id] = {'id': id}  # if scraper fails, just pass "id" to appDetails to display
				scraper_fail_id.append(id)

		print("Scraper failed %d times: %s" % (len(scraper_fail_id), ", ".join(scraper_fail_id)))
		print("There were %d ids not in our database or stale. %d are now added" % (len(appsMissingInDatabase), len(appsMissingInDatabase) - len(scraper_fail_id)))

	print(f'total results: {len(app_ids)}')
	assert None not in app_infos.values(), "Every app id returned from Google should have an app detail."
	return list(app_infos.values())
Beispiel #3
0
def search(request: django.http.HttpRequest):
    # If the user loads Google Analysis, let Nginx handle rating limit.
    if not request.COOKIES.get('_gaload') and limitRate(getClientIP(request)):
        return JsonResponse({'error': 'Rate limit reached. Wait 60 seconds.'})

    keyword = request.GET.get('q', '').strip()
    with connection.cursor() as cursor:
        try:
            logSearch(cursor, keyword, request)
        except django.db.utils.OperationalError:
            cursor.execute(apiHelper.getSqlCreateTableSearch())
            try:
                logSearch(cursor, keyword, request)
            except Exception as e:
                print(str(e))

    excludedPIds = [
        int(n) for n in request.GET.get('pids', '').split(',') if n != ''
    ]

    excludedCIds = [
        int(n) for n in request.GET.get('cids', '').split(',') if n != ''
    ]

    try:
        appAccessor = AppAccessor()
        appInfos = appAccessor.searchApps(keyword)

        needCompleteInfo = determineAppInfoCompleteness(request)

        if needCompleteInfo:
            appInfos = getCompleteAppInfo([a['id'] for a in appInfos])

        appInfos = filterApps(appInfos, excludedCIds, excludedPIds, request)

        # If we cannot find 200 matches from our database, we try to find more matches from Google.
        if len(appInfos) < 200 and cache.get('searchkey-' + keyword) is None:
            cache.set('searchkey-' + keyword, '', timeout=60 *
                      5)  # do not search the same keyword in 5 minutes
            appInfos2 = apiHelper.searchGooglePlay(keyword)
            if needCompleteInfo:
                appInfos2 = getCompleteAppInfo([a['id'] for a in appInfos2])

            appInfos2 = filterApps(appInfos2, excludedCIds, excludedPIds,
                                   request)

            appInfoIds = {a['id'] for a in appInfos}
            appInfos.extend(
                [a for a in appInfos2 if a['id'] not in appInfoIds])

        sortType = request.GET.get('sort')
        if sortType == 'rlh':  # rating low to high
            appInfos = sorted(appInfos, key=lambda a: a['rating'])
        elif sortType == 'rhl':  # rating high to low
            appInfos = sorted(appInfos,
                              key=lambda a: a['rating'],
                              reverse=True)
        elif sortType == 'plh':  # number of permissions low to high
            appInfos = sorted(appInfos, key=lambda a: len(a['permissions']))
        elif sortType == 'phl':  # number of permissions low to high
            appInfos = sorted(appInfos,
                              key=lambda a: len(a['permissions']),
                              reverse=True)

        response = JsonResponse({'apps': [dict(a) for a in appInfos]},
                                safe=False)
        response['Cache-Control'] = "public, max-age=3600"
        return response
    except requests.exceptions.SSLError as e:
        # In getCompleteAppInfo, we throw our own SSLError where we don't have request object.
        if e.request:
            url = urlparse(e.request.url)
            return JsonResponse({
                'error':
                f'Searching is aborted because secure connection to https://{url.netloc} is compromised.\nAttacker is attacking us, but we didn\'t leak your data!'
            })
        else:
            return JsonResponse({
                'error':
                f'Searching is aborted because secure connection is compromised.\nAttacker is attacking us, but we didn\'t leak your data!'
            })
Beispiel #4
0
def search(request):
    startTime = time.time()

    keyword = request.GET['q']
    excludedPIds = [
        int(n) for n in request.GET.get('pids', '').split(',') if n != ''
    ]

    excludedCIds = [
        int(n) for n in request.GET.get('cids', '').split(',') if n != ''
    ]

    try:
        appAccessor = AppAccessor(1)
        appInfos = appAccessor.searchApps(keyword)

        needCompleteInfo = determineAppInfoCompleteness(request)

        if needCompleteInfo:
            appInfos = getCompleteAppInfo([a['id'] for a in appInfos])

        if len(excludedPIds):
            appInfos = [
                a for a in appInfos
                if isExcluded(a['permissions'], excludedPIds) == False
            ]
        if len(excludedCIds):
            appInfos = [
                a for a in appInfos
                if isExcluded(a['categories'], excludedCIds) == False
            ]

        # If we cannot find 200 matches from our database, we try to find more matches from Google.
        if len(appInfos) < 200:
            appInfos2 = searchGooglePlay(keyword)
            if needCompleteInfo:
                appInfos2 = getCompleteAppInfo([a['id'] for a in appInfos2])
            if len(excludedPIds):
                appInfos2 = [
                    a for a in appInfos2
                    if isExcluded(a['permissions'], excludedPIds) == False
                ]
            if len(excludedCIds):
                appInfos2 = [
                    a for a in appInfos2
                    if isExcluded(a['categories'], excludedCIds) == False
                ]

            appInfoIds = {a['id'] for a in appInfos}
            appInfos.extend(
                [a for a in appInfos2 if a['id'] not in appInfoIds])

        sortType = request.GET.get('sort')
        if sortType == 'rlh':  # rating low to high
            appInfos = sorted(appInfos, key=lambda a: a['rating'])
        elif sortType == 'rhl':  # rating high to low
            appInfos = sorted(appInfos,
                              key=lambda a: a['rating'],
                              reverse=True)
        elif sortType == 'plh':  # number of permissions low to high
            appInfos = sorted(appInfos, key=lambda a: len(a['permissions']))
        elif sortType == 'phl':  # number of permissions low to high
            appInfos = sorted(appInfos,
                              key=lambda a: len(a['permissions']),
                              reverse=True)

        response = JsonResponse(
            {
                'executionSeconds': time.time() - startTime,
                'apps': [dict(a) for a in appInfos]
            },
            safe=False)
        response['Cache-Control'] = "private, max-age=3600"
        return response
    except requests.exceptions.SSLError as e:
        # In getCompleteAppInfo, we throw our own SSLError where we don't have request object.
        if e.request:
            url = urlparse(e.request.url)
            return JsonResponse({
                'error':
                f'Searching is aborted because secure connection to https://{url.netloc} is compromised.\nAttacker is attacking us, but we didn\'t leak your data!'
            })
        else:
            return JsonResponse({
                'error':
                f'Searching is aborted because secure connection is compromised.\nAttacker is attacking us, but we didn\'t leak your data!'
            })