def getReviewRatings(dbHandle, appUrlList): for appUrl in appUrlList: app_dict = json.loads(open('appRating.json', 'r').read()) app_pkg_name = appUrl.split('=')[1] headers = {'User-Agent': 'Mozilla/5.0'} req = urllib2.Request(appUrl, None, headers) try: review_rating = 0.0 page = urllib2.urlopen(req).read() soup = bs(''.join(page)) for div in soup.findAll(attrs={'class': 'score'}): for child in div.children: if not child.string == ' ': review_rating = round(eval(child.string), 1) app_dict[app_pkg_name] = review_rating insertInDB(dbHandle, review_rating, app_pkg_name) open('appRating.json', 'w').write(json.dumps(app_dict, indent=4, sort_keys=True)) except urllib2.HTTPError, e: if str(e.code) == '404': sqlStatement = "UPDATE `appdata` SET `still_in_googleplaystore`= 0 WHERE `app_pkg_name` = '" + app_pkg_name + "';" print 'HTTPError =', str( e.code), 'for app:', app_pkg_name, sqlStatement logging.debug('HTTPError =' + str(e.code) + 'for app:' + app_pkg_name + " statement: " + sqlStatement) databaseHandler.dbManipulateData(dbHandle, sqlStatement) else: print 'HTTPError =', str(e.code), 'for app:', app_pkg_name logging.debug('HTTPError =' + str(e.code) + 'for app:' + app_pkg_name)
def insertInDB(app_pkg_name,paid): dbHandle = databaseHandler.dbConnectionCheck() # DB Open sqlStatement = "UPDATE `appdata` SET `paid`= "+str(paid)+" WHERE `app_pkg_name` = '"+app_pkg_name+"';" print sqlStatement logging.debug("Statement: "+sqlStatement) databaseHandler.dbManipulateData(dbHandle, sqlStatement) dbHandle.close() #DB Close
def extractPermissionsInfo(dbHandle, pkgName, GSFId): if isAPKPermissionsAlreadyInTable(dbHandle, pkgName) == True: print "Moving on to extracting permissions for the next app. This one is already in the database." else: # Extract permissions using the API and store in the DB pkgNameList = [] pkgNameList.append(pkgName) # API call to unofficial Google Play API written in Python by egirault listOfPermissions = permissions.getPackagePermission( pkgNameList, GSFId) # print listOfPermissions for permissionName in listOfPermissions: dbHandle = databaseHandler.dbConnectionCheck() # See if the permission is in the table if not insert it and get its id sqlStatementPermName = "SELECT id FROM `permissions` WHERE `name` = '" + permissionName + "';" permissionId = getPermissionId(dbHandle, sqlStatementPermName, permissionName) # Find the App's Id in the DB # Assumption is that the crawlURL has already extracted all information about the app and the same is in the appdata table # If that is not true this step will fail and we will have to skip and go to the next app sqlStatementAppPkgName = "SELECT id FROM `appdata` WHERE `app_pkg_name` = '" + pkgName + "';" appId = getAppId(dbHandle, sqlStatementAppPkgName, pkgName) if appId > 0: # Insert the App_Id and corresponding Perm_Id in to the DB sqlStatement = "INSERT INTO `appperm`(`app_id`,`perm_id`) VALUES (" + str( appId) + "," + str(permissionId) + ");" print sqlStatement databaseHandler.dbManipulateData(dbHandle, sqlStatement) else: print "Moving on to the next app. This app has not been extracted from Google Play Store."
def downloadAPKFromPhone(): urlPrefix = "https://play.google.com/store/apps/details?id=" listOfPackages = subprocess.check_output( ["adb", "shell", "pm", "list", "packages"]) # This is a brilliant Python line I learnt from AK # lines = [line.strip() for line in listOfPackages.split()] for line in listOfPackages.split(): package = line.strip().split(":")[-1] urlPrefix = "https://play.google.com/store/apps/details?id=" urlExtract = urlPrefix + package if verifyPresentInAppMarket(urlExtract): dbHandle = databaseHandler.dbConnectionCheck() # DB Open sqlStatement = "INSERT INTO `appurls`(`app_pkg_name`,`app_url`,`downloaded`) VALUES('" + package + "', '" + urlExtract + "', 1);" try: databaseHandler.dbManipulateData(dbHandle, sqlStatement) except _mysql_exceptions.IntegrityError: print "package", package, "was present, updating now" sqlStatement = "SELECT `id` FROM `appurls` WHERE app_pkg_name = '" + package + "';" cursor = dbHandle.cursor() try: cursor.execute(sqlStatement) queryOutput = cursor.fetchall() except: print "Unexpected error:", sys.exc_info()[0] raise for row in queryOutput: updateDownloaded(dbHandle, row[0]) dbHandle.close() #DB Close # Get the path of the apk and extract it path = subprocess.check_output( ["adb", "shell", "pm", "path", package]).strip().split(":")[-1] # If the apps download directory doesn't exist just create it currentDirectory = os.getcwd() osInfo = platform.system() if osInfo == 'Windows': appsDownloadDirectory = currentDirectory + "\\apps\\" downloadAPK(appsDownloadDirectory, path) elif osInfo == 'Linux': appsDownloadDirectory = currentDirectory + "/apps/" downloadAPK(appsDownloadDirectory, path) else: sys.stderr.write( 'The current os not supported at the moment.\n') sys.exit(1) copiedFromPhoneAPKName = appsDownloadDirectory + path.split( "/")[-1] realPackageBasedAPKName = appsDownloadDirectory + package + ".apk" try: os.rename(copiedFromPhoneAPKName, realPackageBasedAPKName) except WindowsError: # The file already exists, we should copy the new apk over it os.remove(realPackageBasedAPKName) os.rename(copiedFromPhoneAPKName, realPackageBasedAPKName) os.chdir(currentDirectory)
def insertInDB(dbHandle, review_rating, app_pkg_name): dbHandle = databaseHandler.dbConnectionCheck() # DB Open # app_dict = json.loads(open('appRating.json','r').read()) # for app_pkg_name, review_rating in app_dict.iteritems(): sqlStatement = "UPDATE `appdata` SET `review_rating`= " + str( review_rating) + " WHERE `app_pkg_name` = '" + app_pkg_name + "';" print sqlStatement logging.debug("Statement: " + sqlStatement) databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def insertIntoDB(name, protectionLevel, permissionGroup="NULL", permissionFlags="NULL"): sqlStatement = "insert into permissions(name, protection_level, permission_group, permission_flags) values('" + name + "', '" + protectionLevel + "', '" + permissionGroup + "', '" + permissionFlags + "') on duplicate key update protection_level = '" + protectionLevel + "', permission_group = '" + permissionGroup + "', permission_flags = '" + permissionFlags + "';" if permissionFlags == '': sqlStatement = "insert into permissions(name, protection_level, permission_group, permission_flags) values('" + name + "', '" + protectionLevel + "', '" + permissionGroup + "', NULL) on duplicate key update protection_level = '" + protectionLevel + "', permission_group = '" + permissionGroup + "', permission_flags = NULL;" dbHandle = databaseHandler.dbConnectionCheck() databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def getDataForAppList(dbHandle): cursor = dbHandle.cursor() appList = json.loads(open('applist.json', 'r').read())['appNames'] appUrlPrefix = 'https://play.google.com/store/apps/details?id=' privacyGradePrefix = 'http://privacygrade.org/apps/' for app in appList: appUrl = appUrlPrefix + app privacy_grade_url = privacyGradePrefix + app + '.html' sqlStatement = "INSERT INTO `appurls`(`app_pkg_name`,`app_url`,`privacy_grade_url`) VALUES ('" + app + "','" + appUrl + "','" + privacy_grade_url + "')" databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def extractMoreURLsAndStore(dbHandle, urlExtract): headers = { 'User-Agent' : 'Mozilla/5.0' } req = urllib2.Request(urlExtract, None, headers) try: page = urllib2.urlopen(req).read() soup = BeautifulSoup(''.join(page)) data = soup.findAll(attrs={'class': 'card-click-target'}) for chunk in data: url = "https://play.google.com"+chunk['href'] packageName = url.split("=") sqlStatement = "INSERT INTO `appurls`(`app_pkg_name`,`app_url`) VALUES('"+packageName[1]+"', '"+url+"');" databaseHandler.dbManipulateData(dbHandle, sqlStatement) except urllib2.HTTPError, e: print 'HTTPError = ', str(e.code)
def getDeveloperId(dbHandle, app_dict): cursor = dbHandle.cursor() dev_name = app_dict['developer_name'] dev_name = conversion.MySQLConverter().escape(dev_name) if 'dev_website' in app_dict: dev_web = app_dict['dev_website'] else: dev_web = "" dev_web = conversion.MySQLConverter().escape(dev_web) if 'dev_email' in app_dict: dev_email = app_dict['dev_email'] else: dev_email = "" dev_email = conversion.MySQLConverter().escape(dev_email) if 'dev_location' in app_dict: dev_loc = app_dict['dev_location'] else: dev_loc = "" dev_loc = conversion.MySQLConverter().escape(dev_loc) sqlStatementdDevId = "SELECT `id` FROM `developer` WHERE `name` = '" + dev_name + "';" try: cursor.execute(sqlStatementdDevId) if cursor.rowcount > 0: queryOutput = cursor.fetchall() for row in queryOutput: return row[0] else: sqlStatementdDevIdInsert = "INSERT into `developer`(`name`,`website`,`email`,`country`) VALUES('" + dev_name + "','" + dev_web + "','" + dev_email + "','" + dev_loc + "');" return databaseHandler.dbManipulateData(dbHandle, sqlStatementdDevIdInsert) except: print "Unexpected error:", sys.exc_info()[0] raise
def getDeveloperId(dbHandle, app_dict): cursor = dbHandle.cursor() dev_name = app_dict['developer_name'] dev_name = conversion.MySQLConverter().escape(dev_name) if 'dev_website' in app_dict: dev_web = app_dict['dev_website'] else: dev_web = "" dev_web = conversion.MySQLConverter().escape(dev_web) if 'dev_email' in app_dict: dev_email = app_dict['dev_email'] else: dev_email = "" dev_email = conversion.MySQLConverter().escape(dev_email) if 'dev_location' in app_dict: dev_loc = app_dict['dev_location'] else: dev_loc = "" dev_loc = conversion.MySQLConverter().escape(dev_loc) sqlStatementdDevId = "SELECT `id` FROM `developer` WHERE `name` = '" + dev_name + "';" try: cursor.execute(sqlStatementdDevId) data = cursor.fetchone() if data is not None: return data[0] else: #If the developer id was not found we will not execute the while loop and execute the following code sqlStatementdDevIdInsert = "INSERT into `developer`(`name`,`website`,`email`,`country`) VALUES('" + dev_name + "','" + dev_web + "','" + dev_email + "','" + dev_loc + "');" print sqlStatementdDevIdInsert return databaseHandler.dbManipulateData(dbHandle, sqlStatementdDevIdInsert) except: print "Unexpected error:", sys.exc_info()[0] raise
def extractPermissionInfo(dbHandle, appJSONDownloadFileLocation, pkgName): appInfoDict = json.loads( open(appJSONDownloadFileLocation, 'r').read().decode('utf8')) if 'details' in appInfoDict: details = appInfoDict['details'] if 'app_details' in details: app_details = details['app_details'] if 'permission' in app_details: permissionList = app_details['permission'] for permissionName in permissionList: permissionName = sanitizeInput(permissionName) # See if the permission is in the table if not insert it and get its id sqlStatementPermName = "SELECT id FROM `permissions` WHERE `name` = '" + permissionName + "';" permissionId = getPermissionId(dbHandle, sqlStatementPermName, permissionName) # Find the App's Id in the DB # Assumption is that the crawlURL has already extracted all information about the app and the same is in the appdata table # If that is not true this step will fail and we will have to skip and go to the next app sqlStatementAppPkgName = "SELECT id FROM `appdata` WHERE `app_pkg_name` = '" + pkgName + "';" appId = getAppId(dbHandle, sqlStatementAppPkgName, pkgName) if appId > 0: # Insert the App_Id and corresponding Perm_Id in to the DB sqlStatement = "INSERT INTO `appperm`(`app_id`,`perm_id`) VALUES (" + str( appId) + "," + str( permissionId ) + ") ON DUPLICATE KEY UPDATE `app_id`='" + str( appId) + "',`perm_id`='" + str( permissionId) + "';" print sqlStatement databaseHandler.dbManipulateData( dbHandle, sqlStatement) else: print "Moving on to the next app. This app has not been extracted from Google Play Store."
def getPermissionId(dbHandle, sqlStatement, permissionName): cursor = dbHandle.cursor() try: cursor.execute(sqlStatement) if cursor.rowcount > 0: # If permission is found permission table great, just return the permission id to be inserted into the appperm table queryOutput = cursor.fetchall() for row in queryOutput: permissionId = row[0] else: # If permission is NOT found permission table then insert it in the table and return the permission id to be inserted into the appperm table # We are inserting protection level as signature by default. # The data quality can be improved further by analyzing the apks or # by carrying out post analysis on the `AndroidManifest.xml file <https://raw.githubusercontent.com/android/platform_frameworks_base/master/core/res/AndroidManifest.xml>`_ sqlStatement = "INSERT INTO `permissions`(`name`,`protection_level`) VALUES ('" + permissionName + "','signature');" permissionId = databaseHandler.dbManipulateData( dbHandle, sqlStatement) except: print "Unexpected error:", sys.exc_info()[0] raise return permissionId
def updatePermExtracted(dbHandle, tableId): sqlStatement = "UPDATE `appurls` SET `perm_extracted`=1 WHERE `id`=" + str( tableId) + ";" databaseHandler.dbManipulateData(dbHandle, sqlStatement)
errorList = [] mismatchList = [] playCategory = '' for app in appList: sqlStatement = "SELECT c.url as category FROM appdata a, appcategories c WHERE a.app_category_id = c.id AND a.app_pkg_name = '" + app + "';" try: cursor.execute(sqlStatement) if cursor.rowcount == 1: playCategory = str(cursor.fetchone()[0]) playCategory = (((playCategory.replace( "https://play.google.com/store/apps/category/", "")).lower()).replace("_and_", "_n_")).replace("game_", "") annotatedCategory = str(appDict[app]["annotated_category"]) if playCategory == appDict[app]["google_play_category"]: insertStatement = "INSERT INTO annotations (app_pkg_name, google_play_category, annotated_category) VALUES ('" + app + "', '" + playCategory + "', '" + annotatedCategory + "');" rowid = db.dbManipulateData(dbHandle, insertStatement) print "Inserted data for", app, "row:", rowid else: if appDict[app]["google_play_category"] == '': insertStatement = "INSERT INTO annotations (app_pkg_name, google_play_category, annotated_category) VALUES ('" + app + "', '" + playCategory + "', '" + annotatedCategory + "');" rowid = db.dbManipulateData(dbHandle, insertStatement) print "Inserted data for", app, "row:", rowid else: # print "Mismatch occurred: ", app, playCategory, appDict[app]["google_play_category"] # print cursor.rowcount mismatchList.append(app) else: errorList.append(app) except: print('Unexpected error: ' + str(sys.exc_info()[0]))
def updateURLs(dbHandle, app_pkg_name, app_url, playdrone_metadata_url, playdrone_apk_url): sqlStatement = "INSERT INTO `appurls` (`app_pkg_name`,`app_url`,`playdrone_metadata_url`,`playdrone_apk_url`) VALUES('" + app_pkg_name + "','" + app_url + "','" + playdrone_metadata_url + "','" + playdrone_apk_url + "') ON DUPLICATE KEY UPDATE `playdrone_metadata_url`='" + playdrone_metadata_url + "',`playdrone_apk_url`='" + playdrone_apk_url + "';" print sqlStatement databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def runSQLFile(sqlScriptPath, dbHandle): for sqlStatement in file(sqlScriptPath).read().split(';'): databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def updateParsed(dbHandle, tableId): sqlStatement = "UPDATE `appurls` SET `parsed_privacy_grade`=1 WHERE `id`=" + str( tableId) + ";" databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def updateDownloaded(dbHandle, tableId): sqlStatement = "UPDATE `appurls` SET `downloaded`=1 WHERE `id`="+str(tableId)+";" databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def updateDownloaded(dbHandle, tableId): sqlStatement = "UPDATE appurls SET downloaded=1 WHERE id=" + str( tableId) + ";" databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def createSQLStatementAndInsert(dbHandle, app_dict): if 'app_name' in app_dict: app_name = app_dict['app_name'] app_name = conversion.MySQLConverter().escape(app_name) #print app_name app_pkg_name = app_dict['app_pkg_name'] developer_id = getDeveloperId(dbHandle, app_dict) app_category_id = getCategoryId(dbHandle, app_dict) if 'review_rating' in app_dict: review_rating = app_dict['review_rating'] else: review_rating = 0.0 if 'review_count' in app_dict: review_count = app_dict['review_count'] else: review_count = 0 if 'app_desc' in app_dict: app_desc = app_dict['app_desc'] else: app_desc = '' escaped_text_desc = conversion.MySQLConverter().escape(app_desc) if 'whats_new' in app_dict: whats_new = app_dict['whats_new'] else: whats_new = '' escaped_text_whats_new = conversion.MySQLConverter().escape(whats_new) if 'Updated' in app_dict: updated = app_dict['Updated'] else: updated = '1984-08-31' if 'Installs' in app_dict: installs = app_dict['Installs'] else: installs = 0 if 'Current_Version' in app_dict: version = app_dict['Current_Version'] else: version = '' if 'Requires_Android' in app_dict: android_reqd = app_dict['Requires_Android'] else: android_reqd = '' if 'Content_Rating' in app_dict: content_rating = app_dict['Content_Rating'] else: content_rating = '' sqlStatement = "INSERT INTO `appdata`(`app_pkg_name`,`app_name`,`developer_id`,`app_category_id`,`review_rating`,`review_count`,`desc`,`whats_new`,`updated`,`installs`,`version`,`android_reqd`,`content_rating`) VALUES('" + app_pkg_name + "','" + app_name + "'," + str( developer_id ) + "," + str(app_category_id) + "," + str(review_rating) + "," + str( review_count ) + ",'" + escaped_text_desc + "','" + escaped_text_whats_new + "','" + updated + "'," + str( installs ) + ",'" + version + "','" + android_reqd + "','" + content_rating + "');" print sqlStatement databaseHandler.dbManipulateData(dbHandle, sqlStatement)