Example #1
0
def getReviewRatings(dbHandle, appUrlList):
    for appUrl in appUrlList:
        app_dict = json.loads(open('appRating.json', 'r').read())
        app_pkg_name = appUrl.split('=')[1]
        headers = {'User-Agent': 'Mozilla/5.0'}
        req = urllib2.Request(appUrl, None, headers)
        try:
            review_rating = 0.0
            page = urllib2.urlopen(req).read()
            soup = bs(''.join(page))
            for div in soup.findAll(attrs={'class': 'score'}):
                for child in div.children:
                    if not child.string == ' ':
                        review_rating = round(eval(child.string), 1)
                    app_dict[app_pkg_name] = review_rating
            insertInDB(dbHandle, review_rating, app_pkg_name)
            open('appRating.json',
                 'w').write(json.dumps(app_dict, indent=4, sort_keys=True))
        except urllib2.HTTPError, e:
            if str(e.code) == '404':
                sqlStatement = "UPDATE `appdata` SET `still_in_googleplaystore`= 0 WHERE `app_pkg_name` = '" + app_pkg_name + "';"
                print 'HTTPError =', str(
                    e.code), 'for app:', app_pkg_name, sqlStatement
                logging.debug('HTTPError =' + str(e.code) + 'for app:' +
                              app_pkg_name + " statement: " + sqlStatement)
                databaseHandler.dbManipulateData(dbHandle, sqlStatement)
            else:
                print 'HTTPError =', str(e.code), 'for app:', app_pkg_name
                logging.debug('HTTPError =' + str(e.code) + 'for app:' +
                              app_pkg_name)
Example #2
0
def insertInDB(app_pkg_name,paid):
	dbHandle = databaseHandler.dbConnectionCheck() # DB Open
	sqlStatement = "UPDATE `appdata` SET `paid`= "+str(paid)+" WHERE `app_pkg_name` = '"+app_pkg_name+"';"
	print sqlStatement
	logging.debug("Statement: "+sqlStatement)
	databaseHandler.dbManipulateData(dbHandle, sqlStatement)
	dbHandle.close() #DB Close
def extractPermissionsInfo(dbHandle, pkgName, GSFId):
    if isAPKPermissionsAlreadyInTable(dbHandle, pkgName) == True:
        print "Moving on to extracting permissions for the next app. This one is already in the database."
    else:
        # Extract permissions using the API and store in the DB
        pkgNameList = []
        pkgNameList.append(pkgName)
        # API call to unofficial Google Play API written in Python by egirault
        listOfPermissions = permissions.getPackagePermission(
            pkgNameList, GSFId)
        #		print listOfPermissions

        for permissionName in listOfPermissions:
            dbHandle = databaseHandler.dbConnectionCheck()

            # See if the permission is in the table if not insert it and get its id
            sqlStatementPermName = "SELECT id FROM `permissions` WHERE `name` = '" + permissionName + "';"
            permissionId = getPermissionId(dbHandle, sqlStatementPermName,
                                           permissionName)

            # Find the App's Id in the DB
            # Assumption is that the crawlURL has already extracted all information about the app and the same is in the appdata table
            # If that is not true this step will fail and we will have to skip and go to the next app
            sqlStatementAppPkgName = "SELECT id FROM `appdata` WHERE `app_pkg_name` = '" + pkgName + "';"
            appId = getAppId(dbHandle, sqlStatementAppPkgName, pkgName)

            if appId > 0:
                # Insert the App_Id and corresponding Perm_Id in to the DB
                sqlStatement = "INSERT INTO `appperm`(`app_id`,`perm_id`) VALUES (" + str(
                    appId) + "," + str(permissionId) + ");"
                print sqlStatement
                databaseHandler.dbManipulateData(dbHandle, sqlStatement)
            else:
                print "Moving on to the next app. This app has not been extracted from Google Play Store."
def downloadAPKFromPhone():
    urlPrefix = "https://play.google.com/store/apps/details?id="
    listOfPackages = subprocess.check_output(
        ["adb", "shell", "pm", "list", "packages"])
    #	This is a brilliant Python line I learnt from AK
    #	 lines = [line.strip() for line in listOfPackages.split()]
    for line in listOfPackages.split():
        package = line.strip().split(":")[-1]
        urlPrefix = "https://play.google.com/store/apps/details?id="
        urlExtract = urlPrefix + package
        if verifyPresentInAppMarket(urlExtract):
            dbHandle = databaseHandler.dbConnectionCheck()  # DB Open

            sqlStatement = "INSERT INTO `appurls`(`app_pkg_name`,`app_url`,`downloaded`) VALUES('" + package + "', '" + urlExtract + "', 1);"
            try:
                databaseHandler.dbManipulateData(dbHandle, sqlStatement)
            except _mysql_exceptions.IntegrityError:
                print "package", package, "was present, updating now"
                sqlStatement = "SELECT `id` FROM `appurls` WHERE app_pkg_name = '" + package + "';"
                cursor = dbHandle.cursor()
                try:
                    cursor.execute(sqlStatement)
                    queryOutput = cursor.fetchall()
                except:
                    print "Unexpected error:", sys.exc_info()[0]
                    raise
                for row in queryOutput:
                    updateDownloaded(dbHandle, row[0])

            dbHandle.close()  #DB Close

            # Get the path of the apk and extract it
            path = subprocess.check_output(
                ["adb", "shell", "pm", "path", package]).strip().split(":")[-1]

            # If the apps download directory doesn't exist just create it
            currentDirectory = os.getcwd()

            osInfo = platform.system()
            if osInfo == 'Windows':
                appsDownloadDirectory = currentDirectory + "\\apps\\"
                downloadAPK(appsDownloadDirectory, path)
            elif osInfo == 'Linux':
                appsDownloadDirectory = currentDirectory + "/apps/"
                downloadAPK(appsDownloadDirectory, path)
            else:
                sys.stderr.write(
                    'The current os not supported at the moment.\n')
                sys.exit(1)
            copiedFromPhoneAPKName = appsDownloadDirectory + path.split(
                "/")[-1]
            realPackageBasedAPKName = appsDownloadDirectory + package + ".apk"
            try:
                os.rename(copiedFromPhoneAPKName, realPackageBasedAPKName)
            except WindowsError:
                # The file already exists, we should copy the new apk over it
                os.remove(realPackageBasedAPKName)
                os.rename(copiedFromPhoneAPKName, realPackageBasedAPKName)
            os.chdir(currentDirectory)
Example #5
0
def insertInDB(dbHandle, review_rating, app_pkg_name):
    dbHandle = databaseHandler.dbConnectionCheck()  # DB Open
    # app_dict = json.loads(open('appRating.json','r').read())
    # for app_pkg_name, review_rating in app_dict.iteritems():
    sqlStatement = "UPDATE `appdata` SET `review_rating`= " + str(
        review_rating) + " WHERE `app_pkg_name` = '" + app_pkg_name + "';"
    print sqlStatement
    logging.debug("Statement: " + sqlStatement)
    databaseHandler.dbManipulateData(dbHandle, sqlStatement)
Example #6
0
def insertIntoDB(name,
                 protectionLevel,
                 permissionGroup="NULL",
                 permissionFlags="NULL"):
    sqlStatement = "insert into permissions(name, protection_level, permission_group, permission_flags) values('" + name + "', '" + protectionLevel + "', '" + permissionGroup + "', '" + permissionFlags + "') on duplicate key update protection_level = '" + protectionLevel + "', permission_group = '" + permissionGroup + "', permission_flags = '" + permissionFlags + "';"
    if permissionFlags == '':
        sqlStatement = "insert into permissions(name, protection_level, permission_group, permission_flags) values('" + name + "', '" + protectionLevel + "', '" + permissionGroup + "', NULL) on duplicate key update protection_level = '" + protectionLevel + "', permission_group = '" + permissionGroup + "', permission_flags = NULL;"

    dbHandle = databaseHandler.dbConnectionCheck()
    databaseHandler.dbManipulateData(dbHandle, sqlStatement)
Example #7
0
def getDataForAppList(dbHandle):
    cursor = dbHandle.cursor()
    appList = json.loads(open('applist.json', 'r').read())['appNames']
    appUrlPrefix = 'https://play.google.com/store/apps/details?id='
    privacyGradePrefix = 'http://privacygrade.org/apps/'
    for app in appList:
        appUrl = appUrlPrefix + app
        privacy_grade_url = privacyGradePrefix + app + '.html'
        sqlStatement = "INSERT INTO `appurls`(`app_pkg_name`,`app_url`,`privacy_grade_url`) VALUES ('" + app + "','" + appUrl + "','" + privacy_grade_url + "')"
        databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def extractMoreURLsAndStore(dbHandle, urlExtract):
	headers = { 'User-Agent' : 'Mozilla/5.0' }
	req = urllib2.Request(urlExtract, None, headers)
	try: 
		page = urllib2.urlopen(req).read()
		soup = BeautifulSoup(''.join(page))
		data = soup.findAll(attrs={'class': 'card-click-target'})
	
		for chunk in data:
			url = "https://play.google.com"+chunk['href']
			packageName = url.split("=")
			sqlStatement = "INSERT INTO `appurls`(`app_pkg_name`,`app_url`) VALUES('"+packageName[1]+"', '"+url+"');"
			databaseHandler.dbManipulateData(dbHandle, sqlStatement)
	except urllib2.HTTPError, e:
		print 'HTTPError = ', str(e.code)
Example #9
0
def getDeveloperId(dbHandle, app_dict):
    cursor = dbHandle.cursor()
    dev_name = app_dict['developer_name']
    dev_name = conversion.MySQLConverter().escape(dev_name)
    if 'dev_website' in app_dict:
        dev_web = app_dict['dev_website']
    else:
        dev_web = ""
    dev_web = conversion.MySQLConverter().escape(dev_web)
    if 'dev_email' in app_dict:
        dev_email = app_dict['dev_email']
    else:
        dev_email = ""
    dev_email = conversion.MySQLConverter().escape(dev_email)
    if 'dev_location' in app_dict:
        dev_loc = app_dict['dev_location']
    else:
        dev_loc = ""
    dev_loc = conversion.MySQLConverter().escape(dev_loc)
    sqlStatementdDevId = "SELECT `id` FROM `developer` WHERE `name` = '" + dev_name + "';"
    try:
        cursor.execute(sqlStatementdDevId)
        if cursor.rowcount > 0:
            queryOutput = cursor.fetchall()
            for row in queryOutput:
                return row[0]
        else:
            sqlStatementdDevIdInsert = "INSERT into `developer`(`name`,`website`,`email`,`country`) VALUES('" + dev_name + "','" + dev_web + "','" + dev_email + "','" + dev_loc + "');"
            return databaseHandler.dbManipulateData(dbHandle,
                                                    sqlStatementdDevIdInsert)
    except:
        print "Unexpected error:", sys.exc_info()[0]
        raise
Example #10
0
def getDeveloperId(dbHandle, app_dict):
    cursor = dbHandle.cursor()
    dev_name = app_dict['developer_name']
    dev_name = conversion.MySQLConverter().escape(dev_name)
    if 'dev_website' in app_dict:
        dev_web = app_dict['dev_website']
    else:
        dev_web = ""
    dev_web = conversion.MySQLConverter().escape(dev_web)
    if 'dev_email' in app_dict:
        dev_email = app_dict['dev_email']
    else:
        dev_email = ""
    dev_email = conversion.MySQLConverter().escape(dev_email)
    if 'dev_location' in app_dict:
        dev_loc = app_dict['dev_location']
    else:
        dev_loc = ""
    dev_loc = conversion.MySQLConverter().escape(dev_loc)
    sqlStatementdDevId = "SELECT `id` FROM `developer` WHERE `name` = '" + dev_name + "';"
    try:
        cursor.execute(sqlStatementdDevId)
        data = cursor.fetchone()
        if data is not None:
            return data[0]
        else:
            #If the developer id was not found we will not execute the while loop and execute the following code
            sqlStatementdDevIdInsert = "INSERT into `developer`(`name`,`website`,`email`,`country`) VALUES('" + dev_name + "','" + dev_web + "','" + dev_email + "','" + dev_loc + "');"
            print sqlStatementdDevIdInsert
            return databaseHandler.dbManipulateData(dbHandle,
                                                    sqlStatementdDevIdInsert)
    except:
        print "Unexpected error:", sys.exc_info()[0]
        raise
def extractPermissionInfo(dbHandle, appJSONDownloadFileLocation, pkgName):
    appInfoDict = json.loads(
        open(appJSONDownloadFileLocation, 'r').read().decode('utf8'))
    if 'details' in appInfoDict:
        details = appInfoDict['details']
        if 'app_details' in details:
            app_details = details['app_details']
            if 'permission' in app_details:
                permissionList = app_details['permission']
                for permissionName in permissionList:
                    permissionName = sanitizeInput(permissionName)
                    # See if the permission is in the table if not insert it and get its id
                    sqlStatementPermName = "SELECT id FROM `permissions` WHERE `name` = '" + permissionName + "';"
                    permissionId = getPermissionId(dbHandle,
                                                   sqlStatementPermName,
                                                   permissionName)

                    # Find the App's Id in the DB
                    # Assumption is that the crawlURL has already extracted all information about the app and the same is in the appdata table
                    # If that is not true this step will fail and we will have to skip and go to the next app
                    sqlStatementAppPkgName = "SELECT id FROM `appdata` WHERE `app_pkg_name` = '" + pkgName + "';"
                    appId = getAppId(dbHandle, sqlStatementAppPkgName, pkgName)

                    if appId > 0:
                        # Insert the App_Id and corresponding Perm_Id in to the DB
                        sqlStatement = "INSERT INTO `appperm`(`app_id`,`perm_id`) VALUES (" + str(
                            appId) + "," + str(
                                permissionId
                            ) + ") ON DUPLICATE KEY UPDATE `app_id`='" + str(
                                appId) + "',`perm_id`='" + str(
                                    permissionId) + "';"
                        print sqlStatement
                        databaseHandler.dbManipulateData(
                            dbHandle, sqlStatement)
                    else:
                        print "Moving on to the next app. This app has not been extracted from Google Play Store."
def getPermissionId(dbHandle, sqlStatement, permissionName):
    cursor = dbHandle.cursor()
    try:
        cursor.execute(sqlStatement)
        if cursor.rowcount > 0:
            # If permission is found permission table great, just return the permission id to be inserted into the appperm table
            queryOutput = cursor.fetchall()
            for row in queryOutput:
                permissionId = row[0]
        else:
            # If permission is NOT found permission table then insert it in the table and return the permission id to be inserted into the appperm table
            # We are inserting protection level as signature by default.
            # The data quality can be improved further by analyzing the apks or
            # by carrying out post analysis on the `AndroidManifest.xml file <https://raw.githubusercontent.com/android/platform_frameworks_base/master/core/res/AndroidManifest.xml>`_
            sqlStatement = "INSERT INTO `permissions`(`name`,`protection_level`) VALUES ('" + permissionName + "','signature');"
            permissionId = databaseHandler.dbManipulateData(
                dbHandle, sqlStatement)
    except:
        print "Unexpected error:", sys.exc_info()[0]
        raise
    return permissionId
def updatePermExtracted(dbHandle, tableId):
    sqlStatement = "UPDATE `appurls` SET `perm_extracted`=1 WHERE `id`=" + str(
        tableId) + ";"
    databaseHandler.dbManipulateData(dbHandle, sqlStatement)
Example #14
0
errorList = []
mismatchList = []
playCategory = ''
for app in appList:
    sqlStatement = "SELECT c.url as category FROM appdata a, appcategories c WHERE a.app_category_id = c.id AND a.app_pkg_name = '" + app + "';"
    try:
        cursor.execute(sqlStatement)
        if cursor.rowcount == 1:
            playCategory = str(cursor.fetchone()[0])
            playCategory = (((playCategory.replace(
                "https://play.google.com/store/apps/category/",
                "")).lower()).replace("_and_", "_n_")).replace("game_", "")
            annotatedCategory = str(appDict[app]["annotated_category"])
            if playCategory == appDict[app]["google_play_category"]:
                insertStatement = "INSERT INTO annotations (app_pkg_name, google_play_category, annotated_category) VALUES ('" + app + "', '" + playCategory + "', '" + annotatedCategory + "');"
                rowid = db.dbManipulateData(dbHandle, insertStatement)
                print "Inserted data for", app, "row:", rowid
            else:
                if appDict[app]["google_play_category"] == '':
                    insertStatement = "INSERT INTO annotations (app_pkg_name, google_play_category, annotated_category) VALUES ('" + app + "', '" + playCategory + "', '" + annotatedCategory + "');"
                    rowid = db.dbManipulateData(dbHandle, insertStatement)
                    print "Inserted data for", app, "row:", rowid
                else:
                    #					print "Mismatch occurred: ", app, playCategory, appDict[app]["google_play_category"]
                    #					print cursor.rowcount
                    mismatchList.append(app)
        else:
            errorList.append(app)
    except:
        print('Unexpected error: ' + str(sys.exc_info()[0]))
def updateURLs(dbHandle, app_pkg_name, app_url, playdrone_metadata_url,
               playdrone_apk_url):
    sqlStatement = "INSERT INTO `appurls` (`app_pkg_name`,`app_url`,`playdrone_metadata_url`,`playdrone_apk_url`) VALUES('" + app_pkg_name + "','" + app_url + "','" + playdrone_metadata_url + "','" + playdrone_apk_url + "') ON DUPLICATE KEY UPDATE `playdrone_metadata_url`='" + playdrone_metadata_url + "',`playdrone_apk_url`='" + playdrone_apk_url + "';"
    print sqlStatement
    databaseHandler.dbManipulateData(dbHandle, sqlStatement)
Example #16
0
def runSQLFile(sqlScriptPath, dbHandle):
    for sqlStatement in file(sqlScriptPath).read().split(';'):
        databaseHandler.dbManipulateData(dbHandle, sqlStatement)
Example #17
0
def updateParsed(dbHandle, tableId):
    sqlStatement = "UPDATE `appurls` SET `parsed_privacy_grade`=1 WHERE `id`=" + str(
        tableId) + ";"
    databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def updateDownloaded(dbHandle, tableId):
	sqlStatement = "UPDATE `appurls` SET `downloaded`=1 WHERE `id`="+str(tableId)+";"
	databaseHandler.dbManipulateData(dbHandle, sqlStatement)
def updateDownloaded(dbHandle, tableId):
    sqlStatement = "UPDATE appurls SET downloaded=1 WHERE id=" + str(
        tableId) + ";"
    databaseHandler.dbManipulateData(dbHandle, sqlStatement)
Example #20
0
def createSQLStatementAndInsert(dbHandle, app_dict):
    if 'app_name' in app_dict:
        app_name = app_dict['app_name']
        app_name = conversion.MySQLConverter().escape(app_name)
        #print app_name

        app_pkg_name = app_dict['app_pkg_name']
        developer_id = getDeveloperId(dbHandle, app_dict)
        app_category_id = getCategoryId(dbHandle, app_dict)

        if 'review_rating' in app_dict:
            review_rating = app_dict['review_rating']
        else:
            review_rating = 0.0

        if 'review_count' in app_dict:
            review_count = app_dict['review_count']
        else:
            review_count = 0

        if 'app_desc' in app_dict:
            app_desc = app_dict['app_desc']
        else:
            app_desc = ''
        escaped_text_desc = conversion.MySQLConverter().escape(app_desc)

        if 'whats_new' in app_dict:
            whats_new = app_dict['whats_new']
        else:
            whats_new = ''
        escaped_text_whats_new = conversion.MySQLConverter().escape(whats_new)

        if 'Updated' in app_dict:
            updated = app_dict['Updated']
        else:
            updated = '1984-08-31'

        if 'Installs' in app_dict:
            installs = app_dict['Installs']
        else:
            installs = 0

        if 'Current_Version' in app_dict:
            version = app_dict['Current_Version']
        else:
            version = ''

        if 'Requires_Android' in app_dict:
            android_reqd = app_dict['Requires_Android']
        else:
            android_reqd = ''

        if 'Content_Rating' in app_dict:
            content_rating = app_dict['Content_Rating']
        else:
            content_rating = ''

        sqlStatement = "INSERT INTO `appdata`(`app_pkg_name`,`app_name`,`developer_id`,`app_category_id`,`review_rating`,`review_count`,`desc`,`whats_new`,`updated`,`installs`,`version`,`android_reqd`,`content_rating`) VALUES('" + app_pkg_name + "','" + app_name + "'," + str(
            developer_id
        ) + "," + str(app_category_id) + "," + str(review_rating) + "," + str(
            review_count
        ) + ",'" + escaped_text_desc + "','" + escaped_text_whats_new + "','" + updated + "'," + str(
            installs
        ) + ",'" + version + "','" + android_reqd + "','" + content_rating + "');"
        print sqlStatement
        databaseHandler.dbManipulateData(dbHandle, sqlStatement)