def getPostIdsFromPostCollection(): databaseName = "VineDatabase" collectionName = "CollectedUserPosts" documents = mongoOperations.findAllDataFromCollection(databaseName, collectionName) postidList = [] for document in documents: postidList.append(str(document['postId'])) return postidList
def getUserIdsFromBioCollection(): databaseName = "VineDatabase" collectionName = "CollectedUserBio" documents = mongoOperations.findAllDataFromCollection(databaseName, collectionName) useridList = [] for document in documents: useridList.append(str(document['userId'])) return useridList
def getInstagramBio(): databaseName = "VineDatabase" collectionName = "CollectedUserBio" documents = mongoOperations.findAllDataFromCollection(databaseName, collectionName) f = open("NathanInstagramFile.txt","w") count = 0 for document in documents: if "instagram" in str(document["description"]).lower(): vineUsername = "******"+str(document["username"]) vineUserID = "Vine UserID:"+str(document["userId"]) vineDescription = "Description:"+str(document["description"]) f.write(vineUsername+"\n") f.write(vineUserID+"\n") f.write(vineDescription+"\n") count = count + 1 print str(count)+" users done" f.close()
def writeAllPostIdsToFile(): databaseName = "VineDatabase" collectionName = "CollectedUserPosts" documents = mongoOperations.findAllDataFromCollection(databaseName, collectionName) count = 0 fileCount = 1 totalCount = 0 g = open("vinePostIdFileNumber"+str(fileCount)+".txt","a") for document in documents: g.write(str(document['postId'])+"\n") count = count + 1 totalCount = totalCount + 1 print "total:"+str(totalCount) if count == 100000: count = 0 g.close() fileCount = fileCount + 1 g = open("vinePostIdFileNumber"+str(fileCount)+".txt","a") print "newfile!yay!!!!!!!!!!!!!!!!!!!!!!!" g.close()
def locationUserAverageStats(): databaseName = "VineDatabase" collectionName = "CollectedUserBio" documents = mongoOperations.findAllDataFromCollection(databaseName, collectionName) locationcount = 0 nonLocationcount = 0 locationFollowerCount = 0 nonLocationFollowerCount = 0 locationFollowingCount = 0 nonLocationFollowingCount = 0 locationLoopCount = 0 nonLocationLoopCount = 0 locationPostCount = 0 nonLocationPostCount = 0 locationAuthorCount = 0 nonLocationAuthorCount = 0 locationExplicitCount = 0 nonLocationExplicitCount = 0 locationTwitterCount = 0 nonLocationTwitterCount = 0 locationInstagramCount = 0 nonLocationInstagramCount = 0 locationYoutubeCount = 0 nonLocationYoutubeCount = 0 for document in documents: if len(str(document["location"])) > 0: locationcount = locationcount + 1 locationAuthorCount = locationAuthorCount + int(str(document["authoredPostCount"])) locationExplicitCount = locationExplicitCount + int(str(document["explicitContent"])) locationPostCount = locationPostCount + int(str(document["postCount"])) locationLoopCount = locationLoopCount + int(str(document["loopCount"])) locationFollowingCount = locationFollowingCount + int(str(document["followingCount"])) locationFollowerCount = locationFollowerCount + int(str(document["followerCount"])) if "twitter" in str(document["description"]).lower(): locationTwitterCount = locationTwitterCount + 1 if "instagram" in str(document["description"]).lower(): locationInstagramCount = locationInstagramCount + 1 if "youtube" in str(document["description"]).lower(): locationYoutubeCount = locationYoutubeCount + 1 else: nonLocationcount = nonLocationcount + 1 nonLocationAuthorCount = nonLocationAuthorCount + int(str(document["authoredPostCount"])) nonLocationExplicitCount = nonLocationExplicitCount + int(str(document["explicitContent"])) nonLocationPostCount = nonLocationPostCount + int(str(document["postCount"])) nonLocationLoopCount = nonLocationLoopCount + int(str(document["loopCount"])) nonLocationFollowingCount = nonLocationFollowingCount + int(str(document["followingCount"])) nonLocationFollowerCount = nonLocationFollowerCount + int(str(document["followerCount"])) if "twitter" in str(document["description"]).lower(): nonLocationTwitterCount = nonLocationTwitterCount + 1 if "instagram" in str(document["description"]).lower(): nonLocationInstagramCount = nonLocationInstagramCount + 1 if "youtube" in str(document["description"]).lower(): nonLocationYoutubeCount = nonLocationYoutubeCount + 1 print "location set users:" print "total explicit content: "+str(locationExplicitCount) print "total explicit content percentage: "+str(float(locationExplicitCount)/float(locationcount)) print "total authored post: "+str(locationAuthorCount) print "average authored post: "+str(locationAuthorCount/locationcount) print "total number of post: "+str(locationPostCount) print "average number of post: "+str(locationPostCount/locationcount) print "total number of loop: "+str(locationLoopCount) print "average number of loop: "+str(locationLoopCount/locationcount) print "total number of following: "+str(locationFollowingCount) print "average number of following: "+str(locationFollowingCount/locationcount) print "total number of follower: "+str(locationFollowerCount) print "average number of follower: "+str(locationFollowerCount/locationcount) print "total number of twitter: "+str(locationTwitterCount) print "percentage of twitter: "+str(float(locationTwitterCount)/float(locationcount)) print "total number of instagram: "+str(locationInstagramCount) print "percentage of instagram: "+str(float(locationInstagramCount)/float(locationcount)) print "total number of youtube: "+str(locationYoutubeCount) print "percentage of youtube: "+str(float(locationYoutubeCount)/float(locationcount)) print "non location users:" print "total explicit content: "+str(nonLocationExplicitCount) print "total explicit content percentage: "+str(float(nonLocationExplicitCount)/float(nonLocationcount)) print "total authored post: "+str(nonLocationAuthorCount) print "average authored post: "+str(nonLocationAuthorCount/nonLocationcount) print "total number of post: "+str(nonLocationPostCount) print "average number of post: "+str(nonLocationPostCount/nonLocationcount) print "total number of loop: "+str(nonLocationLoopCount) print "average number of loop: "+str(nonLocationLoopCount/nonLocationcount) print "total number of following: "+str(nonLocationFollowingCount) print "average number of following: "+str(nonLocationFollowingCount/nonLocationcount) print "total number of follower: "+str(nonLocationFollowerCount) print "average number of follower: "+str(nonLocationFollowerCount/nonLocationcount) print "total number of twitter: "+str(nonLocationTwitterCount) print "percentage of twitter: "+str(float(nonLocationTwitterCount)/float(nonLocationcount)) print "total number of instagram: "+str(nonLocationInstagramCount) print "percentage of instagram: "+str(float(nonLocationInstagramCount)/float(nonLocationcount)) print "total number of youtube: "+str(nonLocationYoutubeCount) print "percentage of youtube: "+str(float(nonLocationYoutubeCount)/float(nonLocationcount))
def verifiedUserAverageStats(): databaseName = "VineDatabase" collectionName = "CollectedUserBio" documents = mongoOperations.findAllDataFromCollection(databaseName, collectionName) verifiedcount = 0 nonVerifiedcount = 0 verifiedFollowerCount = 0 nonVerifiedFollowerCount = 0 verifiedFollowingCount = 0 nonVerifiedFollowingCount = 0 verifiedLoopCount = 0 nonVerifiedLoopCount = 0 verifiedPostCount = 0 nonVerifiedPostCount = 0 verifiedAuthorCount = 0 nonVerifiedAuthorCount = 0 verifiedExplicitCount = 0 nonVerifiedExplicitCount = 0 verifiedLocationCount = 0 nonVerifiedLocationCount = 0 verifiedTwitterCount = 0 nonVerifiedTwitterCount = 0 verifiedInstagramCount = 0 nonVerifiedInstagramCount = 0 verifiedYoutubeCount = 0 nonVerifiedYoutubeCount = 0 for document in documents: if str(document["verified"]) == "1": verifiedcount = verifiedcount + 1 verifiedAuthorCount = verifiedAuthorCount + int(str(document["authoredPostCount"])) verifiedExplicitCount = verifiedExplicitCount + int(str(document["explicitContent"])) verifiedPostCount = verifiedPostCount + int(str(document["postCount"])) verifiedLoopCount = verifiedLoopCount + int(str(document["loopCount"])) verifiedFollowingCount = verifiedFollowingCount + int(str(document["followingCount"])) verifiedFollowerCount = verifiedFollowerCount + int(str(document["followerCount"])) if len(str(document["location"])) > 0: verifiedLocationCount = verifiedLocationCount + 1 if "twitter" in str(document["description"]).lower(): verifiedTwitterCount = verifiedTwitterCount + 1 if "instagram" in str(document["description"]).lower(): verifiedInstagramCount = verifiedInstagramCount + 1 if "youtube" in str(document["description"]).lower(): verifiedYoutubeCount = verifiedYoutubeCount + 1 else: nonVerifiedcount = nonVerifiedcount + 1 nonVerifiedAuthorCount = nonVerifiedAuthorCount + int(str(document["authoredPostCount"])) nonVerifiedExplicitCount = nonVerifiedExplicitCount + int(str(document["explicitContent"])) nonVerifiedPostCount = nonVerifiedPostCount + int(str(document["postCount"])) nonVerifiedLoopCount = nonVerifiedLoopCount + int(str(document["loopCount"])) nonVerifiedFollowingCount = nonVerifiedFollowingCount + int(str(document["followingCount"])) nonVerifiedFollowerCount = nonVerifiedFollowerCount + int(str(document["followerCount"])) if len(str(document["location"])) > 0: nonVerifiedLocationCount = nonVerifiedLocationCount + 1 if "twitter" in str(document["description"]).lower(): nonVerifiedTwitterCount = nonVerifiedTwitterCount + 1 if "instagram" in str(document["description"]).lower(): nonVerifiedInstagramCount = nonVerifiedInstagramCount + 1 if "youtube" in str(document["description"]).lower(): nonVerifiedYoutubeCount = nonVerifiedYoutubeCount + 1 print "verified users:" print "total explicit content: "+str(verifiedExplicitCount) print "total explicit content percentage: "+str(float(verifiedExplicitCount)/float(verifiedcount)) print "total authored post: "+str(verifiedAuthorCount) print "average authored post: "+str(verifiedAuthorCount/verifiedcount) print "total number of post: "+str(verifiedPostCount) print "average number of post: "+str(verifiedPostCount/verifiedcount) print "total number of loop: "+str(verifiedLoopCount) print "average number of loop: "+str(verifiedLoopCount/verifiedcount) print "total number of following: "+str(verifiedFollowingCount) print "average number of following: "+str(verifiedFollowingCount/verifiedcount) print "total number of follower: "+str(verifiedFollowerCount) print "average number of follower: "+str(verifiedFollowerCount/verifiedcount) print "total number of location: "+str(verifiedLocationCount) print "percentage of location: "+str(float(verifiedLocationCount)/float(verifiedcount)) print "total number of twitter: "+str(verifiedTwitterCount) print "percentage of twitter: "+str(float(verifiedTwitterCount)/float(verifiedcount)) print "total number of instagram: "+str(verifiedInstagramCount) print "percentage of instagram: "+str(float(verifiedInstagramCount)/float(verifiedcount)) print "total number of youtube: "+str(verifiedYoutubeCount) print "percentage of youtube: "+str(float(verifiedYoutubeCount)/float(verifiedcount)) print "non verified users:" print "total explicit content: "+str(nonVerifiedExplicitCount) print "total explicit content percentage: "+str(float(nonVerifiedExplicitCount)/float(nonVerifiedcount)) print "total authored post: "+str(nonVerifiedAuthorCount) print "average authored post: "+str(nonVerifiedAuthorCount/nonVerifiedcount) print "total number of post: "+str(nonVerifiedPostCount) print "average number of post: "+str(nonVerifiedPostCount/nonVerifiedcount) print "total number of loop: "+str(nonVerifiedLoopCount) print "average number of loop: "+str(nonVerifiedLoopCount/nonVerifiedcount) print "total number of following: "+str(nonVerifiedFollowingCount) print "average number of following: "+str(nonVerifiedFollowingCount/nonVerifiedcount) print "total number of follower: "+str(nonVerifiedFollowerCount) print "average number of follower: "+str(nonVerifiedFollowerCount/nonVerifiedcount) print "total number of location: "+str(nonVerifiedLocationCount) print "percentage of location: "+str(float(nonVerifiedLocationCount)/float(nonVerifiedcount)) print "total number of twitter: "+str(nonVerifiedTwitterCount) print "percentage of twitter: "+str(float(nonVerifiedTwitterCount)/float(nonVerifiedcount)) print "total number of instagram: "+str(nonVerifiedInstagramCount) print "percentage of instagram: "+str(float(nonVerifiedInstagramCount)/float(nonVerifiedcount)) print "total number of youtube: "+str(nonVerifiedYoutubeCount) print "percentage of youtube: "+str(float(nonVerifiedYoutubeCount)/float(nonVerifiedcount))