コード例 #1
0
def process(inputdir,outfilename,brtime1,brtime2):

        # Empty dictionary to store the tweet counts for each user
	userlist={}

        # Process all the files in the input directory
	filelist=tweetproc.jsonindir(inputdir)
	for file in filelist:
            # Returns a dictionary of user ids with the counts of each behavior response for that user
	    userbrcounts=tweetproc.tweetcodingbresponse(os.path.join(inputdir,file),brtime1,brtime2)

            # Loop over all of the users in the userbrs dictionary returned by the bresponse method
            # If the user is already in userlist, then add the behavior response counts, otherwise set the counts
	    for user in userbrcounts:
		if user in userlist: # If user is already in the user list
                    for br in userbrcounts[user]: # Loop over all the behavior responses
		        userlist[user][br]+=userbrcounts[user][br] # Add each to the existing count
		else:
		    userlist[user]=userbrcounts[user] # Just copy the behavior count dictionary over
        print "Processing",outfilename

        #pprint.pprint(userlist)

        # Save the results
	with io.open(outfilename,'w',encoding="utf-8",errors='ignore') as outfile:
            # Loop over all users in the userlist and save the values to the CSV file
	    for user, brcounts in sorted(userlist.items()):
		userstr=user
                for br in brcounts:
		    userstr=userstr+","+str(brcounts[br])
                userstr+="\n"
		outfile.write(unicode(userstr))

	    brsummary={'handwash':0,
                 'handsanitize':0,
                 'cough':0,
                 'avoidgathering':0,
                 'avoidschool':0,
                 'total':0
            }

            # Loop over all users in the userlist and summarize the number of behavior responses 
	    for user, brcounts in sorted(userlist.items()):
                for br in brcounts:
                    print "user",user,"br",br,"brcounts[br]",brcounts[br],"brsum[br]",brsummary[br]
                    brsummary[br]+=brcounts[br]
            sum=len(userlist)

            # For convenience calculate the ratio of users for each category 
            # Write out the percentage of users with the count 
	    for br in brsummary: 
                if sum>0:
                    percent=float(brsummary[br]*100)/float(sum)
                else:
                    percent=0.0
		outfile.write(unicode(str(br)+","+str(brsummary[br])+","+str(round(percent,2))+"\n"))
            # Then write out the sum
	    outfile.write(unicode("sum,"+str(sum)+"\n"))
コード例 #2
0
def process(inputdir,outfilename,tictime1,tictime2):

        # Empty dictionary to store the tweet counts for each user
	userlist={}

        # Process all the files in the input directory
	filelist=tweetproc.jsonindir(inputdir)
	for file in filelist:
	    usercounts=tweetproc.tic(os.path.join(inputdir,file),tictime1,tictime2)

            # Loop over all of the users in the usercounts dictionary returned by the tic method
            # If the user is already in userlist, then add the count, otherwise set the count
	    for user in usercounts:
		if user in userlist:
		    userlist[user]+=usercounts[user]
		else:
		    userlist[user]=usercounts[user]

        print "Processing",outfilename

	with io.open(outfilename,'w',encoding="utf-8",errors='ignore') as outfile:
            # Loop over all users in the userlist and save the values to the CSV file
	    for user, value in sorted(userlist.items()):
		userstr=user+","+str(value)+"\n"
		outfile.write(unicode(userstr))

            # Loop over all users in the userlist and classify the user based on the number of tweets
            # The count goes from 1-5.
	    counts=[0,0,0,0,0,0]
	    for user, value in sorted(userlist.items()):
		if value >= 5:
		    counts[5]+=1
		else:
		    counts[value]+=1
            sum=len(userlist)

            # For convenience calculate the ratio of users in each category
            # Write out the percentage of users with 1, 2, 3, 4, 5+ tweets
	    for i in xrange(0,len(counts)):
                if sum>0:
                    percent=float(counts[i]*100)/float(sum)
                else:
                    percent=0.0
		outfile.write(unicode(str(i)+","+str(counts[i])+","+str(round(percent,2))+"\n"))
            # Then write out the sum
	    outfile.write(unicode("sum,"+str(sum)+"\n"))
コード例 #3
0
#!/usr/bin/python

import tweetproc 
import os
import io

inputdir="data/geoebola-sites-Kent-25000"
inputdir="data/geoebola"

outfilename="csv/out.geoebola.csv"

# Start with an empty string
outputstring=""

filelist=tweetproc.jsonindir(inputdir)
for file in filelist:
    outputstring=outputstring+tweetproc.geocsv(os.path.join(inputdir,file))

with io.open(outfilename,'w',encoding="utf-8",errors='ignore') as outfile:
    outfile.write(outputstring)

コード例 #4
0
def process(inputdir,outfilename,tctime1,tctime2):

        # Empty dictionary to store the tweet counts for each user
	userlist={}

        # Process all the files in the input directory
	filelist=tweetproc.jsonindir(inputdir)
	for file in filelist:
            # Returns a dictionary of user ids with the counts of each behavior response for that user
	    usertccounts=tweetproc.tweetcoding(os.path.join(inputdir,file),tctime1,tctime2)

            print usertccounts

            # Loop over all of the users in the usertcs dictionary returned by the tcesponse method
            # If the user is already in userlist, then add the behavior response counts, otherwise set the counts
	    for user in usertccounts:
		if user in userlist: # If user is already in the user list
                    for tc in usertccounts[user]: # Loop over all the behavior responses
		        userlist[user][tc]+=usertccounts[user][tc] # Add each to the existing count
		else:
		    userlist[user]=usertccounts[user] # Just copy the behavior count dictionary over
        print "Processing",outfilename

        #pprint.pprint(userlist)

        # Save the results
	with io.open(outfilename,'w',encoding="utf-8",errors='ignore') as outfile:
            # Loop over all users in the userlist and save the values to the CSV file
	    for user, tccounts in sorted(userlist.items()):
		userstr=user
                for tc in tccounts:
		    userstr=userstr+","+str(tccounts[tc])
                userstr+="\n"
		outfile.write(unicode(userstr))

	        tcsummary={'concern':0,
                 'experience':0,
                 'opinion':0,
                 'sarcasm':0,
                 'relief':0,
                 'downplay':0,
                 'frustration':0,
                 'total':0
                }

            # Loop over all users in the userlist and summarize the number of behavior responses 
	    for user, tccounts in sorted(userlist.items()):
                for tc in tccounts:
                    print "user",user,"tc",tc,"tccounts[tc]",tccounts[tc],"tcsum[tc]",tcsummary[tc]
                    tcsummary[tc]+=tccounts[tc]
            sum=len(userlist)

            # For convenience calculate the ratio of users for each category 
            # Write out the percentage of users with the count 
	    for tc in tcsummary: 
                if sum>0:
                    percent=float(tcsummary[tc]*100)/float(sum)
                else:
                    percent=0.0
		outfile.write(unicode(str(tc)+","+str(tcsummary[tc])+","+str(round(percent,2))+"\n"))
            # Then write out the sum
	    outfile.write(unicode("sum,"+str(sum)+"\n"))