Example #1
0
                      "--user",
                      dest="user",
                      help=u"User (screen name in Twitter)",
                      metavar="USER",
                      default='')

    (options, args) = parser.parse_args()

    if not (options.input and options.iteractions and options.output
            and options.type):
        parser.print_usage()
        exit(1)

    (user_relevance, content_relevance, content_to_user,
     user_photo) = compute_relevance(options.input, int(options.iteractions),
                                     float(options.damping_factor),
                                     options.user)

    #(user_num_retweets,user_average_num_retweets,user_num_retweeters,user_sum_num_retweets_retweeter) = compute_user_statistics(input_file_name)

    user_relevance_table = {}

    with open(options.output, 'w') as output_file:

        #output_file.write("user,influence,retweets,avg retweets,retweeters,sum retweeters\n")

        for user in xrange(0, len(user_relevance)):
            user_relevance_table[user_relevance[user]
                                 [0]] = user_relevance[user][1]

            if options.type == "USER":
Example #2
0
def main():
	parser = OptionParser()
	# parser.add_option("-i", "--input", dest="input", help=u"Database input file (sqlite3)", metavar="INPUT")
	parser.add_option("-i", "--input-dir", dest="input_dir", 
					help=u"Input dir containing CSV files."
						u"Files must contain 1 line per tweet and fields username, tweet.id, retweet.id, "
						u"tweet.create_date (yyyy-mm-ddThh:MM:ss), entities (comma-delimited, optional) and "
						u"user.foto. File names must be in the format yyyymmdd. ", 
					metavar="INPUT")
	parser.add_option("-n", "--iteractions", dest="iteractions", help=u"Number of iteractions (default = 20)", 
					metavar="ITERACTIONS", default=20)
	parser.add_option("-d", "--damping-factor", dest="damping_factor", help=u"Damping factor (default: 0.85)", 
					metavar="DAMPING", default='0.85')
	parser.add_option("-o", "--output", dest="output", 
					help=u"Output dir. Files will be named after type (CONTENT or USER) and date", 
					metavar="OUPUT")
	parser.add_option("-t", "--type", dest="type", help=u"Type (USER or CONTENT)", metavar="TYPE")
	parser.add_option("-u", "--user", dest="user", help=u"User (screen name in Twitter). Not used anymore.", 
					metavar="USER", default='')
	parser.add_option("", "--date", dest="date", help=u"Reference date. Defaults to today, format yyyymmdd", 
					metavar="Date")
	parser.add_option("-r", "--days", dest="days", 
					help=u"Number of days to consider. The range goes from [date - days -1, date]", 
					metavar="DAYS", default='60')


	warnings.simplefilter("ignore", SparseEfficiencyWarning)	

	(options, _) = parser.parse_args()
	
	if not (options.input_dir and options.iteractions and options.type):
		parser.print_usage()
		exit(1)
	if options.date:
		reference = datetime.datetime.strptime(options.date, '%Y%m%d')
	else:
		reference = datetime.datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)
	start = reference - datetime.timedelta(days=int(options.days))
	
	data_iterator = read_data(options.input_dir, start, reference)
	(user_relevance, content_relevance, content_to_user, 
		user_photo, content_to_date, content_to_entities) = compute_relevance(data_iterator,
					int(options.iteractions), float(options.damping_factor), options.user, reference, start)
	
	user_relevance_table = {}
	outfile = '{0}/{1}_{2}'.format(options.output, options.type[0], reference.strftime('%Y%m%d'))
	
	with open(outfile, 'w') as output_file:
		for user in xrange(0, len(user_relevance)):
			user_relevance_table[user_relevance[user][0]] = user_relevance[user][1]
				
			if options.type == "USER":
				if options.user != user_relevance[user][0]:
					u = user_relevance[user][0]
					try:
						print >> output_file, (u'{0}\t{1}\t{2}'.format(
																		user_relevance[user][0], 
																		user_relevance[user][1] * 1000000, 
																		user_photo[u])).encode('utf8')
					except:
						print "Erro de encoding"
		if options.type == "CONTENT":
			for content in xrange(0, len(content_relevance)):
				user = content_to_user[content_relevance[content][0]]
				data = content_to_date[content_relevance[content][0]]
				entities = content_to_entities[content_relevance[content][0]]
				print >> output_file, '{0}\t{1}\t{2}\t{3}\t{4}\thttp://twitter.com/{3}/status/{0}\t{5}'.format(
					content_relevance[content][0], content_relevance[content][1] * 1000000,
							user_photo[user], user, data, entities)
		
	logger.info('All done. Thanks for flying with us. Check output file %s', outfile)
Example #3
0
if __name__ == '__main__':
	parser = OptionParser()
	parser.add_option("-i", "--input", dest="input", help=u"Input file", metavar="INPUT")
	parser.add_option("-n", "--iteractions", dest="iteractions", help=u"Number of iteractions", metavar="ITERACTIONS")
	parser.add_option("-d", "--damping-factor", dest="damping_factor", help=u"Damping factor", metavar="DAMPING", default='0.85')
	parser.add_option("-o", "--output", dest="output", help=u"Output file", metavar="OUPUT")
	parser.add_option("-t", "--type", dest="type", help=u"Type (USER or CONTENT)", metavar="TYPE")
	parser.add_option("-u", "--user", dest="user", help=u"User (screen name in Twitter)", metavar="USER", default='')
	
	(options, args) = parser.parse_args()
	
	if not (options.input and options.iteractions and options.output and options.type):
		parser.print_usage()
		exit(1)
	
	(user_relevance, content_relevance, content_to_user, user_photo) = compute_relevance(options.input, 
							int(options.iteractions), float(options.damping_factor), options.user)
	
	#(user_num_retweets,user_average_num_retweets,user_num_retweeters,user_sum_num_retweets_retweeter) = compute_user_statistics(input_file_name)
	
	user_relevance_table = {}
	
	with open(options.output, 'w') as output_file:
	
		#output_file.write("user,influence,retweets,avg retweets,retweeters,sum retweeters\n")
		
		for user in xrange(0, len(user_relevance)):
			user_relevance_table[user_relevance[user][0]] = user_relevance[user][1]
				
			if options.type == "USER":
				if options.user != user_relevance[user][0]:
					print >> output_file, '{0},{1}'.format(user_relevance[user][0], user_relevance[user][1])