def plot_data(self, pdf_file, image_filename): place_types = [] reference_counts = [] for data_row in Utils.get_data_vectors(self._datafile, ' '): place_types.append(data_row[0]) reference_counts.append(int(data_row[1])) figure(1, figsize = (6, 6)) ax = axes([0.1, 0.1, 0.8, 0.8]) # The slices will be ordered and plotted counter-clockwise. labels = place_types total_references = sum(reference_counts) # Eliminate the NA data index = place_types.index('NotAvailable') place_types.pop(index) NA_count = reference_counts.pop(index) NA_percentage = 100 * NA_count / float(total_references) total_references -= NA_count fracs = [count / float(total_references) for count in reference_counts] explode = (0, 0, 0, 0, 0) mpl.rcParams['font.size'] = 9.0 pie(fracs, explode = explode, labels = labels, autopct = '%1.1f%%', shadow = False, startangle = 0) # The default startangle is 0, which would start # the Frogs slice on the x-axis. With startangle=90, # everything is rotated counter-clockwise by 90 degrees, # so the plotting starts on the positive y-axis. title('Place Type Distribution: ' + str(NA_percentage) + '% tweets dont have a place type, showing the remaining tweets', bbox = {'facecolor':'0.8', 'pad':5}) savefig(pdf_file, format = 'pdf') savefig(image_filename) clf()
def plot_data(self, pdf_file, image_filename): categories = [] tweet_count = [] for data_row in Utils.get_data_vectors(self._datafile, ' '): categories.append(data_row[0]) tweet_count.append(int(data_row[1])) figure(1, figsize = (6, 6)) ax = axes([0.1, 0.1, 0.8, 0.8]) # The slices will be ordered and plotted counter-clockwise. labels = categories total_references = sum(tweet_count) fracs = [count / float(total_references) for count in tweet_count] explode = (0, 0) mpl.rcParams['font.size'] = 12.0 pie(fracs, explode = explode, labels = labels, autopct = '%1.1f%%', shadow = False, startangle = 0) # The default startangle is 0, which would start # the Frogs slice on the x-axis. With startangle=90, # everything is rotated counter-clockwise by 90 degrees, # so the plotting starts on the positive y-axis. title('Tweet Distribution for ' + str(total_references) + ' tweets.', bbox = {'facecolor':'0.8', 'pad':5}) savefig(pdf_file, format = 'pdf') savefig(image_filename) clf()
def plot_data(self, pdf_file, image_filename): data_points = [] total = 0 for data_row in Utils.get_data_vectors(self._datafile, ' '): bin = int(float(data_row[0])) data = int(float(data_row[1])) total += data data_points.append((bin, data)) bins = [] data = [] for data_point in sorted(data_points, key = itemgetter(0)): bins.append(data_point[0]) percentage = 100 * data_point[1] / float(total) data.append(percentage) P.bar(bins, data, width = 1, color = 'r') mpl.rcParams['font.size'] = 10.0 P.xlabel('Geotagged tweet percentage') P.ylabel('Users percentage') P.savefig(pdf_file, format = 'pdf') P.savefig(image_filename) P.clf()
def plot_data(self, pdf_file, image_filename): data_points = [] total_user_count = 0 total_tweet_count = 0 for data_row in Utils.get_data_vectors(self._datafile, ' '): num_tweets = int(data_row[0]) user_count = int(data_row[1]) total_user_count += user_count total_tweet_count += num_tweets data_points.append((num_tweets, user_count)) bins = [] user_count = [] for data_point in sorted(data_points, key = itemgetter(0)): bins.append(log(data_point[0])) # / float(total_tweet_count)) user_count.append(log(data_point[1])) # / float(total_user_count)) _, axes = P.subplots() axes.plot(user_count, bins, 'ro') axes.set_ylabel('log(GeoTweet Count)') axes.set_xlabel('log(User Count)') P.savefig(pdf_file, format = 'pdf') P.savefig(image_filename) P.clf()
def plot_data(self, pdf_file, image_filename): data_dict = {} for data_row in Utils.get_data_vectors(self._datafile, '|'): data_dict[data_row[0]] = int(data_row[1]) fig = figure(1, figsize = (6, 6)) ax = axes([0.2, 0.2, 0.6, 0.6]) # The slices will be ordered and plotted counter-clockwise. data_dict = sorted(data_dict.iteritems(), key = itemgetter(1), reverse = True) total_tweet_count_original = sum(float(data[1]) for data in data_dict) data_dict = data_dict[0:5] + data_dict[10:12] + data_dict[6:9] + data_dict[13:15] total_tweet_count = sum(float(data[1]) for data in data_dict) application_names = list(data[0] for data in data_dict) tweet_percentage = 100 * (total_tweet_count / float(total_tweet_count_original)) fracs = [float(data[1]) / total_tweet_count for data in data_dict] mpl.rcParams['font.size'] = 6.0 pie(fracs, labels = application_names, autopct = '%1.1f%%', shadow = False, startangle = 0) title('Tweet sources Distribution: Showing top 15 tweet sources which generate ' + str(tweet_percentage) + '% of the total tweets', bbox = {'facecolor':'0.8', 'pad':5}) savefig(pdf_file, format = 'pdf') savefig(image_filename) clf()