def main(): # add and parse the command line arguments requirement, infile and outfile argparser = argparse.ArgumentParser(description='Calculates the total number of times each word has been tweeted.') argparser.add_argument('infile', type=argparse.FileType('r'), help='text file containing tweets') argparser.add_argument('outfile', type=argparse.FileType('w'), help='output file showing the words counts') args = argparser.parse_args() # TweetParse object for finding and counting the words per tweet tp = TweetParse() # read each tweet # use the TweetParse function to find and update the frequency of words tweeted for line in args.infile: tp.addtowordscounter(line) # get all the words found in all the tweets in sorted order words = tp.alltweetedwords() # get the length of the longest word, use for formatting the output maxwordlen = len(max(words, key=len)) # print out each word in the sorted order and its number of times tweeted for word in words: args.outfile.write('{w:<{l}}\t{c:3d}\n'.format(w=word, l=maxwordlen+1, c=tp.getwordcount(word))) # close the files args.infile.close() args.outfile.close()
def test_words_sort(self): """ Test the word sort order """ tp = TweetParse() tweet = '~ } | { z a ` _ ^ ] \ [ @ ? > = < ; : 9 0 / . - , + * ) ( \' & % $ # " ! ' tp.addtowordscounter(tweet) self.assertEqual(tp.alltweetedwords()[0], '!') # reverse the test tweet for the expected order revtweet = tweet.split() revtweet.reverse() self.assertEqual(''.join(tp.alltweetedwords()), ''.join(revtweet))
def test_word_count(self): """ Test the word count """ tp = TweetParse() tp.addtowordscounter('a b a c') self.assertEqual(tp.getwordcount('a'), 2) self.assertEqual(tp.getwordcount('c'), 1) self.assertEqual(tp.getwordcount('d'), 0) tp.addtowordscounter('ad a') self.assertEqual(tp.getwordcount('a'), 3) self.assertEqual(tp.getwordcount('ad'), 1)
def test_median_unique(self): """ Test the calculation of median number """ tp = TweetParse() tp.addtouniquecounts('a b a c') self.assertEqual(tp.getmedianunique(), 3) tp.addtouniquecounts('123') self.assertEqual(tp.getmedianunique(), 2)
def main(): # add and parse the command line arguments requirement, infile and outfile argparser = argparse.ArgumentParser(description="Calculates the median number of unique words per tweet.") argparser.add_argument("infile", type=argparse.FileType("r"), help="text file containing tweets") argparser.add_argument("outfile", type=argparse.FileType("w"), help="output file the median number writes to") args = argparser.parse_args() # TweetParse object for finding the number of unique words per tweet # and storing the numbers for calculating the median number tp = TweetParse() # read each tweet # add the number of unique words to the TweetParse object # get the new median number from the TweetParse object # print result to the outfile for line in args.infile: tp.addtouniquecounts(line) newmedian = tp.getmedianunique() args.outfile.write("{:.1f}\n".format(newmedian)) # close the files args.infile.close() args.outfile.close()
def test_num_unique(self): """ Test the counting of unique words """ self.assertEqual(TweetParse.numunique('a b a c'), 3)
def test_white_space(self): """ Test white space delimiter """ self.assertEqual(TweetParse.split('a b c'), ['a', 'b', 'c'])