コード例 #1
0
def main():

    # add and parse the command line arguments requirement, infile and outfile
    argparser = argparse.ArgumentParser(description='Calculates the total number of times each word has been tweeted.')
    argparser.add_argument('infile', type=argparse.FileType('r'), help='text file containing tweets')
    argparser.add_argument('outfile', type=argparse.FileType('w'), help='output file showing the words counts')
    args = argparser.parse_args()

    # TweetParse object for finding and counting the words per tweet
    tp = TweetParse()

    # read each tweet
    # use the TweetParse function to find and update the frequency of words tweeted
    for line in args.infile:
        tp.addtowordscounter(line)

    # get all the words found in all the tweets in sorted order
    words = tp.alltweetedwords()

    # get the length of the longest word, use for formatting the output
    maxwordlen = len(max(words, key=len))

    # print out each word in the sorted order and its number of times tweeted
    for word in words:
        args.outfile.write('{w:<{l}}\t{c:3d}\n'.format(w=word, l=maxwordlen+1, c=tp.getwordcount(word)))

    # close the files
    args.infile.close()
    args.outfile.close()
コード例 #2
0
 def test_words_sort(self):
     """
     Test the word sort order
     """
     tp = TweetParse()
     tweet = '~ } | { z a ` _ ^ ] \ [ @ ? > = < ; : 9 0 / . - , + * ) ( \' & % $ # " ! '
     tp.addtowordscounter(tweet)
     self.assertEqual(tp.alltweetedwords()[0], '!')
     # reverse the test tweet for the expected order
     revtweet = tweet.split()
     revtweet.reverse()
     self.assertEqual(''.join(tp.alltweetedwords()), ''.join(revtweet))
コード例 #3
0
 def test_word_count(self):
     """
     Test the word count
     """
     tp = TweetParse()
     tp.addtowordscounter('a b a c')
     self.assertEqual(tp.getwordcount('a'), 2)
     self.assertEqual(tp.getwordcount('c'), 1)
     self.assertEqual(tp.getwordcount('d'), 0)
     tp.addtowordscounter('ad a')
     self.assertEqual(tp.getwordcount('a'), 3)
     self.assertEqual(tp.getwordcount('ad'), 1)
コード例 #4
0
 def test_median_unique(self):
     """
     Test the calculation of median number
     """
     tp = TweetParse()
     tp.addtouniquecounts('a b a c')
     self.assertEqual(tp.getmedianunique(), 3)
     tp.addtouniquecounts('123')
     self.assertEqual(tp.getmedianunique(), 2)
コード例 #5
0
def main():

    # add and parse the command line arguments requirement, infile and outfile
    argparser = argparse.ArgumentParser(description="Calculates the median number of unique words per tweet.")
    argparser.add_argument("infile", type=argparse.FileType("r"), help="text file containing tweets")
    argparser.add_argument("outfile", type=argparse.FileType("w"), help="output file the median number writes to")
    args = argparser.parse_args()

    # TweetParse object for finding the number of unique words per tweet
    # and storing the numbers for calculating the median number
    tp = TweetParse()

    # read each tweet
    # add the number of unique words to the TweetParse object
    # get the new median number from the TweetParse object
    # print result to the outfile
    for line in args.infile:
        tp.addtouniquecounts(line)
        newmedian = tp.getmedianunique()
        args.outfile.write("{:.1f}\n".format(newmedian))

    # close the files
    args.infile.close()
    args.outfile.close()
コード例 #6
0
 def test_num_unique(self):
     """
     Test the counting of unique words
     """
     self.assertEqual(TweetParse.numunique('a b a c'), 3)
コード例 #7
0
 def test_white_space(self):
     """
     Test white space delimiter
     """
     self.assertEqual(TweetParse.split('a b  c'), ['a', 'b', 'c'])