Example #1
0
 def test_textoutput(self):
     ref_fn = os.path.join(self.subdir,
                           'tweets.20150430-223406.text.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
         json2csv(self.infile, outfn, ['text'], gzip_compress=False)
         self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #2
0
    def test_user_metadata(self):
        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref')
        fields = ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']

        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv')
            json2csv(self.infile, outfn, fields, gzip_compress=False)
            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #3
0
 def test_file_is_wrong(self):
     """
     Sanity check that file comparison is not giving false positives.
     """
     ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
         json2csv(self.infile, outfn, ['text'], gzip_compress=False)
         self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #4
0
 def test_file_is_wrong(self):
     """
     Sanity check that file comparison is not giving false positives.
     """
     ref_fn = os.path.join(self.subdir,
                           'tweets.20150430-223406.retweet.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
         json2csv(self.infile, outfn, ['text'], gzip_compress=False)
         self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #5
0
    def test_tweet_metadata(self):
        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref')
        fields = ['created_at', 'favorite_count', 'id',
                  'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_count',
                  'retweeted', 'text', 'truncated', 'user.id']

        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv')
            json2csv(self.infile, outfn, fields, gzip_compress=False)
            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #6
0
 def convert_csv_tweet_file(self,
                            input_file,
                            args=[
                                'created_at', 'favorite_count', 'id',
                                'in_reply_to_status_id',
                                'in_reply_to_user_id', 'retweet_count',
                                'text', 'truncated', 'user.id'
                            ]):
     with open(input_file) as file:
         json2csv(file, path + 'tweets_text.csv', args)
         return open(path + 'tweets_text.csv', 'r').readlines()
Example #7
0
    def test_user_metadata(self):
        ref_fn = os.path.join(self.subdir,
                              'tweets.20150430-223406.user.csv.ref')
        fields = [
            'id', 'text', 'user.id', 'user.followers_count',
            'user.friends_count'
        ]

        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv')
            json2csv(self.infile, outfn, fields, gzip_compress=False)
            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #8
0
    def test_tweet_metadata(self):
        ref_fn = os.path.join(self.subdir,
                              'tweets.20150430-223406.tweet.csv.ref')
        fields = [
            'created_at', 'favorite_count', 'id', 'in_reply_to_status_id',
            'in_reply_to_user_id', 'retweet_count', 'retweeted', 'text',
            'truncated', 'user.id'
        ]

        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv')
            json2csv(self.infile, outfn, fields, gzip_compress=False)
            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
Example #9
0
from nltk.twitter.util import json2csv
input_file = "tweets.20150430-223406.json"
with open(input_file) as fp:
    json2csv(fp, 'tweets_text.csv', ['text'])
Example #10
0
 def test_textoutput(self):
     ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.text.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
         json2csv(self.infile, outfn, ['text'], gzip_compress=False)
         self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)