Python json2csv_entities Exemples, nltk.twitter.common.json2csv_entities Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : yuanlanda/nltk

    def test_retweet_original_tweet(self):
        ref_fn = os.path.join(self.subdir,
                              'tweets.20150430-223406.retweet.csv.ref')
        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.retweet.csv')
            json2csv_entities(
                self.infile,
                outfn,
                ['id'],
                'retweeted_status',
                [
                    'created_at',
                    'favorite_count',
                    'id',
                    'in_reply_to_status_id',
                    'in_reply_to_user_id',
                    'retweet_count',
                    'text',
                    'truncated',
                    'user.id',
                ],
                gzip_compress=False,
            )

            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #2

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : CPHB-FKMP/book-extractor

 def test_tweet_usermention(self):
     ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.usermention.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.usermention.csv')
         json2csv_entities(self.infile, outfn,
                           ['id', 'text'], 'user_mentions', ['id', 'screen_name'],
                           gzip_compress=False)
         self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #3

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : CPHB-FKMP/book-extractor

    def test_tweet_url(self):
        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.url.csv.ref')
        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.url.csv')
            json2csv_entities(self.infile, outfn,
                              ['id'], 'urls', ['url', 'expanded_url'],
                              gzip_compress=False)

            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #4

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : CPHB-FKMP/book-extractor

    def test_tweet_place_boundingbox(self):
        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.placeboundingbox.csv.ref')
        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.placeboundingbox.csv')
            json2csv_entities(self.infile, outfn,
                              ['id', 'name'], 'place.bounding_box', ['coordinates'],
                              gzip_compress=False)

            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #5

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Weiming-Hu/text-based-six-degree

    def test_retweet_original_tweet(self):
        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
        with TemporaryDirectory() as tempdir:
            outfn = os.path.join(tempdir, 'tweets.20150430-223406.retweet.csv')
            json2csv_entities(self.infile, outfn, ['id'], 'retweeted_status',
                              ['created_at', 'favorite_count', 'id', 'in_reply_to_status_id',
                               'in_reply_to_user_id', 'retweet_count', 'text', 'truncated',
                               'user.id'],
                              gzip_compress=False)

            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #6

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_tweet_usermention(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.usermention.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.usermention.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id', 'text'],
        'user_mentions',
        ['id', 'screen_name'],
        gzip_compress=False,
    )
    assert files_are_identical(outfn, ref_fn)

Exemple #7

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_tweet_hashtag(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.hashtag.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.hashtag.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id', 'text'],
        'hashtags',
        ['text'],
        gzip_compress=False,
    )
    assert files_are_identical(outfn, ref_fn)

Exemple #8

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_tweet_hashtag(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.hashtag.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.hashtag.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id", "text"],
        "hashtags",
        ["text"],
        gzip_compress=False,
    )
    assert files_are_identical(outfn, ref_fn)

Exemple #9

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_tweet_usermention(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.usermention.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.usermention.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id", "text"],
        "user_mentions",
        ["id", "screen_name"],
        gzip_compress=False,
    )
    assert files_are_identical(outfn, ref_fn)

Exemple #10

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_tweet_media(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.media.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.media.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id"],
        "media",
        ["media_url", "url"],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #11

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Leehon008/Social-Media-traker

 def test_tweet_hashtag(self):
     ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.hashtag.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.hashtag.csv')
         json2csv_entities(
             self.infile,
             outfn,
             ['id', 'text'],
             'hashtags',
             ['text'],
             gzip_compress=False,
         )
         self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #12

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_tweet_place_boundingbox(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.placeboundingbox.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.placeboundingbox.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id', 'name'],
        'place.bounding_box',
        ['coordinates'],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #13

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_tweet_place_boundingbox(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.placeboundingbox.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.placeboundingbox.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id", "name"],
        "place.bounding_box",
        ["coordinates"],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #14

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_userurl(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.userurl.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.userurl.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id', 'screen_name'],
        'user.urls',
        ['url', 'expanded_url'],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #15

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_tweet_media(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.media.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.media.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id'],
        'media',
        ['media_url', 'url'],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #16

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_userurl(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.userurl.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.userurl.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id", "screen_name"],
        "user.urls",
        ["url", "expanded_url"],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #17

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_tweet_place(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.place.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.place.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id", "text"],
        "place",
        ["name", "country"],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #18

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_tweet_place(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.place.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.place.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id', 'text'],
        'place',
        ['name', 'country'],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #19

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : rmalouf/nltk

 def test_tweet_hashtag(self):
     ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.hashtag.csv.ref')
     with TemporaryDirectory() as tempdir:
         outfn = os.path.join(tempdir, 'tweets.20150430-223406.hashtag.csv')
         json2csv_entities(
             self.infile,
             outfn,
             ['id', 'text'],
             'hashtags',
             ['text'],
             gzip_compress=False,
         )
         self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)

Exemple #20

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : Geolem/nltk

def test_retweet_original_tweet(tmp_path, infile):
    ref_fn = subdir / 'tweets.20150430-223406.retweet.csv.ref'
    outfn = tmp_path / 'tweets.20150430-223406.retweet.csv'
    json2csv_entities(
        infile,
        outfn,
        ['id'],
        'retweeted_status',
        [
            'created_at',
            'favorite_count',
            'id',
            'in_reply_to_status_id',
            'in_reply_to_user_id',
            'retweet_count',
            'text',
            'truncated',
            'user.id',
        ],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #21

0

Afficher le fichier

Fichier : test_json2csv_corpus.py Projet : vishalbelsare/nltk

def test_retweet_original_tweet(tmp_path, infile):
    ref_fn = subdir / "tweets.20150430-223406.retweet.csv.ref"
    outfn = tmp_path / "tweets.20150430-223406.retweet.csv"
    json2csv_entities(
        infile,
        outfn,
        ["id"],
        "retweeted_status",
        [
            "created_at",
            "favorite_count",
            "id",
            "in_reply_to_status_id",
            "in_reply_to_user_id",
            "retweet_count",
            "text",
            "truncated",
            "user.id",
        ],
        gzip_compress=False,
    )

    assert files_are_identical(outfn, ref_fn)

Exemple #22

0

Afficher le fichier

Fichier : twitter-donaldtrump.py Projet : eddiesherlock/twitter

    json2csv(fp, 'tweets_text.csv', ['text'])

# 讀取
data = pd.read_csv('tweets_text.csv')
for line in data.text:
    print('Trump tweets content: ')
    print(line)

# 斷詞
tokenized = twitter_samples.tokenized(input_file)
for tok in tokenized[:5]:
    print('tokenized: ')
    print(tok)

# tweets 資料處理
with open(input_file) as fp:
    json2csv_entities(fp, 'tweets.20180726-155316.hashtags.csv',
                      ['id', 'text'], 'hashtags', ['text'])

with open(input_file) as fp:
    json2csv_entities(fp, 'tweets.20180726-155316.user_mentions.csv',
                      ['id', 'text'], 'user_mentions', ['id', 'screen_name'])

with open(input_file) as fp:
    json2csv_entities(fp, 'tweets.20180726-155316.media.csv',
                      ['id'], 'media', ['media_url', 'url'])

with open(input_file) as fp:
    json2csv_entities(fp, 'tweets.20180726-155316.urls.csv',
                      ['id'], 'urls', ['url', 'expanded_url'])