# -*- coding: utf-8 -*-
"""
Created on Sat Oct  1 17:09:48 2016

@author: Admin
"""
import nltk

from nltk.corpus import twitter_samples
twitter_samples.fileids()
"""Accessing json file of positive tweets"""
positive = nltk.corpus.twitter_samples.raw("positive_tweets.json")
positive.__str__()

postwts = nltk.word_tokenize(positive)
"""Length of all the positive tweets"""
len(set(postwts))

from nltk.corpus import twitter_samples
twitter_samples.fileids()
"""Accessing json file of negative tweets"""
tweet = twitter_samples.raw("negative_tweets.json")
tweet.__str__()

tokens = nltk.word_tokenize(tweet)
"""Length of all the negative tweets"""
len(set(tokens))
Beispiel #2
0
import nltk
from nltk.corpus import twitter_samples
from nltk.tokenize import PunktSentenceTokenizer

train_text = twitter_samples.raw("negativepositive.json")
sample_text = twitter_samples.raw("Final Tokens.json")

custom_sent_tokenizer = PunktSentenceTokenizer(train_text)

tokenized = custom_sent_tokenizer.tokenize(sample_text)


def process_content():
    try:
        for i in tokenized:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            print(tagged)

    except Exception as e:
        print(str(e))


process_content()
Beispiel #3
0
from nltk.corpus import inaugural, twitter_samples
from nltk.tokenize import sent_tokenize
# reading files from nltk copora

sample=inaugural.raw("2009-Obama.txt")
tok=sent_tokenize(sample)
print(tok[25:30])

tweet_pos=twitter_samples.raw("/home/iamukasa/nltk_data/corpora/twitter_samples/positive_tweets.json")
tweet_neg=twitter_samples.raw("/home/iamukasa/nltk_data/corpora/twitter_samples/negative_tweets.json")
print(tweet_neg)
print(tweet_pos)

# reading custm data
textpath = '/home/iamukasa/Downloads/analyze.txt'
Alltext=open(textpath, "r").read()
print(Alltext)
Beispiel #4
0
 def __init__(self):
     self.number_id = 41
     self.source_id = "twitter_samples"
     self.titles = [name for name in twitter_samples.fileids()]
     self.data = [twitter_samples.raw(name) for name in self.titles]